From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
To: Chris Mason <clm@fb.com>, Josef Bacik <josef@toxicpanda.com>,
David Sterba <dsterba@suse.com>
Cc: Johannes Thumshirn <johannes.thumshirn@wdc.com>,
Christoph Hellwig <hch@lst.de>,
Naohiro Aota <naohiro.aota@wdc.com>, Qu Wenruo <wqu@suse.com>,
Damien Le Moal <dlemoal@kernel.org>,
linux-btrfs@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH v8 05/11] btrfs: lookup physical address from stripe extent
Date: Mon, 11 Sep 2023 05:52:06 -0700 [thread overview]
Message-ID: <20230911-raid-stripe-tree-v8-5-647676fa852c@wdc.com> (raw)
In-Reply-To: <20230911-raid-stripe-tree-v8-0-647676fa852c@wdc.com>
Lookup the physical address from the raid stripe tree when a read on an
RAID volume formatted with the raid stripe tree was attempted.
If the requested logical address was not found in the stripe tree, it may
still be in the in-memory ordered stripe tree, so fallback to searching
the ordered stripe tree in this case.
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
fs/btrfs/raid-stripe-tree.c | 159 ++++++++++++++++++++++++++++++++++++++++++++
fs/btrfs/raid-stripe-tree.h | 11 +++
fs/btrfs/volumes.c | 37 ++++++++---
3 files changed, 198 insertions(+), 9 deletions(-)
diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c
index 5b12f40877b5..7ed02e4b79ec 100644
--- a/fs/btrfs/raid-stripe-tree.c
+++ b/fs/btrfs/raid-stripe-tree.c
@@ -324,3 +324,162 @@ int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans,
return ret;
}
+
+static bool btrfs_check_for_extent(struct btrfs_fs_info *fs_info, u64 logical,
+ u64 length, struct btrfs_path *path)
+{
+ struct btrfs_root *extent_root = btrfs_extent_root(fs_info, logical);
+ struct btrfs_key key;
+ int ret;
+
+ btrfs_release_path(path);
+
+ key.objectid = logical;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = length;
+
+ ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
+
+ return ret;
+}
+
+int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
+ u64 logical, u64 *length, u64 map_type,
+ u32 stripe_index,
+ struct btrfs_io_stripe *stripe)
+{
+ struct btrfs_root *stripe_root = btrfs_stripe_tree_root(fs_info);
+ struct btrfs_stripe_extent *stripe_extent;
+ struct btrfs_key stripe_key;
+ struct btrfs_key found_key;
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ int num_stripes;
+ u8 encoding;
+ u64 offset;
+ u64 found_logical;
+ u64 found_length;
+ u64 end;
+ u64 found_end;
+ int slot;
+ int ret;
+ int i;
+
+ stripe_key.objectid = logical;
+ stripe_key.type = BTRFS_RAID_STRIPE_KEY;
+ stripe_key.offset = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = btrfs_search_slot(NULL, stripe_root, &stripe_key, path, 0, 0);
+ if (ret < 0)
+ goto free_path;
+ if (ret) {
+ if (path->slots[0] != 0)
+ path->slots[0]--;
+ }
+
+ end = logical + *length;
+
+ while (1) {
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+
+ btrfs_item_key_to_cpu(leaf, &found_key, slot);
+ found_logical = found_key.objectid;
+ found_length = found_key.offset;
+ found_end = found_logical + found_length;
+
+ if (found_logical > end) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ if (in_range(logical, found_logical, found_length))
+ break;
+
+ ret = btrfs_next_item(stripe_root, path);
+ if (ret)
+ goto out;
+ }
+
+ offset = logical - found_logical;
+
+ /*
+ * If we have a logically contiguous, but physically noncontinuous
+ * range, we need to split the bio. Record the length after which we
+ * must split the bio.
+ */
+ if (end > found_end)
+ *length -= end - found_end;
+
+ num_stripes = btrfs_num_raid_stripes(btrfs_item_size(leaf, slot));
+ stripe_extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent);
+ encoding = btrfs_stripe_extent_encoding(leaf, stripe_extent);
+
+ if (encoding != btrfs_bg_type_to_raid_encoding(map_type)) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ for (i = 0; i < num_stripes; i++) {
+ struct btrfs_raid_stride *stride = &stripe_extent->strides[i];
+ u64 devid = btrfs_raid_stride_devid(leaf, stride);
+ u64 len = btrfs_raid_stride_length(leaf, stride);
+ u64 physical = btrfs_raid_stride_physical(leaf, stride);
+
+ if (offset >= len) {
+ offset -= len;
+
+ if (offset >= BTRFS_STRIPE_LEN)
+ continue;
+ }
+
+ if (devid != stripe->dev->devid)
+ continue;
+
+ if ((map_type & BTRFS_BLOCK_GROUP_DUP) && stripe_index != i)
+ continue;
+
+ stripe->physical = physical + offset;
+
+ ret = 0;
+ goto free_path;
+ }
+
+ /*
+ * If we're here, we haven't found the requested devid in the stripe.
+ */
+ ret = -ENOENT;
+out:
+ if (ret > 0)
+ ret = -ENOENT;
+ if (ret && ret != -EIO) {
+ /*
+ * Check if the range we're looking for is actually backed by
+ * an extent. This can happen, e.g. when scrub is running on a
+ * block-group and the extent it is trying to scrub get's
+ * deleted in the meantime. Although scrub is setting the
+ * block-group to read-only, deletion of extents are still
+ * allowed. If the extent is gone, simply return ENOENT and be
+ * good.
+ */
+ if (btrfs_check_for_extent(fs_info, logical, *length, path)) {
+ ret = -ENOENT;
+ goto free_path;
+ }
+
+ if (IS_ENABLED(CONFIG_BTRFS_DEBUG))
+ btrfs_print_tree(leaf, 1);
+ btrfs_err(fs_info,
+ "cannot find raid-stripe for logical [%llu, %llu] devid %llu, profile %s",
+ logical, logical + *length, stripe->dev->devid,
+ btrfs_bg_type_to_raid_name(map_type));
+ }
+free_path:
+ btrfs_free_path(path);
+
+ return ret;
+}
diff --git a/fs/btrfs/raid-stripe-tree.h b/fs/btrfs/raid-stripe-tree.h
index 7560dc501a65..40aa553ae8aa 100644
--- a/fs/btrfs/raid-stripe-tree.h
+++ b/fs/btrfs/raid-stripe-tree.h
@@ -13,6 +13,10 @@ struct btrfs_io_stripe;
int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start,
u64 length);
+int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
+ u64 logical, u64 *length, u64 map_type,
+ u32 stripe_index,
+ struct btrfs_io_stripe *stripe);
int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans,
struct btrfs_ordered_extent *ordered_extent);
@@ -33,4 +37,11 @@ static inline bool btrfs_need_stripe_tree_update(struct btrfs_fs_info *fs_info,
return false;
}
+
+static inline int btrfs_num_raid_stripes(u32 item_size)
+{
+ return (item_size - offsetof(struct btrfs_stripe_extent, strides)) /
+ sizeof(struct btrfs_raid_stride);
+}
+
#endif
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0c0fd4eb4848..7c25f5c77788 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -35,6 +35,7 @@
#include "relocation.h"
#include "scrub.h"
#include "super.h"
+#include "raid-stripe-tree.h"
#define BTRFS_BLOCK_GROUP_STRIPE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \
BTRFS_BLOCK_GROUP_RAID10 | \
@@ -6206,12 +6207,22 @@ static u64 btrfs_max_io_len(struct map_lookup *map, enum btrfs_map_op op,
return U64_MAX;
}
-static void set_io_stripe(struct btrfs_io_stripe *dst, const struct map_lookup *map,
- u32 stripe_index, u64 stripe_offset, u32 stripe_nr)
+static int set_io_stripe(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
+ u64 logical, u64 *length, struct btrfs_io_stripe *dst,
+ struct map_lookup *map, u32 stripe_index,
+ u64 stripe_offset, u64 stripe_nr)
{
dst->dev = map->stripes[stripe_index].dev;
+
+ if (op == BTRFS_MAP_READ &&
+ btrfs_need_stripe_tree_update(fs_info, map->type))
+ return btrfs_get_raid_extent_offset(fs_info, logical, length,
+ map->type, stripe_index,
+ dst);
+
dst->physical = map->stripes[stripe_index].physical +
stripe_offset + btrfs_stripe_nr_to_offset(stripe_nr);
+ return 0;
}
/*
@@ -6428,11 +6439,11 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
*/
if (smap && num_alloc_stripes == 1 &&
!((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1)) {
- set_io_stripe(smap, map, stripe_index, stripe_offset, stripe_nr);
+ ret = set_io_stripe(fs_info, op, logical, length, smap, map,
+ stripe_index, stripe_offset, stripe_nr);
if (mirror_num_ret)
*mirror_num_ret = mirror_num;
*bioc_ret = NULL;
- ret = 0;
goto out;
}
@@ -6463,21 +6474,29 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
bioc->full_stripe_logical = em->start +
btrfs_stripe_nr_to_offset(stripe_nr * data_stripes);
for (i = 0; i < num_stripes; i++)
- set_io_stripe(&bioc->stripes[i], map,
- (i + stripe_nr) % num_stripes,
- stripe_offset, stripe_nr);
+ ret = set_io_stripe(fs_info, op, logical, length,
+ &bioc->stripes[i], map,
+ (i + stripe_nr) % num_stripes,
+ stripe_offset, stripe_nr);
} else {
/*
* For all other non-RAID56 profiles, just copy the target
* stripe into the bioc.
*/
for (i = 0; i < num_stripes; i++) {
- set_io_stripe(&bioc->stripes[i], map, stripe_index,
- stripe_offset, stripe_nr);
+ ret = set_io_stripe(fs_info, op, logical, length,
+ &bioc->stripes[i], map, stripe_index,
+ stripe_offset, stripe_nr);
stripe_index++;
}
}
+ if (ret) {
+ *bioc_ret = NULL;
+ btrfs_put_bioc(bioc);
+ goto out;
+ }
+
if (op != BTRFS_MAP_READ)
max_errors = btrfs_chunk_max_errors(map);
--
2.41.0
next prev parent reply other threads:[~2023-09-11 22:06 UTC|newest]
Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-09-11 12:52 [PATCH v8 00/11] btrfs: introduce RAID stripe tree Johannes Thumshirn
2023-09-11 12:52 ` [PATCH v8 01/11] btrfs: add raid stripe tree definitions Johannes Thumshirn
2023-09-11 21:00 ` Damien Le Moal
2023-09-12 6:09 ` Johannes Thumshirn
2023-09-12 20:32 ` David Sterba
2023-09-13 6:02 ` Johannes Thumshirn
2023-09-13 14:49 ` David Sterba
2023-09-13 14:57 ` Johannes Thumshirn
2023-09-13 16:06 ` David Sterba
2023-09-11 12:52 ` [PATCH v8 02/11] btrfs: read raid-stripe-tree from disk Johannes Thumshirn
2023-09-14 9:27 ` Qu Wenruo
2023-09-14 9:33 ` Johannes Thumshirn
2023-09-11 12:52 ` [PATCH v8 03/11] btrfs: add support for inserting raid stripe extents Johannes Thumshirn
2023-09-13 16:50 ` David Sterba
2023-09-13 16:57 ` David Sterba
2023-09-14 9:25 ` Qu Wenruo
2023-09-14 9:51 ` Johannes Thumshirn
2023-09-14 10:06 ` Qu Wenruo
2023-09-14 15:35 ` Johannes Thumshirn
2023-09-11 12:52 ` [PATCH v8 04/11] btrfs: delete stripe extent on extent deletion Johannes Thumshirn
2023-09-11 12:52 ` Johannes Thumshirn [this message]
2023-09-14 9:18 ` [PATCH v8 05/11] btrfs: lookup physical address from stripe extent Qu Wenruo
2023-09-14 9:45 ` Johannes Thumshirn
2023-09-14 14:16 ` Johannes Thumshirn
2023-09-11 12:52 ` [PATCH v8 06/11] btrfs: implement RST version of scrub Johannes Thumshirn
2023-09-13 9:51 ` Qu Wenruo
2023-09-13 16:59 ` David Sterba
2023-09-11 12:52 ` [PATCH v8 07/11] btrfs: zoned: allow zoned RAID Johannes Thumshirn
2023-09-12 20:49 ` David Sterba
2023-09-13 5:41 ` Johannes Thumshirn
2023-09-13 14:52 ` David Sterba
2023-09-13 14:59 ` Johannes Thumshirn
2023-09-11 12:52 ` [PATCH v8 08/11] btrfs: add raid stripe tree pretty printer Johannes Thumshirn
2023-09-12 20:42 ` David Sterba
2023-09-13 5:34 ` Johannes Thumshirn
2023-09-11 12:52 ` [PATCH v8 09/11] btrfs: announce presence of raid-stripe-tree in sysfs Johannes Thumshirn
2023-09-11 12:52 ` [PATCH v8 10/11] btrfs: add trace events for RST Johannes Thumshirn
2023-09-12 20:46 ` David Sterba
2023-09-11 12:52 ` [PATCH v8 11/11] btrfs: add raid-stripe-tree to features enabled with debug Johannes Thumshirn
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230911-raid-stripe-tree-v8-5-647676fa852c@wdc.com \
--to=johannes.thumshirn@wdc.com \
--cc=clm@fb.com \
--cc=dlemoal@kernel.org \
--cc=dsterba@suse.com \
--cc=hch@lst.de \
--cc=josef@toxicpanda.com \
--cc=linux-btrfs@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=naohiro.aota@wdc.com \
--cc=wqu@suse.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).