* [PATCH v3 1/9] btrfs: calculate @physical_end using @dev_extent_len directly in scrub_stripe()
2022-02-18 5:48 [PATCH v3 0/9] btrfs: refactor scrub entrances for each profile Qu Wenruo
@ 2022-02-18 5:48 ` Qu Wenruo
2022-02-18 5:48 ` [PATCH v3 2/9] btrfs: introduce a helper to locate an extent item Qu Wenruo
` (7 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Qu Wenruo @ 2022-02-18 5:48 UTC (permalink / raw)
To: linux-btrfs
The variable @physical_end is the exclusive stripe end, currently it's
calculated using @physical + @dev_extent_len / map->stripe_len *
map->stripe_len.
And since at allocation time we ensured dev_extent_len is stripe_len
aligned, the result is the same as @physical + @dev_extent_len.
So this patch will just assign @physical and @physical_end early,
without using @nstripes.
This is especially helpful for any possible out: label user, as now we
only need to initialize @offset before going to out: label.
Since we're here, also make @physical_end constant.
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/scrub.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 11089568b287..5ac5434c6227 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3183,10 +3183,10 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
int slot;
u64 nstripes;
struct extent_buffer *l;
- u64 physical;
+ u64 physical = map->stripes[stripe_index].physical;
u64 logical;
u64 logic_end;
- u64 physical_end;
+ const u64 physical_end = physical + dev_extent_len;
u64 generation;
int mirror_num;
struct btrfs_key key;
@@ -3205,7 +3205,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
int extent_mirror_num;
int stop_loop = 0;
- physical = map->stripes[stripe_index].physical;
offset = 0;
nstripes = div64_u64(dev_extent_len, map->stripe_len);
mirror_num = 1;
@@ -3242,7 +3241,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
path->reada = READA_FORWARD;
logical = chunk_logical + offset;
- physical_end = physical + nstripes * map->stripe_len;
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
get_raid56_logic_offset(physical_end, stripe_index,
map, &logic_end, NULL);
--
2.35.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v3 2/9] btrfs: introduce a helper to locate an extent item
2022-02-18 5:48 [PATCH v3 0/9] btrfs: refactor scrub entrances for each profile Qu Wenruo
2022-02-18 5:48 ` [PATCH v3 1/9] btrfs: calculate @physical_end using @dev_extent_len directly in scrub_stripe() Qu Wenruo
@ 2022-02-18 5:48 ` Qu Wenruo
2022-02-18 5:48 ` [PATCH v3 3/9] btrfs: introduce dedicated helper to scrub simple-mirror based range Qu Wenruo
` (6 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Qu Wenruo @ 2022-02-18 5:48 UTC (permalink / raw)
To: linux-btrfs
The new helper, find_first_extent_item(), will locate an extent item
(either EXTENT_ITEM or METADATA_ITEM) which covers *ANY* byte of the
search range.
This helper will later be used to refactor scrub code.
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/scrub.c | 108 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 108 insertions(+)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 5ac5434c6227..52d75575b3b4 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2882,6 +2882,114 @@ static void scrub_parity_put(struct scrub_parity *sparity)
scrub_parity_check_and_repair(sparity);
}
+/*
+ * Return 0 if the extent item range covers any byte of the range.
+ * Return <0 if the extent item is before @search_start.
+ * Return >0 if the extent item is after @start_start + @search_len.
+ */
+static int compare_extent_item_range(struct btrfs_path *path,
+ u64 search_start, u64 search_len)
+{
+ struct btrfs_fs_info *fs_info = path->nodes[0]->fs_info;
+ u64 len;
+ struct btrfs_key key;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ ASSERT(key.type == BTRFS_EXTENT_ITEM_KEY ||
+ key.type == BTRFS_METADATA_ITEM_KEY);
+ if (key.type == BTRFS_METADATA_ITEM_KEY)
+ len = fs_info->nodesize;
+ else
+ len = key.offset;
+
+ if (key.objectid + len <= search_start)
+ return -1;
+ if (key.objectid >= search_start + search_len)
+ return 1;
+ return 0;
+}
+
+/*
+ * Helper to locate one extent item which covers any byte in range
+ * [@search_start, @search_start + @search_length)
+ *
+ * If the path is not initialized, we will initialize the search by doing
+ * a btrfs_search_slot().
+ * If the path is already initialized, we will use the path as the initial
+ * slot, to avoid duplicated btrfs_search_slot() calls.
+ *
+ * NOTE: If an extent item starts before @search_start, we will still
+ * return the extent item. This is for data extent crossing stripe boundary.
+ *
+ * Return 0 if we found such extent item, and @path will point to the
+ * extent item.
+ * Return >0 if no such extent item can be found, and @path will be released.
+ * Return <0 if hit fatal error, and @path will be released.
+ */
+static int find_first_extent_item(struct btrfs_root *extent_root,
+ struct btrfs_path *path,
+ u64 search_start, u64 search_len)
+{
+ struct btrfs_fs_info *fs_info = extent_root->fs_info;
+ struct btrfs_key key;
+ int ret;
+
+ /* Continue using the existing path */
+ if (path->nodes[0])
+ goto search_forward;
+
+ if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
+ key.type = BTRFS_METADATA_ITEM_KEY;
+ else
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.objectid = search_start;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
+ if (ret < 0)
+ return ret;
+
+ ASSERT(ret > 0);
+ /*
+ * Here we intentionally pass 0 as @min_objectid, as there could be
+ * an extent item starting before @search_start.
+ */
+ ret = btrfs_previous_extent_item(extent_root, path, 0);
+ if (ret < 0)
+ return ret;
+ /*
+ * No matter whether we have found an extent item, the next loop will
+ * properly do every check on the key.
+ */
+search_forward:
+ while (true) {
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ if (key.objectid >= search_start + search_len)
+ break;
+ if (key.type != BTRFS_METADATA_ITEM_KEY &&
+ key.type != BTRFS_EXTENT_ITEM_KEY)
+ goto next;
+
+ ret = compare_extent_item_range(path, search_start, search_len);
+ if (ret == 0)
+ return ret;
+ if (ret > 0)
+ break;
+next:
+ path->slots[0]++;
+ if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
+ ret = btrfs_next_leaf(extent_root, path);
+ if (ret) {
+ /* Either no more item or fatal error */
+ btrfs_release_path(path);
+ return ret;
+ }
+ }
+ }
+ btrfs_release_path(path);
+ return 1;
+}
+
static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
struct map_lookup *map,
struct btrfs_device *sdev,
--
2.35.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v3 3/9] btrfs: introduce dedicated helper to scrub simple-mirror based range
2022-02-18 5:48 [PATCH v3 0/9] btrfs: refactor scrub entrances for each profile Qu Wenruo
2022-02-18 5:48 ` [PATCH v3 1/9] btrfs: calculate @physical_end using @dev_extent_len directly in scrub_stripe() Qu Wenruo
2022-02-18 5:48 ` [PATCH v3 2/9] btrfs: introduce a helper to locate an extent item Qu Wenruo
@ 2022-02-18 5:48 ` Qu Wenruo
2022-02-18 5:48 ` [PATCH v3 4/9] btrfs: introduce dedicated helper to scrub simple-stripe " Qu Wenruo
` (5 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Qu Wenruo @ 2022-02-18 5:48 UTC (permalink / raw)
To: linux-btrfs
The new helper, scrub_simple_mirror(), will scrub all extents inside a range
which only has simple mirror based duplication.
This covers every range of SINGLE/DUP/RAID1/RAID1C*, and inside each
data stripe for RAID0/RAID10.
Currently we will use this function to scrub SINGLE/DUP/RAID1/RAID1C*
profiles.
As one can see, the new entrance for those simple-mirror based profiles
can be small enough (with comments, just reach 100 lines).
This function will be the basis for the incoming scrub refactor.
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/scrub.c | 190 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 190 insertions(+)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 52d75575b3b4..37f882fd3079 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2990,6 +2990,26 @@ static int find_first_extent_item(struct btrfs_root *extent_root,
return 1;
}
+static void get_extent_info(struct btrfs_path *path, u64 *extent_start_ret,
+ u64 *size_ret, u64 *flags_ret, u64 *generation_ret)
+{
+ struct btrfs_key key;
+ struct btrfs_extent_item *ei;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ ASSERT(key.type == BTRFS_METADATA_ITEM_KEY ||
+ key.type == BTRFS_EXTENT_ITEM_KEY);
+ *extent_start_ret = key.objectid;
+ if (key.type == BTRFS_METADATA_ITEM_KEY)
+ *size_ret = path->nodes[0]->fs_info->nodesize;
+ else
+ *size_ret = key.offset;
+ ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_extent_item);
+ *flags_ret = btrfs_extent_flags(path->nodes[0], ei);
+ *generation_ret = btrfs_extent_generation(path->nodes[0], ei);
+}
+
static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
struct map_lookup *map,
struct btrfs_device *sdev,
@@ -3273,6 +3293,152 @@ static int sync_write_pointer_for_zoned(struct scrub_ctx *sctx, u64 logical,
return ret;
}
+static bool does_range_cross_boundary(u64 extent_start, u64 extent_len,
+ u64 boundary_start, u64 boudary_len)
+{
+ return (extent_start < boundary_start &&
+ extent_start + extent_len > boundary_start) ||
+ (extent_start < boundary_start + boudary_len &&
+ extent_start + extent_len > boundary_start + boudary_len);
+}
+
+/*
+ * Scrub one range which can only has simple mirror based profile.
+ * (Including all range in SINGLE/DUP/RAID1/RAID1C*, and each stripe in
+ * RAID0/RAID10).
+ *
+ * Since we may need to handle a subset of block group, we need @logical_start
+ * and @logical_length parameter.
+ */
+static int scrub_simple_mirror(struct scrub_ctx *sctx,
+ struct btrfs_root *extent_root,
+ struct btrfs_root *csum_root,
+ struct btrfs_block_group *bg,
+ struct map_lookup *map,
+ u64 logical_start, u64 logical_length,
+ struct btrfs_device *device,
+ u64 physical, int mirror_num)
+{
+ struct btrfs_fs_info *fs_info = sctx->fs_info;
+ const u64 logical_end = logical_start + logical_length;
+ /* An artificial limit, inherit from old scrub behavior */
+ const u32 max_length = SZ_64K;
+ struct btrfs_path path = {};
+ u64 cur_logical = logical_start;
+ int ret;
+
+ /* The range must be inside the bg */
+ ASSERT(logical_start >= bg->start &&
+ logical_end <= bg->start + bg->length);
+
+ path.search_commit_root = 1;
+ path.skip_locking = 1;
+ /* Go through each extent items inside the logical range */
+ while (cur_logical < logical_end) {
+ int cur_mirror = mirror_num;
+ struct btrfs_device *target_dev = device;
+ u64 extent_start;
+ u64 extent_len;
+ u64 extent_flags;
+ u64 extent_gen;
+ u64 scrub_len;
+ u64 cur_physical;
+
+ /* Canceled ? */
+ if (atomic_read(&fs_info->scrub_cancel_req) ||
+ atomic_read(&sctx->cancel_req)) {
+ ret = -ECANCELED;
+ break;
+ }
+ /* Paused ? */
+ if (atomic_read(&fs_info->scrub_pause_req)) {
+ /* Push queued extents */
+ sctx->flush_all_writes = true;
+ scrub_submit(sctx);
+ mutex_lock(&sctx->wr_lock);
+ scrub_wr_submit(sctx);
+ mutex_unlock(&sctx->wr_lock);
+ wait_event(sctx->list_wait,
+ atomic_read(&sctx->bios_in_flight) == 0);
+ sctx->flush_all_writes = false;
+ scrub_blocked_if_needed(fs_info);
+ }
+ /* Block group removed? */
+ spin_lock(&bg->lock);
+ if (bg->removed) {
+ spin_unlock(&bg->lock);
+ ret = 0;
+ break;
+ }
+ spin_unlock(&bg->lock);
+
+ ret = find_first_extent_item(extent_root, &path, cur_logical,
+ logical_end - cur_logical);
+ if (ret > 0) {
+ /* No more extent, just update the accounting */
+ sctx->stat.last_physical = physical + logical_length;
+ ret = 0;
+ break;
+ }
+ if (ret < 0)
+ break;
+ get_extent_info(&path, &extent_start, &extent_len,
+ &extent_flags, &extent_gen);
+ /* Skip hole range which doesn't have any extent */
+ cur_logical = max(extent_start, cur_logical);
+
+ /*
+ * Scrub len has three limits:
+ * - Extent size limit
+ * - Scrub range limit
+ * This is especially imporatant for RAID0/RAID10 to reuse
+ * this function
+ * - Max scrub size limit
+ */
+ scrub_len = min(min(extent_start + extent_len,
+ logical_end), cur_logical + max_length) -
+ cur_logical;
+ cur_physical = cur_logical - logical_start + physical;
+
+ if (sctx->is_dev_replace)
+ scrub_remap_extent(fs_info, cur_logical, scrub_len,
+ &cur_physical, &target_dev, &cur_mirror);
+ if (extent_flags & BTRFS_EXTENT_FLAG_DATA) {
+ ret = btrfs_lookup_csums_range(csum_root, cur_logical,
+ cur_logical + scrub_len - 1,
+ &sctx->csum_list, 1);
+ if (ret)
+ break;
+ }
+ if ((extent_flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
+ does_range_cross_boundary(extent_start, extent_len,
+ logical_start, logical_length)) {
+ btrfs_err(fs_info,
+"scrub: tree block %llu spanning boundaries, ignored. boundary=[%llu, %llu)",
+ extent_start, logical_start, logical_end);
+ spin_lock(&sctx->stat_lock);
+ sctx->stat.uncorrectable_errors++;
+ spin_unlock(&sctx->stat_lock);
+ cur_logical += scrub_len;
+ continue;
+ }
+ ret = scrub_extent(sctx, map, cur_logical, scrub_len, cur_physical,
+ target_dev, extent_flags, extent_gen,
+ cur_mirror, cur_logical - logical_start +
+ physical);
+ scrub_free_csums(sctx);
+ if (ret)
+ break;
+ if (sctx->is_dev_replace)
+ sync_replace_for_zoned(sctx);
+ cur_logical += scrub_len;
+ /* Don't hold CPU for too long time */
+ cond_resched();
+ }
+ btrfs_release_path(&path);
+ return ret;
+}
+
static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
struct btrfs_block_group *bg,
struct map_lookup *map,
@@ -3285,6 +3451,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
struct btrfs_root *csum_root;
struct btrfs_extent_item *extent;
struct blk_plug plug;
+ const u64 profile = map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
const u64 chunk_logical = bg->start;
u64 flags;
int ret;
@@ -3377,6 +3544,29 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
sctx->flush_all_writes = true;
}
+ /*
+ * There used to be a big double loop to handle all profiles using the
+ * same routine, which grows larger and more gross over time.
+ *
+ * So here we handle each profile differently, so simpler profiles
+ * have simpler scrubing function.
+ */
+ if (!(profile & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10 |
+ BTRFS_BLOCK_GROUP_RAID56_MASK))) {
+ /*
+ * Above check rules out all complex profile, the remaining
+ * profiles are SINGLE|DUP|RAID1|RAID1C*, which is simple
+ * mirrored duplication without stripe.
+ *
+ * Only @phsyical and @mirror_num needs to calculated using
+ * @stripe_index.
+ */
+ ret = scrub_simple_mirror(sctx, root, csum_root, bg, map,
+ bg->start, bg->length, scrub_dev,
+ map->stripes[stripe_index].physical,
+ stripe_index + 1);
+ goto out;
+ }
/*
* now find all extents for each stripe and scrub them
*/
--
2.35.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v3 4/9] btrfs: introduce dedicated helper to scrub simple-stripe based range
2022-02-18 5:48 [PATCH v3 0/9] btrfs: refactor scrub entrances for each profile Qu Wenruo
` (2 preceding siblings ...)
2022-02-18 5:48 ` [PATCH v3 3/9] btrfs: introduce dedicated helper to scrub simple-mirror based range Qu Wenruo
@ 2022-02-18 5:48 ` Qu Wenruo
2022-02-18 5:48 ` [PATCH v3 5/9] btrfs: scrub: cleanup the non-RAID56 branches in scrub_stripe() Qu Wenruo
` (4 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Qu Wenruo @ 2022-02-18 5:48 UTC (permalink / raw)
To: linux-btrfs
The new entrance will iterate through each data stripe which belongs to
the target device.
And since inside each data stripe, RAID0 is just SINGLE, while RAID10 is
just RAID1, we can reuse scrub_simple_mirror() to do the scrub properly.
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/scrub.c | 100 +++++++++++++++++++++++++++++++++++++++++------
1 file changed, 88 insertions(+), 12 deletions(-)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 37f882fd3079..876b1fd80d29 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3010,6 +3010,15 @@ static void get_extent_info(struct btrfs_path *path, u64 *extent_start_ret,
*generation_ret = btrfs_extent_generation(path->nodes[0], ei);
}
+static bool does_range_cross_boundary(u64 extent_start, u64 extent_len,
+ u64 boundary_start, u64 boudary_len)
+{
+ return (extent_start < boundary_start &&
+ extent_start + extent_len > boundary_start) ||
+ (extent_start < boundary_start + boudary_len &&
+ extent_start + extent_len > boundary_start + boudary_len);
+}
+
static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
struct map_lookup *map,
struct btrfs_device *sdev,
@@ -3293,15 +3302,6 @@ static int sync_write_pointer_for_zoned(struct scrub_ctx *sctx, u64 logical,
return ret;
}
-static bool does_range_cross_boundary(u64 extent_start, u64 extent_len,
- u64 boundary_start, u64 boudary_len)
-{
- return (extent_start < boundary_start &&
- extent_start + extent_len > boundary_start) ||
- (extent_start < boundary_start + boudary_len &&
- extent_start + extent_len > boundary_start + boudary_len);
-}
-
/*
* Scrub one range which can only has simple mirror based profile.
* (Including all range in SINGLE/DUP/RAID1/RAID1C*, and each stripe in
@@ -3439,6 +3439,77 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
return ret;
}
+/* Calculate the full stripe length for simple stripe based profiles */
+static u64 simple_stripe_full_stripe_len(struct map_lookup *map)
+{
+ ASSERT(map->type & (BTRFS_BLOCK_GROUP_RAID0 |
+ BTRFS_BLOCK_GROUP_RAID10));
+
+ return map->num_stripes / map->sub_stripes * map->stripe_len;
+}
+
+/* Get the logical bytenr for the stripe */
+static u64 simple_stripe_get_logical(struct map_lookup *map,
+ struct btrfs_block_group *bg,
+ int stripe_index)
+{
+ ASSERT(map->type & (BTRFS_BLOCK_GROUP_RAID0 |
+ BTRFS_BLOCK_GROUP_RAID10));
+ ASSERT(stripe_index < map->num_stripes);
+
+ /*
+ * (stripe_index / sub_stripes) gives how many data stripes we need to
+ * skip.
+ */
+ return (stripe_index / map->sub_stripes) * map->stripe_len + bg->start;
+}
+
+/* Get the mirror number for the stripe */
+static int simple_stripe_mirror_num(struct map_lookup *map, int stripe_index)
+{
+ ASSERT(map->type & (BTRFS_BLOCK_GROUP_RAID0 |
+ BTRFS_BLOCK_GROUP_RAID10));
+ ASSERT(stripe_index < map->num_stripes);
+
+ /* For RAID0, it's fixed to 1, for RAID10 it's 0,1,0,1... */
+ return stripe_index % map->sub_stripes + 1;
+}
+
+static int scrub_simple_stripe(struct scrub_ctx *sctx,
+ struct btrfs_root *extent_root,
+ struct btrfs_root *csum_root,
+ struct btrfs_block_group *bg,
+ struct map_lookup *map,
+ struct btrfs_device *device,
+ int stripe_index)
+{
+ const u64 logical_increment = simple_stripe_full_stripe_len(map);
+ const u64 orig_logical = simple_stripe_get_logical(map, bg, stripe_index);
+ const u64 orig_physical = map->stripes[stripe_index].physical;
+ const int mirror_num = simple_stripe_mirror_num(map, stripe_index);
+ u64 cur_logical = orig_logical;
+ u64 cur_physical = orig_physical;
+ int ret = 0;
+
+ while (cur_logical < bg->start + bg->length) {
+ /*
+ * Inside each stripe, RAID0 is just SINGLE, and RAID10 is
+ * just RAID1, so we can reuse scrub_simple_mirror() to scrub
+ * this stripe.
+ */
+ ret = scrub_simple_mirror(sctx, extent_root, csum_root, bg, map,
+ cur_logical, map->stripe_len, device,
+ cur_physical, mirror_num);
+ if (ret)
+ return ret;
+ /* Skip to next stripe which belongs to the target device */
+ cur_logical += logical_increment;
+ /* For physical offset, we just go to next stripe */
+ cur_physical += map->stripe_len;
+ }
+ return ret;
+}
+
static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
struct btrfs_block_group *bg,
struct map_lookup *map,
@@ -3567,9 +3638,14 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
stripe_index + 1);
goto out;
}
- /*
- * now find all extents for each stripe and scrub them
- */
+ if (profile & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)) {
+ ret = scrub_simple_stripe(sctx, root, csum_root, bg, map,
+ scrub_dev, stripe_index);
+ goto out;
+ }
+
+ /* Only RAID56 goes through the old code */
+ ASSERT(map->type & BTRFS_BLOCK_GROUP_RAID56_MASK);
ret = 0;
while (physical < physical_end) {
/*
--
2.35.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v3 5/9] btrfs: scrub: cleanup the non-RAID56 branches in scrub_stripe()
2022-02-18 5:48 [PATCH v3 0/9] btrfs: refactor scrub entrances for each profile Qu Wenruo
` (3 preceding siblings ...)
2022-02-18 5:48 ` [PATCH v3 4/9] btrfs: introduce dedicated helper to scrub simple-stripe " Qu Wenruo
@ 2022-02-18 5:48 ` Qu Wenruo
2022-02-18 5:48 ` [PATCH v3 6/9] btrfs: use scrub_simple_mirror() to handle RAID56 data stripe scrub Qu Wenruo
` (3 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Qu Wenruo @ 2022-02-18 5:48 UTC (permalink / raw)
To: linux-btrfs
Since we have moved all other profiles handling into their own
functions, now the main body of scrub_stripe() is just handling RAID56
profiles.
There is no need to address other profiles in the main loop of
scrub_stripe(), so we can remove those dead branches.
Since we're here, also slightly change the timing of initialization of
variables like @offset, @increment and @logical.
Especially for @logical, we don't really need to initialize it for
btrfs_extent_root()/btrfs_csum_root(), we can use bg->start for that
purpose.
Now those variables are only initialize for RAID56 branches.
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/scrub.c | 128 +++++++++++++++++++----------------------------
1 file changed, 51 insertions(+), 77 deletions(-)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 876b1fd80d29..1fd9d836dafb 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3527,14 +3527,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
u64 flags;
int ret;
int slot;
- u64 nstripes;
struct extent_buffer *l;
u64 physical = map->stripes[stripe_index].physical;
u64 logical;
u64 logic_end;
const u64 physical_end = physical + dev_extent_len;
u64 generation;
- int mirror_num;
struct btrfs_key key;
u64 increment;
u64 offset;
@@ -3551,28 +3549,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
int extent_mirror_num;
int stop_loop = 0;
- offset = 0;
- nstripes = div64_u64(dev_extent_len, map->stripe_len);
- mirror_num = 1;
- increment = map->stripe_len;
- if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
- offset = map->stripe_len * stripe_index;
- increment = map->stripe_len * map->num_stripes;
- } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
- int factor = map->num_stripes / map->sub_stripes;
- offset = map->stripe_len * (stripe_index / map->sub_stripes);
- increment = map->stripe_len * factor;
- mirror_num = stripe_index % map->sub_stripes + 1;
- } else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
- mirror_num = stripe_index % map->num_stripes + 1;
- } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
- mirror_num = stripe_index % map->num_stripes + 1;
- } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
- get_raid56_logic_offset(physical, stripe_index, map, &offset,
- NULL);
- increment = map->stripe_len * nr_data_stripes(map);
- }
-
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@@ -3586,20 +3562,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
path->skip_locking = 1;
path->reada = READA_FORWARD;
- logical = chunk_logical + offset;
- if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
- get_raid56_logic_offset(physical_end, stripe_index,
- map, &logic_end, NULL);
- logic_end += chunk_logical;
- } else {
- logic_end = logical + increment * nstripes;
- }
wait_event(sctx->list_wait,
atomic_read(&sctx->bios_in_flight) == 0);
scrub_blocked_if_needed(fs_info);
- root = btrfs_extent_root(fs_info, logical);
- csum_root = btrfs_csum_root(fs_info, logical);
+ root = btrfs_extent_root(fs_info, bg->start);
+ csum_root = btrfs_csum_root(fs_info, bg->start);
/*
* collect all data csums for the stripe to avoid seeking during
@@ -3636,17 +3604,29 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
bg->start, bg->length, scrub_dev,
map->stripes[stripe_index].physical,
stripe_index + 1);
+ offset = 0;
goto out;
}
if (profile & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)) {
ret = scrub_simple_stripe(sctx, root, csum_root, bg, map,
scrub_dev, stripe_index);
+ offset = map->stripe_len * (stripe_index / map->sub_stripes);
goto out;
}
/* Only RAID56 goes through the old code */
ASSERT(map->type & BTRFS_BLOCK_GROUP_RAID56_MASK);
ret = 0;
+
+ /* Calculate the logical end of the stripe */
+ get_raid56_logic_offset(physical_end, stripe_index,
+ map, &logic_end, NULL);
+ logic_end += chunk_logical;
+
+ /* Initialize @offset in case we need to go to out: label */
+ get_raid56_logic_offset(physical, stripe_index, map, &offset, NULL);
+ increment = map->stripe_len * nr_data_stripes(map);
+
while (physical < physical_end) {
/*
* canceled?
@@ -3672,22 +3652,20 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
scrub_blocked_if_needed(fs_info);
}
- if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
- ret = get_raid56_logic_offset(physical, stripe_index,
- map, &logical,
- &stripe_logical);
- logical += chunk_logical;
- if (ret) {
- /* it is parity strip */
- stripe_logical += chunk_logical;
- stripe_end = stripe_logical + increment;
- ret = scrub_raid56_parity(sctx, map, scrub_dev,
- stripe_logical,
- stripe_end);
- if (ret)
- goto out;
- goto skip;
- }
+ ret = get_raid56_logic_offset(physical, stripe_index,
+ map, &logical,
+ &stripe_logical);
+ logical += chunk_logical;
+ if (ret) {
+ /* it is parity strip */
+ stripe_logical += chunk_logical;
+ stripe_end = stripe_logical + increment;
+ ret = scrub_raid56_parity(sctx, map, scrub_dev,
+ stripe_logical,
+ stripe_end);
+ if (ret)
+ goto out;
+ goto skip;
}
if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
@@ -3805,7 +3783,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
extent_physical = extent_logical - logical + physical;
extent_dev = scrub_dev;
- extent_mirror_num = mirror_num;
+ /* For RAID56 data stripes, mirror_num is fixed to 1 */
+ extent_mirror_num = 1;
if (sctx->is_dev_replace)
scrub_remap_extent(fs_info, extent_logical,
extent_len, &extent_physical,
@@ -3836,33 +3815,28 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
if (extent_logical + extent_len <
key.objectid + bytes) {
- if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
- /*
- * loop until we find next data stripe
- * or we have finished all stripes.
- */
+ /*
+ * loop until we find next data stripe
+ * or we have finished all stripes.
+ */
loop:
- physical += map->stripe_len;
- ret = get_raid56_logic_offset(physical,
- stripe_index, map,
- &logical, &stripe_logical);
- logical += chunk_logical;
-
- if (ret && physical < physical_end) {
- stripe_logical += chunk_logical;
- stripe_end = stripe_logical +
- increment;
- ret = scrub_raid56_parity(sctx,
- map, scrub_dev,
- stripe_logical,
- stripe_end);
- if (ret)
- goto out;
- goto loop;
- }
- } else {
- physical += map->stripe_len;
- logical += increment;
+ physical += map->stripe_len;
+ ret = get_raid56_logic_offset(physical,
+ stripe_index, map,
+ &logical, &stripe_logical);
+ logical += chunk_logical;
+
+ if (ret && physical < physical_end) {
+ stripe_logical += chunk_logical;
+ stripe_end = stripe_logical +
+ increment;
+ ret = scrub_raid56_parity(sctx,
+ map, scrub_dev,
+ stripe_logical,
+ stripe_end);
+ if (ret)
+ goto out;
+ goto loop;
}
if (logical < key.objectid + bytes) {
cond_resched();
--
2.35.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v3 6/9] btrfs: use scrub_simple_mirror() to handle RAID56 data stripe scrub
2022-02-18 5:48 [PATCH v3 0/9] btrfs: refactor scrub entrances for each profile Qu Wenruo
` (4 preceding siblings ...)
2022-02-18 5:48 ` [PATCH v3 5/9] btrfs: scrub: cleanup the non-RAID56 branches in scrub_stripe() Qu Wenruo
@ 2022-02-18 5:48 ` Qu Wenruo
2022-02-18 5:48 ` [PATCH v3 7/9] btrfs: refactor scrub_raid56_parity() Qu Wenruo
` (2 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Qu Wenruo @ 2022-02-18 5:48 UTC (permalink / raw)
To: linux-btrfs
Although RAID56 has complex repair mechanism, which involves reading the
whole full stripe, but inside one data stripe, it's in fact no
different than SINGLE/RAID1.
The point here is, for data stripe we just check the csum for each
extent we hit.
Only for csum mismatch case, our repair paths divide.
So we can still reuse scrub_simple_mirror() for RAID56 data stripes,
which saves quite some code.
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/scrub.c | 255 +++++------------------------------------------
1 file changed, 24 insertions(+), 231 deletions(-)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 1fd9d836dafb..13acf07bccae 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3520,33 +3520,18 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
struct btrfs_fs_info *fs_info = sctx->fs_info;
struct btrfs_root *root;
struct btrfs_root *csum_root;
- struct btrfs_extent_item *extent;
struct blk_plug plug;
const u64 profile = map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
const u64 chunk_logical = bg->start;
- u64 flags;
int ret;
- int slot;
- struct extent_buffer *l;
u64 physical = map->stripes[stripe_index].physical;
+ const u64 physical_end = physical + dev_extent_len;
u64 logical;
u64 logic_end;
- const u64 physical_end = physical + dev_extent_len;
- u64 generation;
- struct btrfs_key key;
- u64 increment;
- u64 offset;
- u64 extent_logical;
- u64 extent_physical;
- /*
- * Unlike chunk length, extent length should never go beyond
- * BTRFS_MAX_EXTENT_SIZE, thus u32 is enough here.
- */
- u32 extent_len;
+ u64 increment; /* The logical increment after finishing one stripe */
+ u64 offset; /* Offset inside the chunk */
u64 stripe_logical;
u64 stripe_end;
- struct btrfs_device *extent_dev;
- int extent_mirror_num;
int stop_loop = 0;
path = btrfs_alloc_path();
@@ -3554,7 +3539,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
return -ENOMEM;
/*
- * work on commit root. The related disk blocks are static as
+ * Work on commit root. The related disk blocks are static as
* long as COW is applied. This means, it is save to rewrite
* them to repair disk errors without any race conditions
*/
@@ -3570,7 +3555,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
csum_root = btrfs_csum_root(fs_info, bg->start);
/*
- * collect all data csums for the stripe to avoid seeking during
+ * Collect all data csums for the stripe to avoid seeking during
* the scrub. This might currently (crc32) end up to be about 1MB
*/
blk_start_plug(&plug);
@@ -3627,37 +3612,16 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
get_raid56_logic_offset(physical, stripe_index, map, &offset, NULL);
increment = map->stripe_len * nr_data_stripes(map);
+ /*
+ * Due to the rotation, for RAID56 it's better to iterate each stripe
+ * using their physical offset.
+ */
while (physical < physical_end) {
- /*
- * canceled?
- */
- if (atomic_read(&fs_info->scrub_cancel_req) ||
- atomic_read(&sctx->cancel_req)) {
- ret = -ECANCELED;
- goto out;
- }
- /*
- * check to see if we have to pause
- */
- if (atomic_read(&fs_info->scrub_pause_req)) {
- /* push queued extents */
- sctx->flush_all_writes = true;
- scrub_submit(sctx);
- mutex_lock(&sctx->wr_lock);
- scrub_wr_submit(sctx);
- mutex_unlock(&sctx->wr_lock);
- wait_event(sctx->list_wait,
- atomic_read(&sctx->bios_in_flight) == 0);
- sctx->flush_all_writes = false;
- scrub_blocked_if_needed(fs_info);
- }
-
- ret = get_raid56_logic_offset(physical, stripe_index,
- map, &logical,
- &stripe_logical);
+ ret = get_raid56_logic_offset(physical, stripe_index, map,
+ &logical, &stripe_logical);
logical += chunk_logical;
if (ret) {
- /* it is parity strip */
+ /* It is parity strip */
stripe_logical += chunk_logical;
stripe_end = stripe_logical + increment;
ret = scrub_raid56_parity(sctx, map, scrub_dev,
@@ -3665,194 +3629,23 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
stripe_end);
if (ret)
goto out;
- goto skip;
+ goto next;
}
- if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
- key.type = BTRFS_METADATA_ITEM_KEY;
- else
- key.type = BTRFS_EXTENT_ITEM_KEY;
- key.objectid = logical;
- key.offset = (u64)-1;
-
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ /*
+ * Now we're at a data stripe, scrub each extents in the range.
+ *
+ * At this stage, if we ignore the repair part, inside each data
+ * stripe it is no different than SINGLE profile.
+ * We can reuse scrub_simple_mirror() here, as the repair part
+ * is still based on @mirror_num.
+ */
+ ret = scrub_simple_mirror(sctx, root, csum_root, bg, map,
+ logical, map->stripe_len,
+ scrub_dev, physical, 1);
if (ret < 0)
goto out;
-
- if (ret > 0) {
- ret = btrfs_previous_extent_item(root, path, 0);
- if (ret < 0)
- goto out;
- if (ret > 0) {
- /* there's no smaller item, so stick with the
- * larger one */
- btrfs_release_path(path);
- ret = btrfs_search_slot(NULL, root, &key,
- path, 0, 0);
- if (ret < 0)
- goto out;
- }
- }
-
- stop_loop = 0;
- while (1) {
- u64 bytes;
-
- l = path->nodes[0];
- slot = path->slots[0];
- if (slot >= btrfs_header_nritems(l)) {
- ret = btrfs_next_leaf(root, path);
- if (ret == 0)
- continue;
- if (ret < 0)
- goto out;
-
- stop_loop = 1;
- break;
- }
- btrfs_item_key_to_cpu(l, &key, slot);
-
- if (key.type != BTRFS_EXTENT_ITEM_KEY &&
- key.type != BTRFS_METADATA_ITEM_KEY)
- goto next;
-
- if (key.type == BTRFS_METADATA_ITEM_KEY)
- bytes = fs_info->nodesize;
- else
- bytes = key.offset;
-
- if (key.objectid + bytes <= logical)
- goto next;
-
- if (key.objectid >= logical + map->stripe_len) {
- /* out of this device extent */
- if (key.objectid >= logic_end)
- stop_loop = 1;
- break;
- }
-
- /*
- * If our block group was removed in the meanwhile, just
- * stop scrubbing since there is no point in continuing.
- * Continuing would prevent reusing its device extents
- * for new block groups for a long time.
- */
- spin_lock(&bg->lock);
- if (bg->removed) {
- spin_unlock(&bg->lock);
- ret = 0;
- goto out;
- }
- spin_unlock(&bg->lock);
-
- extent = btrfs_item_ptr(l, slot,
- struct btrfs_extent_item);
- flags = btrfs_extent_flags(l, extent);
- generation = btrfs_extent_generation(l, extent);
-
- if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
- (key.objectid < logical ||
- key.objectid + bytes >
- logical + map->stripe_len)) {
- btrfs_err(fs_info,
- "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
- key.objectid, logical);
- spin_lock(&sctx->stat_lock);
- sctx->stat.uncorrectable_errors++;
- spin_unlock(&sctx->stat_lock);
- goto next;
- }
-
-again:
- extent_logical = key.objectid;
- ASSERT(bytes <= U32_MAX);
- extent_len = bytes;
-
- /*
- * trim extent to this stripe
- */
- if (extent_logical < logical) {
- extent_len -= logical - extent_logical;
- extent_logical = logical;
- }
- if (extent_logical + extent_len >
- logical + map->stripe_len) {
- extent_len = logical + map->stripe_len -
- extent_logical;
- }
-
- extent_physical = extent_logical - logical + physical;
- extent_dev = scrub_dev;
- /* For RAID56 data stripes, mirror_num is fixed to 1 */
- extent_mirror_num = 1;
- if (sctx->is_dev_replace)
- scrub_remap_extent(fs_info, extent_logical,
- extent_len, &extent_physical,
- &extent_dev,
- &extent_mirror_num);
-
- if (flags & BTRFS_EXTENT_FLAG_DATA) {
- ret = btrfs_lookup_csums_range(csum_root,
- extent_logical,
- extent_logical + extent_len - 1,
- &sctx->csum_list, 1);
- if (ret)
- goto out;
- }
-
- ret = scrub_extent(sctx, map, extent_logical, extent_len,
- extent_physical, extent_dev, flags,
- generation, extent_mirror_num,
- extent_logical - logical + physical);
-
- scrub_free_csums(sctx);
-
- if (ret)
- goto out;
-
- if (sctx->is_dev_replace)
- sync_replace_for_zoned(sctx);
-
- if (extent_logical + extent_len <
- key.objectid + bytes) {
- /*
- * loop until we find next data stripe
- * or we have finished all stripes.
- */
-loop:
- physical += map->stripe_len;
- ret = get_raid56_logic_offset(physical,
- stripe_index, map,
- &logical, &stripe_logical);
- logical += chunk_logical;
-
- if (ret && physical < physical_end) {
- stripe_logical += chunk_logical;
- stripe_end = stripe_logical +
- increment;
- ret = scrub_raid56_parity(sctx,
- map, scrub_dev,
- stripe_logical,
- stripe_end);
- if (ret)
- goto out;
- goto loop;
- }
- if (logical < key.objectid + bytes) {
- cond_resched();
- goto again;
- }
-
- if (physical >= physical_end) {
- stop_loop = 1;
- break;
- }
- }
next:
- path->slots[0]++;
- }
- btrfs_release_path(path);
-skip:
logical += increment;
physical += map->stripe_len;
spin_lock(&sctx->stat_lock);
--
2.35.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v3 7/9] btrfs: refactor scrub_raid56_parity()
2022-02-18 5:48 [PATCH v3 0/9] btrfs: refactor scrub entrances for each profile Qu Wenruo
` (5 preceding siblings ...)
2022-02-18 5:48 ` [PATCH v3 6/9] btrfs: use scrub_simple_mirror() to handle RAID56 data stripe scrub Qu Wenruo
@ 2022-02-18 5:48 ` Qu Wenruo
2022-02-18 5:48 ` [PATCH v3 8/9] btrfs: use find_first_extent_item() to replace the open-coded extent item search Qu Wenruo
2022-02-18 5:48 ` [PATCH v3 9/9] btrfs: move scrub_remap_extent() call into scrub_extent() with more comments Qu Wenruo
8 siblings, 0 replies; 10+ messages in thread
From: Qu Wenruo @ 2022-02-18 5:48 UTC (permalink / raw)
To: linux-btrfs
Currently scrub_raid56_parity() has a large double loop, handling the
following things at the same time:
- Iterate each data stripe
- Iterate each extent item in one data stripe
Refactor this mess by:
- Introduce a new helper to handle data stripe iteration
The new helper is scrub_raid56_data_stripe_for_parity(), which
only has one while() loop handling the extent items inside the
data stripe.
The code is still mostly the same as the old code.
- Call cond_resched() for each extent
Previously we only call cond_resched() under a complex if () check.
I see no special reason to do that, and for other scrub functions,
like scrub_simple_mirror() we're already doing the same cond_resched()
after scrubbing one extent.
- Add extra comments
To improve the readability
Please note that, this patch is only to address the double loop, there
are incoming patches to do extra cleanup.
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/scrub.c | 342 ++++++++++++++++++++++-------------------------
1 file changed, 161 insertions(+), 181 deletions(-)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 13acf07bccae..97044957ed85 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3019,6 +3019,161 @@ static bool does_range_cross_boundary(u64 extent_start, u64 extent_len,
extent_start + extent_len > boundary_start + boudary_len);
}
+static int scrub_raid56_data_stripe_for_parity(struct scrub_ctx *sctx,
+ struct scrub_parity *sparity,
+ struct map_lookup *map,
+ struct btrfs_device *sdev,
+ struct btrfs_path *path,
+ u64 logical)
+{
+ struct btrfs_fs_info *fs_info = sctx->fs_info;
+ struct btrfs_root *extent_root = btrfs_extent_root(fs_info, logical);
+ struct btrfs_root *csum_root = btrfs_csum_root(fs_info, logical);
+ struct btrfs_key key;
+ u64 extent_start;
+ u64 extent_size;
+ int ret;
+
+ ASSERT(map->type & BTRFS_BLOCK_GROUP_RAID56_MASK);
+
+ /* Path should not be populated */
+ ASSERT(!path->nodes[0]);
+
+ if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
+ key.type = BTRFS_METADATA_ITEM_KEY;
+ else
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.objectid = logical;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
+ if (ret < 0)
+ return ret;
+
+ if (ret > 0) {
+ ret = btrfs_previous_extent_item(extent_root, path, 0);
+ if (ret < 0)
+ return ret;
+ if (ret > 0) {
+ btrfs_release_path(path);
+ ret = btrfs_search_slot(NULL, extent_root, &key, path,
+ 0, 0);
+ if (ret < 0)
+ return ret;
+ }
+ }
+
+ while (1) {
+ struct btrfs_io_context *bioc = NULL;
+ struct btrfs_device *extent_dev;
+ struct btrfs_extent_item *ei;
+ struct extent_buffer *l;
+ int slot;
+ u64 mapped_length;
+ u64 extent_flags;
+ u64 extent_gen;
+ u64 extent_physical;
+ u64 extent_mirror_num;
+
+ l = path->nodes[0];
+ slot = path->slots[0];
+ if (slot >= btrfs_header_nritems(l)) {
+ ret = btrfs_next_leaf(extent_root, path);
+ if (ret == 0)
+ continue;
+
+ /* No more extent items or error, exit */
+ break;
+ }
+ btrfs_item_key_to_cpu(l, &key, slot);
+
+ if (key.type != BTRFS_EXTENT_ITEM_KEY &&
+ key.type != BTRFS_METADATA_ITEM_KEY)
+ goto next;
+
+ if (key.type == BTRFS_METADATA_ITEM_KEY)
+ extent_size = fs_info->nodesize;
+ else
+ extent_size = key.offset;
+
+ if (key.objectid + extent_size <= logical)
+ goto next;
+
+ /* Beyond this data stripe */
+ if (key.objectid >= logical + map->stripe_len)
+ break;
+
+ ei = btrfs_item_ptr(l, slot, struct btrfs_extent_item);
+ extent_flags = btrfs_extent_flags(l, ei);
+ extent_gen = btrfs_extent_generation(l, ei);
+
+ if ((extent_flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
+ (key.objectid < logical || key.objectid + extent_size >
+ logical + map->stripe_len)) {
+ btrfs_err(fs_info,
+ "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
+ key.objectid, logical);
+ spin_lock(&sctx->stat_lock);
+ sctx->stat.uncorrectable_errors++;
+ spin_unlock(&sctx->stat_lock);
+ goto next;
+ }
+
+ extent_start = key.objectid;
+ ASSERT(extent_size <= U32_MAX);
+
+ /* Truncate the range inside the data stripe */
+ if (extent_start < logical) {
+ extent_size -= logical - extent_start;
+ extent_start = logical;
+ }
+ if (extent_start + extent_size > logical + map->stripe_len)
+ extent_size = logical + map->stripe_len - extent_start;
+
+ scrub_parity_mark_sectors_data(sparity, extent_start, extent_size);
+
+ mapped_length = extent_size;
+ ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, extent_start,
+ &mapped_length, &bioc, 0);
+ if (!ret && (!bioc || mapped_length < extent_size))
+ ret = -EIO;
+ if (ret) {
+ btrfs_put_bioc(bioc);
+ scrub_parity_mark_sectors_error(sparity, extent_start,
+ extent_size);
+ break;
+ }
+ extent_physical = bioc->stripes[0].physical;
+ extent_mirror_num = bioc->mirror_num;
+ extent_dev = bioc->stripes[0].dev;
+ btrfs_put_bioc(bioc);
+
+ ret = btrfs_lookup_csums_range(csum_root, extent_start,
+ extent_start + extent_size - 1,
+ &sctx->csum_list, 1);
+ if (ret)
+ break;
+
+ ret = scrub_extent_for_parity(sparity, extent_start,
+ extent_size, extent_physical,
+ extent_dev, extent_flags,
+ extent_gen, extent_mirror_num);
+ scrub_free_csums(sctx);
+
+ if (ret)
+ break;
+
+ cond_resched();
+next:
+ path->slots[0]++;
+ }
+ if (ret < 0)
+ scrub_parity_mark_sectors_error(sparity, extent_start,
+ extent_size);
+ btrfs_release_path(path);
+ return ret;
+}
+
static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
struct map_lookup *map,
struct btrfs_device *sdev,
@@ -3026,28 +3181,12 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
u64 logic_end)
{
struct btrfs_fs_info *fs_info = sctx->fs_info;
- struct btrfs_root *root = btrfs_extent_root(fs_info, logic_start);
- struct btrfs_root *csum_root;
- struct btrfs_extent_item *extent;
- struct btrfs_io_context *bioc = NULL;
struct btrfs_path *path;
- u64 flags;
+ u64 cur_logical;
int ret;
- int slot;
- struct extent_buffer *l;
- struct btrfs_key key;
- u64 generation;
- u64 extent_logical;
- u64 extent_physical;
- /* Check the comment in scrub_stripe() for why u32 is enough here */
- u32 extent_len;
- u64 mapped_length;
- struct btrfs_device *extent_dev;
struct scrub_parity *sparity;
int nsectors;
int bitmap_len;
- int extent_mirror_num;
- int stop_loop = 0;
path = btrfs_alloc_path();
if (!path) {
@@ -3085,173 +3224,14 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
sparity->ebitmap = (void *)sparity->bitmap + bitmap_len;
ret = 0;
- while (logic_start < logic_end) {
- if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
- key.type = BTRFS_METADATA_ITEM_KEY;
- else
- key.type = BTRFS_EXTENT_ITEM_KEY;
- key.objectid = logic_start;
- key.offset = (u64)-1;
-
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ for (cur_logical = logic_start; cur_logical < logic_end;
+ cur_logical += map->stripe_len) {
+ ret = scrub_raid56_data_stripe_for_parity(sctx, sparity, map,
+ sdev, path, cur_logical);
if (ret < 0)
- goto out;
-
- if (ret > 0) {
- ret = btrfs_previous_extent_item(root, path, 0);
- if (ret < 0)
- goto out;
- if (ret > 0) {
- btrfs_release_path(path);
- ret = btrfs_search_slot(NULL, root, &key,
- path, 0, 0);
- if (ret < 0)
- goto out;
- }
- }
-
- stop_loop = 0;
- while (1) {
- u64 bytes;
-
- l = path->nodes[0];
- slot = path->slots[0];
- if (slot >= btrfs_header_nritems(l)) {
- ret = btrfs_next_leaf(root, path);
- if (ret == 0)
- continue;
- if (ret < 0)
- goto out;
-
- stop_loop = 1;
- break;
- }
- btrfs_item_key_to_cpu(l, &key, slot);
-
- if (key.type != BTRFS_EXTENT_ITEM_KEY &&
- key.type != BTRFS_METADATA_ITEM_KEY)
- goto next;
-
- if (key.type == BTRFS_METADATA_ITEM_KEY)
- bytes = fs_info->nodesize;
- else
- bytes = key.offset;
-
- if (key.objectid + bytes <= logic_start)
- goto next;
-
- if (key.objectid >= logic_end) {
- stop_loop = 1;
- break;
- }
-
- while (key.objectid >= logic_start + map->stripe_len)
- logic_start += map->stripe_len;
-
- extent = btrfs_item_ptr(l, slot,
- struct btrfs_extent_item);
- flags = btrfs_extent_flags(l, extent);
- generation = btrfs_extent_generation(l, extent);
-
- if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
- (key.objectid < logic_start ||
- key.objectid + bytes >
- logic_start + map->stripe_len)) {
- btrfs_err(fs_info,
- "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
- key.objectid, logic_start);
- spin_lock(&sctx->stat_lock);
- sctx->stat.uncorrectable_errors++;
- spin_unlock(&sctx->stat_lock);
- goto next;
- }
-again:
- extent_logical = key.objectid;
- ASSERT(bytes <= U32_MAX);
- extent_len = bytes;
-
- if (extent_logical < logic_start) {
- extent_len -= logic_start - extent_logical;
- extent_logical = logic_start;
- }
-
- if (extent_logical + extent_len >
- logic_start + map->stripe_len)
- extent_len = logic_start + map->stripe_len -
- extent_logical;
-
- scrub_parity_mark_sectors_data(sparity, extent_logical,
- extent_len);
-
- mapped_length = extent_len;
- bioc = NULL;
- ret = btrfs_map_block(fs_info, BTRFS_MAP_READ,
- extent_logical, &mapped_length, &bioc,
- 0);
- if (!ret) {
- if (!bioc || mapped_length < extent_len)
- ret = -EIO;
- }
- if (ret) {
- btrfs_put_bioc(bioc);
- goto out;
- }
- extent_physical = bioc->stripes[0].physical;
- extent_mirror_num = bioc->mirror_num;
- extent_dev = bioc->stripes[0].dev;
- btrfs_put_bioc(bioc);
-
- csum_root = btrfs_csum_root(fs_info, extent_logical);
- ret = btrfs_lookup_csums_range(csum_root,
- extent_logical,
- extent_logical + extent_len - 1,
- &sctx->csum_list, 1);
- if (ret)
- goto out;
-
- ret = scrub_extent_for_parity(sparity, extent_logical,
- extent_len,
- extent_physical,
- extent_dev, flags,
- generation,
- extent_mirror_num);
-
- scrub_free_csums(sctx);
-
- if (ret)
- goto out;
-
- if (extent_logical + extent_len <
- key.objectid + bytes) {
- logic_start += map->stripe_len;
-
- if (logic_start >= logic_end) {
- stop_loop = 1;
- break;
- }
-
- if (logic_start < key.objectid + bytes) {
- cond_resched();
- goto again;
- }
- }
-next:
- path->slots[0]++;
- }
-
- btrfs_release_path(path);
-
- if (stop_loop)
break;
-
- logic_start += map->stripe_len;
- }
-out:
- if (ret < 0) {
- ASSERT(logic_end - logic_start <= U32_MAX);
- scrub_parity_mark_sectors_error(sparity, logic_start,
- logic_end - logic_start);
}
+
scrub_parity_put(sparity);
scrub_submit(sctx);
mutex_lock(&sctx->wr_lock);
--
2.35.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v3 8/9] btrfs: use find_first_extent_item() to replace the open-coded extent item search
2022-02-18 5:48 [PATCH v3 0/9] btrfs: refactor scrub entrances for each profile Qu Wenruo
` (6 preceding siblings ...)
2022-02-18 5:48 ` [PATCH v3 7/9] btrfs: refactor scrub_raid56_parity() Qu Wenruo
@ 2022-02-18 5:48 ` Qu Wenruo
2022-02-18 5:48 ` [PATCH v3 9/9] btrfs: move scrub_remap_extent() call into scrub_extent() with more comments Qu Wenruo
8 siblings, 0 replies; 10+ messages in thread
From: Qu Wenruo @ 2022-02-18 5:48 UTC (permalink / raw)
To: linux-btrfs
Since we have find_first_extent_item() to iterate the extent items of a
certain range, there is no need to use the open-coded version.
Replace the final scrub call site with find_first_extent_item().
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/scrub.c | 99 ++++++++++++------------------------------------
1 file changed, 25 insertions(+), 74 deletions(-)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 97044957ed85..ddf337ac412d 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3029,106 +3029,58 @@ static int scrub_raid56_data_stripe_for_parity(struct scrub_ctx *sctx,
struct btrfs_fs_info *fs_info = sctx->fs_info;
struct btrfs_root *extent_root = btrfs_extent_root(fs_info, logical);
struct btrfs_root *csum_root = btrfs_csum_root(fs_info, logical);
- struct btrfs_key key;
u64 extent_start;
u64 extent_size;
+ u64 cur_logical = logical;
int ret;
ASSERT(map->type & BTRFS_BLOCK_GROUP_RAID56_MASK);
-
/* Path should not be populated */
ASSERT(!path->nodes[0]);
- if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
- key.type = BTRFS_METADATA_ITEM_KEY;
- else
- key.type = BTRFS_EXTENT_ITEM_KEY;
- key.objectid = logical;
- key.offset = (u64)-1;
-
- ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
- if (ret < 0)
- return ret;
-
- if (ret > 0) {
- ret = btrfs_previous_extent_item(extent_root, path, 0);
- if (ret < 0)
- return ret;
- if (ret > 0) {
- btrfs_release_path(path);
- ret = btrfs_search_slot(NULL, extent_root, &key, path,
- 0, 0);
- if (ret < 0)
- return ret;
- }
- }
-
- while (1) {
+ while (cur_logical < logical + map->stripe_len) {
struct btrfs_io_context *bioc = NULL;
struct btrfs_device *extent_dev;
- struct btrfs_extent_item *ei;
- struct extent_buffer *l;
- int slot;
u64 mapped_length;
u64 extent_flags;
u64 extent_gen;
u64 extent_physical;
u64 extent_mirror_num;
- l = path->nodes[0];
- slot = path->slots[0];
- if (slot >= btrfs_header_nritems(l)) {
- ret = btrfs_next_leaf(extent_root, path);
- if (ret == 0)
- continue;
-
- /* No more extent items or error, exit */
+ ret = find_first_extent_item(extent_root, path, cur_logical,
+ logical + map->stripe_len - cur_logical);
+ /* No more extent item in this data stripe */
+ if (ret > 0) {
+ ret = 0;
break;
}
- btrfs_item_key_to_cpu(l, &key, slot);
-
- if (key.type != BTRFS_EXTENT_ITEM_KEY &&
- key.type != BTRFS_METADATA_ITEM_KEY)
- goto next;
-
- if (key.type == BTRFS_METADATA_ITEM_KEY)
- extent_size = fs_info->nodesize;
- else
- extent_size = key.offset;
-
- if (key.objectid + extent_size <= logical)
- goto next;
-
- /* Beyond this data stripe */
- if (key.objectid >= logical + map->stripe_len)
+ if (ret < 0)
break;
+ get_extent_info(path, &extent_start, &extent_size,
+ &extent_flags, &extent_gen);
- ei = btrfs_item_ptr(l, slot, struct btrfs_extent_item);
- extent_flags = btrfs_extent_flags(l, ei);
- extent_gen = btrfs_extent_generation(l, ei);
-
+ /* Metadata should not cross stripe boundaries */
if ((extent_flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
- (key.objectid < logical || key.objectid + extent_size >
- logical + map->stripe_len)) {
+ does_range_cross_boundary(extent_start, extent_size,
+ logical, map->stripe_len)) {
btrfs_err(fs_info,
- "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
- key.objectid, logical);
+ "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
+ extent_start, logical);
spin_lock(&sctx->stat_lock);
sctx->stat.uncorrectable_errors++;
spin_unlock(&sctx->stat_lock);
- goto next;
+ cur_logical += extent_size;
+ continue;
}
- extent_start = key.objectid;
- ASSERT(extent_size <= U32_MAX);
+ /* Skip hole range which doesn't have any extent */
+ cur_logical = max(extent_start, cur_logical);
- /* Truncate the range inside the data stripe */
- if (extent_start < logical) {
- extent_size -= logical - extent_start;
- extent_start = logical;
- }
- if (extent_start + extent_size > logical + map->stripe_len)
- extent_size = logical + map->stripe_len - extent_start;
+ /* Truncate the range inside this data stripe */
+ extent_size = min(extent_start + extent_size,
+ logical + map->stripe_len) - cur_logical;
+ extent_start = cur_logical;
+ ASSERT(extent_size <= U32_MAX);
scrub_parity_mark_sectors_data(sparity, extent_start, extent_size);
@@ -3164,8 +3116,7 @@ static int scrub_raid56_data_stripe_for_parity(struct scrub_ctx *sctx,
break;
cond_resched();
-next:
- path->slots[0]++;
+ cur_logical += extent_size;
}
if (ret < 0)
scrub_parity_mark_sectors_error(sparity, extent_start,
--
2.35.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v3 9/9] btrfs: move scrub_remap_extent() call into scrub_extent() with more comments
2022-02-18 5:48 [PATCH v3 0/9] btrfs: refactor scrub entrances for each profile Qu Wenruo
` (7 preceding siblings ...)
2022-02-18 5:48 ` [PATCH v3 8/9] btrfs: use find_first_extent_item() to replace the open-coded extent item search Qu Wenruo
@ 2022-02-18 5:48 ` Qu Wenruo
8 siblings, 0 replies; 10+ messages in thread
From: Qu Wenruo @ 2022-02-18 5:48 UTC (permalink / raw)
To: linux-btrfs
[SUSPICIOUS CODE]
When refactoring scrub code, I notice a very strange behavior around
scrub_remap_extent():
if (sctx->is_dev_replace)
scrub_remap_extent(fs_info, cur_logical, scrub_len,
&cur_physical, &target_dev, &cur_mirror);
As replace target is a 1:1 copy of the source device, thus physical
offset inside the target should be the same as physical inside source,
thus this remap call makes no sense to me.
[REAL FUNCTIONALITY]
After more investigation, the function name scrub_remape_extent()
doesn't tell anything of the truth, nor does its if () condition.
The real story behind this function is that, for scrub_pages() we never
expect missing device, even for replacing missing device.
What scrub_remap_extent() is really doing is to find a live mirror, and
make later scrub_pages() to read data from the good copy, other than
from the missing device and increase error counters unnecessarily.
[IMPROVEMENT]
We have no need to bother scrub_remap_extent() in scrub_simple_mirror()
at all, we only need to call it before we call scrub_pages().
And rename the function to scrub_find_live_copy(), add extra comments on
them.
By this we can remove one parameter from scrub_extent(), and reduce the
unnecessary calls to scrub_remape_extent() for regular replace.
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/scrub.c | 62 +++++++++++++++++++++++++++---------------------
1 file changed, 35 insertions(+), 27 deletions(-)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index ddf337ac412d..13cfa39f83b9 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -233,11 +233,11 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u32 len,
static void scrub_bio_end_io(struct bio *bio);
static void scrub_bio_end_io_worker(struct btrfs_work *work);
static void scrub_block_complete(struct scrub_block *sblock);
-static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
- u64 extent_logical, u32 extent_len,
- u64 *extent_physical,
- struct btrfs_device **extent_dev,
- int *extent_mirror_num);
+static void scrub_find_good_copy(struct btrfs_fs_info *fs_info,
+ u64 extent_logical, u32 extent_len,
+ u64 *extent_physical,
+ struct btrfs_device **extent_dev,
+ int *extent_mirror_num);
static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
struct scrub_page *spage);
static void scrub_wr_submit(struct scrub_ctx *sctx);
@@ -2213,7 +2213,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
* We shouldn't be scrubbing a missing device. Even for dev
* replace, we should only get here for RAID 5/6. We either
* managed to mount something with no mirrors remaining or
- * there's a bug in scrub_remap_extent()/btrfs_map_block().
+ * there's a bug in scrub_find_good_copy()/btrfs_map_block().
*/
goto bioc_out;
}
@@ -2532,8 +2532,11 @@ static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum)
static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
u64 logical, u32 len,
u64 physical, struct btrfs_device *dev, u64 flags,
- u64 gen, int mirror_num, u64 physical_for_dev_replace)
+ u64 gen, int mirror_num)
{
+ struct btrfs_device *src_dev = dev;
+ u64 src_physical = physical;
+ int src_mirror = mirror_num;
int ret;
u8 csum[BTRFS_CSUM_SIZE];
u32 blocksize;
@@ -2561,6 +2564,18 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
WARN_ON(1);
}
+ /*
+ * For dev-replace case, we can have @dev being a missing device.
+ * Regular scrub will avoid its execution on missing device at all,
+ * as that would trigger tons of read error.
+ *
+ * Reading from missing device will cause read error counts to
+ * increase unnecessarily.
+ * So here we change the read source to a good mirror.
+ */
+ if (sctx->is_dev_replace && !dev->bdev)
+ scrub_find_good_copy(sctx->fs_info, logical, len, &src_physical,
+ &src_dev, &src_mirror);
while (len) {
u32 l = min(len, blocksize);
int have_csum = 0;
@@ -2571,15 +2586,15 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
if (have_csum == 0)
++sctx->stat.no_csum;
}
- ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,
- mirror_num, have_csum ? csum : NULL,
- physical_for_dev_replace);
+ ret = scrub_pages(sctx, logical, l, src_physical, src_dev,
+ flags, gen, src_mirror,
+ have_csum ? csum : NULL, physical);
if (ret)
return ret;
len -= l;
logical += l;
physical += l;
- physical_for_dev_replace += l;
+ src_physical += l;
}
return 0;
}
@@ -3266,14 +3281,11 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
path.skip_locking = 1;
/* Go through each extent items inside the logical range */
while (cur_logical < logical_end) {
- int cur_mirror = mirror_num;
- struct btrfs_device *target_dev = device;
u64 extent_start;
u64 extent_len;
u64 extent_flags;
u64 extent_gen;
u64 scrub_len;
- u64 cur_physical;
/* Canceled ? */
if (atomic_read(&fs_info->scrub_cancel_req) ||
@@ -3329,11 +3341,7 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
scrub_len = min(min(extent_start + extent_len,
logical_end), cur_logical + max_length) -
cur_logical;
- cur_physical = cur_logical - logical_start + physical;
- if (sctx->is_dev_replace)
- scrub_remap_extent(fs_info, cur_logical, scrub_len,
- &cur_physical, &target_dev, &cur_mirror);
if (extent_flags & BTRFS_EXTENT_FLAG_DATA) {
ret = btrfs_lookup_csums_range(csum_root, cur_logical,
cur_logical + scrub_len - 1,
@@ -3353,10 +3361,10 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
cur_logical += scrub_len;
continue;
}
- ret = scrub_extent(sctx, map, cur_logical, scrub_len, cur_physical,
- target_dev, extent_flags, extent_gen,
- cur_mirror, cur_logical - logical_start +
- physical);
+ ret = scrub_extent(sctx, map, cur_logical, scrub_len,
+ cur_logical - logical_start + physical,
+ device, extent_flags, extent_gen,
+ mirror_num);
scrub_free_csums(sctx);
if (ret)
break;
@@ -4341,11 +4349,11 @@ int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
return dev ? (sctx ? 0 : -ENOTCONN) : -ENODEV;
}
-static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
- u64 extent_logical, u32 extent_len,
- u64 *extent_physical,
- struct btrfs_device **extent_dev,
- int *extent_mirror_num)
+static void scrub_find_good_copy(struct btrfs_fs_info *fs_info,
+ u64 extent_logical, u32 extent_len,
+ u64 *extent_physical,
+ struct btrfs_device **extent_dev,
+ int *extent_mirror_num)
{
u64 mapped_length;
struct btrfs_io_context *bioc = NULL;
--
2.35.1
^ permalink raw reply related [flat|nested] 10+ messages in thread