linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] btrfs-progs: check: Also compare data between mirrors to detect corruption for NODATASUM extents
@ 2018-05-29  7:45 Qu Wenruo
  2018-05-29  8:59 ` Su Yue
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Qu Wenruo @ 2018-05-29  7:45 UTC (permalink / raw)
  To: linux-btrfs

As the lzo corruption reported by James Harvey, for data extents without
checksum, neither btrfs check nor kernel scrub could detect anything
wrong.

However if our profile supports duplication, we still have a chance to
detect such corruption, as in that case the mirrors will mismatch with
each other.

So enhance --check-data-csum option to do such check, and this could
help us to detect such corruption in an autonomous test case.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 Documentation/btrfs-check.asciidoc |   2 +
 check/main.c                       | 161 ++++++++++++++++++++++++++++-
 2 files changed, 162 insertions(+), 1 deletion(-)

diff --git a/Documentation/btrfs-check.asciidoc b/Documentation/btrfs-check.asciidoc
index b963eae5cdce..34173acbc523 100644
--- a/Documentation/btrfs-check.asciidoc
+++ b/Documentation/btrfs-check.asciidoc
@@ -51,6 +51,8 @@ verify checksums of data blocks
 +
 This expects that the filesystem is otherwise OK, and is basically and offline
 'scrub' but does not repair data from spare copies.
+Also, for data extent without checksum (NODATASUM), it will compare data
+between mirrors to detect any possible mismatch.
 
 --chunk-root <bytenr>::
 use the given offset 'bytenr' for the chunk tree root
diff --git a/check/main.c b/check/main.c
index 68da994f7ae0..2ea965fc2fed 100644
--- a/check/main.c
+++ b/check/main.c
@@ -5602,6 +5602,148 @@ out:
 	return ret;
 }
 
+/*
+ * Return 0 if both mirrors match
+ * Return >0 if mirrors mismatch
+ * Return <0 for fatal error
+ */
+static int compare_extent_mirrors(struct btrfs_fs_info *fs_info, u64 bytenr,
+				  u64 len)
+{
+	u64 cur;
+	const int buf_size = SZ_128K;
+	char buf1[buf_size];
+	char buf2[buf_size];
+	int ret;
+
+	if (btrfs_num_copies(fs_info, bytenr, len) != 2)
+		return -EINVAL;
+
+	for (cur = bytenr; cur < bytenr + len; cur += buf_size) {
+		u64 cur_len = min_t(u64, buf_size, bytenr + len - cur);
+
+		ret = read_data_from_disk(fs_info, buf1, cur, cur_len, 1);
+		if (ret < 0)
+			return ret;
+		ret = read_data_from_disk(fs_info, buf2, cur, cur_len, 2);
+		if (ret < 0)
+			return ret;
+		if (memcmp(buf1, buf2, cur_len))
+			return 1;
+	}
+	return 0;
+}
+
+static int check_bg_mirrors_for_nodatasum(struct btrfs_fs_info *fs_info,
+					  struct btrfs_block_group_cache *bg)
+{
+	struct btrfs_root *extent_root = fs_info->extent_root;
+	struct btrfs_key key;
+	struct btrfs_path path;
+	int errors = 0;
+	int ret;
+
+	key.objectid = bg->key.objectid;
+	key.type = 0;
+	key.offset = 0;
+	btrfs_init_path(&path);
+
+	ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
+	if (ret < 0)
+		return ret;
+	if (ret == 0) {
+		/* Such key should not exist, something goes wrong */
+		ret = -EUCLEAN;
+		goto error;
+	}
+
+	while (1) {
+		u64 csum_found;
+
+		btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+		if (key.type != BTRFS_EXTENT_ITEM_KEY)
+			goto next;
+		ret = count_csum_range(fs_info, key.objectid, key.offset,
+				       &csum_found);
+		if (ret < 0)
+			goto error;
+		/*
+		 * Here we don't care if csum is covering the whole extent,
+		 * as if it's true it's half csumed, file extent check will
+		 * report, we only care if it's without csum at all
+		 */
+		if (csum_found)
+			goto next;
+		ret = compare_extent_mirrors(fs_info, key.objectid, key.offset);
+		if (ret < 0) {
+			error(
+	"failed to compare data extent mirrors for bytenr %llu len %llu: %s",
+			      key.objectid, key.offset, strerror(-ret));
+			goto error;
+		}
+		if (ret > 0) {
+			error(
+	"found data mismatch for NODATASUM extent, at bytenr %llu len %llu",
+			      key.objectid, key.offset);
+			errors++;
+		}
+next:
+		ret = btrfs_next_extent_item(extent_root, &path,
+					bg->key.objectid + bg->key.offset - 1);
+		if (ret < 0)
+			goto error;
+		if (ret > 0)
+			break;
+	}
+	btrfs_release_path(&path);
+	return errors;
+
+error:
+	btrfs_release_path(&path);
+	return ret;
+}
+
+/*
+ * Check for any data extent without csum, and then compare its data with
+ * any existing mirror to detect any possible corruption.
+ *
+ * Return 0 for all good.
+ * Return <0 for fatal error. (Like failed to read tree blocks)
+ * Return >0 for how many mismatched extents found.
+ */
+static int check_mirrors_for_nodatasum(struct btrfs_fs_info *fs_info)
+{
+	struct btrfs_block_group_cache *bg;
+	int ret;
+	int errors = 0;
+
+	bg = btrfs_lookup_first_block_group(fs_info, 0);
+	if (!bg)
+		return -EUCLEAN;
+
+	while (bg) {
+		/* Here we only care about data block groups */
+		if (!(bg->flags & BTRFS_BLOCK_GROUP_DATA))
+			goto next;
+		/*
+		 * And it must be mirrored profile
+		 * NOTE: For RAID5/6 it's not yet supported yet.
+		 */
+		if (!((BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP |
+		       BTRFS_BLOCK_GROUP_RAID10) & bg->flags))
+			goto next;
+		ret = check_bg_mirrors_for_nodatasum(fs_info, bg);
+		/* Fatal error, exist right now */
+		if (ret < 0)
+			return ret;
+		errors += ret;
+next:
+		bg = btrfs_lookup_first_block_group(fs_info,
+					bg->key.objectid + bg->key.offset);
+	}
+	return errors;
+}
+
 static int check_csums(struct btrfs_root *root)
 {
 	struct btrfs_path path;
@@ -5697,8 +5839,25 @@ skip_csum_check:
 		num_bytes += data_len;
 		path.slots[0]++;
 	}
-
 	btrfs_release_path(&path);
+
+	/*
+	 * Do extra check on mirror consistence for NODATASUM case if
+	 * --check-datasum is specified.
+	 * For NODATASUM case, we can still check if both copies match.
+	 */
+	if (verify_csum) {
+		ret = check_mirrors_for_nodatasum(root->fs_info);
+		if (ret < 0) {
+			error(
+		"failed to check consistence for data without checksum: %s",
+			      strerror(-ret));
+			errors++;
+		}
+		if (ret > 0)
+			errors += ret;
+	}
+
 	return errors;
 }
 
-- 
2.17.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] btrfs-progs: check: Also compare data between mirrors to detect corruption for NODATASUM extents
  2018-05-29  7:45 [PATCH] btrfs-progs: check: Also compare data between mirrors to detect corruption for NODATASUM extents Qu Wenruo
@ 2018-05-29  8:59 ` Su Yue
  2018-05-29  9:28 ` Lu Fengqi
  2018-05-29 19:26 ` james harvey
  2 siblings, 0 replies; 4+ messages in thread
From: Su Yue @ 2018-05-29  8:59 UTC (permalink / raw)
  To: Qu Wenruo, linux-btrfs

[-- Attachment #1: Type: text/plain, Size: 6552 bytes --]



On 05/29/2018 03:45 PM, Qu Wenruo wrote:
> As the lzo corruption reported by James Harvey, for data extents without
> checksum, neither btrfs check nor kernel scrub could detect anything
> wrong.
> 
> However if our profile supports duplication, we still have a chance to
> detect such corruption, as in that case the mirrors will mismatch with
> each other.
> 
> So enhance --check-data-csum option to do such check, and this could
> help us to detect such corruption in an autonomous test case.
> 
> Signed-off-by: Qu Wenruo <wqu@suse.com>

Looks good to me.
I'd rather like naming style without "for" like
check_bg_mirrors_nodatasum though.

Anyway,
Reviewed-by: Su Yue <suy.fnst@cn.fujitsu.com>
> ---
>  Documentation/btrfs-check.asciidoc |   2 +
>  check/main.c                       | 161 ++++++++++++++++++++++++++++-
>  2 files changed, 162 insertions(+), 1 deletion(-)
> 
> diff --git a/Documentation/btrfs-check.asciidoc b/Documentation/btrfs-check.asciidoc
> index b963eae5cdce..34173acbc523 100644
> --- a/Documentation/btrfs-check.asciidoc
> +++ b/Documentation/btrfs-check.asciidoc
> @@ -51,6 +51,8 @@ verify checksums of data blocks
>  +
>  This expects that the filesystem is otherwise OK, and is basically and offline
>  'scrub' but does not repair data from spare copies.
> +Also, for data extent without checksum (NODATASUM), it will compare data
> +between mirrors to detect any possible mismatch.
>  
>  --chunk-root <bytenr>::
>  use the given offset 'bytenr' for the chunk tree root
> diff --git a/check/main.c b/check/main.c
> index 68da994f7ae0..2ea965fc2fed 100644
> --- a/check/main.c
> +++ b/check/main.c
> @@ -5602,6 +5602,148 @@ out:
>  	return ret;
>  }
>  
> +/*
> + * Return 0 if both mirrors match
> + * Return >0 if mirrors mismatch
> + * Return <0 for fatal error
> + */
> +static int compare_extent_mirrors(struct btrfs_fs_info *fs_info, u64 bytenr,
> +				  u64 len)
> +{
> +	u64 cur;
> +	const int buf_size = SZ_128K;
> +	char buf1[buf_size];
> +	char buf2[buf_size];
> +	int ret;
> +
> +	if (btrfs_num_copies(fs_info, bytenr, len) != 2)
> +		return -EINVAL;
> +
> +	for (cur = bytenr; cur < bytenr + len; cur += buf_size) {
> +		u64 cur_len = min_t(u64, buf_size, bytenr + len - cur);
> +
> +		ret = read_data_from_disk(fs_info, buf1, cur, cur_len, 1);
> +		if (ret < 0)
> +			return ret;
> +		ret = read_data_from_disk(fs_info, buf2, cur, cur_len, 2);
> +		if (ret < 0)
> +			return ret;
> +		if (memcmp(buf1, buf2, cur_len))
> +			return 1;
> +	}
> +	return 0;
> +}
> +
> +static int check_bg_mirrors_for_nodatasum(struct btrfs_fs_info *fs_info,
> +					  struct btrfs_block_group_cache *bg)
> +{
> +	struct btrfs_root *extent_root = fs_info->extent_root;
> +	struct btrfs_key key;
> +	struct btrfs_path path;
> +	int errors = 0;
> +	int ret;
> +
> +	key.objectid = bg->key.objectid;
> +	key.type = 0;
> +	key.offset = 0;
> +	btrfs_init_path(&path);
> +
> +	ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
> +	if (ret < 0)
> +		return ret;
> +	if (ret == 0) {
> +		/* Such key should not exist, something goes wrong */
> +		ret = -EUCLEAN;
> +		goto error;
> +	}
> +
> +	while (1) {
> +		u64 csum_found;
> +
> +		btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
> +		if (key.type != BTRFS_EXTENT_ITEM_KEY)
> +			goto next;
> +		ret = count_csum_range(fs_info, key.objectid, key.offset,
> +				       &csum_found);
> +		if (ret < 0)
> +			goto error;
> +		/*
> +		 * Here we don't care if csum is covering the whole extent,
> +		 * as if it's true it's half csumed, file extent check will
> +		 * report, we only care if it's without csum at all
> +		 */
> +		if (csum_found)
> +			goto next;
> +		ret = compare_extent_mirrors(fs_info, key.objectid, key.offset);
> +		if (ret < 0) {
> +			error(
> +	"failed to compare data extent mirrors for bytenr %llu len %llu: %s",
> +			      key.objectid, key.offset, strerror(-ret));
> +			goto error;
> +		}
> +		if (ret > 0) {
> +			error(
> +	"found data mismatch for NODATASUM extent, at bytenr %llu len %llu",
> +			      key.objectid, key.offset);
> +			errors++;
> +		}
> +next:
> +		ret = btrfs_next_extent_item(extent_root, &path,
> +					bg->key.objectid + bg->key.offset - 1);
> +		if (ret < 0)
> +			goto error;
> +		if (ret > 0)
> +			break;
> +	}
> +	btrfs_release_path(&path);
> +	return errors;
> +
> +error:
> +	btrfs_release_path(&path);
> +	return ret;
> +}
> +
> +/*
> + * Check for any data extent without csum, and then compare its data with
> + * any existing mirror to detect any possible corruption.
> + *
> + * Return 0 for all good.
> + * Return <0 for fatal error. (Like failed to read tree blocks)
> + * Return >0 for how many mismatched extents found.
> + */
> +static int check_mirrors_for_nodatasum(struct btrfs_fs_info *fs_info)
> +{
> +	struct btrfs_block_group_cache *bg;
> +	int ret;
> +	int errors = 0;
> +
> +	bg = btrfs_lookup_first_block_group(fs_info, 0);
> +	if (!bg)
> +		return -EUCLEAN;
> +
> +	while (bg) {
> +		/* Here we only care about data block groups */
> +		if (!(bg->flags & BTRFS_BLOCK_GROUP_DATA))
> +			goto next;
> +		/*
> +		 * And it must be mirrored profile
> +		 * NOTE: For RAID5/6 it's not yet supported yet.
> +		 */
> +		if (!((BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP |
> +		       BTRFS_BLOCK_GROUP_RAID10) & bg->flags))
> +			goto next;
> +		ret = check_bg_mirrors_for_nodatasum(fs_info, bg);
> +		/* Fatal error, exist right now */
> +		if (ret < 0)
> +			return ret;
> +		errors += ret;
> +next:
> +		bg = btrfs_lookup_first_block_group(fs_info,
> +					bg->key.objectid + bg->key.offset);
> +	}
> +	return errors;
> +}
> +
>  static int check_csums(struct btrfs_root *root)
>  {
>  	struct btrfs_path path;
> @@ -5697,8 +5839,25 @@ skip_csum_check:
>  		num_bytes += data_len;
>  		path.slots[0]++;
>  	}
> -
>  	btrfs_release_path(&path);
> +
> +	/*
> +	 * Do extra check on mirror consistence for NODATASUM case if
> +	 * --check-datasum is specified.
> +	 * For NODATASUM case, we can still check if both copies match.
> +	 */
> +	if (verify_csum) {
> +		ret = check_mirrors_for_nodatasum(root->fs_info);
> +		if (ret < 0) {
> +			error(
> +		"failed to check consistence for data without checksum: %s",
> +			      strerror(-ret));
> +			errors++;
> +		}
> +		if (ret > 0)
> +			errors += ret;
> +	}
> +
>  	return errors;
>  }
>  
> 



[-- Attachment #2: pEpkey.asc --]
[-- Type: application/pgp-keys, Size: 1791 bytes --]

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] btrfs-progs: check: Also compare data between mirrors to detect corruption for NODATASUM extents
  2018-05-29  7:45 [PATCH] btrfs-progs: check: Also compare data between mirrors to detect corruption for NODATASUM extents Qu Wenruo
  2018-05-29  8:59 ` Su Yue
@ 2018-05-29  9:28 ` Lu Fengqi
  2018-05-29 19:26 ` james harvey
  2 siblings, 0 replies; 4+ messages in thread
From: Lu Fengqi @ 2018-05-29  9:28 UTC (permalink / raw)
  To: Qu Wenruo; +Cc: linux-btrfs

On Tue, May 29, 2018 at 03:45:27PM +0800, Qu Wenruo wrote:
>As the lzo corruption reported by James Harvey, for data extents without
>checksum, neither btrfs check nor kernel scrub could detect anything
>wrong.
>
>However if our profile supports duplication, we still have a chance to
>detect such corruption, as in that case the mirrors will mismatch with
>each other.
>
>So enhance --check-data-csum option to do such check, and this could
>help us to detect such corruption in an autonomous test case.
>
>Signed-off-by: Qu Wenruo <wqu@suse.com>

Overall looks good to me.

Reviewed-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>

But a small nitpick inlined below:

>---
> Documentation/btrfs-check.asciidoc |   2 +
> check/main.c                       | 161 ++++++++++++++++++++++++++++-
> 2 files changed, 162 insertions(+), 1 deletion(-)
>
>diff --git a/Documentation/btrfs-check.asciidoc b/Documentation/btrfs-check.asciidoc
>index b963eae5cdce..34173acbc523 100644
>--- a/Documentation/btrfs-check.asciidoc
>+++ b/Documentation/btrfs-check.asciidoc
>@@ -51,6 +51,8 @@ verify checksums of data blocks
> +
> This expects that the filesystem is otherwise OK, and is basically and offline
> 'scrub' but does not repair data from spare copies.
>+Also, for data extent without checksum (NODATASUM), it will compare data
>+between mirrors to detect any possible mismatch.
> 
> --chunk-root <bytenr>::
> use the given offset 'bytenr' for the chunk tree root
>diff --git a/check/main.c b/check/main.c
>index 68da994f7ae0..2ea965fc2fed 100644
>--- a/check/main.c
>+++ b/check/main.c
>@@ -5602,6 +5602,148 @@ out:
> 	return ret;
> }
> 
>+/*
>+ * Return 0 if both mirrors match
>+ * Return >0 if mirrors mismatch
>+ * Return <0 for fatal error
>+ */
>+static int compare_extent_mirrors(struct btrfs_fs_info *fs_info, u64 bytenr,
>+				  u64 len)
>+{
>+	u64 cur;
>+	const int buf_size = SZ_128K;
>+	char buf1[buf_size];
>+	char buf2[buf_size];
>+	int ret;
>+
>+	if (btrfs_num_copies(fs_info, bytenr, len) != 2)
>+		return -EINVAL;
>+
>+	for (cur = bytenr; cur < bytenr + len; cur += buf_size) {
>+		u64 cur_len = min_t(u64, buf_size, bytenr + len - cur);
>+
>+		ret = read_data_from_disk(fs_info, buf1, cur, cur_len, 1);
>+		if (ret < 0)
>+			return ret;
>+		ret = read_data_from_disk(fs_info, buf2, cur, cur_len, 2);
>+		if (ret < 0)
>+			return ret;
>+		if (memcmp(buf1, buf2, cur_len))
>+			return 1;
>+	}
>+	return 0;
>+}
>+
>+static int check_bg_mirrors_for_nodatasum(struct btrfs_fs_info *fs_info,
>+					  struct btrfs_block_group_cache *bg)
>+{
>+	struct btrfs_root *extent_root = fs_info->extent_root;
>+	struct btrfs_key key;
>+	struct btrfs_path path;
>+	int errors = 0;
>+	int ret;
>+
>+	key.objectid = bg->key.objectid;
>+	key.type = 0;
>+	key.offset = 0;
>+	btrfs_init_path(&path);
>+
>+	ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
>+	if (ret < 0)
>+		return ret;
>+	if (ret == 0) {
>+		/* Such key should not exist, something goes wrong */
>+		ret = -EUCLEAN;
>+		goto error;
>+	}
>+
>+	while (1) {
>+		u64 csum_found;
>+
>+		btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
>+		if (key.type != BTRFS_EXTENT_ITEM_KEY)
>+			goto next;
>+		ret = count_csum_range(fs_info, key.objectid, key.offset,
>+				       &csum_found);
>+		if (ret < 0)
>+			goto error;
>+		/*
>+		 * Here we don't care if csum is covering the whole extent,
>+		 * as if it's true it's half csumed, file extent check will
>+		 * report, we only care if it's without csum at all
>+		 */
>+		if (csum_found)
>+			goto next;
>+		ret = compare_extent_mirrors(fs_info, key.objectid, key.offset);
>+		if (ret < 0) {
>+			error(
>+	"failed to compare data extent mirrors for bytenr %llu len %llu: %s",
>+			      key.objectid, key.offset, strerror(-ret));
>+			goto error;
>+		}
>+		if (ret > 0) {
>+			error(
>+	"found data mismatch for NODATASUM extent, at bytenr %llu len %llu",
>+			      key.objectid, key.offset);
>+			errors++;
>+		}
>+next:
>+		ret = btrfs_next_extent_item(extent_root, &path,
>+					bg->key.objectid + bg->key.offset - 1);
>+		if (ret < 0)
>+			goto error;
>+		if (ret > 0)
>+			break;
>+	}
>+	btrfs_release_path(&path);
>+	return errors;
>+
>+error:
>+	btrfs_release_path(&path);
>+	return ret;
>+}
>+
>+/*
>+ * Check for any data extent without csum, and then compare its data with
>+ * any existing mirror to detect any possible corruption.
>+ *
>+ * Return 0 for all good.
>+ * Return <0 for fatal error. (Like failed to read tree blocks)
>+ * Return >0 for how many mismatched extents found.
>+ */
>+static int check_mirrors_for_nodatasum(struct btrfs_fs_info *fs_info)
>+{
>+	struct btrfs_block_group_cache *bg;
>+	int ret;
>+	int errors = 0;
>+
>+	bg = btrfs_lookup_first_block_group(fs_info, 0);
>+	if (!bg)
>+		return -EUCLEAN;
>+
>+	while (bg) {
>+		/* Here we only care about data block groups */
>+		if (!(bg->flags & BTRFS_BLOCK_GROUP_DATA))
>+			goto next;
>+		/*
>+		 * And it must be mirrored profile
>+		 * NOTE: For RAID5/6 it's not yet supported yet.
>+		 */
>+		if (!((BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP |
>+		       BTRFS_BLOCK_GROUP_RAID10) & bg->flags))
>+			goto next;
>+		ret = check_bg_mirrors_for_nodatasum(fs_info, bg);
>+		/* Fatal error, exist right now */
>+		if (ret < 0)
>+			return ret;
>+		errors += ret;
>+next:
>+		bg = btrfs_lookup_first_block_group(fs_info,
>+					bg->key.objectid + bg->key.offset);
>+	}
>+	return errors;
>+}
>+
> static int check_csums(struct btrfs_root *root)
> {
> 	struct btrfs_path path;
>@@ -5697,8 +5839,25 @@ skip_csum_check:
> 		num_bytes += data_len;
> 		path.slots[0]++;
> 	}
>-
> 	btrfs_release_path(&path);
>+
>+	/*
>+	 * Do extra check on mirror consistence for NODATASUM case if
>+	 * --check-datasum is specified.

[nit]
s/check-datasum/check-data-csum

-- 
Thanks,
Lu

>+	 * For NODATASUM case, we can still check if both copies match.
>+	 */
>+	if (verify_csum) {
>+		ret = check_mirrors_for_nodatasum(root->fs_info);
>+		if (ret < 0) {
>+			error(
>+		"failed to check consistence for data without checksum: %s",
>+			      strerror(-ret));
>+			errors++;
>+		}
>+		if (ret > 0)
>+			errors += ret;
>+	}
>+
> 	return errors;
> }
> 
>-- 
>2.17.0
>
>--
>To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
>the body of a message to majordomo@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
>



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] btrfs-progs: check: Also compare data between mirrors to detect corruption for NODATASUM extents
  2018-05-29  7:45 [PATCH] btrfs-progs: check: Also compare data between mirrors to detect corruption for NODATASUM extents Qu Wenruo
  2018-05-29  8:59 ` Su Yue
  2018-05-29  9:28 ` Lu Fengqi
@ 2018-05-29 19:26 ` james harvey
  2 siblings, 0 replies; 4+ messages in thread
From: james harvey @ 2018-05-29 19:26 UTC (permalink / raw)
  To: Qu Wenruo; +Cc: Btrfs BTRFS

On Tue, May 29, 2018 at 3:45 AM, Qu Wenruo <wqu@suse.com> wrote:
> As the lzo corruption reported by James Harvey, for data extents without
> checksum, neither btrfs check nor kernel scrub could detect anything
> wrong.
>
> However if our profile supports duplication, we still have a chance to
> detect such corruption, as in that case the mirrors will mismatch with
> each other.
>
> So enhance --check-data-csum option to do such check, and this could
> help us to detect such corruption in an autonomous test case.
>
> Signed-off-by: Qu Wenruo <wqu@suse.com>

I think it's good to have check --check-data-csum do such check.

I also think there should be a check option that only checks the
mirrored copies, so it could be ran without taking the time to check
everything that scrub looks at.  (If the feature isn't also added to
scrub as I argue for below, one could run this option every time they
scrub.)

IMO, it is important to also add this to scrub.  First reason is to
allow online scanning, since check is recommended not to run with
--force on a quiescent/read-only fs.  Second, I think users expect
scrub to ensure mirrored data integrity, and this goes along with
that, of course without automatic correction.  I think without doing
so, users who periodically run scrub would be surprised if they had
data corruption, that it hadn't been being checked all along.

It's recommended many places that users setup scrub to run
automatically on a periodic basis, but I'm not sure if I've seen
anywhere recommend setting up check to do so as well, especially as
doing so on a mounted filesystem is discouraged and potentially will
crash.

I haven't looked at this specifically, but I'm thinking this would be
better in the btrfs-progs side of scrub after it runs the kernel side
real scrub.

I have to head out for a while.  I haven't looked over or tried the
code yet, but will later.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2018-05-29 19:26 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-05-29  7:45 [PATCH] btrfs-progs: check: Also compare data between mirrors to detect corruption for NODATASUM extents Qu Wenruo
2018-05-29  8:59 ` Su Yue
2018-05-29  9:28 ` Lu Fengqi
2018-05-29 19:26 ` james harvey

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).