* [Qemu-devel] [PATCH 1/5] qcow2: Add corrupt bit
2013-08-26 13:04 [Qemu-devel] [PATCH 0/5] qcow2: Add metadata overlap checks Max Reitz
@ 2013-08-26 13:04 ` Max Reitz
2013-08-27 9:54 ` Kevin Wolf
2013-08-26 13:04 ` [Qemu-devel] [PATCH 2/5] qcow2: Metadata overlap checks Max Reitz
` (3 subsequent siblings)
4 siblings, 1 reply; 18+ messages in thread
From: Max Reitz @ 2013-08-26 13:04 UTC (permalink / raw)
To: qemu-devel; +Cc: Kevin Wolf, Stefan Hajnoczi, Max Reitz
This adds an incompatible bit indicating corruption to qcow2. Any image
with this bit set may not be written to unless for repairing (and
subsequently clearing the bit if the repair has been successful).
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
block/qcow2.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
block/qcow2.h | 7 ++++++-
docs/specs/qcow2.txt | 7 ++++++-
include/block/block.h | 2 ++
qemu-img.c | 2 +-
tests/qemu-iotests/031.out | 12 ++++++------
tests/qemu-iotests/036.out | 2 +-
7 files changed, 66 insertions(+), 10 deletions(-)
diff --git a/block/qcow2.c b/block/qcow2.c
index 3376901..1d0d7ca 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -272,6 +272,37 @@ static int qcow2_mark_clean(BlockDriverState *bs)
return 0;
}
+/*
+ * Marks the image as corrupt.
+ */
+int qcow2_mark_corrupt(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+
+ s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT;
+ return qcow2_update_header(bs);
+}
+
+/*
+ * Marks the image as consistent, i.e., unsets the corrupt bit, and flushes
+ * before if necessary.
+ */
+int qcow2_mark_consistent(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+
+ if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
+ int ret = bdrv_flush(bs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ s->incompatible_features &= ~QCOW2_INCOMPAT_CORRUPT;
+ return qcow2_update_header(bs);
+ }
+ return 0;
+}
+
static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result,
BdrvCheckMode fix)
{
@@ -402,6 +433,14 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags)
goto fail;
}
+ if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
+ /* Corrupt images may not be written to unless they are being repaired */
+ if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_REPAIR)) {
+ ret = -EACCES;
+ goto fail;
+ }
+ }
+
/* Check support for various header values */
if (header.refcount_order != 4) {
report_unsupported(bs, "%d bit reference counts",
@@ -1130,6 +1169,11 @@ int qcow2_update_header(BlockDriverState *bs)
.name = "dirty bit",
},
{
+ .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
+ .bit = QCOW2_INCOMPAT_CORRUPT_BITNR,
+ .name = "corrupt bit",
+ },
+ {
.type = QCOW2_FEAT_TYPE_COMPATIBLE,
.bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
.name = "lazy refcounts",
diff --git a/block/qcow2.h b/block/qcow2.h
index dba9771..4297487 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -119,9 +119,12 @@ enum {
/* Incompatible feature bits */
enum {
QCOW2_INCOMPAT_DIRTY_BITNR = 0,
+ QCOW2_INCOMPAT_CORRUPT_BITNR = 1,
QCOW2_INCOMPAT_DIRTY = 1 << QCOW2_INCOMPAT_DIRTY_BITNR,
+ QCOW2_INCOMPAT_CORRUPT = 1 << QCOW2_INCOMPAT_CORRUPT_BITNR,
- QCOW2_INCOMPAT_MASK = QCOW2_INCOMPAT_DIRTY,
+ QCOW2_INCOMPAT_MASK = QCOW2_INCOMPAT_DIRTY
+ | QCOW2_INCOMPAT_CORRUPT,
};
/* Compatible feature bits */
@@ -361,6 +364,8 @@ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
int64_t sector_num, int nb_sectors);
int qcow2_mark_dirty(BlockDriverState *bs);
+int qcow2_mark_corrupt(BlockDriverState *bs);
+int qcow2_mark_consistent(BlockDriverState *bs);
int qcow2_update_header(BlockDriverState *bs);
/* qcow2-refcount.c functions */
diff --git a/docs/specs/qcow2.txt b/docs/specs/qcow2.txt
index 36a559d..33eca36 100644
--- a/docs/specs/qcow2.txt
+++ b/docs/specs/qcow2.txt
@@ -80,7 +80,12 @@ in the description of a field.
tables to repair refcounts before accessing the
image.
- Bits 1-63: Reserved (set to 0)
+ Bit 1: Corrupt bit. If this bit is set then any data
+ structure may be corrupt and the image must not
+ be written to (unless for regaining
+ consistency).
+
+ Bits 2-63: Reserved (set to 0)
80 - 87: compatible_features
Bitmask of compatible features. An implementation can
diff --git a/include/block/block.h b/include/block/block.h
index 742fce5..7145bf0 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -74,6 +74,8 @@ typedef struct BlockDevOps {
#define BDRV_O_CHECK 0x1000 /* open solely for consistency check */
#define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */
#define BDRV_O_UNMAP 0x4000 /* execute guest UNMAP/TRIM operations */
+#define BDRV_O_REPAIR 0x8000 /* assure any writes are intended only for
+ repairs (important for corrupt images) */
#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH)
diff --git a/qemu-img.c b/qemu-img.c
index b9a848d..28c6376 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -558,7 +558,7 @@ static int img_check(int argc, char **argv)
fmt = optarg;
break;
case 'r':
- flags |= BDRV_O_RDWR;
+ flags |= BDRV_O_RDWR | BDRV_O_REPAIR;
if (!strcmp(optarg, "leaks")) {
fix = BDRV_FIX_LEAKS;
diff --git a/tests/qemu-iotests/031.out b/tests/qemu-iotests/031.out
index 796c993..a943344 100644
--- a/tests/qemu-iotests/031.out
+++ b/tests/qemu-iotests/031.out
@@ -54,7 +54,7 @@ header_length 72
Header extension:
magic 0x6803f857
-length 96
+length 144
data <binary>
Header extension:
@@ -68,7 +68,7 @@ No errors were found on the image.
magic 0x514649fb
version 2
-backing_file_offset 0xf8
+backing_file_offset 0x128
backing_file_size 0x17
cluster_bits 16
size 67108864
@@ -92,7 +92,7 @@ data 'host_device'
Header extension:
magic 0x6803f857
-length 96
+length 144
data <binary>
Header extension:
@@ -155,7 +155,7 @@ header_length 104
Header extension:
magic 0x6803f857
-length 96
+length 144
data <binary>
Header extension:
@@ -169,7 +169,7 @@ No errors were found on the image.
magic 0x514649fb
version 3
-backing_file_offset 0x118
+backing_file_offset 0x148
backing_file_size 0x17
cluster_bits 16
size 67108864
@@ -193,7 +193,7 @@ data 'host_device'
Header extension:
magic 0x6803f857
-length 96
+length 144
data <binary>
Header extension:
diff --git a/tests/qemu-iotests/036.out b/tests/qemu-iotests/036.out
index 063ca22..55a3e6e 100644
--- a/tests/qemu-iotests/036.out
+++ b/tests/qemu-iotests/036.out
@@ -46,7 +46,7 @@ header_length 104
Header extension:
magic 0x6803f857
-length 96
+length 144
data <binary>
*** done
--
1.8.3.1
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [Qemu-devel] [PATCH 1/5] qcow2: Add corrupt bit
2013-08-26 13:04 ` [Qemu-devel] [PATCH 1/5] qcow2: Add corrupt bit Max Reitz
@ 2013-08-27 9:54 ` Kevin Wolf
2013-08-27 10:00 ` Max Reitz
0 siblings, 1 reply; 18+ messages in thread
From: Kevin Wolf @ 2013-08-27 9:54 UTC (permalink / raw)
To: Max Reitz; +Cc: qemu-devel, Stefan Hajnoczi
Am 26.08.2013 um 15:04 hat Max Reitz geschrieben:
> This adds an incompatible bit indicating corruption to qcow2. Any image
> with this bit set may not be written to unless for repairing (and
> subsequently clearing the bit if the repair has been successful).
>
> Signed-off-by: Max Reitz <mreitz@redhat.com>
> ---
> block/qcow2.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
> block/qcow2.h | 7 ++++++-
> docs/specs/qcow2.txt | 7 ++++++-
> include/block/block.h | 2 ++
> qemu-img.c | 2 +-
> tests/qemu-iotests/031.out | 12 ++++++------
> tests/qemu-iotests/036.out | 2 +-
> 7 files changed, 66 insertions(+), 10 deletions(-)
>
> diff --git a/block/qcow2.c b/block/qcow2.c
> index 3376901..1d0d7ca 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -272,6 +272,37 @@ static int qcow2_mark_clean(BlockDriverState *bs)
> return 0;
> }
>
> +/*
> + * Marks the image as corrupt.
> + */
> +int qcow2_mark_corrupt(BlockDriverState *bs)
> +{
> + BDRVQcowState *s = bs->opaque;
> +
> + s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT;
> + return qcow2_update_header(bs);
> +}
> +
> +/*
> + * Marks the image as consistent, i.e., unsets the corrupt bit, and flushes
> + * before if necessary.
> + */
> +int qcow2_mark_consistent(BlockDriverState *bs)
> +{
> + BDRVQcowState *s = bs->opaque;
> +
> + if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
> + int ret = bdrv_flush(bs);
> + if (ret < 0) {
> + return ret;
> + }
> +
> + s->incompatible_features &= ~QCOW2_INCOMPAT_CORRUPT;
> + return qcow2_update_header(bs);
> + }
> + return 0;
> +}
> +
> static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result,
> BdrvCheckMode fix)
> {
> @@ -402,6 +433,14 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags)
> goto fail;
> }
>
> + if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
> + /* Corrupt images may not be written to unless they are being repaired */
> + if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_REPAIR)) {
Isn't BDRV_O_REPAIR equivalent to BDRV_O_CHECK && BDRV_O_RDWR, or is
there an advantage in using a new bit?
Looks good otherwise.
Kevin
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [Qemu-devel] [PATCH 1/5] qcow2: Add corrupt bit
2013-08-27 9:54 ` Kevin Wolf
@ 2013-08-27 10:00 ` Max Reitz
0 siblings, 0 replies; 18+ messages in thread
From: Max Reitz @ 2013-08-27 10:00 UTC (permalink / raw)
To: Kevin Wolf; +Cc: qemu-devel, Stefan Hajnoczi
Am 27.08.2013 11:54, schrieb Kevin Wolf:
> Am 26.08.2013 um 15:04 hat Max Reitz geschrieben:
>> This adds an incompatible bit indicating corruption to qcow2. Any image
>> with this bit set may not be written to unless for repairing (and
>> subsequently clearing the bit if the repair has been successful).
>>
>> Signed-off-by: Max Reitz <mreitz@redhat.com>
>> ---
[snip]
>> @@ -402,6 +433,14 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags)
>> goto fail;
>> }
>>
>> + if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
>> + /* Corrupt images may not be written to unless they are being repaired */
>> + if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_REPAIR)) {
> Isn't BDRV_O_REPAIR equivalent to BDRV_O_CHECK && BDRV_O_RDWR, or is
> there an advantage in using a new bit?
>
> Looks good otherwise.
>
> Kevin
Oh, yes, you're right. I overlooked that flag.
Max
^ permalink raw reply [flat|nested] 18+ messages in thread
* [Qemu-devel] [PATCH 2/5] qcow2: Metadata overlap checks
2013-08-26 13:04 [Qemu-devel] [PATCH 0/5] qcow2: Add metadata overlap checks Max Reitz
2013-08-26 13:04 ` [Qemu-devel] [PATCH 1/5] qcow2: Add corrupt bit Max Reitz
@ 2013-08-26 13:04 ` Max Reitz
2013-08-27 10:17 ` Kevin Wolf
2013-08-26 13:04 ` [Qemu-devel] [PATCH 3/5] qcow2: Employ metadata " Max Reitz
` (2 subsequent siblings)
4 siblings, 1 reply; 18+ messages in thread
From: Max Reitz @ 2013-08-26 13:04 UTC (permalink / raw)
To: qemu-devel; +Cc: Kevin Wolf, Stefan Hajnoczi, Max Reitz
Two new functions are added; the first one checks a given range in the
image file for overlaps with metadata (main header, L1 tables, L2
tables, refcount table and blocks).
The second one should be used immediately before writing to the image
file as it calls the first function and, upon collision, marks the
image as corrupt and makes the BDS unusable, thereby preventing
further access.
Both functions take a bitmask argument specifying the structures which
should be checked for overlaps, making it possible to also check
metadata writes against colliding with other structures.
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
block/qcow2-refcount.c | 142 +++++++++++++++++++++++++++++++++++++++++++++++++
block/qcow2.h | 28 ++++++++++
2 files changed, 170 insertions(+)
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 1244693..c8141c8 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -25,6 +25,7 @@
#include "qemu-common.h"
#include "block/block_int.h"
#include "block/qcow2.h"
+#include "qemu/range.h"
static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size);
static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
@@ -1372,3 +1373,144 @@ fail:
return ret;
}
+/*
+ * Checks if the given offset into the image file is actually free to use by
+ * looking for overlaps with important metadata sections (L1/L2 tables etc.),
+ * i.e. a sanity check without relying on the refcount tables.
+ *
+ * The chk parameter specifies exactly what checks to perform.
+ *
+ * Returns:
+ * - 0 if writing to this offset will not affect the mentioned metadata
+ * - a positive QCow2MetadataOverlap value indicating one overlapping section
+ * - a negative value (-errno) indicating an error while performing a check,
+ * e.g. when bdrv_read failed on QCOW2_OL_INACTIVE_L2
+ */
+int qcow2_check_metadata_overlap(BlockDriverState *bs, QCow2MetadataOverlap chk,
+ int64_t offset, int64_t size)
+{
+ BDRVQcowState *s = bs->opaque;
+ int i, j;
+
+ if (!size) {
+ return 0;
+ }
+
+ if (chk & QCOW2_OL_MAIN_HEADER) {
+ if (offset < s->cluster_size) {
+ return QCOW2_OL_MAIN_HEADER;
+ }
+ }
+
+ if ((chk & QCOW2_OL_ACTIVE_L1) && s->l1_size) {
+ if (ranges_overlap(offset, size, s->l1_table_offset,
+ s->l1_size * sizeof(uint64_t))) {
+ return QCOW2_OL_ACTIVE_L1;
+ }
+ }
+
+ if ((chk & QCOW2_OL_REFCOUNT_TABLE) && s->refcount_table_size) {
+ if (ranges_overlap(offset, size, s->refcount_table_offset,
+ s->refcount_table_size * sizeof(uint64_t))) {
+ return QCOW2_OL_REFCOUNT_TABLE;
+ }
+ }
+
+ if ((chk & QCOW2_OL_SNAPSHOT_TABLE) && s->snapshots_size) {
+ if (ranges_overlap(offset, size, s->snapshots_offset,
+ s->snapshots_size)) {
+ return QCOW2_OL_SNAPSHOT_TABLE;
+ }
+ }
+
+ if ((chk & QCOW2_OL_INACTIVE_L1) && s->snapshots) {
+ for (i = 0; i < s->nb_snapshots; i++) {
+ if (s->snapshots[i].l1_size &&
+ ranges_overlap(offset, size, s->snapshots[i].l1_table_offset,
+ s->snapshots[i].l1_size * sizeof(uint64_t))) {
+ return QCOW2_OL_INACTIVE_L1;
+ }
+ }
+ }
+
+ if ((chk & QCOW2_OL_ACTIVE_L2) && s->l1_table) {
+ for (i = 0; i < s->l1_size; i++) {
+ if ((s->l1_table[i] & L1E_OFFSET_MASK) &&
+ ranges_overlap(offset, size, s->l1_table[i] & L1E_OFFSET_MASK,
+ s->cluster_size)) {
+ return QCOW2_OL_ACTIVE_L2;
+ }
+ }
+ }
+
+ if ((chk & QCOW2_OL_REFCOUNT_BLOCK) && s->refcount_table) {
+ for (i = 0; i < s->refcount_table_size; i++) {
+ if ((s->refcount_table[i] & REFT_OFFSET_MASK) &&
+ ranges_overlap(offset, size, s->refcount_table[i] &
+ REFT_OFFSET_MASK, s->cluster_size)) {
+ return QCOW2_OL_REFCOUNT_BLOCK;
+ }
+ }
+ }
+
+ if ((chk & QCOW2_OL_INACTIVE_L2) && s->snapshots) {
+ for (i = 0; i < s->nb_snapshots; i++) {
+ uint64_t l1_ofs = s->snapshots[i].l1_table_offset;
+ uint32_t l1_sz = s->snapshots[i].l1_size;
+ uint64_t *l1 = g_malloc(l1_sz * sizeof(uint64_t));
+ int ret;
+
+ ret = bdrv_read(bs->file, l1_ofs / BDRV_SECTOR_SIZE, (uint8_t *)l1,
+ l1_sz * sizeof(uint64_t) / BDRV_SECTOR_SIZE);
+
+ if (ret < 0) {
+ g_free(l1);
+ return ret;
+ }
+
+ for (j = 0; j < l1_sz; j++) {
+ if ((l1[j] & L1E_OFFSET_MASK) &&
+ ranges_overlap(offset, size, l1[j] & L1E_OFFSET_MASK,
+ s->cluster_size)) {
+ g_free(l1);
+ return QCOW2_OL_INACTIVE_L2;
+ }
+ }
+
+ g_free(l1);
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * First performs a check for metadata overlaps (through
+ * qcow2_check_metadata_overlap); if that fails with a negative value (error
+ * while performing a check), it will print a message but otherwise ignore that
+ * error. If an impending overlap is detected, the BDS will be made unusable and
+ * the qcow2 file marked corrupt.
+ *
+ * Returns 0 if there were no overlaps (or an error occured while checking for
+ * overlaps) or a positive QCow2MetadataOverlap value on overlap (then, the BDS
+ * will be unusable and the qcow2 file marked corrupt).
+ */
+int qcow2_pre_write_overlap_check(BlockDriverState *bs, QCow2MetadataOverlap chk,
+ int64_t offset, int64_t size)
+{
+ int ret = qcow2_check_metadata_overlap(bs, chk, offset, size);
+
+ if (ret < 0) {
+ fprintf(stderr, "qcow2: Error while checking for metadata overlaps: "
+ "%s\n", strerror(-ret));
+ return ret;
+ } else if (ret > 0) {
+ fprintf(stderr, "qcow2: Preventing invalid write on metadata; "
+ "image marked as corrupt.\n");
+ qcow2_mark_corrupt(bs);
+ bs->drv = NULL; /* make BDS unusable */
+ return ret;
+ }
+
+ return 0;
+}
diff --git a/block/qcow2.h b/block/qcow2.h
index 4297487..8a55da0 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -289,6 +289,29 @@ enum {
QCOW2_CLUSTER_ZERO
};
+typedef enum QCow2MetadataOverlap {
+ QCOW2_OL_NONE = 0,
+ QCOW2_OL_MAIN_HEADER = (1 << 0),
+ QCOW2_OL_ACTIVE_L1 = (1 << 1),
+ QCOW2_OL_ACTIVE_L2 = (1 << 2),
+ QCOW2_OL_REFCOUNT_TABLE = (1 << 3),
+ QCOW2_OL_REFCOUNT_BLOCK = (1 << 4),
+ QCOW2_OL_SNAPSHOT_TABLE = (1 << 5),
+ QCOW2_OL_INACTIVE_L1 = (1 << 6),
+ /* NOTE: Checking overlaps with inactive L2 tables will result in bdrv
+ * reads. */
+ QCOW2_OL_INACTIVE_L2 = (1 << 7),
+} QCow2MetadataOverlap;
+
+/* Perform all overlap checks which don't require disk access */
+#define QCOW2_OL_CACHED \
+ (QCOW2_OL_MAIN_HEADER | QCOW2_OL_ACTIVE_L1 | QCOW2_OL_ACTIVE_L2 | \
+ QCOW2_OL_REFCOUNT_TABLE | QCOW2_OL_REFCOUNT_BLOCK | \
+ QCOW2_OL_SNAPSHOT_TABLE | QCOW2_OL_INACTIVE_L1)
+
+/* The default checks to perform */
+#define QCOW2_OL_DEFAULT QCOW2_OL_CACHED
+
#define L1E_OFFSET_MASK 0x00ffffffffffff00ULL
#define L2E_OFFSET_MASK 0x00ffffffffffff00ULL
#define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL
@@ -390,6 +413,11 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
void qcow2_process_discards(BlockDriverState *bs, int ret);
+int qcow2_check_metadata_overlap(BlockDriverState *bs, QCow2MetadataOverlap chk,
+ int64_t offset, int64_t size);
+int qcow2_pre_write_overlap_check(BlockDriverState *bs, QCow2MetadataOverlap chk,
+ int64_t offset, int64_t size);
+
/* qcow2-cluster.c functions */
int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
bool exact_size);
--
1.8.3.1
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [Qemu-devel] [PATCH 2/5] qcow2: Metadata overlap checks
2013-08-26 13:04 ` [Qemu-devel] [PATCH 2/5] qcow2: Metadata overlap checks Max Reitz
@ 2013-08-27 10:17 ` Kevin Wolf
2013-08-27 11:06 ` Max Reitz
0 siblings, 1 reply; 18+ messages in thread
From: Kevin Wolf @ 2013-08-27 10:17 UTC (permalink / raw)
To: Max Reitz; +Cc: qemu-devel, Stefan Hajnoczi
Am 26.08.2013 um 15:04 hat Max Reitz geschrieben:
> Two new functions are added; the first one checks a given range in the
> image file for overlaps with metadata (main header, L1 tables, L2
> tables, refcount table and blocks).
>
> The second one should be used immediately before writing to the image
> file as it calls the first function and, upon collision, marks the
> image as corrupt and makes the BDS unusable, thereby preventing
> further access.
>
> Both functions take a bitmask argument specifying the structures which
> should be checked for overlaps, making it possible to also check
> metadata writes against colliding with other structures.
>
> Signed-off-by: Max Reitz <mreitz@redhat.com>
> ---
> block/qcow2-refcount.c | 142 +++++++++++++++++++++++++++++++++++++++++++++++++
> block/qcow2.h | 28 ++++++++++
> 2 files changed, 170 insertions(+)
>
> diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
> index 1244693..c8141c8 100644
> --- a/block/qcow2-refcount.c
> +++ b/block/qcow2-refcount.c
> @@ -25,6 +25,7 @@
> #include "qemu-common.h"
> #include "block/block_int.h"
> #include "block/qcow2.h"
> +#include "qemu/range.h"
>
> static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size);
> static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
> @@ -1372,3 +1373,144 @@ fail:
> return ret;
> }
>
> +/*
> + * Checks if the given offset into the image file is actually free to use by
> + * looking for overlaps with important metadata sections (L1/L2 tables etc.),
> + * i.e. a sanity check without relying on the refcount tables.
> + *
> + * The chk parameter specifies exactly what checks to perform.
> + *
> + * Returns:
> + * - 0 if writing to this offset will not affect the mentioned metadata
> + * - a positive QCow2MetadataOverlap value indicating one overlapping section
> + * - a negative value (-errno) indicating an error while performing a check,
> + * e.g. when bdrv_read failed on QCOW2_OL_INACTIVE_L2
> + */
> +int qcow2_check_metadata_overlap(BlockDriverState *bs, QCow2MetadataOverlap chk,
chk is really just an int, because you don't pass a single enum value but
a bit mask consisting of multiple enum values ored together.
> + int64_t offset, int64_t size)
> +{
> + BDRVQcowState *s = bs->opaque;
> + int i, j;
> +
> + if (!size) {
> + return 0;
> + }
> +
> + if (chk & QCOW2_OL_MAIN_HEADER) {
> + if (offset < s->cluster_size) {
> + return QCOW2_OL_MAIN_HEADER;
> + }
> + }
> +
> + if ((chk & QCOW2_OL_ACTIVE_L1) && s->l1_size) {
> + if (ranges_overlap(offset, size, s->l1_table_offset,
> + s->l1_size * sizeof(uint64_t))) {
The size could be rounded up to the next cluster boundary (same thing
for other metadata types).
> + return QCOW2_OL_ACTIVE_L1;
> + }
> + }
> +
> + if ((chk & QCOW2_OL_REFCOUNT_TABLE) && s->refcount_table_size) {
> + if (ranges_overlap(offset, size, s->refcount_table_offset,
> + s->refcount_table_size * sizeof(uint64_t))) {
> + return QCOW2_OL_REFCOUNT_TABLE;
> + }
> + }
> +
> + if ((chk & QCOW2_OL_SNAPSHOT_TABLE) && s->snapshots_size) {
> + if (ranges_overlap(offset, size, s->snapshots_offset,
> + s->snapshots_size)) {
> + return QCOW2_OL_SNAPSHOT_TABLE;
> + }
> + }
> +
> + if ((chk & QCOW2_OL_INACTIVE_L1) && s->snapshots) {
> + for (i = 0; i < s->nb_snapshots; i++) {
> + if (s->snapshots[i].l1_size &&
> + ranges_overlap(offset, size, s->snapshots[i].l1_table_offset,
> + s->snapshots[i].l1_size * sizeof(uint64_t))) {
> + return QCOW2_OL_INACTIVE_L1;
> + }
> + }
> + }
> +
> + if ((chk & QCOW2_OL_ACTIVE_L2) && s->l1_table) {
> + for (i = 0; i < s->l1_size; i++) {
> + if ((s->l1_table[i] & L1E_OFFSET_MASK) &&
> + ranges_overlap(offset, size, s->l1_table[i] & L1E_OFFSET_MASK,
> + s->cluster_size)) {
> + return QCOW2_OL_ACTIVE_L2;
> + }
> + }
> + }
> +
> + if ((chk & QCOW2_OL_REFCOUNT_BLOCK) && s->refcount_table) {
> + for (i = 0; i < s->refcount_table_size; i++) {
> + if ((s->refcount_table[i] & REFT_OFFSET_MASK) &&
> + ranges_overlap(offset, size, s->refcount_table[i] &
> + REFT_OFFSET_MASK, s->cluster_size)) {
> + return QCOW2_OL_REFCOUNT_BLOCK;
> + }
> + }
> + }
> +
> + if ((chk & QCOW2_OL_INACTIVE_L2) && s->snapshots) {
> + for (i = 0; i < s->nb_snapshots; i++) {
> + uint64_t l1_ofs = s->snapshots[i].l1_table_offset;
> + uint32_t l1_sz = s->snapshots[i].l1_size;
> + uint64_t *l1 = g_malloc(l1_sz * sizeof(uint64_t));
> + int ret;
> +
> + ret = bdrv_read(bs->file, l1_ofs / BDRV_SECTOR_SIZE, (uint8_t *)l1,
> + l1_sz * sizeof(uint64_t) / BDRV_SECTOR_SIZE);
> +
> + if (ret < 0) {
> + g_free(l1);
> + return ret;
> + }
> +
> + for (j = 0; j < l1_sz; j++) {
> + if ((l1[j] & L1E_OFFSET_MASK) &&
> + ranges_overlap(offset, size, l1[j] & L1E_OFFSET_MASK,
> + s->cluster_size)) {
> + g_free(l1);
> + return QCOW2_OL_INACTIVE_L2;
> + }
> + }
> +
> + g_free(l1);
> + }
> + }
> +
> + return 0;
> +}
> +
> +/*
> + * First performs a check for metadata overlaps (through
> + * qcow2_check_metadata_overlap); if that fails with a negative value (error
> + * while performing a check), it will print a message but otherwise ignore that
> + * error. If an impending overlap is detected, the BDS will be made unusable and
> + * the qcow2 file marked corrupt.
> + *
> + * Returns 0 if there were no overlaps (or an error occured while checking for
> + * overlaps) or a positive QCow2MetadataOverlap value on overlap (then, the BDS
> + * will be unusable and the qcow2 file marked corrupt).
> + */
> +int qcow2_pre_write_overlap_check(BlockDriverState *bs, QCow2MetadataOverlap chk,
> + int64_t offset, int64_t size)
> +{
> + int ret = qcow2_check_metadata_overlap(bs, chk, offset, size);
> +
> + if (ret < 0) {
> + fprintf(stderr, "qcow2: Error while checking for metadata overlaps: "
> + "%s\n", strerror(-ret));
Leftover debug code?
> + return ret;
> + } else if (ret > 0) {
> + fprintf(stderr, "qcow2: Preventing invalid write on metadata; "
> + "image marked as corrupt.\n");
This one makes actually sense to keep even for production as it is a
condition that we want to make sure to appear in log files.
Another thing to consider would be to send out a QMP event when this
happens.
> + qcow2_mark_corrupt(bs);
> + bs->drv = NULL; /* make BDS unusable */
> + return ret;
> + }
> +
> + return 0;
> +}
Kevin
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [Qemu-devel] [PATCH 2/5] qcow2: Metadata overlap checks
2013-08-27 10:17 ` Kevin Wolf
@ 2013-08-27 11:06 ` Max Reitz
2013-08-27 11:16 ` Kevin Wolf
0 siblings, 1 reply; 18+ messages in thread
From: Max Reitz @ 2013-08-27 11:06 UTC (permalink / raw)
To: Kevin Wolf; +Cc: qemu-devel, Stefan Hajnoczi
Am 27.08.2013 12:17, schrieb Kevin Wolf:
> Am 26.08.2013 um 15:04 hat Max Reitz geschrieben:
>> Two new functions are added; the first one checks a given range in the
>> image file for overlaps with metadata (main header, L1 tables, L2
>> tables, refcount table and blocks).
>>
>> The second one should be used immediately before writing to the image
>> file as it calls the first function and, upon collision, marks the
>> image as corrupt and makes the BDS unusable, thereby preventing
>> further access.
>>
>> Both functions take a bitmask argument specifying the structures which
>> should be checked for overlaps, making it possible to also check
>> metadata writes against colliding with other structures.
>>
>> Signed-off-by: Max Reitz <mreitz@redhat.com>
>> ---
>> block/qcow2-refcount.c | 142 +++++++++++++++++++++++++++++++++++++++++++++++++
>> block/qcow2.h | 28 ++++++++++
>> 2 files changed, 170 insertions(+)
>>
>> diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
>> index 1244693..c8141c8 100644
>> --- a/block/qcow2-refcount.c
>> +++ b/block/qcow2-refcount.c
>> @@ -25,6 +25,7 @@
>> #include "qemu-common.h"
>> #include "block/block_int.h"
>> #include "block/qcow2.h"
>> +#include "qemu/range.h"
>>
>> static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size);
>> static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
>> @@ -1372,3 +1373,144 @@ fail:
>> return ret;
>> }
>>
>> +/*
>> + * Checks if the given offset into the image file is actually free to use by
>> + * looking for overlaps with important metadata sections (L1/L2 tables etc.),
>> + * i.e. a sanity check without relying on the refcount tables.
>> + *
>> + * The chk parameter specifies exactly what checks to perform.
>> + *
>> + * Returns:
>> + * - 0 if writing to this offset will not affect the mentioned metadata
>> + * - a positive QCow2MetadataOverlap value indicating one overlapping section
>> + * - a negative value (-errno) indicating an error while performing a check,
>> + * e.g. when bdrv_read failed on QCOW2_OL_INACTIVE_L2
>> + */
>> +int qcow2_check_metadata_overlap(BlockDriverState *bs, QCow2MetadataOverlap chk,
> chk is really just an int, because you don't pass a single enum value but
> a bit mask consisting of multiple enum values ored together.
Okay, I justed wanted to make clear what values make up that mask, but
yes, you're right, more often than not chk will not assume a value from
the enum itself.
>> + int64_t offset, int64_t size)
>> +{
>> + BDRVQcowState *s = bs->opaque;
>> + int i, j;
>> +
>> + if (!size) {
>> + return 0;
>> + }
>> +
>> + if (chk & QCOW2_OL_MAIN_HEADER) {
>> + if (offset < s->cluster_size) {
>> + return QCOW2_OL_MAIN_HEADER;
>> + }
>> + }
>> +
>> + if ((chk & QCOW2_OL_ACTIVE_L1) && s->l1_size) {
>> + if (ranges_overlap(offset, size, s->l1_table_offset,
>> + s->l1_size * sizeof(uint64_t))) {
> The size could be rounded up to the next cluster boundary (same thing
> for other metadata types).
Would this actually change anything?
>> + return QCOW2_OL_ACTIVE_L1;
>> + }
>> + }
>> +
>> + if ((chk & QCOW2_OL_REFCOUNT_TABLE) && s->refcount_table_size) {
>> + if (ranges_overlap(offset, size, s->refcount_table_offset,
>> + s->refcount_table_size * sizeof(uint64_t))) {
>> + return QCOW2_OL_REFCOUNT_TABLE;
>> + }
>> + }
>> +
>> + if ((chk & QCOW2_OL_SNAPSHOT_TABLE) && s->snapshots_size) {
>> + if (ranges_overlap(offset, size, s->snapshots_offset,
>> + s->snapshots_size)) {
>> + return QCOW2_OL_SNAPSHOT_TABLE;
>> + }
>> + }
>> +
>> + if ((chk & QCOW2_OL_INACTIVE_L1) && s->snapshots) {
>> + for (i = 0; i < s->nb_snapshots; i++) {
>> + if (s->snapshots[i].l1_size &&
>> + ranges_overlap(offset, size, s->snapshots[i].l1_table_offset,
>> + s->snapshots[i].l1_size * sizeof(uint64_t))) {
>> + return QCOW2_OL_INACTIVE_L1;
>> + }
>> + }
>> + }
>> +
>> + if ((chk & QCOW2_OL_ACTIVE_L2) && s->l1_table) {
>> + for (i = 0; i < s->l1_size; i++) {
>> + if ((s->l1_table[i] & L1E_OFFSET_MASK) &&
>> + ranges_overlap(offset, size, s->l1_table[i] & L1E_OFFSET_MASK,
>> + s->cluster_size)) {
>> + return QCOW2_OL_ACTIVE_L2;
>> + }
>> + }
>> + }
>> +
>> + if ((chk & QCOW2_OL_REFCOUNT_BLOCK) && s->refcount_table) {
>> + for (i = 0; i < s->refcount_table_size; i++) {
>> + if ((s->refcount_table[i] & REFT_OFFSET_MASK) &&
>> + ranges_overlap(offset, size, s->refcount_table[i] &
>> + REFT_OFFSET_MASK, s->cluster_size)) {
>> + return QCOW2_OL_REFCOUNT_BLOCK;
>> + }
>> + }
>> + }
>> +
>> + if ((chk & QCOW2_OL_INACTIVE_L2) && s->snapshots) {
>> + for (i = 0; i < s->nb_snapshots; i++) {
>> + uint64_t l1_ofs = s->snapshots[i].l1_table_offset;
>> + uint32_t l1_sz = s->snapshots[i].l1_size;
>> + uint64_t *l1 = g_malloc(l1_sz * sizeof(uint64_t));
>> + int ret;
>> +
>> + ret = bdrv_read(bs->file, l1_ofs / BDRV_SECTOR_SIZE, (uint8_t *)l1,
>> + l1_sz * sizeof(uint64_t) / BDRV_SECTOR_SIZE);
>> +
>> + if (ret < 0) {
>> + g_free(l1);
>> + return ret;
>> + }
>> +
>> + for (j = 0; j < l1_sz; j++) {
>> + if ((l1[j] & L1E_OFFSET_MASK) &&
>> + ranges_overlap(offset, size, l1[j] & L1E_OFFSET_MASK,
>> + s->cluster_size)) {
>> + g_free(l1);
>> + return QCOW2_OL_INACTIVE_L2;
>> + }
>> + }
>> +
>> + g_free(l1);
>> + }
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +/*
>> + * First performs a check for metadata overlaps (through
>> + * qcow2_check_metadata_overlap); if that fails with a negative value (error
>> + * while performing a check), it will print a message but otherwise ignore that
>> + * error. If an impending overlap is detected, the BDS will be made unusable and
>> + * the qcow2 file marked corrupt.
>> + *
>> + * Returns 0 if there were no overlaps (or an error occured while checking for
>> + * overlaps) or a positive QCow2MetadataOverlap value on overlap (then, the BDS
>> + * will be unusable and the qcow2 file marked corrupt).
>> + */
>> +int qcow2_pre_write_overlap_check(BlockDriverState *bs, QCow2MetadataOverlap chk,
>> + int64_t offset, int64_t size)
>> +{
>> + int ret = qcow2_check_metadata_overlap(bs, chk, offset, size);
>> +
>> + if (ret < 0) {
>> + fprintf(stderr, "qcow2: Error while checking for metadata overlaps: "
>> + "%s\n", strerror(-ret));
> Leftover debug code?
Oh, yes, kind of. At one point in time this code didn't pass the error
code but simply ignored it. Therefore it made sense to at least give an
information to the user. When changing this behavior, I thought it
wouldn't be too bad to leave that message in.
>> + return ret;
>> + } else if (ret > 0) {
>> + fprintf(stderr, "qcow2: Preventing invalid write on metadata; "
>> + "image marked as corrupt.\n");
> This one makes actually sense to keep even for production as it is a
> condition that we want to make sure to appear in log files.
>
> Another thing to consider would be to send out a QMP event when this
> happens.
Okay.
>> + qcow2_mark_corrupt(bs);
>> + bs->drv = NULL; /* make BDS unusable */
>> + return ret;
>> + }
>> +
>> + return 0;
>> +}
> Kevin
Thanks for reviewing and your comments,
Max
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [Qemu-devel] [PATCH 2/5] qcow2: Metadata overlap checks
2013-08-27 11:06 ` Max Reitz
@ 2013-08-27 11:16 ` Kevin Wolf
0 siblings, 0 replies; 18+ messages in thread
From: Kevin Wolf @ 2013-08-27 11:16 UTC (permalink / raw)
To: Max Reitz; +Cc: qemu-devel, Stefan Hajnoczi
Am 27.08.2013 um 13:06 hat Max Reitz geschrieben:
> Am 27.08.2013 12:17, schrieb Kevin Wolf:
> >Am 26.08.2013 um 15:04 hat Max Reitz geschrieben:
> >>Two new functions are added; the first one checks a given range in the
> >>image file for overlaps with metadata (main header, L1 tables, L2
> >>tables, refcount table and blocks).
> >>
> >>The second one should be used immediately before writing to the image
> >>file as it calls the first function and, upon collision, marks the
> >>image as corrupt and makes the BDS unusable, thereby preventing
> >>further access.
> >>
> >>Both functions take a bitmask argument specifying the structures which
> >>should be checked for overlaps, making it possible to also check
> >>metadata writes against colliding with other structures.
> >>
> >>Signed-off-by: Max Reitz <mreitz@redhat.com>
> >>---
> >> block/qcow2-refcount.c | 142 +++++++++++++++++++++++++++++++++++++++++++++++++
> >> block/qcow2.h | 28 ++++++++++
> >> 2 files changed, 170 insertions(+)
> >>+ int64_t offset, int64_t size)
> >>+{
> >>+ BDRVQcowState *s = bs->opaque;
> >>+ int i, j;
> >>+
> >>+ if (!size) {
> >>+ return 0;
> >>+ }
> >>+
> >>+ if (chk & QCOW2_OL_MAIN_HEADER) {
> >>+ if (offset < s->cluster_size) {
> >>+ return QCOW2_OL_MAIN_HEADER;
> >>+ }
> >>+ }
> >>+
> >>+ if ((chk & QCOW2_OL_ACTIVE_L1) && s->l1_size) {
> >>+ if (ranges_overlap(offset, size, s->l1_table_offset,
> >>+ s->l1_size * sizeof(uint64_t))) {
> >The size could be rounded up to the next cluster boundary (same thing
> >for other metadata types).
> Would this actually change anything?
Not sure. With correct images, it wouldn't, because both the old and the
new offset would be aligned to a cluster boundary, so if there is an
overlap, it would be at the start. But after all, we're dealing with
broken images here.
I don't have a strong opinion on it, take it as a suggestion that you
can implement for additional safety. But if you don't want to, I don't
think it's a horrible gap in the checks.
Kevin
^ permalink raw reply [flat|nested] 18+ messages in thread
* [Qemu-devel] [PATCH 3/5] qcow2: Employ metadata overlap checks
2013-08-26 13:04 [Qemu-devel] [PATCH 0/5] qcow2: Add metadata overlap checks Max Reitz
2013-08-26 13:04 ` [Qemu-devel] [PATCH 1/5] qcow2: Add corrupt bit Max Reitz
2013-08-26 13:04 ` [Qemu-devel] [PATCH 2/5] qcow2: Metadata overlap checks Max Reitz
@ 2013-08-26 13:04 ` Max Reitz
2013-08-27 11:32 ` Kevin Wolf
2013-08-26 13:04 ` [Qemu-devel] [PATCH 4/5] qcow2: Check allocations in qcow2_check Max Reitz
2013-08-26 13:04 ` [Qemu-devel] [PATCH 5/5] qemu-iotests: Overlapping cluster allocations Max Reitz
4 siblings, 1 reply; 18+ messages in thread
From: Max Reitz @ 2013-08-26 13:04 UTC (permalink / raw)
To: qemu-devel; +Cc: Kevin Wolf, Stefan Hajnoczi, Max Reitz
The pre-write overlap check function is now called before most of the
qcow2 writes (aborting it on collision or other error).
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
block/qcow2-cache.c | 17 +++++++++++++++++
block/qcow2-cluster.c | 23 +++++++++++++++++++++++
block/qcow2-snapshot.c | 24 ++++++++++++++++++++++++
block/qcow2.c | 38 +++++++++++++++++++++++++++++++++++++-
4 files changed, 101 insertions(+), 1 deletion(-)
diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 2f3114e..da65297 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -115,6 +115,23 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
}
if (c == s->refcount_block_cache) {
+ ret = qcow2_pre_write_overlap_check(bs,
+ QCOW2_OL_DEFAULT & ~QCOW2_OL_REFCOUNT_BLOCK,
+ c->entries[i].offset, s->cluster_size);
+ } else if (c == s->l2_table_cache) {
+ ret = qcow2_pre_write_overlap_check(bs,
+ QCOW2_OL_DEFAULT & ~QCOW2_OL_ACTIVE_L2,
+ c->entries[i].offset, s->cluster_size);
+ } else {
+ ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
+ c->entries[i].offset, s->cluster_size);
+ }
+
+ if (ret) {
+ return (ret < 0) ? ret : -EIO;
+ }
+
+ if (c == s->refcount_block_cache) {
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
} else if (c == s->l2_table_cache) {
BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index cca76d4..be35983 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -80,6 +80,15 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
goto fail;
}
+ /* the L1 position has not yet been updated, so these clusters must
+ * indeed be completely free */
+ ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
+ new_l1_table_offset, new_l1_size2);
+ if (ret) {
+ ret = (ret < 0) ? ret : -EIO;
+ goto fail;
+ }
+
BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
for(i = 0; i < s->l1_size; i++)
new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
@@ -149,6 +158,13 @@ static int write_l1_entry(BlockDriverState *bs, int l1_index)
buf[i] = cpu_to_be64(s->l1_table[l1_start_index + i]);
}
+ ret = qcow2_pre_write_overlap_check(bs,
+ QCOW2_OL_DEFAULT & ~QCOW2_OL_ACTIVE_L1,
+ s->l1_table_offset + 8 * l1_start_index, sizeof(buf));
+ if (ret) {
+ return (ret < 0) ? ret : -EIO;
+ }
+
BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset + 8 * l1_start_index,
buf, sizeof(buf));
@@ -368,6 +384,13 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
&s->aes_encrypt_key);
}
+ ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
+ ((cluster_offset >> 9) + n_start) << 9, n * BDRV_SECTOR_SIZE);
+ if (ret) {
+ ret = (ret < 0) ? ret : -EIO;
+ goto out;
+ }
+
BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + n_start, n, &qiov);
if (ret < 0) {
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index 0caac90..6f69ecc 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -189,6 +189,15 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
return ret;
}
+ /* The snapshot list position has not yet been updated, so these clusters
+ * must indeed be completely free */
+ ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
+ offset, s->nb_snapshots * sizeof(h));
+ if (ret) {
+ return (ret < 0) ? ret : -EIO;
+ }
+
+
/* Write all snapshots to the new list */
for(i = 0; i < s->nb_snapshots; i++) {
sn = s->snapshots + i;
@@ -363,6 +372,13 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
l1_table[i] = cpu_to_be64(s->l1_table[i]);
}
+ ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
+ sn->l1_table_offset, s->l1_size * sizeof(uint64_t));
+ if (ret) {
+ ret = (ret < 0) ? ret : -EIO;
+ goto fail;
+ }
+
ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
s->l1_size * sizeof(uint64_t));
if (ret < 0) {
@@ -475,6 +491,14 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
goto fail;
}
+ ret = qcow2_pre_write_overlap_check(bs,
+ QCOW2_OL_DEFAULT & ~QCOW2_OL_ACTIVE_L1,
+ s->l1_table_offset, cur_l1_bytes);
+ if (ret) {
+ ret = (ret < 0) ? ret : -EIO;
+ goto fail;
+ }
+
ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
cur_l1_bytes);
if (ret < 0) {
diff --git a/block/qcow2.c b/block/qcow2.c
index 1d0d7ca..95497c6 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -621,6 +621,8 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags)
qcow2_free_snapshots(bs);
qcow2_refcount_close(bs);
g_free(s->l1_table);
+ /* else pre-write overlap checks in cache_destroy may crash */
+ s->l1_table = NULL;
if (s->l2_table_cache) {
qcow2_cache_destroy(bs, s->l2_table_cache);
}
@@ -920,6 +922,14 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
cur_nr_sectors * 512);
}
+ ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
+ ((cluster_offset >> 9) + index_in_cluster) << 9,
+ cur_nr_sectors << 9);
+ if (ret) {
+ ret = (ret < 0) ? ret : -EIO;
+ goto fail;
+ }
+
qemu_co_mutex_unlock(&s->lock);
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
trace_qcow2_writev_data(qemu_coroutine_self(),
@@ -986,6 +996,8 @@ static void qcow2_close(BlockDriverState *bs)
{
BDRVQcowState *s = bs->opaque;
g_free(s->l1_table);
+ /* else pre-write overlap checks in cache_destroy may crash */
+ s->l1_table = NULL;
qcow2_cache_flush(bs, s->l2_table_cache);
qcow2_cache_flush(bs, s->refcount_block_cache);
@@ -1663,6 +1675,14 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
/* could not compress: write normal cluster */
+
+ ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
+ sector_num << 9, s->cluster_sectors << 9);
+ if (ret) {
+ ret = (ret < 0) ? ret : -EIO;
+ goto fail;
+ }
+
ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
if (ret < 0) {
goto fail;
@@ -1675,6 +1695,14 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
goto fail;
}
cluster_offset &= s->cluster_offset_mask;
+
+ ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
+ cluster_offset, out_len);
+ if (ret) {
+ ret = (ret < 0) ? ret : -EIO;
+ goto fail;
+ }
+
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
if (ret < 0) {
@@ -1752,10 +1780,18 @@ static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
BDRVQcowState *s = bs->opaque;
int growable = bs->growable;
int ret;
+ int64_t offset = qcow2_vm_state_offset(s) + pos;
BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
bs->growable = 1;
- ret = bdrv_pwritev(bs, qcow2_vm_state_offset(s) + pos, qiov);
+
+ ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT, offset,
+ qiov->size);
+ if (ret) {
+ return (ret < 0) ? ret : -EIO;
+ }
+
+ ret = bdrv_pwritev(bs, offset, qiov);
bs->growable = growable;
return ret;
--
1.8.3.1
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [Qemu-devel] [PATCH 3/5] qcow2: Employ metadata overlap checks
2013-08-26 13:04 ` [Qemu-devel] [PATCH 3/5] qcow2: Employ metadata " Max Reitz
@ 2013-08-27 11:32 ` Kevin Wolf
2013-08-27 11:41 ` Max Reitz
0 siblings, 1 reply; 18+ messages in thread
From: Kevin Wolf @ 2013-08-27 11:32 UTC (permalink / raw)
To: Max Reitz; +Cc: qemu-devel, Stefan Hajnoczi
Am 26.08.2013 um 15:04 hat Max Reitz geschrieben:
> The pre-write overlap check function is now called before most of the
> qcow2 writes (aborting it on collision or other error).
>
> Signed-off-by: Max Reitz <mreitz@redhat.com>
> ---
> block/qcow2-cache.c | 17 +++++++++++++++++
> block/qcow2-cluster.c | 23 +++++++++++++++++++++++
> block/qcow2-snapshot.c | 24 ++++++++++++++++++++++++
> block/qcow2.c | 38 +++++++++++++++++++++++++++++++++++++-
> 4 files changed, 101 insertions(+), 1 deletion(-)
> @@ -368,6 +384,13 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
> &s->aes_encrypt_key);
> }
>
> + ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
> + ((cluster_offset >> 9) + n_start) << 9, n * BDRV_SECTOR_SIZE);
Looks a bit overcomplicated, I'd like something like this better:
cluster_offset + n_start * BDRV_SECTOR_SIZE
> + if (ret) {
> + ret = (ret < 0) ? ret : -EIO;
I wonder whether the -EIO logic should be moved into
qcow2_pre_write_overlap_check(). Currently each single caller seems to
have this check.
> + goto out;
> + }
> +
> BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
> ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + n_start, n, &qiov);
> if (ret < 0) {
> diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
> index 0caac90..6f69ecc 100644
> --- a/block/qcow2-snapshot.c
> +++ b/block/qcow2-snapshot.c
> @@ -189,6 +189,15 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
> return ret;
> }
>
> + /* The snapshot list position has not yet been updated, so these clusters
> + * must indeed be completely free */
> + ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
> + offset, s->nb_snapshots * sizeof(h));
> + if (ret) {
> + return (ret < 0) ? ret : -EIO;
> + }
This doesn't check the full size. snapshots_size should have the right
value.
> +
> +
> /* Write all snapshots to the new list */
> for(i = 0; i < s->nb_snapshots; i++) {
> sn = s->snapshots + i;
> @@ -363,6 +372,13 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
> l1_table[i] = cpu_to_be64(s->l1_table[i]);
> }
>
> + ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
> + sn->l1_table_offset, s->l1_size * sizeof(uint64_t));
> + if (ret) {
> + ret = (ret < 0) ? ret : -EIO;
> + goto fail;
> + }
> +
> ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
> s->l1_size * sizeof(uint64_t));
> if (ret < 0) {
> @@ -475,6 +491,14 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
> goto fail;
> }
>
> + ret = qcow2_pre_write_overlap_check(bs,
> + QCOW2_OL_DEFAULT & ~QCOW2_OL_ACTIVE_L1,
> + s->l1_table_offset, cur_l1_bytes);
> + if (ret) {
> + ret = (ret < 0) ? ret : -EIO;
> + goto fail;
> + }
> +
> ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
> cur_l1_bytes);
> if (ret < 0) {
> diff --git a/block/qcow2.c b/block/qcow2.c
> index 1d0d7ca..95497c6 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -621,6 +621,8 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags)
> qcow2_free_snapshots(bs);
> qcow2_refcount_close(bs);
> g_free(s->l1_table);
> + /* else pre-write overlap checks in cache_destroy may crash */
> + s->l1_table = NULL;
> if (s->l2_table_cache) {
> qcow2_cache_destroy(bs, s->l2_table_cache);
> }
> @@ -920,6 +922,14 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
> cur_nr_sectors * 512);
> }
>
> + ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
> + ((cluster_offset >> 9) + index_in_cluster) << 9,
Same thing as above.
> + cur_nr_sectors << 9);
> + if (ret) {
> + ret = (ret < 0) ? ret : -EIO;
> + goto fail;
> + }
> +
> qemu_co_mutex_unlock(&s->lock);
> BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
> trace_qcow2_writev_data(qemu_coroutine_self(),
Kevin
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [Qemu-devel] [PATCH 3/5] qcow2: Employ metadata overlap checks
2013-08-27 11:32 ` Kevin Wolf
@ 2013-08-27 11:41 ` Max Reitz
2013-08-27 11:51 ` Kevin Wolf
0 siblings, 1 reply; 18+ messages in thread
From: Max Reitz @ 2013-08-27 11:41 UTC (permalink / raw)
To: Kevin Wolf; +Cc: qemu-devel, Stefan Hajnoczi
Am 27.08.2013 13:32, schrieb Kevin Wolf:
> Am 26.08.2013 um 15:04 hat Max Reitz geschrieben:
>> The pre-write overlap check function is now called before most of the
>> qcow2 writes (aborting it on collision or other error).
>>
>> Signed-off-by: Max Reitz <mreitz@redhat.com>
>> ---
>> block/qcow2-cache.c | 17 +++++++++++++++++
>> block/qcow2-cluster.c | 23 +++++++++++++++++++++++
>> block/qcow2-snapshot.c | 24 ++++++++++++++++++++++++
>> block/qcow2.c | 38 +++++++++++++++++++++++++++++++++++++-
>> 4 files changed, 101 insertions(+), 1 deletion(-)
>> @@ -368,6 +384,13 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
>> &s->aes_encrypt_key);
>> }
>>
>> + ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
>> + ((cluster_offset >> 9) + n_start) << 9, n * BDRV_SECTOR_SIZE);
> Looks a bit overcomplicated, I'd like something like this better:
>
> cluster_offset + n_start * BDRV_SECTOR_SIZE
Yes, but this wouldn't correspond with the write call if (cluster_offset
& ((1 << 9) - 1)) != 0. ;-)
Basically, I just wanted it to match exactly the write command.
>
>> + if (ret) {
>> + ret = (ret < 0) ? ret : -EIO;
> I wonder whether the -EIO logic should be moved into
> qcow2_pre_write_overlap_check(). Currently each single caller seems to
> have this check.
Seems reasonable. I didn't want to prevent the caller from receiving
information about the exact overlap, but that could be achieved through
an optional result pointer as well, I think.
>
>> + goto out;
>> + }
>> +
>> BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
>> ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + n_start, n, &qiov);
>> if (ret < 0) {
>> diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
>> index 0caac90..6f69ecc 100644
>> --- a/block/qcow2-snapshot.c
>> +++ b/block/qcow2-snapshot.c
>> @@ -189,6 +189,15 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
>> return ret;
>> }
>>
>> + /* The snapshot list position has not yet been updated, so these clusters
>> + * must indeed be completely free */
>> + ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
>> + offset, s->nb_snapshots * sizeof(h));
>> + if (ret) {
>> + return (ret < 0) ? ret : -EIO;
>> + }
> This doesn't check the full size. snapshots_size should have the right
> value.
Yes, you're right.
Max
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [Qemu-devel] [PATCH 3/5] qcow2: Employ metadata overlap checks
2013-08-27 11:41 ` Max Reitz
@ 2013-08-27 11:51 ` Kevin Wolf
0 siblings, 0 replies; 18+ messages in thread
From: Kevin Wolf @ 2013-08-27 11:51 UTC (permalink / raw)
To: Max Reitz; +Cc: qemu-devel, Stefan Hajnoczi
Am 27.08.2013 um 13:41 hat Max Reitz geschrieben:
> Am 27.08.2013 13:32, schrieb Kevin Wolf:
> >Am 26.08.2013 um 15:04 hat Max Reitz geschrieben:
> >>The pre-write overlap check function is now called before most of the
> >>qcow2 writes (aborting it on collision or other error).
> >>
> >>Signed-off-by: Max Reitz <mreitz@redhat.com>
> >>---
> >> block/qcow2-cache.c | 17 +++++++++++++++++
> >> block/qcow2-cluster.c | 23 +++++++++++++++++++++++
> >> block/qcow2-snapshot.c | 24 ++++++++++++++++++++++++
> >> block/qcow2.c | 38 +++++++++++++++++++++++++++++++++++++-
> >> 4 files changed, 101 insertions(+), 1 deletion(-)
> >>@@ -368,6 +384,13 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
> >> &s->aes_encrypt_key);
> >> }
> >>+ ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
> >>+ ((cluster_offset >> 9) + n_start) << 9, n * BDRV_SECTOR_SIZE);
> >Looks a bit overcomplicated, I'd like something like this better:
> >
> >cluster_offset + n_start * BDRV_SECTOR_SIZE
> Yes, but this wouldn't correspond with the write call if
> (cluster_offset & ((1 << 9) - 1)) != 0. ;-)
And then you have a problem anyway. It's something that I'd be happy to
assert() at any time, i.e. if it isn't true, it's a bug.
> Basically, I just wanted it to match exactly the write command.
I can see your point. Well, matter of taste, I guess.
> >
> >>+ if (ret) {
> >>+ ret = (ret < 0) ? ret : -EIO;
> >I wonder whether the -EIO logic should be moved into
> >qcow2_pre_write_overlap_check(). Currently each single caller seems to
> >have this check.
> Seems reasonable. I didn't want to prevent the caller from receiving
> information about the exact overlap, but that could be achieved
> through an optional result pointer as well, I think.
Don't complicate an interface for a potential caller that doesn't exist
yet. If one comes up, it will change the interface as it needs.
Kevin
^ permalink raw reply [flat|nested] 18+ messages in thread
* [Qemu-devel] [PATCH 4/5] qcow2: Check allocations in qcow2_check
2013-08-26 13:04 [Qemu-devel] [PATCH 0/5] qcow2: Add metadata overlap checks Max Reitz
` (2 preceding siblings ...)
2013-08-26 13:04 ` [Qemu-devel] [PATCH 3/5] qcow2: Employ metadata " Max Reitz
@ 2013-08-26 13:04 ` Max Reitz
2013-08-26 13:15 ` Max Reitz
2013-08-27 12:23 ` Kevin Wolf
2013-08-26 13:04 ` [Qemu-devel] [PATCH 5/5] qemu-iotests: Overlapping cluster allocations Max Reitz
4 siblings, 2 replies; 18+ messages in thread
From: Max Reitz @ 2013-08-26 13:04 UTC (permalink / raw)
To: qemu-devel; +Cc: Kevin Wolf, Stefan Hajnoczi, Max Reitz
Adds a new function checking for overlapping cluster allocations.
Furthermore, qcow2_check now marks the image as consistent if no
corruptions have been found.
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
block/qcow2-cluster.c | 414 ++++++++++++++++++++++++++++++++++++++++++++++++++
block/qcow2.c | 15 +-
block/qcow2.h | 2 +
3 files changed, 429 insertions(+), 2 deletions(-)
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index be35983..ea7d334 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1499,3 +1499,417 @@ fail:
return ret;
}
+
+static const char *const overlap_strings[8] = {
+ "main header",
+ "active L1 table",
+ "active L2 table",
+ "refcount table",
+ "refcount block",
+ "snapshot table",
+ "inactive L1 table",
+ "inactive L2 table"
+};
+
+/*
+ * Checks the table and cluster allocations for inconsistencies.
+ */
+int qcow2_check_allocations(BlockDriverState *bs, BdrvCheckResult *result,
+ BdrvCheckMode fix)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret;
+ int i, j;
+ uint8_t *bm; /* bitmap */
+ /* bitmap byte shift (bytes per bitmap-bit = 1 << bm_shift) */
+ int bm_shift = s->cluster_bits;
+ uint64_t bm_size;
+
+ /* Checking every data cluster through qcow2_check_metadata_overlap takes a
+ * considerable amount of time; a bitmap would help out very much, although
+ * it quickly assumes ridiculous sizes if not limited; therefore we're
+ * grouping several clusters together to not exceed the size of the L1 table
+ * in RAM (which seems a reasonable limit). */
+ while ((bm_size = bs->total_sectors >> (bm_shift++ - BDRV_SECTOR_BITS)) >
+ s->l1_size * sizeof(uint64_t));
+
+ bm = g_malloc0(++bm_size);
+
+ bm[0] = 1; /* main header */
+ /* active L1 */
+ for (i = 0; i < s->l1_size * sizeof(uint64_t) >> bm_shift; i++) {
+ uintptr_t bit = i + (s->l1_table_offset >> bm_shift);
+ if (bit / 8 >= bm_size) {
+ fprintf(stderr, "ERROR L1 table offset is after EOF\n");
+ result->corruptions++;
+ /* this is such a fundamental error that it will probably crash
+ * qcow2_check_metadata_overlap if this function doesn't return
+ * already here (qcow2_check_metadata_overlap is for keeping
+ * consistency, therefore it always assumes a pretty consistent
+ * image) */
+ ret = 0;
+ goto fail;
+ }
+ bm[bit / 8] |= 1 << (bit % 8);
+ }
+
+ if (s->l1_table) {
+ /* active L2 */
+ for (i = 0; i < s->l1_size; i++) {
+ uintptr_t bit = (s->l1_table[i] & L1E_OFFSET_MASK) >> bm_shift;
+ if (bit / 8 >= bm_size) {
+ fprintf(stderr, "ERROR L2 table %i offset is after EOF\n", i);
+ result->corruptions++;
+ ret = 0;
+ goto fail;
+ }
+ /* the test for j != 0 is unneccessary, since that bit is set
+ * anyway (because of the main header) */
+ bm[bit / 8] |= 1 << (bit % 8);
+ }
+ }
+
+ /* snapshot table */
+ for (i = 0; i < s->snapshots_size >> bm_shift; i++) {
+ uintptr_t bit = i + (s->snapshots_offset >> bm_shift);
+ if (bit / 8 >= bm_size) {
+ fprintf(stderr, "ERROR snapshot table offset is after EOF\n");
+ result->corruptions++;
+ ret = 0;
+ goto fail;
+ }
+ bm[bit / 8] |= 1 << (bit % 8);
+ }
+
+ if (s->snapshots) {
+ /* inactive L1 */
+ for (i = 0; i < s->nb_snapshots; i++) {
+ for (j = 0; j < s->snapshots[i].l1_size * sizeof(uint64_t) >>
+ bm_shift; j++) {
+ uintptr_t bit =
+ j + (s->snapshots[i].l1_table_offset >> bm_shift);
+ if (bit / 8 >= bm_size) {
+ fprintf(stderr, "ERROR inactive L1 table %i offset is "
+ "after EOF\n", i);
+ result->corruptions++;
+ ret = 0;
+ goto fail;
+ }
+ bm[bit / 8] |= 1 << (bit % 8);
+ }
+ }
+ }
+
+ /* refcount table */
+ for (i = 0; i < s->refcount_table_size * sizeof(uint64_t) >> bm_shift; i++)
+ {
+ uintptr_t bit = i + (s->refcount_table_offset >> bm_shift);
+ if (bit / 8 >= bm_size) {
+ fprintf(stderr, "ERROR refcount table offset is after EOF\n");
+ result->corruptions++;
+ ret = 0;
+ goto fail;
+ }
+ bm[bit / 8] |= 1 << (bit % 8);
+ }
+
+ if (s->refcount_table) {
+ int initial_corruptions = result->corruptions;
+
+ /* Do the refcount blocks last; that way, we can directly use the bitmap
+ * to check for overlaps */
+ for (i = 0; i < s->refcount_table_size; i++) {
+ uint64_t rb_offset = s->refcount_table[i] & REFT_OFFSET_MASK;
+ uintptr_t bit = rb_offset >> bm_shift;
+
+ if (!rb_offset) {
+ /* unallocated */
+ continue;
+ }
+
+ if (rb_offset & (s->cluster_size - 1)) {
+ fprintf(stderr, "ERROR refcount block %i is not cluster "
+ "aligned in image\n", i);
+ result->corruptions++;
+ continue;
+ }
+
+ if (bit / 8 >= bm_size) {
+ fprintf(stderr, "ERROR refcount block %i offset is after EOF\n",
+ i);
+ result->corruptions++;
+ ret = 0;
+ goto fail;
+ }
+
+ if (bm[bit / 8] & (1 << (bit % 8))) {
+ ret = qcow2_check_metadata_overlap(bs,
+ QCOW2_OL_CACHED & ~QCOW2_OL_REFCOUNT_BLOCK, rb_offset,
+ s->cluster_size);
+ if (ret > 0) {
+ fprintf(stderr, "ERROR refcount block %i overlaps with %s\n", i,
+ overlap_strings[ffs(ret) - 1]);
+ result->corruptions++;
+ } else if (ret < 0) {
+ fprintf(stderr, "ERROR could not check refcount block %i overlap: "
+ "%s\n", i, strerror(-ret));
+ result->check_errors++;
+ goto fail;
+ }
+ }
+ }
+
+ if (result->corruptions != initial_corruptions) {
+ ret = 0;
+ goto fail;
+ }
+
+ /* Now, enter the refcount blocks into the bitmap */
+ for (i = 0; i < s->refcount_table_size; i++) {
+ uint64_t rb_offset = s->refcount_table[i] & REFT_OFFSET_MASK;
+ uintptr_t bit = rb_offset >> bm_shift;
+ bm[bit / 8] |= 1 << (bit % 8);
+ }
+ }
+
+ ret = qcow2_check_metadata_overlap(bs, QCOW2_OL_MAIN_HEADER |
+ QCOW2_OL_REFCOUNT_TABLE | QCOW2_OL_SNAPSHOT_TABLE,
+ s->l1_table_offset, s->l1_size * sizeof(uint64_t));
+ if (ret > 0) {
+ fprintf(stderr, "ERROR L1 offset intersects %s\n",
+ overlap_strings[ffs(ret) - 1]);
+ result->corruptions++;
+ ret = 0;
+ goto fail;
+ } else if (ret < 0) {
+ fprintf(stderr, "ERROR could not check L1 overlap: %s\n",
+ strerror(-ret));
+ result->check_errors++;
+ goto fail;
+ }
+
+ if (s->l1_table) {
+ for (i = 0; i < s->l1_size; i++) {
+ uint64_t l2_offset = s->l1_table[i] & L1E_OFFSET_MASK;
+ uint64_t *l2;
+
+ if (!l2_offset) {
+ /* unallocated */
+ continue;
+ }
+
+ if (l2_offset & (s->cluster_size - 1)) {
+ fprintf(stderr, "ERROR L2 %i is not cluster aligned; L1 table "
+ "entry corrupted\n", i);
+ result->corruptions++;
+ continue;
+ }
+
+ ret = qcow2_check_metadata_overlap(bs,
+ QCOW2_OL_CACHED & ~QCOW2_OL_ACTIVE_L2, l2_offset,
+ s->cluster_size);
+ if (ret > 0) {
+ fprintf(stderr, "ERROR L2 %i overlaps with %s\n", i,
+ overlap_strings[ffs(ret) - 1]);
+ result->corruptions++;
+ continue;
+ } else if (ret < 0) {
+ fprintf(stderr, "ERROR could not check L2 %i overlap: %s\n", i,
+ strerror(-ret));
+ result->check_errors++;
+ goto fail;
+ }
+
+ ret = l2_load(bs, l2_offset, &l2);
+ if ((ret < 0) || !l2) {
+ fprintf(stderr, "ERROR could not load L2 %i: %s\n", i,
+ strerror(-ret));
+ result->check_errors++;
+ if (ret < 0) {
+ goto fail;
+ } else {
+ continue;
+ }
+ }
+
+ for (j = 0; j < s->l2_size; j++) {
+ uint64_t cl_offset, cl_size;
+ unsigned long long cl_index;
+ uint64_t l2_entry = be64_to_cpu(l2[j]);
+
+ if ((s->qcow_version >= 3) && (l2_entry & 1)) {
+ /* cluster reads as all zeroes */
+ continue;
+ }
+
+ if (l2_entry & (1ULL << 62)) { /* compressed */
+ int xp1 = 62 - (s->cluster_bits - 9);
+ cl_offset = l2_entry & ((1 << xp1) - 1);
+ cl_size = ((l2_entry & L2E_COMPRESSED_OFFSET_SIZE_MASK) >>
+ xp1) * BDRV_SECTOR_SIZE;
+ } else { /* uncompressed */
+ cl_offset = l2_entry & L2E_OFFSET_MASK;
+ cl_size = s->cluster_size;
+ }
+
+ if (!cl_offset) {
+ /* unallocated */
+ continue;
+ }
+
+ cl_index = ((unsigned long long)i << s->l2_bits) | j;
+
+ if (!(l2_entry & (1ULL << 62)) &&
+ cl_offset & (s->cluster_size - 1)) {
+ fprintf(stderr, "ERROR data cluster 0x%llx is not cluster "
+ "aligned in image\n", cl_index);
+ result->corruptions++;
+ continue;
+ }
+
+ if (!(bm[(cl_offset >> bm_shift) / 8] &
+ (1 << ((cl_offset >> bm_shift) % 8)))) {
+ continue;
+ }
+
+ ret = qcow2_check_metadata_overlap(bs, QCOW2_OL_CACHED, cl_offset,
+ cl_size);
+ if (ret > 0) {
+ /* TODO: fix compressed clusters */
+ if ((fix & BDRV_FIX_ERRORS) && !(l2_entry & (1ULL << 62))) {
+ void *content = g_malloc(s->cluster_size);
+ uint64_t sector = cl_index <<
+ (s->cluster_bits - BDRV_SECTOR_BITS);
+
+ fprintf(stderr, "Reallocating data cluster 0x%llx "
+ "(currently overlapping with %s)\n", cl_index,
+ overlap_strings[ffs(ret) - 1]);
+
+ ret = bdrv_read(bs, sector, content,
+ s->cluster_sectors);
+ if (ret < 0) {
+ fprintf(stderr, "ERROR could not read cluster: "
+ "%s\n", strerror(-ret));
+ result->corruptions++;
+ g_free(content);
+ qcow2_cache_put(bs, s->l2_table_cache,
+ (void **)&l2);
+ goto fail;
+ }
+
+ /* unallocate cluster (but leaving the refcount for
+ * now) */
+ l2[j] = 0;
+
+ /* FIXME: the image obviously is inconsistent, so this
+ * write access may trigger new inconsistencies
+ * (although the refcount check, which is supposed to be
+ * performed before these checks, should take care of
+ * this) */
+ ret = bdrv_write(bs, sector, content,
+ s->cluster_sectors);
+ if (ret < 0) {
+ /* at least leave it as it was */
+ l2[j] = cpu_to_be64(l2_entry);
+
+ fprintf(stderr, "ERROR could not write cluster: "
+ "%s\n", strerror(-ret));
+ result->corruptions++;
+ g_free(content);
+ qcow2_cache_put(bs, s->l2_table_cache,
+ (void **)&l2);
+ goto fail;
+ }
+
+ g_free(content);
+ result->corruptions_fixed++;
+
+ /* Now decrement the refcount (although an invalid
+ * refcount might have been the problem in the first
+ * place, the refcount check (which is running before
+ * this) will have fixed that) */
+ qcow2_free_any_clusters(bs, l2_entry, 1,
+ QCOW2_DISCARD_ALWAYS);
+ } else {
+ fprintf(stderr, "ERROR data cluster 0x%llx overlaps "
+ "with %s\n", cl_index,
+ overlap_strings[ffs(ret) - 1]);
+ result->corruptions++;
+ }
+ } else if (ret < 0) {
+ fprintf(stderr, "ERROR could not check data cluster 0x%llx "
+ "overlap: %s\n", cl_index, strerror(-ret));
+ result->check_errors++;
+ qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2);
+ goto fail;
+ }
+ }
+
+ qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2);
+ }
+ }
+
+ ret = qcow2_check_metadata_overlap(bs, QCOW2_OL_MAIN_HEADER |
+ QCOW2_OL_ACTIVE_L1 | QCOW2_OL_SNAPSHOT_TABLE,
+ s->refcount_table_offset,
+ s->refcount_table_size * sizeof(uint64_t));
+ if (ret > 0) {
+ fprintf(stderr, "ERROR refcount table overlaps with %s\n",
+ overlap_strings[ffs(ret) - 1]);
+ result->corruptions++;
+ ret = 0;
+ goto fail;
+ } else if (ret < 0) {
+ fprintf(stderr, "ERROR could not check refcount table overlap: %s\n",
+ strerror(-ret));
+ result->check_errors++;
+ goto fail;
+ }
+
+ if (!s->refcount_table) {
+ fprintf(stderr, "ERROR could not check refcount block overlap: "
+ "refcount table not loaded\n");
+ result->check_errors++;
+ ret = -EIO;
+ goto fail;
+ }
+
+ /* refcount blocks have already been checked (when creating the bitmap) */
+
+ ret = qcow2_check_metadata_overlap(bs, QCOW2_OL_MAIN_HEADER |
+ QCOW2_OL_ACTIVE_L1 | QCOW2_OL_REFCOUNT_TABLE, s->snapshots_offset,
+ s->snapshots_size);
+ if (ret > 0) {
+ fprintf(stderr, "ERROR snapshot table overlaps with %s\n",
+ overlap_strings[ffs(ret) - 1]);
+ result->corruptions++;
+ ret = 0;
+ goto fail;
+ } else if (ret < 0) {
+ fprintf(stderr, "ERROR could not check snapshot table overlap: %s\n",
+ strerror(-ret));
+ result->check_errors++;
+ goto fail;
+ }
+
+ /* We now know for sure that nothing overlaps with the inactive L1 tables -
+ * therefore, we can skip explicitly checking them. */
+
+ ret = 0;
+
+fail:
+ g_free(bm);
+
+ if (fix & BDRV_FIX_ERRORS) {
+ /* qemu-img check will double check if some corruptions could be fixed
+ * - without reopening the underlying image file. Since the refcount
+ * check doesn't use the L2 cache for some reason, it must be flushed
+ * to disk before the second check can commence (else it will report
+ * inexistent errors); and if we're already at it, the refcount cache
+ * may be flushed just as well */
+ qcow2_cache_flush(bs, s->l2_table_cache);
+ qcow2_cache_flush(bs, s->refcount_block_cache);
+ }
+
+ return ret;
+}
diff --git a/block/qcow2.c b/block/qcow2.c
index 95497c6..0b0f0ac 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -311,8 +311,19 @@ static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result,
return ret;
}
- if (fix && result->check_errors == 0 && result->corruptions == 0) {
- return qcow2_mark_clean(bs);
+ ret = qcow2_check_allocations(bs, result, fix);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (result->check_errors == 0 && result->corruptions == 0) {
+ if (fix) {
+ ret = qcow2_mark_clean(bs);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+ return qcow2_mark_consistent(bs);
}
return ret;
}
diff --git a/block/qcow2.h b/block/qcow2.h
index 8a55da0..3772ab6 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -410,6 +410,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
BdrvCheckMode fix);
+int qcow2_check_allocations(BlockDriverState *bs, BdrvCheckResult *result,
+ BdrvCheckMode fix);
void qcow2_process_discards(BlockDriverState *bs, int ret);
--
1.8.3.1
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [Qemu-devel] [PATCH 4/5] qcow2: Check allocations in qcow2_check
2013-08-26 13:04 ` [Qemu-devel] [PATCH 4/5] qcow2: Check allocations in qcow2_check Max Reitz
@ 2013-08-26 13:15 ` Max Reitz
2013-08-27 12:23 ` Kevin Wolf
1 sibling, 0 replies; 18+ messages in thread
From: Max Reitz @ 2013-08-26 13:15 UTC (permalink / raw)
To: qemu-devel; +Cc: Kevin Wolf, Stefan Hajnoczi
Hi,
On Mon, Aug 26, 2013 at 15:04 +0200, Max Reitz wrote:
> Adds a new function checking for overlapping cluster allocations.
> Furthermore, qcow2_check now marks the image as consistent if no
> corruptions have been found.
>
> Signed-off-by: Max Reitz <mreitz@redhat.com>
Such overlappings are often (if not always) found by the refcount checks
as well. By increasing the refcount of the clusters affected and
therefore enforcing COW, the collision is basically fixed as well
without requiring this new function.
Thus I'm not sure whether this new function is actually necessary since
the existing refcount check seems to cover all its functionality already.
But, well, I thought it couldn't hurt proposing this function at least. ;-)
Kind regards,
Max
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [Qemu-devel] [PATCH 4/5] qcow2: Check allocations in qcow2_check
2013-08-26 13:04 ` [Qemu-devel] [PATCH 4/5] qcow2: Check allocations in qcow2_check Max Reitz
2013-08-26 13:15 ` Max Reitz
@ 2013-08-27 12:23 ` Kevin Wolf
1 sibling, 0 replies; 18+ messages in thread
From: Kevin Wolf @ 2013-08-27 12:23 UTC (permalink / raw)
To: Max Reitz; +Cc: qemu-devel, Stefan Hajnoczi
Am 26.08.2013 um 15:04 hat Max Reitz geschrieben:
> Adds a new function checking for overlapping cluster allocations.
> Furthermore, qcow2_check now marks the image as consistent if no
> corruptions have been found.
>
> Signed-off-by: Max Reitz <mreitz@redhat.com>
> ---
> block/qcow2-cluster.c | 414 ++++++++++++++++++++++++++++++++++++++++++++++++++
> block/qcow2.c | 15 +-
> block/qcow2.h | 2 +
> 3 files changed, 429 insertions(+), 2 deletions(-)
>
> diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
> index be35983..ea7d334 100644
> --- a/block/qcow2-cluster.c
> +++ b/block/qcow2-cluster.c
> @@ -1499,3 +1499,417 @@ fail:
>
> return ret;
> }
> +
> +static const char *const overlap_strings[8] = {
> + "main header",
> + "active L1 table",
> + "active L2 table",
> + "refcount table",
> + "refcount block",
> + "snapshot table",
> + "inactive L1 table",
> + "inactive L2 table"
> +};
> +
> +/*
> + * Checks the table and cluster allocations for inconsistencies.
> + */
> +int qcow2_check_allocations(BlockDriverState *bs, BdrvCheckResult *result,
> + BdrvCheckMode fix)
For the record: We have decided to leave this out as the required
functionality is already provided by the refcount checks.
> diff --git a/block/qcow2.c b/block/qcow2.c
> index 95497c6..0b0f0ac 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -311,8 +311,19 @@ static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result,
> return ret;
> }
>
> - if (fix && result->check_errors == 0 && result->corruptions == 0) {
> - return qcow2_mark_clean(bs);
> + ret = qcow2_check_allocations(bs, result, fix);
> + if (ret < 0) {
> + return ret;
> + }
> +
> + if (result->check_errors == 0 && result->corruptions == 0) {
> + if (fix) {
> + ret = qcow2_mark_clean(bs);
> + if (ret < 0) {
> + return ret;
> + }
> + }
> + return qcow2_mark_consistent(bs);
The qcow2_mark_consistent() call should probably be inside the if block,
otherwise you'll be trying to update the header of read-only image.
The reason why you didn't notice this is that qemu-img is broken since
commit 8599ea4c and doesn't report errors on negative return values any
more...
Kevin
^ permalink raw reply [flat|nested] 18+ messages in thread
* [Qemu-devel] [PATCH 5/5] qemu-iotests: Overlapping cluster allocations
2013-08-26 13:04 [Qemu-devel] [PATCH 0/5] qcow2: Add metadata overlap checks Max Reitz
` (3 preceding siblings ...)
2013-08-26 13:04 ` [Qemu-devel] [PATCH 4/5] qcow2: Check allocations in qcow2_check Max Reitz
@ 2013-08-26 13:04 ` Max Reitz
2013-08-27 12:37 ` Kevin Wolf
4 siblings, 1 reply; 18+ messages in thread
From: Max Reitz @ 2013-08-26 13:04 UTC (permalink / raw)
To: qemu-devel; +Cc: Kevin Wolf, Stefan Hajnoczi, Max Reitz
A new test on corrupted images with overlapping cluster allocations.
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
tests/qemu-iotests/060 | 107 +++++++++++++++++++++++++++++++++++++++++++++
tests/qemu-iotests/060.out | 43 ++++++++++++++++++
tests/qemu-iotests/group | 1 +
3 files changed, 151 insertions(+)
create mode 100755 tests/qemu-iotests/060
create mode 100644 tests/qemu-iotests/060.out
diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060
new file mode 100755
index 0000000..2dd6711
--- /dev/null
+++ b/tests/qemu-iotests/060
@@ -0,0 +1,107 @@
+#!/bin/bash
+#
+# Test case for image corruption (overlapping data structures) in qcow2
+#
+# Copyright (C) 2013 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=mreitz@redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1 # failure is the default!
+
+_cleanup()
+{
+ _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+# This tests qocw2-specific low-level functionality
+_supported_fmt qcow2
+_supported_proto generic
+_supported_os Linux
+
+rt_offset=65536 # 0x10000 (XXX: just an assumption)
+rb_offset=131072 # 0x20000 (XXX: just an assumption)
+l1_offset=196608 # 0x30000 (XXX: just an assumption)
+l2_offset=262144 # 0x40000 (XXX: just an assumption)
+
+IMGOPTS="compat=1.1"
+
+echo
+echo "=== Testing L2 reference into L1 ==="
+echo
+_make_test_img 64M
+# Link first L1 entry (first L2 table) onto itself
+# (Note the MSb in the L1 entry is set, ensuring the refcount is one - else any
+# later write will result in a COW operation, effectively ruining this attempt
+# on image corruption)
+poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x03\x00\x00"
+_check_test_img
+
+# The corrupt bit should not be set anyway
+./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+
+# Try to write something, thereby forcing the corrupt bit to be set
+$QEMU_IO -c "write -P 0x2a 0 512" "$TEST_IMG" | _filter_qemu_io
+
+# The corrupt bit must now be set
+./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+
+# We could now try to fix the image, but this would probably fail (how should an
+# L2 table linked onto the L1 table be fixed?)
+
+echo
+echo "=== Testing cluster data reference into refcount block ==="
+echo
+_make_test_img 64M
+# Allocate L2 table
+truncate -s "$(($l2_offset+65536))" "$TEST_IMG"
+poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x00\x00"
+# Mark cluster as used
+poke_file "$TEST_IMG" "$(($rb_offset+8))" "\x00\x01"
+# Redirect new data cluster onto refcount block
+poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x02\x00\x00"
+_check_test_img
+./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+$QEMU_IO -c "write -P 0x2a 0 512" "$TEST_IMG" | _filter_qemu_io
+./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+
+cp $TEST_IMG foo.qcow2
+
+# Try to fix it
+_check_test_img -r all
+
+# The corrupt bit should be cleared
+./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+
+# Look if it's really really fixed
+$QEMU_IO -c "write -P 0x2a 0 512" "$TEST_IMG" | _filter_qemu_io
+./qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out
new file mode 100644
index 0000000..10feced
--- /dev/null
+++ b/tests/qemu-iotests/060.out
@@ -0,0 +1,43 @@
+QA output created by 060
+
+=== Testing L2 reference into L1 ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+ERROR cluster 3 refcount=1 reference=3
+ERROR L2 0 overlaps with active L1 table
+
+2 errors were found on the image.
+Data may be corrupted, or further writes to the image may corrupt it.
+incompatible_features 0x0
+qcow2: Preventing invalid write on metadata; image marked as corrupt.
+write failed: Input/output error
+incompatible_features 0x2
+
+=== Testing cluster data reference into refcount block ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+ERROR refcount block 0 refcount=2
+ERROR cluster 2 refcount=1 reference=2
+ERROR data cluster 0x0 overlaps with refcount block
+
+3 errors were found on the image.
+Data may be corrupted, or further writes to the image may corrupt it.
+incompatible_features 0x0
+qcow2: Preventing invalid write on metadata; image marked as corrupt.
+write failed: Input/output error
+incompatible_features 0x2
+ERROR refcount block 0 refcount=2
+Repairing cluster 2 refcount=1 reference=2
+Reallocating data cluster 0x0 (currently overlapping with refcount block)
+The following inconsistencies were found and repaired:
+
+ 0 leaked clusters
+ 2 corruptions
+
+Double checking the fixed image now...
+No errors were found on the image.
+incompatible_features 0x0
+wrote 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+incompatible_features 0x0
+*** done
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 43c05d6..0845eb5 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -64,3 +64,4 @@
055 rw auto
056 rw auto backing
059 rw auto
+060 rw auto
--
1.8.3.1
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [Qemu-devel] [PATCH 4/5] qcow2: Check allocations in qcow2_check
2013-08-26 10:18 [Qemu-devel] [PATCH 0/5] qcow2: Add metadata overlap checks Max Reitz
@ 2013-08-26 10:18 ` Max Reitz
0 siblings, 0 replies; 18+ messages in thread
From: Max Reitz @ 2013-08-26 10:18 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, Max Reitz
Adds a new function checking for overlapping cluster allocations.
Furthermore, qcow2_check now marks the image as consistent if no
corruptions have been found.
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
block/qcow2-cluster.c | 414 ++++++++++++++++++++++++++++++++++++++++++++++++++
block/qcow2.c | 15 +-
block/qcow2.h | 2 +
3 files changed, 429 insertions(+), 2 deletions(-)
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index be35983..ea7d334 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1499,3 +1499,417 @@ fail:
return ret;
}
+
+static const char *const overlap_strings[8] = {
+ "main header",
+ "active L1 table",
+ "active L2 table",
+ "refcount table",
+ "refcount block",
+ "snapshot table",
+ "inactive L1 table",
+ "inactive L2 table"
+};
+
+/*
+ * Checks the table and cluster allocations for inconsistencies.
+ */
+int qcow2_check_allocations(BlockDriverState *bs, BdrvCheckResult *result,
+ BdrvCheckMode fix)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret;
+ int i, j;
+ uint8_t *bm; /* bitmap */
+ /* bitmap byte shift (bytes per bitmap-bit = 1 << bm_shift) */
+ int bm_shift = s->cluster_bits;
+ uint64_t bm_size;
+
+ /* Checking every data cluster through qcow2_check_metadata_overlap takes a
+ * considerable amount of time; a bitmap would help out very much, although
+ * it quickly assumes ridiculous sizes if not limited; therefore we're
+ * grouping several clusters together to not exceed the size of the L1 table
+ * in RAM (which seems a reasonable limit). */
+ while ((bm_size = bs->total_sectors >> (bm_shift++ - BDRV_SECTOR_BITS)) >
+ s->l1_size * sizeof(uint64_t));
+
+ bm = g_malloc0(++bm_size);
+
+ bm[0] = 1; /* main header */
+ /* active L1 */
+ for (i = 0; i < s->l1_size * sizeof(uint64_t) >> bm_shift; i++) {
+ uintptr_t bit = i + (s->l1_table_offset >> bm_shift);
+ if (bit / 8 >= bm_size) {
+ fprintf(stderr, "ERROR L1 table offset is after EOF\n");
+ result->corruptions++;
+ /* this is such a fundamental error that it will probably crash
+ * qcow2_check_metadata_overlap if this function doesn't return
+ * already here (qcow2_check_metadata_overlap is for keeping
+ * consistency, therefore it always assumes a pretty consistent
+ * image) */
+ ret = 0;
+ goto fail;
+ }
+ bm[bit / 8] |= 1 << (bit % 8);
+ }
+
+ if (s->l1_table) {
+ /* active L2 */
+ for (i = 0; i < s->l1_size; i++) {
+ uintptr_t bit = (s->l1_table[i] & L1E_OFFSET_MASK) >> bm_shift;
+ if (bit / 8 >= bm_size) {
+ fprintf(stderr, "ERROR L2 table %i offset is after EOF\n", i);
+ result->corruptions++;
+ ret = 0;
+ goto fail;
+ }
+ /* the test for j != 0 is unneccessary, since that bit is set
+ * anyway (because of the main header) */
+ bm[bit / 8] |= 1 << (bit % 8);
+ }
+ }
+
+ /* snapshot table */
+ for (i = 0; i < s->snapshots_size >> bm_shift; i++) {
+ uintptr_t bit = i + (s->snapshots_offset >> bm_shift);
+ if (bit / 8 >= bm_size) {
+ fprintf(stderr, "ERROR snapshot table offset is after EOF\n");
+ result->corruptions++;
+ ret = 0;
+ goto fail;
+ }
+ bm[bit / 8] |= 1 << (bit % 8);
+ }
+
+ if (s->snapshots) {
+ /* inactive L1 */
+ for (i = 0; i < s->nb_snapshots; i++) {
+ for (j = 0; j < s->snapshots[i].l1_size * sizeof(uint64_t) >>
+ bm_shift; j++) {
+ uintptr_t bit =
+ j + (s->snapshots[i].l1_table_offset >> bm_shift);
+ if (bit / 8 >= bm_size) {
+ fprintf(stderr, "ERROR inactive L1 table %i offset is "
+ "after EOF\n", i);
+ result->corruptions++;
+ ret = 0;
+ goto fail;
+ }
+ bm[bit / 8] |= 1 << (bit % 8);
+ }
+ }
+ }
+
+ /* refcount table */
+ for (i = 0; i < s->refcount_table_size * sizeof(uint64_t) >> bm_shift; i++)
+ {
+ uintptr_t bit = i + (s->refcount_table_offset >> bm_shift);
+ if (bit / 8 >= bm_size) {
+ fprintf(stderr, "ERROR refcount table offset is after EOF\n");
+ result->corruptions++;
+ ret = 0;
+ goto fail;
+ }
+ bm[bit / 8] |= 1 << (bit % 8);
+ }
+
+ if (s->refcount_table) {
+ int initial_corruptions = result->corruptions;
+
+ /* Do the refcount blocks last; that way, we can directly use the bitmap
+ * to check for overlaps */
+ for (i = 0; i < s->refcount_table_size; i++) {
+ uint64_t rb_offset = s->refcount_table[i] & REFT_OFFSET_MASK;
+ uintptr_t bit = rb_offset >> bm_shift;
+
+ if (!rb_offset) {
+ /* unallocated */
+ continue;
+ }
+
+ if (rb_offset & (s->cluster_size - 1)) {
+ fprintf(stderr, "ERROR refcount block %i is not cluster "
+ "aligned in image\n", i);
+ result->corruptions++;
+ continue;
+ }
+
+ if (bit / 8 >= bm_size) {
+ fprintf(stderr, "ERROR refcount block %i offset is after EOF\n",
+ i);
+ result->corruptions++;
+ ret = 0;
+ goto fail;
+ }
+
+ if (bm[bit / 8] & (1 << (bit % 8))) {
+ ret = qcow2_check_metadata_overlap(bs,
+ QCOW2_OL_CACHED & ~QCOW2_OL_REFCOUNT_BLOCK, rb_offset,
+ s->cluster_size);
+ if (ret > 0) {
+ fprintf(stderr, "ERROR refcount block %i overlaps with %s\n", i,
+ overlap_strings[ffs(ret) - 1]);
+ result->corruptions++;
+ } else if (ret < 0) {
+ fprintf(stderr, "ERROR could not check refcount block %i overlap: "
+ "%s\n", i, strerror(-ret));
+ result->check_errors++;
+ goto fail;
+ }
+ }
+ }
+
+ if (result->corruptions != initial_corruptions) {
+ ret = 0;
+ goto fail;
+ }
+
+ /* Now, enter the refcount blocks into the bitmap */
+ for (i = 0; i < s->refcount_table_size; i++) {
+ uint64_t rb_offset = s->refcount_table[i] & REFT_OFFSET_MASK;
+ uintptr_t bit = rb_offset >> bm_shift;
+ bm[bit / 8] |= 1 << (bit % 8);
+ }
+ }
+
+ ret = qcow2_check_metadata_overlap(bs, QCOW2_OL_MAIN_HEADER |
+ QCOW2_OL_REFCOUNT_TABLE | QCOW2_OL_SNAPSHOT_TABLE,
+ s->l1_table_offset, s->l1_size * sizeof(uint64_t));
+ if (ret > 0) {
+ fprintf(stderr, "ERROR L1 offset intersects %s\n",
+ overlap_strings[ffs(ret) - 1]);
+ result->corruptions++;
+ ret = 0;
+ goto fail;
+ } else if (ret < 0) {
+ fprintf(stderr, "ERROR could not check L1 overlap: %s\n",
+ strerror(-ret));
+ result->check_errors++;
+ goto fail;
+ }
+
+ if (s->l1_table) {
+ for (i = 0; i < s->l1_size; i++) {
+ uint64_t l2_offset = s->l1_table[i] & L1E_OFFSET_MASK;
+ uint64_t *l2;
+
+ if (!l2_offset) {
+ /* unallocated */
+ continue;
+ }
+
+ if (l2_offset & (s->cluster_size - 1)) {
+ fprintf(stderr, "ERROR L2 %i is not cluster aligned; L1 table "
+ "entry corrupted\n", i);
+ result->corruptions++;
+ continue;
+ }
+
+ ret = qcow2_check_metadata_overlap(bs,
+ QCOW2_OL_CACHED & ~QCOW2_OL_ACTIVE_L2, l2_offset,
+ s->cluster_size);
+ if (ret > 0) {
+ fprintf(stderr, "ERROR L2 %i overlaps with %s\n", i,
+ overlap_strings[ffs(ret) - 1]);
+ result->corruptions++;
+ continue;
+ } else if (ret < 0) {
+ fprintf(stderr, "ERROR could not check L2 %i overlap: %s\n", i,
+ strerror(-ret));
+ result->check_errors++;
+ goto fail;
+ }
+
+ ret = l2_load(bs, l2_offset, &l2);
+ if ((ret < 0) || !l2) {
+ fprintf(stderr, "ERROR could not load L2 %i: %s\n", i,
+ strerror(-ret));
+ result->check_errors++;
+ if (ret < 0) {
+ goto fail;
+ } else {
+ continue;
+ }
+ }
+
+ for (j = 0; j < s->l2_size; j++) {
+ uint64_t cl_offset, cl_size;
+ unsigned long long cl_index;
+ uint64_t l2_entry = be64_to_cpu(l2[j]);
+
+ if ((s->qcow_version >= 3) && (l2_entry & 1)) {
+ /* cluster reads as all zeroes */
+ continue;
+ }
+
+ if (l2_entry & (1ULL << 62)) { /* compressed */
+ int xp1 = 62 - (s->cluster_bits - 9);
+ cl_offset = l2_entry & ((1 << xp1) - 1);
+ cl_size = ((l2_entry & L2E_COMPRESSED_OFFSET_SIZE_MASK) >>
+ xp1) * BDRV_SECTOR_SIZE;
+ } else { /* uncompressed */
+ cl_offset = l2_entry & L2E_OFFSET_MASK;
+ cl_size = s->cluster_size;
+ }
+
+ if (!cl_offset) {
+ /* unallocated */
+ continue;
+ }
+
+ cl_index = ((unsigned long long)i << s->l2_bits) | j;
+
+ if (!(l2_entry & (1ULL << 62)) &&
+ cl_offset & (s->cluster_size - 1)) {
+ fprintf(stderr, "ERROR data cluster 0x%llx is not cluster "
+ "aligned in image\n", cl_index);
+ result->corruptions++;
+ continue;
+ }
+
+ if (!(bm[(cl_offset >> bm_shift) / 8] &
+ (1 << ((cl_offset >> bm_shift) % 8)))) {
+ continue;
+ }
+
+ ret = qcow2_check_metadata_overlap(bs, QCOW2_OL_CACHED, cl_offset,
+ cl_size);
+ if (ret > 0) {
+ /* TODO: fix compressed clusters */
+ if ((fix & BDRV_FIX_ERRORS) && !(l2_entry & (1ULL << 62))) {
+ void *content = g_malloc(s->cluster_size);
+ uint64_t sector = cl_index <<
+ (s->cluster_bits - BDRV_SECTOR_BITS);
+
+ fprintf(stderr, "Reallocating data cluster 0x%llx "
+ "(currently overlapping with %s)\n", cl_index,
+ overlap_strings[ffs(ret) - 1]);
+
+ ret = bdrv_read(bs, sector, content,
+ s->cluster_sectors);
+ if (ret < 0) {
+ fprintf(stderr, "ERROR could not read cluster: "
+ "%s\n", strerror(-ret));
+ result->corruptions++;
+ g_free(content);
+ qcow2_cache_put(bs, s->l2_table_cache,
+ (void **)&l2);
+ goto fail;
+ }
+
+ /* unallocate cluster (but leaving the refcount for
+ * now) */
+ l2[j] = 0;
+
+ /* FIXME: the image obviously is inconsistent, so this
+ * write access may trigger new inconsistencies
+ * (although the refcount check, which is supposed to be
+ * performed before these checks, should take care of
+ * this) */
+ ret = bdrv_write(bs, sector, content,
+ s->cluster_sectors);
+ if (ret < 0) {
+ /* at least leave it as it was */
+ l2[j] = cpu_to_be64(l2_entry);
+
+ fprintf(stderr, "ERROR could not write cluster: "
+ "%s\n", strerror(-ret));
+ result->corruptions++;
+ g_free(content);
+ qcow2_cache_put(bs, s->l2_table_cache,
+ (void **)&l2);
+ goto fail;
+ }
+
+ g_free(content);
+ result->corruptions_fixed++;
+
+ /* Now decrement the refcount (although an invalid
+ * refcount might have been the problem in the first
+ * place, the refcount check (which is running before
+ * this) will have fixed that) */
+ qcow2_free_any_clusters(bs, l2_entry, 1,
+ QCOW2_DISCARD_ALWAYS);
+ } else {
+ fprintf(stderr, "ERROR data cluster 0x%llx overlaps "
+ "with %s\n", cl_index,
+ overlap_strings[ffs(ret) - 1]);
+ result->corruptions++;
+ }
+ } else if (ret < 0) {
+ fprintf(stderr, "ERROR could not check data cluster 0x%llx "
+ "overlap: %s\n", cl_index, strerror(-ret));
+ result->check_errors++;
+ qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2);
+ goto fail;
+ }
+ }
+
+ qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2);
+ }
+ }
+
+ ret = qcow2_check_metadata_overlap(bs, QCOW2_OL_MAIN_HEADER |
+ QCOW2_OL_ACTIVE_L1 | QCOW2_OL_SNAPSHOT_TABLE,
+ s->refcount_table_offset,
+ s->refcount_table_size * sizeof(uint64_t));
+ if (ret > 0) {
+ fprintf(stderr, "ERROR refcount table overlaps with %s\n",
+ overlap_strings[ffs(ret) - 1]);
+ result->corruptions++;
+ ret = 0;
+ goto fail;
+ } else if (ret < 0) {
+ fprintf(stderr, "ERROR could not check refcount table overlap: %s\n",
+ strerror(-ret));
+ result->check_errors++;
+ goto fail;
+ }
+
+ if (!s->refcount_table) {
+ fprintf(stderr, "ERROR could not check refcount block overlap: "
+ "refcount table not loaded\n");
+ result->check_errors++;
+ ret = -EIO;
+ goto fail;
+ }
+
+ /* refcount blocks have already been checked (when creating the bitmap) */
+
+ ret = qcow2_check_metadata_overlap(bs, QCOW2_OL_MAIN_HEADER |
+ QCOW2_OL_ACTIVE_L1 | QCOW2_OL_REFCOUNT_TABLE, s->snapshots_offset,
+ s->snapshots_size);
+ if (ret > 0) {
+ fprintf(stderr, "ERROR snapshot table overlaps with %s\n",
+ overlap_strings[ffs(ret) - 1]);
+ result->corruptions++;
+ ret = 0;
+ goto fail;
+ } else if (ret < 0) {
+ fprintf(stderr, "ERROR could not check snapshot table overlap: %s\n",
+ strerror(-ret));
+ result->check_errors++;
+ goto fail;
+ }
+
+ /* We now know for sure that nothing overlaps with the inactive L1 tables -
+ * therefore, we can skip explicitly checking them. */
+
+ ret = 0;
+
+fail:
+ g_free(bm);
+
+ if (fix & BDRV_FIX_ERRORS) {
+ /* qemu-img check will double check if some corruptions could be fixed
+ * - without reopening the underlying image file. Since the refcount
+ * check doesn't use the L2 cache for some reason, it must be flushed
+ * to disk before the second check can commence (else it will report
+ * inexistent errors); and if we're already at it, the refcount cache
+ * may be flushed just as well */
+ qcow2_cache_flush(bs, s->l2_table_cache);
+ qcow2_cache_flush(bs, s->refcount_block_cache);
+ }
+
+ return ret;
+}
diff --git a/block/qcow2.c b/block/qcow2.c
index 95497c6..0b0f0ac 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -311,8 +311,19 @@ static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result,
return ret;
}
- if (fix && result->check_errors == 0 && result->corruptions == 0) {
- return qcow2_mark_clean(bs);
+ ret = qcow2_check_allocations(bs, result, fix);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (result->check_errors == 0 && result->corruptions == 0) {
+ if (fix) {
+ ret = qcow2_mark_clean(bs);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+ return qcow2_mark_consistent(bs);
}
return ret;
}
diff --git a/block/qcow2.h b/block/qcow2.h
index 8a55da0..3772ab6 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -410,6 +410,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
BdrvCheckMode fix);
+int qcow2_check_allocations(BlockDriverState *bs, BdrvCheckResult *result,
+ BdrvCheckMode fix);
void qcow2_process_discards(BlockDriverState *bs, int ret);
--
1.8.3.1
^ permalink raw reply related [flat|nested] 18+ messages in thread