* [Qemu-devel] [PATCH v3] blkverify: Add block driver for verifying I/O
@ 2010-09-04 15:30 Stefan Hajnoczi
2010-09-04 15:34 ` Stefan Hajnoczi
0 siblings, 1 reply; 5+ messages in thread
From: Stefan Hajnoczi @ 2010-09-04 15:30 UTC (permalink / raw)
To: qemu-devel; +Cc: Kevin Wolf, Stefan Hajnoczi
The blkverify block driver makes investigating image format data
corruption much easier. A raw image initialized with the same contents
as the test image (e.g. qcow2 file) must be provided. The raw image
mirrors read/write operations and is used to verify that data read from
the test image is correct.
Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
---
v3:
* Fix compile error in blkverify_aio_cancel()
v2:
* Implement aio_cancel() by waiting for pending requests
* Fix conflict in Makefile.objs
Makefile.objs | 2 +-
block/blkverify.c | 324 ++++++++++++++++++++++++++++++++++++++++++++++++++++
docs/blkverify.txt | 69 +++++++++++
3 files changed, 394 insertions(+), 1 deletions(-)
create mode 100644 block/blkverify.c
create mode 100644 docs/blkverify.txt
diff --git a/Makefile.objs b/Makefile.objs
index 4a1eaa1..b640ec8 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -14,7 +14,7 @@ block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o
-block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o
+block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
block-nested-$(CONFIG_WIN32) += raw-win32.o
block-nested-$(CONFIG_POSIX) += raw-posix.o
block-nested-$(CONFIG_CURL) += curl.o
diff --git a/block/blkverify.c b/block/blkverify.c
new file mode 100644
index 0000000..127c670
--- /dev/null
+++ b/block/blkverify.c
@@ -0,0 +1,324 @@
+/*
+ * Block protocol for block driver correctness testing
+ *
+ * Copyright (C) 2010 IBM, Corp.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <stdarg.h>
+#include "block_int.h"
+
+typedef struct {
+ BlockDriverState *test_file;
+} BDRVBlkverifyState;
+
+typedef struct BlkverifyAIOCB BlkverifyAIOCB;
+struct BlkverifyAIOCB {
+ BlockDriverAIOCB common;
+ QEMUBH *bh;
+
+ /* Request metadata */
+ bool is_write;
+ int64_t sector_num;
+ int nb_sectors;
+
+ int ret; /* first completed request's result */
+ unsigned int done; /* completion counter */
+
+ QEMUIOVector *qiov; /* user I/O vector */
+ QEMUIOVector raw_qiov; /* cloned I/O vector for raw file */
+ void *buf; /* buffer for raw file I/O */
+
+ void (*verify)(BlkverifyAIOCB *acb);
+};
+
+static void blkverify_aio_cancel_cb(void *opaque, int ret)
+{
+ *(bool *)opaque = true;
+}
+
+static void blkverify_aio_cancel(BlockDriverAIOCB *acb)
+{
+ bool done = false;
+
+ /* Allow the request to complete so the raw file and test file stay in
+ * sync. Hijack the callback (since the request is cancelled anyway) to
+ * wait for completion.
+ */
+ acb->common.cb = blkverify_aio_cancel_cb;
+ acb->common.opaque = &done;
+ while (!done) {
+ qemu_aio_wait();
+ }
+}
+
+static AIOPool blkverify_aio_pool = {
+ .aiocb_size = sizeof(BlkverifyAIOCB),
+ .cancel = blkverify_aio_cancel,
+};
+
+static void blkverify_err(BlkverifyAIOCB *acb, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ fprintf(stderr, "blkverify: %s sector_num=%ld nb_sectors=%d ",
+ acb->is_write ? "write" : "read", acb->sector_num,
+ acb->nb_sectors);
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+ va_end(ap);
+ exit(1);
+}
+
+/* Valid blkverify filenames look like blkverify:path/to/raw_image:path/to/image */
+static int blkverify_open(BlockDriverState *bs, const char *filename, int flags)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+ int ret;
+ char *raw, *c;
+
+ /* Parse the blkverify: prefix */
+ if (strncmp(filename, "blkverify:", strlen("blkverify:"))) {
+ return -EINVAL;
+ }
+ filename += strlen("blkverify:");
+
+ /* Parse the raw image filename */
+ c = strchr(filename, ':');
+ if (c == NULL) {
+ return -EINVAL;
+ }
+
+ raw = strdup(filename);
+ raw[c - filename] = '\0';
+ ret = bdrv_file_open(&bs->file, raw, flags);
+ free(raw);
+ if (ret < 0) {
+ return ret;
+ }
+ filename = c + 1;
+
+ /* Open the test file */
+ s->test_file = bdrv_new("");
+ ret = bdrv_open(s->test_file, filename, flags, NULL);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+static void blkverify_close(BlockDriverState *bs)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+
+ bdrv_delete(s->test_file);
+ s->test_file = NULL;
+}
+
+static void blkverify_flush(BlockDriverState *bs)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+
+ /* Only flush test file, the raw file is not important */
+ bdrv_flush(s->test_file);
+}
+
+static int64_t blkverify_getlength(BlockDriverState *bs)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+
+ return bdrv_getlength(s->test_file);
+}
+
+/**
+ * Check that I/O vector contents are identical
+ *
+ * @a: I/O vector
+ * @b: I/O vector
+ * @ret: Offset to first mismatching byte or -1 if match
+ */
+static ssize_t blkverify_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
+{
+ int i;
+ ssize_t offset = 0;
+
+ assert(a->niov == b->niov);
+ for (i = 0; i < a->niov; i++) {
+ size_t len = 0;
+ uint8_t *p = (uint8_t *)a->iov[i].iov_base;
+ uint8_t *q = (uint8_t *)b->iov[i].iov_base;
+
+ assert(a->iov[i].iov_len == b->iov[i].iov_len);
+ while (len < a->iov[i].iov_len && *p++ == *q++) {
+ len++;
+ }
+
+ offset += len;
+
+ if (len != a->iov[i].iov_len) {
+ return offset;
+ }
+ }
+ return -1;
+}
+
+/**
+ * Copy contents of I/O vector
+ */
+static void blkverify_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src,
+ void *buf)
+{
+ int i;
+
+ for (i = 0; i < src->niov; i++) {
+ qemu_iovec_add(dest, buf, src->iov[i].iov_len);
+ buf += src->iov[i].iov_len;
+ }
+}
+
+static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
+ int64_t sector_num, QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ BlkverifyAIOCB *acb = qemu_aio_get(&blkverify_aio_pool, bs, cb, opaque);
+
+ acb->bh = NULL;
+ acb->is_write = is_write;
+ acb->sector_num = sector_num;
+ acb->nb_sectors = nb_sectors;
+ acb->ret = -EINPROGRESS;
+ acb->done = 0;
+ acb->qiov = qiov;
+ acb->buf = NULL;
+ acb->verify = NULL;
+ return acb;
+}
+
+static void blkverify_aio_bh(void *opaque)
+{
+ BlkverifyAIOCB *acb = opaque;
+
+ qemu_bh_delete(acb->bh);
+ if (acb->buf) {
+ qemu_iovec_destroy(&acb->raw_qiov);
+ qemu_vfree(acb->buf);
+ }
+ acb->common.cb(acb->common.opaque, acb->ret);
+ qemu_aio_release(acb);
+}
+
+static void blkverify_aio_cb(void *opaque, int ret)
+{
+ BlkverifyAIOCB *acb = opaque;
+
+ switch (++acb->done) {
+ case 1:
+ acb->ret = ret;
+ break;
+
+ case 2:
+ if (acb->ret != ret) {
+ blkverify_err(acb, "return value mismatch %d != %d", acb->ret, ret);
+ }
+
+ if (acb->verify) {
+ acb->verify(acb);
+ }
+
+ acb->bh = qemu_bh_new(blkverify_aio_bh, acb);
+ qemu_bh_schedule(acb->bh);
+ break;
+ }
+}
+
+static void blkverify_verify_readv(BlkverifyAIOCB *acb)
+{
+ ssize_t offset = blkverify_iovec_compare(acb->qiov, &acb->raw_qiov);
+ if (offset != -1) {
+ blkverify_err(acb, "contents mismatch in sector %ld",
+ acb->sector_num + (offset / BDRV_SECTOR_SIZE));
+ }
+}
+
+static BlockDriverAIOCB *blkverify_aio_readv(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+ BlkverifyAIOCB *acb = blkverify_aio_get(bs, false, sector_num, qiov,
+ nb_sectors, cb, opaque);
+
+ acb->verify = blkverify_verify_readv;
+ acb->buf = qemu_blockalign(bs->file, qiov->size);
+ qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
+ blkverify_iovec_clone(&acb->raw_qiov, qiov, acb->buf);
+
+ if (!bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors,
+ blkverify_aio_cb, acb)) {
+ blkverify_aio_cb(acb, -EIO);
+ }
+ if (!bdrv_aio_readv(bs->file, sector_num, &acb->raw_qiov, nb_sectors,
+ blkverify_aio_cb, acb)) {
+ blkverify_aio_cb(acb, -EIO);
+ }
+ return &acb->common;
+}
+
+static BlockDriverAIOCB *blkverify_aio_writev(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+ BlkverifyAIOCB *acb = blkverify_aio_get(bs, true, sector_num, qiov,
+ nb_sectors, cb, opaque);
+
+ if (!bdrv_aio_writev(s->test_file, sector_num, qiov, nb_sectors,
+ blkverify_aio_cb, acb)) {
+ blkverify_aio_cb(acb, -EIO);
+ }
+ if (!bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
+ blkverify_aio_cb, acb)) {
+ blkverify_aio_cb(acb, -EIO);
+ }
+ return &acb->common;
+}
+
+static BlockDriverAIOCB *blkverify_aio_flush(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+
+ /* Only flush test file, the raw file is not important */
+ return bdrv_aio_flush(s->test_file, cb, opaque);
+}
+
+static BlockDriver bdrv_blkverify = {
+ .format_name = "blkverify",
+ .protocol_name = "blkverify",
+
+ .instance_size = sizeof(BDRVBlkverifyState),
+
+ .bdrv_getlength = blkverify_getlength,
+
+ .bdrv_file_open = blkverify_open,
+ .bdrv_close = blkverify_close,
+ .bdrv_flush = blkverify_flush,
+
+ .bdrv_aio_readv = blkverify_aio_readv,
+ .bdrv_aio_writev = blkverify_aio_writev,
+ .bdrv_aio_flush = blkverify_aio_flush,
+};
+
+static void bdrv_blkverify_init(void)
+{
+ bdrv_register(&bdrv_blkverify);
+}
+
+block_init(bdrv_blkverify_init);
diff --git a/docs/blkverify.txt b/docs/blkverify.txt
new file mode 100644
index 0000000..d556dc4
--- /dev/null
+++ b/docs/blkverify.txt
@@ -0,0 +1,69 @@
+= Block driver correctness testing with blkverify =
+
+== Introduction ==
+
+This document describes how to use the blkverify protocol to test that a block
+driver is operating correctly.
+
+It is difficult to test and debug block drivers against real guests. Often
+processes inside the guest will crash because corrupt sectors were read as part
+of the executable. Other times obscure errors are raised by a program inside
+the guest. These issues are extremely hard to trace back to bugs in the block
+driver.
+
+Blkverify solves this problem by catching data corruption inside QEMU the first
+time bad data is read and reporting the disk sector that is corrupted.
+
+== How it works ==
+
+The blkverify protocol has two child block devices, the "test" device and the
+"raw" device. Read/write operations are mirrored to both devices so their
+state should always be in sync.
+
+The "raw" device is a raw image, a flat file, that has identical starting
+contents to the "test" image. The idea is that the "raw" device will handle
+read/write operations correctly and not corrupt data. It can be used as a
+reference for comparison against the "test" device.
+
+After a mirrored read operation completes, blkverify will compare the data and
+raise an error if it is not identical. This makes it possible to catch the
+first instance where corrupt data is read.
+
+== Example ==
+
+Imagine raw.img has 0xcd repeated throughout its first sector:
+
+ $ ./qemu-io -c 'read -v 0 512' raw.img
+ 00000000: cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd ................
+ 00000010: cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd ................
+ [...]
+ 000001e0: cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd ................
+ 000001f0: cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd ................
+ read 512/512 bytes at offset 0
+ 512.000000 bytes, 1 ops; 0.0000 sec (97.656 MiB/sec and 200000.0000 ops/sec)
+
+And test.img is corrupt, its first sector is zeroed when it shouldn't be:
+
+ $ ./qemu-io -c 'read -v 0 512' test.img
+ 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+ 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+ [...]
+ 000001e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+ 000001f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+ read 512/512 bytes at offset 0
+ 512.000000 bytes, 1 ops; 0.0000 sec (81.380 MiB/sec and 166666.6667 ops/sec)
+
+This error is caught by blkverify:
+
+ $ ./qemu-io -c 'read 0 512' blkverify:a.img:b.img
+ blkverify: read sector_num=0 nb_sectors=4 contents mismatch in sector 0
+
+A more realistic scenario is verifying the installation of a guest OS:
+
+ $ ./qemu-img create raw.img 16G
+ $ ./qemu-img create -f qcow2 test.qcow2 16G
+ $ x86_64-softmmu/qemu-system-x86_64 -cdrom debian.iso \
+ -drive file=blkverify:raw.img:test.qcow2
+
+If the installation is aborted when blkverify detects corruption, use qemu-io
+to explore the contents of the disk image at the sector in question.
--
1.7.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [Qemu-devel] [PATCH v3] blkverify: Add block driver for verifying I/O
2010-09-04 15:30 [Qemu-devel] [PATCH v3] blkverify: Add block driver for verifying I/O Stefan Hajnoczi
@ 2010-09-04 15:34 ` Stefan Hajnoczi
2010-09-20 14:48 ` [Qemu-devel] " Kevin Wolf
0 siblings, 1 reply; 5+ messages in thread
From: Stefan Hajnoczi @ 2010-09-04 15:34 UTC (permalink / raw)
To: qemu-devel; +Cc: Kevin Wolf, Stefan Hajnoczi
The blkverify block driver makes investigating image format data
corruption much easier. A raw image initialized with the same contents
as the test image (e.g. qcow2 file) must be provided. The raw image
mirrors read/write operations and is used to verify that data read from
the test image is correct.
Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
---
This is embarassing, I sent out the v2 patch instead of the v3 patch the first
time!
v3:
* Fix compile error in blkverify_aio_cancel()
v2:
* Implement aio_cancel() by waiting for pending requests
* Fix conflict in Makefile.objs
Makefile.objs | 2 +-
block/blkverify.c | 324 ++++++++++++++++++++++++++++++++++++++++++++++++++++
docs/blkverify.txt | 69 +++++++++++
3 files changed, 394 insertions(+), 1 deletions(-)
create mode 100644 block/blkverify.c
create mode 100644 docs/blkverify.txt
diff --git a/Makefile.objs b/Makefile.objs
index 4a1eaa1..b640ec8 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -14,7 +14,7 @@ block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o
-block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o
+block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
block-nested-$(CONFIG_WIN32) += raw-win32.o
block-nested-$(CONFIG_POSIX) += raw-posix.o
block-nested-$(CONFIG_CURL) += curl.o
diff --git a/block/blkverify.c b/block/blkverify.c
new file mode 100644
index 0000000..97717a6
--- /dev/null
+++ b/block/blkverify.c
@@ -0,0 +1,324 @@
+/*
+ * Block protocol for block driver correctness testing
+ *
+ * Copyright (C) 2010 IBM, Corp.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <stdarg.h>
+#include "block_int.h"
+
+typedef struct {
+ BlockDriverState *test_file;
+} BDRVBlkverifyState;
+
+typedef struct BlkverifyAIOCB BlkverifyAIOCB;
+struct BlkverifyAIOCB {
+ BlockDriverAIOCB common;
+ QEMUBH *bh;
+
+ /* Request metadata */
+ bool is_write;
+ int64_t sector_num;
+ int nb_sectors;
+
+ int ret; /* first completed request's result */
+ unsigned int done; /* completion counter */
+
+ QEMUIOVector *qiov; /* user I/O vector */
+ QEMUIOVector raw_qiov; /* cloned I/O vector for raw file */
+ void *buf; /* buffer for raw file I/O */
+
+ void (*verify)(BlkverifyAIOCB *acb);
+};
+
+static void blkverify_aio_cancel_cb(void *opaque, int ret)
+{
+ *(bool *)opaque = true;
+}
+
+static void blkverify_aio_cancel(BlockDriverAIOCB *acb)
+{
+ bool done = false;
+
+ /* Allow the request to complete so the raw file and test file stay in
+ * sync. Hijack the callback (since the request is cancelled anyway) to
+ * wait for completion.
+ */
+ acb->cb = blkverify_aio_cancel_cb;
+ acb->opaque = &done;
+ while (!done) {
+ qemu_aio_wait();
+ }
+}
+
+static AIOPool blkverify_aio_pool = {
+ .aiocb_size = sizeof(BlkverifyAIOCB),
+ .cancel = blkverify_aio_cancel,
+};
+
+static void blkverify_err(BlkverifyAIOCB *acb, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ fprintf(stderr, "blkverify: %s sector_num=%ld nb_sectors=%d ",
+ acb->is_write ? "write" : "read", acb->sector_num,
+ acb->nb_sectors);
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+ va_end(ap);
+ exit(1);
+}
+
+/* Valid blkverify filenames look like blkverify:path/to/raw_image:path/to/image */
+static int blkverify_open(BlockDriverState *bs, const char *filename, int flags)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+ int ret;
+ char *raw, *c;
+
+ /* Parse the blkverify: prefix */
+ if (strncmp(filename, "blkverify:", strlen("blkverify:"))) {
+ return -EINVAL;
+ }
+ filename += strlen("blkverify:");
+
+ /* Parse the raw image filename */
+ c = strchr(filename, ':');
+ if (c == NULL) {
+ return -EINVAL;
+ }
+
+ raw = strdup(filename);
+ raw[c - filename] = '\0';
+ ret = bdrv_file_open(&bs->file, raw, flags);
+ free(raw);
+ if (ret < 0) {
+ return ret;
+ }
+ filename = c + 1;
+
+ /* Open the test file */
+ s->test_file = bdrv_new("");
+ ret = bdrv_open(s->test_file, filename, flags, NULL);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+static void blkverify_close(BlockDriverState *bs)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+
+ bdrv_delete(s->test_file);
+ s->test_file = NULL;
+}
+
+static void blkverify_flush(BlockDriverState *bs)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+
+ /* Only flush test file, the raw file is not important */
+ bdrv_flush(s->test_file);
+}
+
+static int64_t blkverify_getlength(BlockDriverState *bs)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+
+ return bdrv_getlength(s->test_file);
+}
+
+/**
+ * Check that I/O vector contents are identical
+ *
+ * @a: I/O vector
+ * @b: I/O vector
+ * @ret: Offset to first mismatching byte or -1 if match
+ */
+static ssize_t blkverify_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
+{
+ int i;
+ ssize_t offset = 0;
+
+ assert(a->niov == b->niov);
+ for (i = 0; i < a->niov; i++) {
+ size_t len = 0;
+ uint8_t *p = (uint8_t *)a->iov[i].iov_base;
+ uint8_t *q = (uint8_t *)b->iov[i].iov_base;
+
+ assert(a->iov[i].iov_len == b->iov[i].iov_len);
+ while (len < a->iov[i].iov_len && *p++ == *q++) {
+ len++;
+ }
+
+ offset += len;
+
+ if (len != a->iov[i].iov_len) {
+ return offset;
+ }
+ }
+ return -1;
+}
+
+/**
+ * Copy contents of I/O vector
+ */
+static void blkverify_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src,
+ void *buf)
+{
+ int i;
+
+ for (i = 0; i < src->niov; i++) {
+ qemu_iovec_add(dest, buf, src->iov[i].iov_len);
+ buf += src->iov[i].iov_len;
+ }
+}
+
+static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
+ int64_t sector_num, QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ BlkverifyAIOCB *acb = qemu_aio_get(&blkverify_aio_pool, bs, cb, opaque);
+
+ acb->bh = NULL;
+ acb->is_write = is_write;
+ acb->sector_num = sector_num;
+ acb->nb_sectors = nb_sectors;
+ acb->ret = -EINPROGRESS;
+ acb->done = 0;
+ acb->qiov = qiov;
+ acb->buf = NULL;
+ acb->verify = NULL;
+ return acb;
+}
+
+static void blkverify_aio_bh(void *opaque)
+{
+ BlkverifyAIOCB *acb = opaque;
+
+ qemu_bh_delete(acb->bh);
+ if (acb->buf) {
+ qemu_iovec_destroy(&acb->raw_qiov);
+ qemu_vfree(acb->buf);
+ }
+ acb->common.cb(acb->common.opaque, acb->ret);
+ qemu_aio_release(acb);
+}
+
+static void blkverify_aio_cb(void *opaque, int ret)
+{
+ BlkverifyAIOCB *acb = opaque;
+
+ switch (++acb->done) {
+ case 1:
+ acb->ret = ret;
+ break;
+
+ case 2:
+ if (acb->ret != ret) {
+ blkverify_err(acb, "return value mismatch %d != %d", acb->ret, ret);
+ }
+
+ if (acb->verify) {
+ acb->verify(acb);
+ }
+
+ acb->bh = qemu_bh_new(blkverify_aio_bh, acb);
+ qemu_bh_schedule(acb->bh);
+ break;
+ }
+}
+
+static void blkverify_verify_readv(BlkverifyAIOCB *acb)
+{
+ ssize_t offset = blkverify_iovec_compare(acb->qiov, &acb->raw_qiov);
+ if (offset != -1) {
+ blkverify_err(acb, "contents mismatch in sector %ld",
+ acb->sector_num + (offset / BDRV_SECTOR_SIZE));
+ }
+}
+
+static BlockDriverAIOCB *blkverify_aio_readv(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+ BlkverifyAIOCB *acb = blkverify_aio_get(bs, false, sector_num, qiov,
+ nb_sectors, cb, opaque);
+
+ acb->verify = blkverify_verify_readv;
+ acb->buf = qemu_blockalign(bs->file, qiov->size);
+ qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
+ blkverify_iovec_clone(&acb->raw_qiov, qiov, acb->buf);
+
+ if (!bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors,
+ blkverify_aio_cb, acb)) {
+ blkverify_aio_cb(acb, -EIO);
+ }
+ if (!bdrv_aio_readv(bs->file, sector_num, &acb->raw_qiov, nb_sectors,
+ blkverify_aio_cb, acb)) {
+ blkverify_aio_cb(acb, -EIO);
+ }
+ return &acb->common;
+}
+
+static BlockDriverAIOCB *blkverify_aio_writev(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+ BlkverifyAIOCB *acb = blkverify_aio_get(bs, true, sector_num, qiov,
+ nb_sectors, cb, opaque);
+
+ if (!bdrv_aio_writev(s->test_file, sector_num, qiov, nb_sectors,
+ blkverify_aio_cb, acb)) {
+ blkverify_aio_cb(acb, -EIO);
+ }
+ if (!bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
+ blkverify_aio_cb, acb)) {
+ blkverify_aio_cb(acb, -EIO);
+ }
+ return &acb->common;
+}
+
+static BlockDriverAIOCB *blkverify_aio_flush(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+
+ /* Only flush test file, the raw file is not important */
+ return bdrv_aio_flush(s->test_file, cb, opaque);
+}
+
+static BlockDriver bdrv_blkverify = {
+ .format_name = "blkverify",
+ .protocol_name = "blkverify",
+
+ .instance_size = sizeof(BDRVBlkverifyState),
+
+ .bdrv_getlength = blkverify_getlength,
+
+ .bdrv_file_open = blkverify_open,
+ .bdrv_close = blkverify_close,
+ .bdrv_flush = blkverify_flush,
+
+ .bdrv_aio_readv = blkverify_aio_readv,
+ .bdrv_aio_writev = blkverify_aio_writev,
+ .bdrv_aio_flush = blkverify_aio_flush,
+};
+
+static void bdrv_blkverify_init(void)
+{
+ bdrv_register(&bdrv_blkverify);
+}
+
+block_init(bdrv_blkverify_init);
diff --git a/docs/blkverify.txt b/docs/blkverify.txt
new file mode 100644
index 0000000..d556dc4
--- /dev/null
+++ b/docs/blkverify.txt
@@ -0,0 +1,69 @@
+= Block driver correctness testing with blkverify =
+
+== Introduction ==
+
+This document describes how to use the blkverify protocol to test that a block
+driver is operating correctly.
+
+It is difficult to test and debug block drivers against real guests. Often
+processes inside the guest will crash because corrupt sectors were read as part
+of the executable. Other times obscure errors are raised by a program inside
+the guest. These issues are extremely hard to trace back to bugs in the block
+driver.
+
+Blkverify solves this problem by catching data corruption inside QEMU the first
+time bad data is read and reporting the disk sector that is corrupted.
+
+== How it works ==
+
+The blkverify protocol has two child block devices, the "test" device and the
+"raw" device. Read/write operations are mirrored to both devices so their
+state should always be in sync.
+
+The "raw" device is a raw image, a flat file, that has identical starting
+contents to the "test" image. The idea is that the "raw" device will handle
+read/write operations correctly and not corrupt data. It can be used as a
+reference for comparison against the "test" device.
+
+After a mirrored read operation completes, blkverify will compare the data and
+raise an error if it is not identical. This makes it possible to catch the
+first instance where corrupt data is read.
+
+== Example ==
+
+Imagine raw.img has 0xcd repeated throughout its first sector:
+
+ $ ./qemu-io -c 'read -v 0 512' raw.img
+ 00000000: cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd ................
+ 00000010: cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd ................
+ [...]
+ 000001e0: cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd ................
+ 000001f0: cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd ................
+ read 512/512 bytes at offset 0
+ 512.000000 bytes, 1 ops; 0.0000 sec (97.656 MiB/sec and 200000.0000 ops/sec)
+
+And test.img is corrupt, its first sector is zeroed when it shouldn't be:
+
+ $ ./qemu-io -c 'read -v 0 512' test.img
+ 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+ 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+ [...]
+ 000001e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+ 000001f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+ read 512/512 bytes at offset 0
+ 512.000000 bytes, 1 ops; 0.0000 sec (81.380 MiB/sec and 166666.6667 ops/sec)
+
+This error is caught by blkverify:
+
+ $ ./qemu-io -c 'read 0 512' blkverify:a.img:b.img
+ blkverify: read sector_num=0 nb_sectors=4 contents mismatch in sector 0
+
+A more realistic scenario is verifying the installation of a guest OS:
+
+ $ ./qemu-img create raw.img 16G
+ $ ./qemu-img create -f qcow2 test.qcow2 16G
+ $ x86_64-softmmu/qemu-system-x86_64 -cdrom debian.iso \
+ -drive file=blkverify:raw.img:test.qcow2
+
+If the installation is aborted when blkverify detects corruption, use qemu-io
+to explore the contents of the disk image at the sector in question.
--
1.7.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [Qemu-devel] Re: [PATCH v3] blkverify: Add block driver for verifying I/O
2010-09-04 15:34 ` Stefan Hajnoczi
@ 2010-09-20 14:48 ` Kevin Wolf
2010-09-20 15:22 ` Stefan Hajnoczi
0 siblings, 1 reply; 5+ messages in thread
From: Kevin Wolf @ 2010-09-20 14:48 UTC (permalink / raw)
To: Stefan Hajnoczi; +Cc: qemu-devel
Am 04.09.2010 17:34, schrieb Stefan Hajnoczi:
> The blkverify block driver makes investigating image format data
> corruption much easier. A raw image initialized with the same contents
> as the test image (e.g. qcow2 file) must be provided. The raw image
> mirrors read/write operations and is used to verify that data read from
> the test image is correct.
>
> Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
> ---
> This is embarassing, I sent out the v2 patch instead of the v3 patch the first
> time!
>
> v3:
> * Fix compile error in blkverify_aio_cancel()
>
> v2:
> * Implement aio_cancel() by waiting for pending requests
> * Fix conflict in Makefile.objs
>
> Makefile.objs | 2 +-
> block/blkverify.c | 324 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> docs/blkverify.txt | 69 +++++++++++
> 3 files changed, 394 insertions(+), 1 deletions(-)
> create mode 100644 block/blkverify.c
> create mode 100644 docs/blkverify.txt
>
> diff --git a/Makefile.objs b/Makefile.objs
> index 4a1eaa1..b640ec8 100644
> --- a/Makefile.objs
> +++ b/Makefile.objs
> @@ -14,7 +14,7 @@ block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
>
> block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
> block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o
> -block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o
> +block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
> block-nested-$(CONFIG_WIN32) += raw-win32.o
> block-nested-$(CONFIG_POSIX) += raw-posix.o
> block-nested-$(CONFIG_CURL) += curl.o
> diff --git a/block/blkverify.c b/block/blkverify.c
> new file mode 100644
> index 0000000..97717a6
> --- /dev/null
> +++ b/block/blkverify.c
> @@ -0,0 +1,324 @@
> +/*
> + * Block protocol for block driver correctness testing
> + *
> + * Copyright (C) 2010 IBM, Corp.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#include <stdarg.h>
> +#include "block_int.h"
> +
> +typedef struct {
> + BlockDriverState *test_file;
> +} BDRVBlkverifyState;
> +
> +typedef struct BlkverifyAIOCB BlkverifyAIOCB;
> +struct BlkverifyAIOCB {
> + BlockDriverAIOCB common;
> + QEMUBH *bh;
> +
> + /* Request metadata */
> + bool is_write;
> + int64_t sector_num;
> + int nb_sectors;
> +
> + int ret; /* first completed request's result */
> + unsigned int done; /* completion counter */
> +
> + QEMUIOVector *qiov; /* user I/O vector */
> + QEMUIOVector raw_qiov; /* cloned I/O vector for raw file */
> + void *buf; /* buffer for raw file I/O */
> +
> + void (*verify)(BlkverifyAIOCB *acb);
> +};
> +
> +static void blkverify_aio_cancel_cb(void *opaque, int ret)
> +{
> + *(bool *)opaque = true;
> +}
> +
> +static void blkverify_aio_cancel(BlockDriverAIOCB *acb)
> +{
> + bool done = false;
> +
> + /* Allow the request to complete so the raw file and test file stay in
> + * sync. Hijack the callback (since the request is cancelled anyway) to
> + * wait for completion.
> + */
The approach of completing the request sounds okay, but I think you need
to call the original callback if you let it complete.
> + acb->cb = blkverify_aio_cancel_cb;
> + acb->opaque = &done;
> + while (!done) {
> + qemu_aio_wait();
> + }
qemu_aio_release(acb)?
> +}
> +
> +static AIOPool blkverify_aio_pool = {
> + .aiocb_size = sizeof(BlkverifyAIOCB),
> + .cancel = blkverify_aio_cancel,
> +};
> +
> +static void blkverify_err(BlkverifyAIOCB *acb, const char *fmt, ...)
> +{
> + va_list ap;
> +
> + va_start(ap, fmt);
> + fprintf(stderr, "blkverify: %s sector_num=%ld nb_sectors=%d ",
> + acb->is_write ? "write" : "read", acb->sector_num,
> + acb->nb_sectors);
> + vfprintf(stderr, fmt, ap);
> + fprintf(stderr, "\n");
> + va_end(ap);
> + exit(1);
> +}
> +
> +/* Valid blkverify filenames look like blkverify:path/to/raw_image:path/to/image */
> +static int blkverify_open(BlockDriverState *bs, const char *filename, int flags)
> +{
> + BDRVBlkverifyState *s = bs->opaque;
> + int ret;
> + char *raw, *c;
> +
> + /* Parse the blkverify: prefix */
> + if (strncmp(filename, "blkverify:", strlen("blkverify:"))) {
> + return -EINVAL;
> + }
> + filename += strlen("blkverify:");
> +
> + /* Parse the raw image filename */
> + c = strchr(filename, ':');
> + if (c == NULL) {
> + return -EINVAL;
> + }
> +
> + raw = strdup(filename);
> + raw[c - filename] = '\0';
> + ret = bdrv_file_open(&bs->file, raw, flags);
> + free(raw);
> + if (ret < 0) {
> + return ret;
> + }
> + filename = c + 1;
> +
> + /* Open the test file */
> + s->test_file = bdrv_new("");
> + ret = bdrv_open(s->test_file, filename, flags, NULL);
> + if (ret < 0) {
> + return ret;
This leaks bs->file.
Kevin
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [Qemu-devel] Re: [PATCH v3] blkverify: Add block driver for verifying I/O
2010-09-20 14:48 ` [Qemu-devel] " Kevin Wolf
@ 2010-09-20 15:22 ` Stefan Hajnoczi
2010-09-20 15:43 ` Kevin Wolf
0 siblings, 1 reply; 5+ messages in thread
From: Stefan Hajnoczi @ 2010-09-20 15:22 UTC (permalink / raw)
To: Kevin Wolf; +Cc: Stefan Hajnoczi, qemu-devel
Thanks for your comments Kevin!
I'd like to merge the overlapping I/O patch I sent today since
blkverify is not in mainline yet. Are you okay with that or should I
keep them separate?
On Mon, Sep 20, 2010 at 3:48 PM, Kevin Wolf <kwolf@redhat.com> wrote:
> Am 04.09.2010 17:34, schrieb Stefan Hajnoczi:
>> +static void blkverify_aio_cancel(BlockDriverAIOCB *acb)
>> +{
>> + bool done = false;
>> +
>> + /* Allow the request to complete so the raw file and test file stay in
>> + * sync. Hijack the callback (since the request is cancelled anyway) to
>> + * wait for completion.
>> + */
>
> The approach of completing the request sounds okay, but I think you need
> to call the original callback if you let it complete.
Neither posix-aio-compat.c nor block/qcow2.c invoke the callback. Am
I missing something?
>> + acb->cb = blkverify_aio_cancel_cb;
>> + acb->opaque = &done;
>> + while (!done) {
>> + qemu_aio_wait();
>> + }
>
> qemu_aio_release(acb)?
Will fix.
>> + /* Open the test file */
>> + s->test_file = bdrv_new("");
>> + ret = bdrv_open(s->test_file, filename, flags, NULL);
>> + if (ret < 0) {
>> + return ret;
>
> This leaks bs->file.
s->test_file needs bdrv_delete(). block.c:bdrv_open_common() calls
bdrv_delete(bs->file) on failure to close and delete bs->file, but we
don't need to rely on that. I will fix both.
Stefan
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [Qemu-devel] Re: [PATCH v3] blkverify: Add block driver for verifying I/O
2010-09-20 15:22 ` Stefan Hajnoczi
@ 2010-09-20 15:43 ` Kevin Wolf
0 siblings, 0 replies; 5+ messages in thread
From: Kevin Wolf @ 2010-09-20 15:43 UTC (permalink / raw)
To: Stefan Hajnoczi; +Cc: Stefan Hajnoczi, qemu-devel
Am 20.09.2010 17:22, schrieb Stefan Hajnoczi:
> Thanks for your comments Kevin!
>
> I'd like to merge the overlapping I/O patch I sent today since
> blkverify is not in mainline yet. Are you okay with that or should I
> keep them separate?
If that works better for you, I don't mind merging them.
> On Mon, Sep 20, 2010 at 3:48 PM, Kevin Wolf <kwolf@redhat.com> wrote:
>> Am 04.09.2010 17:34, schrieb Stefan Hajnoczi:
>>> +static void blkverify_aio_cancel(BlockDriverAIOCB *acb)
>>> +{
>>> + bool done = false;
>>> +
>>> + /* Allow the request to complete so the raw file and test file stay in
>>> + * sync. Hijack the callback (since the request is cancelled anyway) to
>>> + * wait for completion.
>>> + */
>>
>> The approach of completing the request sounds okay, but I think you need
>> to call the original callback if you let it complete.
>
> Neither posix-aio-compat.c nor block/qcow2.c invoke the callback. Am
> I missing something?
qcow2 calls raw, so it doesn't need to do it itself.
posix-aio-compat invokes the callback indirectly by waiting for the
request to complete using the normal path.
>>> + acb->cb = blkverify_aio_cancel_cb;
>>> + acb->opaque = &done;
>>> + while (!done) {
>>> + qemu_aio_wait();
>>> + }
>>
>> qemu_aio_release(acb)?
>
> Will fix.
>
>>> + /* Open the test file */
>>> + s->test_file = bdrv_new("");
>>> + ret = bdrv_open(s->test_file, filename, flags, NULL);
>>> + if (ret < 0) {
>>> + return ret;
>>
>> This leaks bs->file.
>
> s->test_file needs bdrv_delete(). block.c:bdrv_open_common() calls
> bdrv_delete(bs->file) on failure to close and delete bs->file, but we
> don't need to rely on that. I will fix both.
Right, I missed that we're in the same path as with an automatically
allocated bs->file. Not exactly intuitive that the common implementation
cleans up when it didn't create the file. ;-)
Kevin
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2010-09-20 15:42 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-09-04 15:30 [Qemu-devel] [PATCH v3] blkverify: Add block driver for verifying I/O Stefan Hajnoczi
2010-09-04 15:34 ` Stefan Hajnoczi
2010-09-20 14:48 ` [Qemu-devel] " Kevin Wolf
2010-09-20 15:22 ` Stefan Hajnoczi
2010-09-20 15:43 ` Kevin Wolf
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).