* [Qemu-devel] [PATCH 01/11] quorum: Create quorum.c, add QuorumSingleAIOCB and QuorumAIOCB.
2012-10-23 12:23 [Qemu-devel] [PATCH 00/11] Quorum disk image corruption resiliency Benoît Canet
@ 2012-10-23 12:23 ` Benoît Canet
2012-10-23 12:23 ` [Qemu-devel] [PATCH 02/11] quorum: Create BDRVQuorumState and BlkDriver and do init Benoît Canet
` (9 subsequent siblings)
10 siblings, 0 replies; 14+ messages in thread
From: Benoît Canet @ 2012-10-23 12:23 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, pbonzini, Benoît Canet, stefanha
Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
block/Makefile.objs | 1 +
block/quorum.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 46 insertions(+)
create mode 100644 block/quorum.c
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 554f429..f94e06f 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -2,6 +2,7 @@ block-obj-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat
block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
block-obj-y += qed-check.o
+block-obj-y += quorum.o
block-obj-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
block-obj-$(CONFIG_WIN32) += raw-win32.o
block-obj-$(CONFIG_POSIX) += raw-posix.o
diff --git a/block/quorum.c b/block/quorum.c
new file mode 100644
index 0000000..65a6b55
--- /dev/null
+++ b/block/quorum.c
@@ -0,0 +1,45 @@
+/*
+ * Quorum Block filter
+ *
+ * Copyright (C) 2012 Nodalink, SARL.
+ *
+ * Author:
+ * Benoît Canet <benoit.canet@irqsave.net>
+ *
+ * Based on the design and code of blkverify.c (Copyright (C) 2010 IBM, Corp)
+ * and blkmirror.c (Copyright (C) 2011 Red Hat, Inc).
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "block_int.h"
+
+typedef struct QuorumAIOCB QuorumAIOCB;
+
+typedef struct QuorumSingleAIOCB {
+ BlockDriverAIOCB *aiocb;
+ uint8_t *buf;
+ int ret;
+ QuorumAIOCB *parent;
+} QuorumSingleAIOCB;
+
+struct QuorumAIOCB {
+ BlockDriverAIOCB common;
+ QEMUBH *bh;
+
+ /* Request metadata */
+ int64_t sector_num;
+ int nb_sectors;
+
+ QEMUIOVector *qiov; /* calling readv IOV */
+
+ QuorumSingleAIOCB *aios; /* individual AIOs */
+ QEMUIOVector *qiovs; /* individual IOVs */
+ int count; /* number of completed AIOCB */
+ int success_count; /* number of successfully completed AIOCB */
+ bool *finished; /* completion signal for cancel */
+
+ void (*vote)(QuorumAIOCB *acb);
+ int vote_ret;
+};
--
1.7.10.4
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Qemu-devel] [PATCH 02/11] quorum: Create BDRVQuorumState and BlkDriver and do init.
2012-10-23 12:23 [Qemu-devel] [PATCH 00/11] Quorum disk image corruption resiliency Benoît Canet
2012-10-23 12:23 ` [Qemu-devel] [PATCH 01/11] quorum: Create quorum.c, add QuorumSingleAIOCB and QuorumAIOCB Benoît Canet
@ 2012-10-23 12:23 ` Benoît Canet
2012-10-23 12:23 ` [Qemu-devel] [PATCH 03/11] quorum: Add quorum_open() and quorum_close() Benoît Canet
` (8 subsequent siblings)
10 siblings, 0 replies; 14+ messages in thread
From: Benoît Canet @ 2012-10-23 12:23 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, pbonzini, Benoît Canet, stefanha
Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
block/quorum.c | 22 ++++++++++++++++++++++
1 file changed, 22 insertions(+)
diff --git a/block/quorum.c b/block/quorum.c
index 65a6b55..19a9a44 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -15,6 +15,13 @@
#include "block_int.h"
+typedef struct {
+ BlockDriverState **bs;
+ int threshold;
+ int total;
+ char **filenames;
+} BDRVQuorumState;
+
typedef struct QuorumAIOCB QuorumAIOCB;
typedef struct QuorumSingleAIOCB {
@@ -26,6 +33,7 @@ typedef struct QuorumSingleAIOCB {
struct QuorumAIOCB {
BlockDriverAIOCB common;
+ BDRVQuorumState *bqs;
QEMUBH *bh;
/* Request metadata */
@@ -43,3 +51,17 @@ struct QuorumAIOCB {
void (*vote)(QuorumAIOCB *acb);
int vote_ret;
};
+
+static BlockDriver bdrv_quorum = {
+ .format_name = "quorum",
+ .protocol_name = "quorum",
+
+ .instance_size = sizeof(BDRVQuorumState),
+};
+
+static void bdrv_quorum_init(void)
+{
+ bdrv_register(&bdrv_quorum);
+}
+
+block_init(bdrv_quorum_init);
--
1.7.10.4
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Qemu-devel] [PATCH 03/11] quorum: Add quorum_open() and quorum_close().
2012-10-23 12:23 [Qemu-devel] [PATCH 00/11] Quorum disk image corruption resiliency Benoît Canet
2012-10-23 12:23 ` [Qemu-devel] [PATCH 01/11] quorum: Create quorum.c, add QuorumSingleAIOCB and QuorumAIOCB Benoît Canet
2012-10-23 12:23 ` [Qemu-devel] [PATCH 02/11] quorum: Create BDRVQuorumState and BlkDriver and do init Benoît Canet
@ 2012-10-23 12:23 ` Benoît Canet
2012-10-23 15:32 ` Eric Blake
2012-10-23 12:23 ` [Qemu-devel] [PATCH 04/11] quorum: Add quorum_aio_writev and its dependencies Benoît Canet
` (7 subsequent siblings)
10 siblings, 1 reply; 14+ messages in thread
From: Benoît Canet @ 2012-10-23 12:23 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, pbonzini, Benoît Canet, stefanha
Valid quorum resources look like
quorum:threshold/total:path/to/image_1: ... :path/to/image_total
':' is used as a separator to allow to use networked path
'\' is the escaping character for filename containing ':'
'\' escape itself
On the command line for quorum files "img,test.raw", "img2.raw"
and "img3.raw" invocation look like:
-drive file=quorum:2/3:img\\:test.raw:img2.raw:img3.raw
(note the double \\)
Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
block/quorum.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 122 insertions(+)
diff --git a/block/quorum.c b/block/quorum.c
index 19a9a44..7b20f08 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -52,11 +52,133 @@ struct QuorumAIOCB {
int vote_ret;
};
+/* Valid quorum resources look like
+ * quorum:threshold/total:path/to/image_1: ... :path/to/image_total
+ *
+ * ':' is used as a separator
+ * '\' is the escaping character for filename containing ':'
+ */
+static int quorum_open(BlockDriverState *bs, const char *filename, int flags)
+{
+ BDRVQuorumState *s = bs->opaque;
+ int i, j, k, len, ret = 0;
+ char *a, *b, *names;
+ const char *start;
+ bool escape;
+
+ /* Parse the quorum: prefix */
+ if (!strstart(filename, "quorum:", &start)) {
+ return -EINVAL;
+ }
+
+ /* Get threshold */
+ errno = 0;
+ s->threshold = strtoul(start, &a, 10);
+ if (*a != '/' || errno) {
+ return -EINVAL;
+ }
+ a++;
+
+ /* Get total */
+ errno = 0;
+ s->total = strtoul(a, &b, 10);
+ if (*b != ':' || errno) {
+ return -EINVAL;
+ }
+ b++;
+
+ if (s->threshold < 1 || s->total < 2) {
+ return -EINVAL;
+ }
+
+ if (s->threshold > s->total) {
+ return -EINVAL;
+ }
+
+ s->bs = g_malloc0(sizeof(BlockDriverState *) * s->total);
+ /* Two allocations for all filenames: simpler to free */
+ s->filenames = g_malloc0(sizeof(char *) * s->total);
+ names = g_strdup(b);
+
+ /* Get the filenames pointers */
+ escape = false;
+ s->filenames[0] = names;
+ len = strlen(names);
+ for (i = j = k = 0; i < len && j < s->total; i++) {
+ /* separation between two files */
+ if (!escape && names[i] == ':') {
+ char *prev = s->filenames[j];
+ prev[k] = '\0';
+ s->filenames[++j] = prev + k + 1;
+ k = 0;
+ continue;
+ }
+
+ escape = !escape && names[i] == '\\';
+
+ /* if we are not escaping copy */
+ if (!escape) {
+ s->filenames[j][k++] = names[i];
+ }
+ }
+ /* terminate last string */
+ s->filenames[j][k] = '\0';
+
+ if ((j + 1) != s->total) {
+ ret = -EINVAL;
+ goto free_exit;
+ }
+
+ /* Open files */
+ for (i = 0; i < s->total; i++) {
+ s->bs[i] = bdrv_new("");
+ ret = bdrv_open(s->bs[i], s->filenames[i], flags, NULL);
+ if (ret < 0) {
+ goto error_exit;
+ }
+ }
+
+ goto exit;
+
+error_exit:
+ for (; i >= 0; i--) {
+ bdrv_delete(s->bs[i]);
+ s->bs[i] = NULL;
+ }
+free_exit:
+ g_free(s->filenames[0]);
+ g_free(s->filenames);
+ s->filenames = NULL;
+ g_free(s->bs);
+exit:
+ return ret;
+}
+
+static void quorum_close(BlockDriverState *bs)
+{
+ BDRVQuorumState *s = bs->opaque;
+ int i;
+
+ for (i = 0; i < s->total; i++) {
+ /* Ensure writes reach stable storage */
+ bdrv_flush(s->bs[i]);
+ bdrv_delete(s->bs[i]);
+ }
+
+ g_free(s->filenames[0]);
+ g_free(s->filenames);
+ s->filenames = NULL;
+ g_free(s->bs);
+}
+
static BlockDriver bdrv_quorum = {
.format_name = "quorum",
.protocol_name = "quorum",
.instance_size = sizeof(BDRVQuorumState),
+
+ .bdrv_file_open = quorum_open,
+ .bdrv_close = quorum_close,
};
static void bdrv_quorum_init(void)
--
1.7.10.4
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] [PATCH 03/11] quorum: Add quorum_open() and quorum_close().
2012-10-23 12:23 ` [Qemu-devel] [PATCH 03/11] quorum: Add quorum_open() and quorum_close() Benoît Canet
@ 2012-10-23 15:32 ` Eric Blake
0 siblings, 0 replies; 14+ messages in thread
From: Eric Blake @ 2012-10-23 15:32 UTC (permalink / raw)
To: Benoît Canet; +Cc: kwolf, pbonzini, qemu-devel, stefanha
[-- Attachment #1: Type: text/plain, Size: 937 bytes --]
On 10/23/2012 06:23 AM, Benoît Canet wrote:
> Valid quorum resources look like
> quorum:threshold/total:path/to/image_1: ... :path/to/image_total
>
> ':' is used as a separator to allow to use networked path
> '\' is the escaping character for filename containing ':'
> '\' escape itself
>
> On the command line for quorum files "img,test.raw", "img2.raw"
Here, you used a comma,
> and "img3.raw" invocation look like:
>
> -drive file=quorum:2/3:img\\:test.raw:img2.raw:img3.raw
but here, you are using a colon for the underlying name 'img:test.raw'.
> (note the double \\)
You also didn't give an example of comma escaping using typical command
line comma escaping; but at least your example looks okay once you fix
the typo in the lead-in text. The code appears to match the commit message.
--
Eric Blake eblake@redhat.com +1-919-301-3266
Libvirt virtualization library http://libvirt.org
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 617 bytes --]
^ permalink raw reply [flat|nested] 14+ messages in thread
* [Qemu-devel] [PATCH 04/11] quorum: Add quorum_aio_writev and its dependencies.
2012-10-23 12:23 [Qemu-devel] [PATCH 00/11] Quorum disk image corruption resiliency Benoît Canet
` (2 preceding siblings ...)
2012-10-23 12:23 ` [Qemu-devel] [PATCH 03/11] quorum: Add quorum_open() and quorum_close() Benoît Canet
@ 2012-10-23 12:23 ` Benoît Canet
2012-10-23 12:23 ` [Qemu-devel] [PATCH 05/11] blkverify: Extract qemu_iovec_clone() and qemu_iovec_compare() from blkverify Benoît Canet
` (6 subsequent siblings)
10 siblings, 0 replies; 14+ messages in thread
From: Benoît Canet @ 2012-10-23 12:23 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, pbonzini, Benoît Canet, stefanha
Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
block/quorum.c | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 112 insertions(+)
diff --git a/block/quorum.c b/block/quorum.c
index 7b20f08..878d930 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -171,6 +171,116 @@ static void quorum_close(BlockDriverState *bs)
g_free(s->bs);
}
+static void quorum_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+ QuorumAIOCB *acb = container_of(blockacb, QuorumAIOCB, common);
+ bool finished = false;
+
+ /* Wait for the request to finish */
+ acb->finished = &finished;
+ while (!finished) {
+ qemu_aio_wait();
+ }
+}
+
+static AIOPool quorum_aio_pool = {
+ .aiocb_size = sizeof(QuorumAIOCB),
+ .cancel = quorum_aio_cancel,
+};
+
+static void quorum_aio_bh(void *opaque)
+{
+ QuorumAIOCB *acb = opaque;
+ BDRVQuorumState *s = acb->bqs;
+ int ret;
+
+ ret = s->threshold <= acb->success_count ? 0 : -EIO;
+
+ qemu_bh_delete(acb->bh);
+ acb->common.cb(acb->common.opaque, ret);
+ if (acb->finished) {
+ *acb->finished = true;
+ }
+ g_free(acb->aios);
+ g_free(acb->qiovs);
+ qemu_aio_release(acb);
+}
+
+static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
+ BlockDriverState *bs,
+ QEMUIOVector *qiov,
+ int64_t sector_num,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ QuorumAIOCB *acb = qemu_aio_get(&quorum_aio_pool, bs, cb, opaque);
+ int i;
+
+ acb->aios = g_new0(QuorumSingleAIOCB, s->total);
+ acb->qiovs = g_new0(QEMUIOVector, s->total);
+
+ acb->bqs = s;
+ acb->qiov = qiov;
+ acb->bh = NULL;
+ acb->count = 0;
+ acb->success_count = 0;
+ acb->sector_num = sector_num;
+ acb->nb_sectors = nb_sectors;
+ acb->vote = NULL;
+ acb->vote_ret = 0;
+
+ for (i = 0; i < s->total; i++) {
+ acb->aios[i].buf = NULL;
+ acb->aios[i].ret = 0;
+ acb->aios[i].parent = acb;
+ }
+
+ return acb;
+}
+
+static void quorum_aio_cb(void *opaque, int ret)
+{
+ QuorumSingleAIOCB *sacb = opaque;
+ QuorumAIOCB *acb = sacb->parent;
+ BDRVQuorumState *s = acb->bqs;
+
+ sacb->ret = ret;
+ acb->count++;
+ if (ret == 0) {
+ acb->success_count++;
+ }
+ assert(acb->count <= s->total);
+ assert(acb->success_count <= s->total);
+ if (acb->count < s->total) {
+ return;
+ }
+
+ acb->bh = qemu_bh_new(quorum_aio_bh, acb);
+ qemu_bh_schedule(acb->bh);
+}
+
+static BlockDriverAIOCB *quorum_aio_writev(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ BDRVQuorumState *s = bs->opaque;
+ QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num, nb_sectors,
+ cb, opaque);
+ int i;
+
+ for (i = 0; i < s->total; i++) {
+ acb->aios[i].aiocb = bdrv_aio_writev(s->bs[i], sector_num, qiov,
+ nb_sectors, &quorum_aio_cb,
+ &acb->aios[i]);
+ }
+
+ return &acb->common;
+}
+
static BlockDriver bdrv_quorum = {
.format_name = "quorum",
.protocol_name = "quorum",
@@ -179,6 +289,8 @@ static BlockDriver bdrv_quorum = {
.bdrv_file_open = quorum_open,
.bdrv_close = quorum_close,
+
+ .bdrv_aio_writev = quorum_aio_writev,
};
static void bdrv_quorum_init(void)
--
1.7.10.4
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Qemu-devel] [PATCH 05/11] blkverify: Extract qemu_iovec_clone() and qemu_iovec_compare() from blkverify.
2012-10-23 12:23 [Qemu-devel] [PATCH 00/11] Quorum disk image corruption resiliency Benoît Canet
` (3 preceding siblings ...)
2012-10-23 12:23 ` [Qemu-devel] [PATCH 04/11] quorum: Add quorum_aio_writev and its dependencies Benoît Canet
@ 2012-10-23 12:23 ` Benoît Canet
2012-10-23 12:23 ` [Qemu-devel] [PATCH 06/11] quorum: Add quorum_co_flush() Benoît Canet
` (5 subsequent siblings)
10 siblings, 0 replies; 14+ messages in thread
From: Benoît Canet @ 2012-10-23 12:23 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, pbonzini, Benoît Canet, stefanha
Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
block/blkverify.c | 108 +----------------------------------------------------
cutils.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++
qemu-common.h | 2 +
3 files changed, 107 insertions(+), 106 deletions(-)
diff --git a/block/blkverify.c b/block/blkverify.c
index 9d5f1ec..79d36d5 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -123,110 +123,6 @@ static int64_t blkverify_getlength(BlockDriverState *bs)
return bdrv_getlength(s->test_file);
}
-/**
- * Check that I/O vector contents are identical
- *
- * @a: I/O vector
- * @b: I/O vector
- * @ret: Offset to first mismatching byte or -1 if match
- */
-static ssize_t blkverify_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
-{
- int i;
- ssize_t offset = 0;
-
- assert(a->niov == b->niov);
- for (i = 0; i < a->niov; i++) {
- size_t len = 0;
- uint8_t *p = (uint8_t *)a->iov[i].iov_base;
- uint8_t *q = (uint8_t *)b->iov[i].iov_base;
-
- assert(a->iov[i].iov_len == b->iov[i].iov_len);
- while (len < a->iov[i].iov_len && *p++ == *q++) {
- len++;
- }
-
- offset += len;
-
- if (len != a->iov[i].iov_len) {
- return offset;
- }
- }
- return -1;
-}
-
-typedef struct {
- int src_index;
- struct iovec *src_iov;
- void *dest_base;
-} IOVectorSortElem;
-
-static int sortelem_cmp_src_base(const void *a, const void *b)
-{
- const IOVectorSortElem *elem_a = a;
- const IOVectorSortElem *elem_b = b;
-
- /* Don't overflow */
- if (elem_a->src_iov->iov_base < elem_b->src_iov->iov_base) {
- return -1;
- } else if (elem_a->src_iov->iov_base > elem_b->src_iov->iov_base) {
- return 1;
- } else {
- return 0;
- }
-}
-
-static int sortelem_cmp_src_index(const void *a, const void *b)
-{
- const IOVectorSortElem *elem_a = a;
- const IOVectorSortElem *elem_b = b;
-
- return elem_a->src_index - elem_b->src_index;
-}
-
-/**
- * Copy contents of I/O vector
- *
- * The relative relationships of overlapping iovecs are preserved. This is
- * necessary to ensure identical semantics in the cloned I/O vector.
- */
-static void blkverify_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src,
- void *buf)
-{
- IOVectorSortElem sortelems[src->niov];
- void *last_end;
- int i;
-
- /* Sort by source iovecs by base address */
- for (i = 0; i < src->niov; i++) {
- sortelems[i].src_index = i;
- sortelems[i].src_iov = &src->iov[i];
- }
- qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_base);
-
- /* Allocate buffer space taking into account overlapping iovecs */
- last_end = NULL;
- for (i = 0; i < src->niov; i++) {
- struct iovec *cur = sortelems[i].src_iov;
- ptrdiff_t rewind = 0;
-
- /* Detect overlap */
- if (last_end && last_end > cur->iov_base) {
- rewind = last_end - cur->iov_base;
- }
-
- sortelems[i].dest_base = buf - rewind;
- buf += cur->iov_len - MIN(rewind, cur->iov_len);
- last_end = MAX(cur->iov_base + cur->iov_len, last_end);
- }
-
- /* Sort by source iovec index and build destination iovec */
- qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_index);
- for (i = 0; i < src->niov; i++) {
- qemu_iovec_add(dest, sortelems[i].dest_base, src->iov[i].iov_len);
- }
-}
-
static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
int64_t sector_num, QEMUIOVector *qiov,
int nb_sectors,
@@ -290,7 +186,7 @@ static void blkverify_aio_cb(void *opaque, int ret)
static void blkverify_verify_readv(BlkverifyAIOCB *acb)
{
- ssize_t offset = blkverify_iovec_compare(acb->qiov, &acb->raw_qiov);
+ ssize_t offset = qemu_iovec_compare(acb->qiov, &acb->raw_qiov);
if (offset != -1) {
blkverify_err(acb, "contents mismatch in sector %" PRId64,
acb->sector_num + (int64_t)(offset / BDRV_SECTOR_SIZE));
@@ -308,7 +204,7 @@ static BlockDriverAIOCB *blkverify_aio_readv(BlockDriverState *bs,
acb->verify = blkverify_verify_readv;
acb->buf = qemu_blockalign(bs->file, qiov->size);
qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
- blkverify_iovec_clone(&acb->raw_qiov, qiov, acb->buf);
+ qemu_iovec_clone(&acb->raw_qiov, qiov, acb->buf);
bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors,
blkverify_aio_cb, acb);
diff --git a/cutils.c b/cutils.c
index 8edd8fa..cedf84e 100644
--- a/cutils.c
+++ b/cutils.c
@@ -245,6 +245,109 @@ size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset,
return iov_memset(qiov->iov, qiov->niov, offset, fillc, bytes);
}
+/**
+ * Check that I/O vector contents are identical
+ *
+ * @a: I/O vector
+ * @b: I/O vector
+ * @ret: Offset to first mismatching byte or -1 if match
+ */
+ssize_t qemu_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
+{
+ int i;
+ ssize_t offset = 0;
+
+ assert(a->niov == b->niov);
+ for (i = 0; i < a->niov; i++) {
+ size_t len = 0;
+ uint8_t *p = (uint8_t *)a->iov[i].iov_base;
+ uint8_t *q = (uint8_t *)b->iov[i].iov_base;
+
+ assert(a->iov[i].iov_len == b->iov[i].iov_len);
+ while (len < a->iov[i].iov_len && *p++ == *q++) {
+ len++;
+ }
+
+ offset += len;
+
+ if (len != a->iov[i].iov_len) {
+ return offset;
+ }
+ }
+ return -1;
+}
+
+typedef struct {
+ int src_index;
+ struct iovec *src_iov;
+ void *dest_base;
+} IOVectorSortElem;
+
+static int sortelem_cmp_src_base(const void *a, const void *b)
+{
+ const IOVectorSortElem *elem_a = a;
+ const IOVectorSortElem *elem_b = b;
+
+ /* Don't overflow */
+ if (elem_a->src_iov->iov_base < elem_b->src_iov->iov_base) {
+ return -1;
+ } else if (elem_a->src_iov->iov_base > elem_b->src_iov->iov_base) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+static int sortelem_cmp_src_index(const void *a, const void *b)
+{
+ const IOVectorSortElem *elem_a = a;
+ const IOVectorSortElem *elem_b = b;
+
+ return elem_a->src_index - elem_b->src_index;
+}
+
+/**
+ * Copy contents of I/O vector
+ *
+ * The relative relationships of overlapping iovecs are preserved. This is
+ * necessary to ensure identical semantics in the cloned I/O vector.
+ */
+void qemu_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src, void *buf)
+{
+ IOVectorSortElem sortelems[src->niov];
+ void *last_end;
+ int i;
+
+ /* Sort by source iovecs by base address */
+ for (i = 0; i < src->niov; i++) {
+ sortelems[i].src_index = i;
+ sortelems[i].src_iov = &src->iov[i];
+ }
+ qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_base);
+
+ /* Allocate buffer space taking into account overlapping iovecs */
+ last_end = NULL;
+ for (i = 0; i < src->niov; i++) {
+ struct iovec *cur = sortelems[i].src_iov;
+ ptrdiff_t rewind = 0;
+
+ /* Detect overlap */
+ if (last_end && last_end > cur->iov_base) {
+ rewind = last_end - cur->iov_base;
+ }
+
+ sortelems[i].dest_base = buf - rewind;
+ buf += cur->iov_len - MIN(rewind, cur->iov_len);
+ last_end = MAX(cur->iov_base + cur->iov_len, last_end);
+ }
+
+ /* Sort by source iovec index and build destination iovec */
+ qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_index);
+ for (i = 0; i < src->niov; i++) {
+ qemu_iovec_add(dest, sortelems[i].dest_base, src->iov[i].iov_len);
+ }
+}
+
/*
* Checks if a buffer is all zeroes
*
diff --git a/qemu-common.h b/qemu-common.h
index b54612b..78467d4 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -386,6 +386,8 @@ size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset,
const void *buf, size_t bytes);
size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset,
int fillc, size_t bytes);
+ssize_t qemu_iovec_compare(QEMUIOVector *a, QEMUIOVector *b);
+void qemu_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src, void *buf);
bool buffer_is_zero(const void *buf, size_t len);
--
1.7.10.4
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Qemu-devel] [PATCH 06/11] quorum: Add quorum_co_flush().
2012-10-23 12:23 [Qemu-devel] [PATCH 00/11] Quorum disk image corruption resiliency Benoît Canet
` (4 preceding siblings ...)
2012-10-23 12:23 ` [Qemu-devel] [PATCH 05/11] blkverify: Extract qemu_iovec_clone() and qemu_iovec_compare() from blkverify Benoît Canet
@ 2012-10-23 12:23 ` Benoît Canet
2012-10-23 15:37 ` Eric Blake
2012-10-23 12:23 ` [Qemu-devel] [PATCH 07/11] quorum: Add quorum_aio_readv Benoît Canet
` (4 subsequent siblings)
10 siblings, 1 reply; 14+ messages in thread
From: Benoît Canet @ 2012-10-23 12:23 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, pbonzini, Benoît Canet, stefanha
Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
block/quorum.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/block/quorum.c b/block/quorum.c
index 878d930..1b40081 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -281,6 +281,18 @@ static BlockDriverAIOCB *quorum_aio_writev(BlockDriverState *bs,
return &acb->common;
}
+static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
+{
+ BDRVQuorumState *s = bs->opaque;
+ int i;
+
+ for (i = 0; i < s->total; i++) {
+ bdrv_co_flush(s->bs[i]);
+ }
+
+ return 0;
+}
+
static BlockDriver bdrv_quorum = {
.format_name = "quorum",
.protocol_name = "quorum",
@@ -289,6 +301,7 @@ static BlockDriver bdrv_quorum = {
.bdrv_file_open = quorum_open,
.bdrv_close = quorum_close,
+ .bdrv_co_flush_to_disk = quorum_co_flush,
.bdrv_aio_writev = quorum_aio_writev,
};
--
1.7.10.4
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] [PATCH 06/11] quorum: Add quorum_co_flush().
2012-10-23 12:23 ` [Qemu-devel] [PATCH 06/11] quorum: Add quorum_co_flush() Benoît Canet
@ 2012-10-23 15:37 ` Eric Blake
0 siblings, 0 replies; 14+ messages in thread
From: Eric Blake @ 2012-10-23 15:37 UTC (permalink / raw)
To: Benoît Canet; +Cc: kwolf, pbonzini, qemu-devel, stefanha
[-- Attachment #1: Type: text/plain, Size: 898 bytes --]
On 10/23/2012 06:23 AM, Benoît Canet wrote:
> Signed-off-by: Benoit Canet <benoit@irqsave.net>
> ---
> block/quorum.c | 13 +++++++++++++
> 1 file changed, 13 insertions(+)
>
> diff --git a/block/quorum.c b/block/quorum.c
> index 878d930..1b40081 100644
> --- a/block/quorum.c
> +++ b/block/quorum.c
> @@ -281,6 +281,18 @@ static BlockDriverAIOCB *quorum_aio_writev(BlockDriverState *bs,
> return &acb->common;
> }
>
> +static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
> +{
> + BDRVQuorumState *s = bs->opaque;
> + int i;
> +
> + for (i = 0; i < s->total; i++) {
> + bdrv_co_flush(s->bs[i]);
What if any of the individual flush calls return non-zero?
> + }
> +
> + return 0;
Shouldn't you then do likewise?
--
Eric Blake eblake@redhat.com +1-919-301-3266
Libvirt virtualization library http://libvirt.org
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 617 bytes --]
^ permalink raw reply [flat|nested] 14+ messages in thread
* [Qemu-devel] [PATCH 07/11] quorum: Add quorum_aio_readv.
2012-10-23 12:23 [Qemu-devel] [PATCH 00/11] Quorum disk image corruption resiliency Benoît Canet
` (5 preceding siblings ...)
2012-10-23 12:23 ` [Qemu-devel] [PATCH 06/11] quorum: Add quorum_co_flush() Benoît Canet
@ 2012-10-23 12:23 ` Benoît Canet
2012-10-23 12:23 ` [Qemu-devel] [PATCH 08/11] quorum: Add quorum mechanism Benoît Canet
` (3 subsequent siblings)
10 siblings, 0 replies; 14+ messages in thread
From: Benoît Canet @ 2012-10-23 12:23 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, pbonzini, Benoît Canet, stefanha
Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
block/quorum.c | 38 +++++++++++++++++++++++++++++++++++++-
1 file changed, 37 insertions(+), 1 deletion(-)
diff --git a/block/quorum.c b/block/quorum.c
index 1b40081..51301fb 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -192,15 +192,24 @@ static void quorum_aio_bh(void *opaque)
{
QuorumAIOCB *acb = opaque;
BDRVQuorumState *s = acb->bqs;
- int ret;
+ int i, ret;
ret = s->threshold <= acb->success_count ? 0 : -EIO;
+ for (i = 0; i < s->total; i++) {
+ qemu_vfree(acb->aios[i].buf);
+ acb->aios[i].buf = NULL;
+ acb->aios[i].ret = 0;
+ }
+
qemu_bh_delete(acb->bh);
acb->common.cb(acb->common.opaque, ret);
if (acb->finished) {
*acb->finished = true;
}
+ for (i = 0; i < s->total; i++) {
+ qemu_iovec_destroy(&acb->qiovs[i]);
+ }
g_free(acb->aios);
g_free(acb->qiovs);
qemu_aio_release(acb);
@@ -260,6 +269,32 @@ static void quorum_aio_cb(void *opaque, int ret)
qemu_bh_schedule(acb->bh);
}
+static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ BDRVQuorumState *s = bs->opaque;
+ QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num,
+ nb_sectors, cb, opaque);
+ int i;
+
+ for (i = 0; i < s->total; i++) {
+ acb->aios[i].buf = qemu_blockalign(bs->file, qiov->size);
+ qemu_iovec_init(&acb->qiovs[i], qiov->niov);
+ qemu_iovec_clone(&acb->qiovs[i], qiov, acb->aios[i].buf);
+ }
+
+ for (i = 0; i < s->total; i++) {
+ bdrv_aio_readv(s->bs[i], sector_num, qiov, nb_sectors,
+ quorum_aio_cb, &acb->aios[i]);
+ }
+
+ return &acb->common;
+}
+
static BlockDriverAIOCB *quorum_aio_writev(BlockDriverState *bs,
int64_t sector_num,
QEMUIOVector *qiov,
@@ -303,6 +338,7 @@ static BlockDriver bdrv_quorum = {
.bdrv_close = quorum_close,
.bdrv_co_flush_to_disk = quorum_co_flush,
+ .bdrv_aio_readv = quorum_aio_readv,
.bdrv_aio_writev = quorum_aio_writev,
};
--
1.7.10.4
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Qemu-devel] [PATCH 08/11] quorum: Add quorum mechanism.
2012-10-23 12:23 [Qemu-devel] [PATCH 00/11] Quorum disk image corruption resiliency Benoît Canet
` (6 preceding siblings ...)
2012-10-23 12:23 ` [Qemu-devel] [PATCH 07/11] quorum: Add quorum_aio_readv Benoît Canet
@ 2012-10-23 12:23 ` Benoît Canet
2012-10-23 12:23 ` [Qemu-devel] [PATCH 09/11] quorum: Add quorum_getlength() Benoît Canet
` (2 subsequent siblings)
10 siblings, 0 replies; 14+ messages in thread
From: Benoît Canet @ 2012-10-23 12:23 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, pbonzini, Benoît Canet, stefanha
Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
block/quorum.c | 278 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 277 insertions(+), 1 deletion(-)
diff --git a/block/quorum.c b/block/quorum.c
index 51301fb..6d76c49 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -14,6 +14,20 @@
*/
#include "block_int.h"
+#include "zlib.h"
+
+typedef struct QuorumVoteItem {
+ int index;
+ QLIST_ENTRY(QuorumVoteItem) next;
+} QuorumVoteItem;
+
+typedef struct QuorumVoteVersion {
+ unsigned long value;
+ int index;
+ int vote_count;
+ QLIST_HEAD(, QuorumVoteItem) items;
+ QLIST_ENTRY(QuorumVoteVersion) next;
+} QuorumVoteVersion;
typedef struct {
BlockDriverState **bs;
@@ -31,6 +45,10 @@ typedef struct QuorumSingleAIOCB {
QuorumAIOCB *parent;
} QuorumSingleAIOCB;
+typedef struct QuorumVotes {
+ QLIST_HEAD(, QuorumVoteVersion) vote_list;
+} QuorumVotes;
+
struct QuorumAIOCB {
BlockDriverAIOCB common;
BDRVQuorumState *bqs;
@@ -48,6 +66,8 @@ struct QuorumAIOCB {
int success_count; /* number of successfully completed AIOCB */
bool *finished; /* completion signal for cancel */
+ QuorumVotes votes;
+
void (*vote)(QuorumAIOCB *acb);
int vote_ret;
};
@@ -203,6 +223,11 @@ static void quorum_aio_bh(void *opaque)
}
qemu_bh_delete(acb->bh);
+
+ if (acb->vote_ret) {
+ ret = acb->vote_ret;
+ }
+
acb->common.cb(acb->common.opaque, ret);
if (acb->finished) {
*acb->finished = true;
@@ -238,6 +263,7 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
acb->nb_sectors = nb_sectors;
acb->vote = NULL;
acb->vote_ret = 0;
+ QLIST_INIT(&acb->votes.vote_list);
for (i = 0; i < s->total; i++) {
acb->aios[i].buf = NULL;
@@ -265,10 +291,258 @@ static void quorum_aio_cb(void *opaque, int ret)
return;
}
+ /* Do the vote */
+ if (acb->vote) {
+ acb->vote(acb);
+ }
+
acb->bh = qemu_bh_new(quorum_aio_bh, acb);
qemu_bh_schedule(acb->bh);
}
+static void quorum_print_bad(QuorumAIOCB *acb, const char *filename)
+{
+ fprintf(stderr, "quorum: corrected error in quorum file %s: sector_num=%"
+ PRId64 " nb_sectors=%i\n", filename, acb->sector_num,
+ acb->nb_sectors);
+}
+
+static void quorum_print_failure(QuorumAIOCB *acb)
+{
+ fprintf(stderr, "quorum: failure sector_num=%" PRId64 " nb_sectors=%i\n",
+ acb->sector_num, acb->nb_sectors);
+}
+
+static void quorum_print_bad_versions(QuorumAIOCB *acb,
+ unsigned long checksum)
+{
+ QuorumVoteVersion *version;
+ QuorumVoteItem *item;
+ BDRVQuorumState *s = acb->bqs;
+
+ QLIST_FOREACH(version, &acb->votes.vote_list, next) {
+ if (version->value == checksum) {
+ continue;
+ }
+ QLIST_FOREACH(item, &version->items, next) {
+ quorum_print_bad(acb, s->filenames[item->index]);
+ }
+ }
+}
+
+static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
+{
+ int i;
+ assert(dest->niov == source->niov);
+ assert(dest->size == source->size);
+ for (i = 0; i < source->niov; i++) {
+ assert(dest->iov[i].iov_len == source->iov[i].iov_len);
+ memcpy(dest->iov[i].iov_base,
+ source->iov[i].iov_base,
+ source->iov[i].iov_len);
+ }
+}
+
+static void quorum_count_vote(QuorumVotes *votes,
+ unsigned long checksum,
+ int index)
+{
+ QuorumVoteVersion *v = NULL, *version = NULL;
+ QuorumVoteItem *item;
+
+ /* look if we have something with this checksum */
+ QLIST_FOREACH(v, &votes->vote_list, next) {
+ if (v->value == checksum) {
+ version = v;
+ break;
+ }
+ }
+
+ /* It's a version not yet in the list add it */
+ if (!version) {
+ version = g_new0(QuorumVoteVersion, 1);
+ QLIST_INIT(&version->items);
+ version->value = checksum;
+ version->index = index;
+ version->vote_count = 0;
+ QLIST_INSERT_HEAD(&votes->vote_list, version, next);
+ }
+
+ version->vote_count++;
+
+ item = g_new0(QuorumVoteItem, 1);
+ item->index = index;
+ QLIST_INSERT_HEAD(&version->items, item, next);
+}
+
+static void quorum_free_vote_list(QuorumVotes *votes)
+{
+ QuorumVoteVersion *version, *next_version;
+ QuorumVoteItem *item, *next_item;
+
+ QLIST_FOREACH_SAFE(version, &votes->vote_list, next, next_version) {
+ QLIST_REMOVE(version, next);
+ QLIST_FOREACH_SAFE(item, &version->items, next, next_item) {
+ QLIST_REMOVE(item, next);
+ g_free(item);
+ }
+ g_free(version);
+ }
+}
+
+static unsigned long quorum_compute_checksum(QuorumAIOCB *acb, int i)
+{
+ int j;
+ unsigned long adler = adler32(0L, Z_NULL, 0);
+ QEMUIOVector *qiov = &acb->qiovs[i];
+
+ for (j = 0; j < qiov->niov; j++) {
+ adler = adler32(adler,
+ qiov->iov[j].iov_base,
+ qiov->iov[j].iov_len);
+ }
+
+ return adler;
+}
+
+static QuorumVoteVersion *quorum_get_vote_winner(QuorumVotes *votes)
+{
+ int i = 0;
+ QuorumVoteVersion *candidate, *winner = NULL;
+
+ QLIST_FOREACH(candidate, &votes->vote_list, next) {
+ if (candidate->vote_count > i) {
+ i = candidate->vote_count;
+ winner = candidate;
+ }
+ }
+
+ return winner;
+}
+
+static bool quorum_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
+{
+ int i;
+ int result;
+
+ assert(a->niov == b->niov);
+ for (i = 0; i < a->niov; i++) {
+ assert(a->iov[i].iov_len == b->iov[i].iov_len);
+ result = memcmp(a->iov[i].iov_base,
+ b->iov[i].iov_base,
+ a->iov[i].iov_len);
+ if (result) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static void GCC_FMT_ATTR(2, 3) quorum_err(QuorumAIOCB *acb,
+ const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ fprintf(stderr, "quorum: sector_num=%" PRId64 " nb_sectors=%d ",
+ acb->sector_num, acb->nb_sectors);
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+ va_end(ap);
+ exit(1);
+}
+
+static bool quorum_compare(QuorumAIOCB *acb,
+ QEMUIOVector *a,
+ QEMUIOVector *b)
+{
+ BDRVQuorumState *s = acb->bqs;
+ bool blkverify = false;
+ ssize_t offset;
+
+ if (s->total == 2 && s->threshold == 2) {
+ blkverify = true;
+ }
+
+ if (blkverify) {
+ offset = qemu_iovec_compare(a, b);
+ if (offset != -1) {
+ quorum_err(acb, "contents mismatch in sector %" PRId64,
+ acb->sector_num +
+ (int64_t)(offset / BDRV_SECTOR_SIZE));
+ }
+ return true;
+ }
+
+ return quorum_iovec_compare(a, b);
+}
+
+static void quorum_vote(QuorumAIOCB *acb)
+{
+ bool quorum = true;
+ int i, j;
+ unsigned long checksum = 0;
+ BDRVQuorumState *s = acb->bqs;
+ QuorumVoteVersion *winner;
+
+ /* get the index of the first successfull read */
+ for (i = 0; i < s->total; i++) {
+ if (!acb->aios[i].ret) {
+ break;
+ }
+ }
+
+ /* compare this read with all other successfull read looking for quorum */
+ for (j = i + 1; j < s->total; j++) {
+ if (acb->aios[j].ret) {
+ continue;
+ }
+
+ quorum = quorum_compare(acb, &acb->qiovs[i], &acb->qiovs[j]);
+ if (!quorum) {
+ break;
+ }
+ }
+
+ /* Every successfull read agrees -> Quorum */
+ if (quorum) {
+ quorum_copy_qiov(acb->qiov, &acb->qiovs[i]);
+ return;
+ }
+
+ /* compute checksums for each successfull read, also store indexes */
+ for (i = 0; i < s->total; i++) {
+ if (acb->aios[i].ret) {
+ continue;
+ }
+ checksum = quorum_compute_checksum(acb, i);
+ quorum_count_vote(&acb->votes, checksum, i);
+ }
+
+ /* vote to select the most represented version */
+ winner = quorum_get_vote_winner(&acb->votes);
+ assert(winner != NULL);
+
+ /* if the winner count is smaller than threshold read fail */
+ if (winner->vote_count < s->threshold) {
+ quorum_print_failure(acb);
+ acb->vote_ret = -EIO;
+ fprintf(stderr, "quorum: vote result inferior to threshold\n");
+ goto free_exit;
+ }
+
+ /* we have a winner: copy it */
+ quorum_copy_qiov(acb->qiov, &acb->qiovs[winner->index]);
+
+ /* if some versions are bad print them */
+ quorum_print_bad_versions(acb, winner->value);
+
+free_exit:
+ /* free lists */
+ quorum_free_vote_list(&acb->votes);
+}
+
static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
int64_t sector_num,
QEMUIOVector *qiov,
@@ -281,6 +555,8 @@ static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
nb_sectors, cb, opaque);
int i;
+ acb->vote = quorum_vote;
+
for (i = 0; i < s->total; i++) {
acb->aios[i].buf = qemu_blockalign(bs->file, qiov->size);
qemu_iovec_init(&acb->qiovs[i], qiov->niov);
@@ -288,7 +564,7 @@ static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
}
for (i = 0; i < s->total; i++) {
- bdrv_aio_readv(s->bs[i], sector_num, qiov, nb_sectors,
+ bdrv_aio_readv(s->bs[i], sector_num, &acb->qiovs[i], nb_sectors,
quorum_aio_cb, &acb->aios[i]);
}
--
1.7.10.4
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Qemu-devel] [PATCH 09/11] quorum: Add quorum_getlength().
2012-10-23 12:23 [Qemu-devel] [PATCH 00/11] Quorum disk image corruption resiliency Benoît Canet
` (7 preceding siblings ...)
2012-10-23 12:23 ` [Qemu-devel] [PATCH 08/11] quorum: Add quorum mechanism Benoît Canet
@ 2012-10-23 12:23 ` Benoît Canet
2012-10-23 12:23 ` [Qemu-devel] [PATCH 10/11] quorum: Add quorum_invalidate_cache() Benoît Canet
2012-10-23 12:23 ` [Qemu-devel] [PATCH 11/11] quorum: Add quorum_co_is_allocated Benoît Canet
10 siblings, 0 replies; 14+ messages in thread
From: Benoît Canet @ 2012-10-23 12:23 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, pbonzini, Benoît Canet, stefanha
Check that every bs file return the same length.
If not return -EIO to disable the quorum and
avoid length discrepancy.
Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
block/quorum.c | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/block/quorum.c b/block/quorum.c
index 6d76c49..a01f712 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -604,12 +604,32 @@ static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
return 0;
}
+static int64_t quorum_getlength(BlockDriverState *bs)
+{
+ BDRVQuorumState *s = bs->opaque;
+ int64_t result;
+ int i;
+
+ /* check that every file have the same length */
+ result = bdrv_getlength(s->bs[0]);
+ for (i = 1; i < s->total; i++) {
+ int64_t value = bdrv_getlength(s->bs[i]);
+ if (value != result) {
+ return -EIO;
+ }
+ }
+
+ return result;
+}
+
static BlockDriver bdrv_quorum = {
.format_name = "quorum",
.protocol_name = "quorum",
.instance_size = sizeof(BDRVQuorumState),
+ .bdrv_getlength = quorum_getlength,
+
.bdrv_file_open = quorum_open,
.bdrv_close = quorum_close,
.bdrv_co_flush_to_disk = quorum_co_flush,
--
1.7.10.4
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Qemu-devel] [PATCH 10/11] quorum: Add quorum_invalidate_cache().
2012-10-23 12:23 [Qemu-devel] [PATCH 00/11] Quorum disk image corruption resiliency Benoît Canet
` (8 preceding siblings ...)
2012-10-23 12:23 ` [Qemu-devel] [PATCH 09/11] quorum: Add quorum_getlength() Benoît Canet
@ 2012-10-23 12:23 ` Benoît Canet
2012-10-23 12:23 ` [Qemu-devel] [PATCH 11/11] quorum: Add quorum_co_is_allocated Benoît Canet
10 siblings, 0 replies; 14+ messages in thread
From: Benoît Canet @ 2012-10-23 12:23 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, pbonzini, Benoît Canet, stefanha
Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
block/quorum.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/block/quorum.c b/block/quorum.c
index a01f712..9550216 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -622,6 +622,16 @@ static int64_t quorum_getlength(BlockDriverState *bs)
return result;
}
+static void quorum_invalidate_cache(BlockDriverState *bs)
+{
+ BDRVQuorumState *s = bs->opaque;
+ int i;
+
+ for (i = 0; i < s->total; i++) {
+ bdrv_invalidate_cache(s->bs[i]);
+ }
+}
+
static BlockDriver bdrv_quorum = {
.format_name = "quorum",
.protocol_name = "quorum",
@@ -636,6 +646,7 @@ static BlockDriver bdrv_quorum = {
.bdrv_aio_readv = quorum_aio_readv,
.bdrv_aio_writev = quorum_aio_writev,
+ .bdrv_invalidate_cache = quorum_invalidate_cache,
};
static void bdrv_quorum_init(void)
--
1.7.10.4
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Qemu-devel] [PATCH 11/11] quorum: Add quorum_co_is_allocated.
2012-10-23 12:23 [Qemu-devel] [PATCH 00/11] Quorum disk image corruption resiliency Benoît Canet
` (9 preceding siblings ...)
2012-10-23 12:23 ` [Qemu-devel] [PATCH 10/11] quorum: Add quorum_invalidate_cache() Benoît Canet
@ 2012-10-23 12:23 ` Benoît Canet
10 siblings, 0 replies; 14+ messages in thread
From: Benoît Canet @ 2012-10-23 12:23 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, pbonzini, Benoît Canet, stefanha
Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
block/quorum.c | 32 ++++++++++++++++++++++++++++++++
1 file changed, 32 insertions(+)
diff --git a/block/quorum.c b/block/quorum.c
index 9550216..587796d 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -632,6 +632,37 @@ static void quorum_invalidate_cache(BlockDriverState *bs)
}
}
+static int coroutine_fn quorum_co_is_allocated(BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors,
+ int *pnum)
+{
+ BDRVQuorumState *s = bs->opaque;
+ QuorumVoteVersion *winner = NULL;
+ QuorumVotes result_votes, num_votes;
+ int i, result, num;
+
+ QLIST_INIT(&result_votes.vote_list);
+ QLIST_INIT(&num_votes.vote_list);
+
+ for (i = 0; i < s->total; i++) {
+ result = bdrv_co_is_allocated(s->bs[i], sector_num, nb_sectors, &num);
+ quorum_count_vote(&result_votes, result, i);
+ quorum_count_vote(&num_votes, num, i);
+ }
+
+ winner = quorum_get_vote_winner(&result_votes);
+ result = winner->value;
+
+ winner = quorum_get_vote_winner(&num_votes);
+ *pnum = winner->value;
+
+ quorum_free_vote_list(&result_votes);
+ quorum_free_vote_list(&num_votes);
+
+ return result;
+}
+
static BlockDriver bdrv_quorum = {
.format_name = "quorum",
.protocol_name = "quorum",
@@ -647,6 +678,7 @@ static BlockDriver bdrv_quorum = {
.bdrv_aio_readv = quorum_aio_readv,
.bdrv_aio_writev = quorum_aio_writev,
.bdrv_invalidate_cache = quorum_invalidate_cache,
+ .bdrv_co_is_allocated = quorum_co_is_allocated,
};
static void bdrv_quorum_init(void)
--
1.7.10.4
^ permalink raw reply related [flat|nested] 14+ messages in thread