* [Qemu-devel] [RFC V6 01/11] quorum: Create quorum.c, add QuorumSingleAIOCB and QuorumAIOCB.
2013-01-17 15:51 [Qemu-devel] [RFC V6 00/11] Quorum block filter Benoît Canet
@ 2013-01-17 15:51 ` Benoît Canet
2013-01-17 15:51 ` [Qemu-devel] [RFC V6 02/11] quorum: Create BDRVQuorumState and BlkDriver and do init Benoît Canet
` (9 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Benoît Canet @ 2013-01-17 15:51 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, pbonzini, Benoît Canet, stefanha
Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
block/Makefile.objs | 1 +
block/quorum.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 46 insertions(+)
create mode 100644 block/quorum.c
diff --git a/block/Makefile.objs b/block/Makefile.objs
index c067f38..4143e34 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -2,6 +2,7 @@ block-obj-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat
block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
block-obj-y += qed-check.o
+block-obj-y += quorum.o
block-obj-y += parallels.o blkdebug.o blkverify.o
block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
block-obj-$(CONFIG_POSIX) += raw-posix.o
diff --git a/block/quorum.c b/block/quorum.c
new file mode 100644
index 0000000..ce094a1
--- /dev/null
+++ b/block/quorum.c
@@ -0,0 +1,45 @@
+/*
+ * Quorum Block filter
+ *
+ * Copyright (C) 2012-2013 Nodalink, SARL.
+ *
+ * Author:
+ * Benoît Canet <benoit.canet@irqsave.net>
+ *
+ * Based on the design and code of blkverify.c (Copyright (C) 2010 IBM, Corp)
+ * and blkmirror.c (Copyright (C) 2011 Red Hat, Inc).
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "block/block_int.h"
+
+typedef struct QuorumAIOCB QuorumAIOCB;
+
+typedef struct QuorumSingleAIOCB {
+ BlockDriverAIOCB *aiocb;
+ uint8_t *buf;
+ int ret;
+ QuorumAIOCB *parent;
+} QuorumSingleAIOCB;
+
+struct QuorumAIOCB {
+ BlockDriverAIOCB common;
+ QEMUBH *bh;
+
+ /* Request metadata */
+ uint64_t sector_num;
+ int nb_sectors;
+
+ QEMUIOVector *qiov; /* calling readv IOV */
+
+ QuorumSingleAIOCB *aios; /* individual AIOs */
+ QEMUIOVector *qiovs; /* individual IOVs */
+ int count; /* number of completed AIOCB */
+ int success_count; /* number of successfully completed AIOCB */
+ bool *finished; /* completion signal for cancel */
+
+ void (*vote)(QuorumAIOCB *acb);
+ int vote_ret;
+};
--
1.7.10.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [RFC V6 02/11] quorum: Create BDRVQuorumState and BlkDriver and do init.
2013-01-17 15:51 [Qemu-devel] [RFC V6 00/11] Quorum block filter Benoît Canet
2013-01-17 15:51 ` [Qemu-devel] [RFC V6 01/11] quorum: Create quorum.c, add QuorumSingleAIOCB and QuorumAIOCB Benoît Canet
@ 2013-01-17 15:51 ` Benoît Canet
2013-01-17 15:51 ` [Qemu-devel] [RFC V6 03/11] quorum: Add quorum_open() and quorum_close() Benoît Canet
` (8 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Benoît Canet @ 2013-01-17 15:51 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, pbonzini, Benoît Canet, stefanha
Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
block/quorum.c | 22 ++++++++++++++++++++++
1 file changed, 22 insertions(+)
diff --git a/block/quorum.c b/block/quorum.c
index ce094a1..1f12438 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -15,6 +15,13 @@
#include "block/block_int.h"
+typedef struct {
+ BlockDriverState **bs;
+ int threshold;
+ int total;
+ char **filenames;
+} BDRVQuorumState;
+
typedef struct QuorumAIOCB QuorumAIOCB;
typedef struct QuorumSingleAIOCB {
@@ -26,6 +33,7 @@ typedef struct QuorumSingleAIOCB {
struct QuorumAIOCB {
BlockDriverAIOCB common;
+ BDRVQuorumState *bqs;
QEMUBH *bh;
/* Request metadata */
@@ -43,3 +51,17 @@ struct QuorumAIOCB {
void (*vote)(QuorumAIOCB *acb);
int vote_ret;
};
+
+static BlockDriver bdrv_quorum = {
+ .format_name = "quorum",
+ .protocol_name = "quorum",
+
+ .instance_size = sizeof(BDRVQuorumState),
+};
+
+static void bdrv_quorum_init(void)
+{
+ bdrv_register(&bdrv_quorum);
+}
+
+block_init(bdrv_quorum_init);
--
1.7.10.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [RFC V6 03/11] quorum: Add quorum_open() and quorum_close().
2013-01-17 15:51 [Qemu-devel] [RFC V6 00/11] Quorum block filter Benoît Canet
2013-01-17 15:51 ` [Qemu-devel] [RFC V6 01/11] quorum: Create quorum.c, add QuorumSingleAIOCB and QuorumAIOCB Benoît Canet
2013-01-17 15:51 ` [Qemu-devel] [RFC V6 02/11] quorum: Create BDRVQuorumState and BlkDriver and do init Benoît Canet
@ 2013-01-17 15:51 ` Benoît Canet
2013-01-17 22:13 ` Eric Blake
2013-01-17 15:51 ` [Qemu-devel] [RFC V6 04/11] quorum: Add quorum_aio_writev and its dependencies Benoît Canet
` (7 subsequent siblings)
10 siblings, 1 reply; 15+ messages in thread
From: Benoît Canet @ 2013-01-17 15:51 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, pbonzini, Benoît Canet, stefanha
Valid quorum resources look like
quorum:threshold/total:path/to/image_1: ... :path/to/image_total
':' is used as a separator
'\' is the escaping character for filename containing ':'
'\' escape itself
',' must be escaped with ','
On the command line for quorum files "img:test.raw", "img2,raw"
and "img3.raw" invocation look like:
-drive file=quorum:2/3:img\\:test.raw:img2,,raw:img3.raw
(note the double \\ and the double ,,)
Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
block/quorum.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 122 insertions(+)
diff --git a/block/quorum.c b/block/quorum.c
index 1f12438..c7ecffd 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -52,11 +52,133 @@ struct QuorumAIOCB {
int vote_ret;
};
+/* Valid quorum resources look like
+ * quorum:threshold/total:path/to/image_1: ... :path/to/image_total
+ *
+ * ':' is used as a separator
+ * '\' is the escaping character for filename containing ':'
+ */
+static int quorum_open(BlockDriverState *bs, const char *filename, int flags)
+{
+ BDRVQuorumState *s = bs->opaque;
+ int i, j, k, len, ret = 0;
+ char *a, *b, *names;
+ const char *start;
+ bool escape;
+
+ /* Parse the quorum: prefix */
+ if (!strstart(filename, "quorum:", &start)) {
+ return -EINVAL;
+ }
+
+ /* Get threshold */
+ errno = 0;
+ s->threshold = strtoul(start, &a, 10);
+ if (*a != '/' || errno) {
+ return -EINVAL;
+ }
+ a++;
+
+ /* Get total */
+ errno = 0;
+ s->total = strtoul(a, &b, 10);
+ if (*b != ':' || errno) {
+ return -EINVAL;
+ }
+ b++;
+
+ if (s->threshold < 1 || s->total < 2) {
+ return -EINVAL;
+ }
+
+ if (s->threshold > s->total) {
+ return -EINVAL;
+ }
+
+ s->bs = g_malloc0(sizeof(BlockDriverState *) * s->total);
+ /* Two allocations for all filenames: simpler to free */
+ s->filenames = g_malloc0(sizeof(char *) * s->total);
+ names = g_strdup(b);
+
+ /* Get the filenames pointers */
+ escape = false;
+ s->filenames[0] = names;
+ len = strlen(names);
+ for (i = j = k = 0; i < len && j < s->total; i++) {
+ /* separation between two files */
+ if (!escape && names[i] == ':') {
+ char *prev = s->filenames[j];
+ prev[k] = '\0';
+ s->filenames[++j] = prev + k + 1;
+ k = 0;
+ continue;
+ }
+
+ escape = !escape && names[i] == '\\';
+
+ /* if we are not escaping copy */
+ if (!escape) {
+ s->filenames[j][k++] = names[i];
+ }
+ }
+ /* terminate last string */
+ s->filenames[j][k] = '\0';
+
+ if ((j + 1) != s->total) {
+ ret = -EINVAL;
+ goto free_exit;
+ }
+
+ /* Open files */
+ for (i = 0; i < s->total; i++) {
+ s->bs[i] = bdrv_new("");
+ ret = bdrv_open(s->bs[i], s->filenames[i], flags, NULL);
+ if (ret < 0) {
+ goto error_exit;
+ }
+ }
+
+ goto exit;
+
+error_exit:
+ for (; i >= 0; i--) {
+ bdrv_delete(s->bs[i]);
+ s->bs[i] = NULL;
+ }
+free_exit:
+ g_free(s->filenames[0]);
+ g_free(s->filenames);
+ s->filenames = NULL;
+ g_free(s->bs);
+exit:
+ return ret;
+}
+
+static void quorum_close(BlockDriverState *bs)
+{
+ BDRVQuorumState *s = bs->opaque;
+ int i;
+
+ for (i = 0; i < s->total; i++) {
+ /* Ensure writes reach stable storage */
+ bdrv_flush(s->bs[i]);
+ bdrv_delete(s->bs[i]);
+ }
+
+ g_free(s->filenames[0]);
+ g_free(s->filenames);
+ s->filenames = NULL;
+ g_free(s->bs);
+}
+
static BlockDriver bdrv_quorum = {
.format_name = "quorum",
.protocol_name = "quorum",
.instance_size = sizeof(BDRVQuorumState),
+
+ .bdrv_file_open = quorum_open,
+ .bdrv_close = quorum_close,
};
static void bdrv_quorum_init(void)
--
1.7.10.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] [RFC V6 03/11] quorum: Add quorum_open() and quorum_close().
2013-01-17 15:51 ` [Qemu-devel] [RFC V6 03/11] quorum: Add quorum_open() and quorum_close() Benoît Canet
@ 2013-01-17 22:13 ` Eric Blake
0 siblings, 0 replies; 15+ messages in thread
From: Eric Blake @ 2013-01-17 22:13 UTC (permalink / raw)
To: Benoît Canet; +Cc: kwolf, pbonzini, qemu-devel, stefanha
[-- Attachment #1: Type: text/plain, Size: 1898 bytes --]
On 01/17/2013 08:51 AM, Benoît Canet wrote:
> Valid quorum resources look like
> quorum:threshold/total:path/to/image_1: ... :path/to/image_total
>
> ':' is used as a separator
> '\' is the escaping character for filename containing ':'
> '\' escape itself
> ',' must be escaped with ','
>
> On the command line for quorum files "img:test.raw", "img2,raw"
> and "img3.raw" invocation look like:
>
> -drive file=quorum:2/3:img\\:test.raw:img2,,raw:img3.raw
> (note the double \\ and the double ,,)
>
> Signed-off-by: Benoit Canet <benoit@irqsave.net>
> + /* Get threshold */
> + errno = 0;
> + s->threshold = strtoul(start, &a, 10);
> + if (*a != '/' || errno) {
> + return -EINVAL;
> + }
> + a++;
Hmm - you can fail to reject file=quorum:/3:... (strtoul happily parses
a to 0 in that case, and is not required to set errno). But see below...
> +
> + /* Get total */
> + errno = 0;
> + s->total = strtoul(a, &b, 10);
> + if (*b != ':' || errno) {
> + return -EINVAL;
> + }
> + b++;
Again, you fail to reject file=qourum:1/:... (strtoul happily parses b
to 0 in that case, and is not required to set errno)...
> +
> + if (s->threshold < 1 || s->total < 2) {
> + return -EINVAL;
> + }
...but you got lucky: this check rejects either a or b being set to 0.
Still, you may want to refactor this patch on top of
https://lists.gnu.org/archive/html/qemu-devel/2013-01/msg03238.html.
> + if ((j + 1) != s->total) {
> + ret = -EINVAL;
> + goto free_exit;
> + }
You have a lot of reasons why this function can fail with -EINVAL; it
would be nicer if you actually set an error object describing each
failure, instead of making the user guess.
--
Eric Blake eblake redhat com +1-919-301-3266
Libvirt virtualization library http://libvirt.org
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 621 bytes --]
^ permalink raw reply [flat|nested] 15+ messages in thread
* [Qemu-devel] [RFC V6 04/11] quorum: Add quorum_aio_writev and its dependencies.
2013-01-17 15:51 [Qemu-devel] [RFC V6 00/11] Quorum block filter Benoît Canet
` (2 preceding siblings ...)
2013-01-17 15:51 ` [Qemu-devel] [RFC V6 03/11] quorum: Add quorum_open() and quorum_close() Benoît Canet
@ 2013-01-17 15:51 ` Benoît Canet
2013-01-17 15:51 ` [Qemu-devel] [RFC V6 05/11] blkverify: Extract qemu_iovec_clone() and qemu_iovec_compare() from blkverify Benoît Canet
` (6 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Benoît Canet @ 2013-01-17 15:51 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, pbonzini, Benoît Canet, stefanha
Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
block/quorum.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 113 insertions(+)
diff --git a/block/quorum.c b/block/quorum.c
index c7ecffd..cb95eab 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -171,6 +171,117 @@ static void quorum_close(BlockDriverState *bs)
g_free(s->bs);
}
+static void quorum_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+ QuorumAIOCB *acb = container_of(blockacb, QuorumAIOCB, common);
+ bool finished = false;
+
+ /* Wait for the request to finish */
+ acb->finished = &finished;
+ while (!finished) {
+ qemu_aio_wait();
+ }
+}
+
+static AIOCBInfo quorum_aiocb_info = {
+ .aiocb_size = sizeof(QuorumAIOCB),
+ .cancel = quorum_aio_cancel,
+};
+
+static void quorum_aio_bh(void *opaque)
+{
+ QuorumAIOCB *acb = opaque;
+ BDRVQuorumState *s = acb->bqs;
+ int ret;
+
+ ret = s->threshold <= acb->success_count ? 0 : -EIO;
+
+ qemu_bh_delete(acb->bh);
+ acb->common.cb(acb->common.opaque, ret);
+ if (acb->finished) {
+ *acb->finished = true;
+ }
+ g_free(acb->aios);
+ g_free(acb->qiovs);
+ qemu_aio_release(acb);
+}
+
+static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
+ BlockDriverState *bs,
+ QEMUIOVector *qiov,
+ uint64_t sector_num,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ QuorumAIOCB *acb = qemu_aio_get(&quorum_aiocb_info, bs, cb, opaque);
+ int i;
+
+ acb->aios = g_new0(QuorumSingleAIOCB, s->total);
+ acb->qiovs = g_new0(QEMUIOVector, s->total);
+
+ acb->bqs = s;
+ acb->qiov = qiov;
+ acb->bh = NULL;
+ acb->count = 0;
+ acb->success_count = 0;
+ acb->sector_num = sector_num;
+ acb->nb_sectors = nb_sectors;
+ acb->vote = NULL;
+ acb->vote_ret = 0;
+ acb->finished = NULL;
+
+ for (i = 0; i < s->total; i++) {
+ acb->aios[i].buf = NULL;
+ acb->aios[i].ret = 0;
+ acb->aios[i].parent = acb;
+ }
+
+ return acb;
+}
+
+static void quorum_aio_cb(void *opaque, int ret)
+{
+ QuorumSingleAIOCB *sacb = opaque;
+ QuorumAIOCB *acb = sacb->parent;
+ BDRVQuorumState *s = acb->bqs;
+
+ sacb->ret = ret;
+ acb->count++;
+ if (ret == 0) {
+ acb->success_count++;
+ }
+ assert(acb->count <= s->total);
+ assert(acb->success_count <= s->total);
+ if (acb->count < s->total) {
+ return;
+ }
+
+ acb->bh = qemu_bh_new(quorum_aio_bh, acb);
+ qemu_bh_schedule(acb->bh);
+}
+
+static BlockDriverAIOCB *quorum_aio_writev(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ BDRVQuorumState *s = bs->opaque;
+ QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num, nb_sectors,
+ cb, opaque);
+ int i;
+
+ for (i = 0; i < s->total; i++) {
+ acb->aios[i].aiocb = bdrv_aio_writev(s->bs[i], sector_num, qiov,
+ nb_sectors, &quorum_aio_cb,
+ &acb->aios[i]);
+ }
+
+ return &acb->common;
+}
+
static BlockDriver bdrv_quorum = {
.format_name = "quorum",
.protocol_name = "quorum",
@@ -179,6 +290,8 @@ static BlockDriver bdrv_quorum = {
.bdrv_file_open = quorum_open,
.bdrv_close = quorum_close,
+
+ .bdrv_aio_writev = quorum_aio_writev,
};
static void bdrv_quorum_init(void)
--
1.7.10.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [RFC V6 05/11] blkverify: Extract qemu_iovec_clone() and qemu_iovec_compare() from blkverify.
2013-01-17 15:51 [Qemu-devel] [RFC V6 00/11] Quorum block filter Benoît Canet
` (3 preceding siblings ...)
2013-01-17 15:51 ` [Qemu-devel] [RFC V6 04/11] quorum: Add quorum_aio_writev and its dependencies Benoît Canet
@ 2013-01-17 15:51 ` Benoît Canet
2013-01-17 15:51 ` [Qemu-devel] [RFC V6 06/11] quorum: Add quorum_aio_readv Benoît Canet
` (5 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Benoît Canet @ 2013-01-17 15:51 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, pbonzini, Benoît Canet, stefanha
Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
block/blkverify.c | 108 +------------------------------------------------
include/qemu-common.h | 2 +
iov.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 107 insertions(+), 106 deletions(-)
diff --git a/block/blkverify.c b/block/blkverify.c
index a7dd459..8c65425 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -123,110 +123,6 @@ static int64_t blkverify_getlength(BlockDriverState *bs)
return bdrv_getlength(s->test_file);
}
-/**
- * Check that I/O vector contents are identical
- *
- * @a: I/O vector
- * @b: I/O vector
- * @ret: Offset to first mismatching byte or -1 if match
- */
-static ssize_t blkverify_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
-{
- int i;
- ssize_t offset = 0;
-
- assert(a->niov == b->niov);
- for (i = 0; i < a->niov; i++) {
- size_t len = 0;
- uint8_t *p = (uint8_t *)a->iov[i].iov_base;
- uint8_t *q = (uint8_t *)b->iov[i].iov_base;
-
- assert(a->iov[i].iov_len == b->iov[i].iov_len);
- while (len < a->iov[i].iov_len && *p++ == *q++) {
- len++;
- }
-
- offset += len;
-
- if (len != a->iov[i].iov_len) {
- return offset;
- }
- }
- return -1;
-}
-
-typedef struct {
- int src_index;
- struct iovec *src_iov;
- void *dest_base;
-} IOVectorSortElem;
-
-static int sortelem_cmp_src_base(const void *a, const void *b)
-{
- const IOVectorSortElem *elem_a = a;
- const IOVectorSortElem *elem_b = b;
-
- /* Don't overflow */
- if (elem_a->src_iov->iov_base < elem_b->src_iov->iov_base) {
- return -1;
- } else if (elem_a->src_iov->iov_base > elem_b->src_iov->iov_base) {
- return 1;
- } else {
- return 0;
- }
-}
-
-static int sortelem_cmp_src_index(const void *a, const void *b)
-{
- const IOVectorSortElem *elem_a = a;
- const IOVectorSortElem *elem_b = b;
-
- return elem_a->src_index - elem_b->src_index;
-}
-
-/**
- * Copy contents of I/O vector
- *
- * The relative relationships of overlapping iovecs are preserved. This is
- * necessary to ensure identical semantics in the cloned I/O vector.
- */
-static void blkverify_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src,
- void *buf)
-{
- IOVectorSortElem sortelems[src->niov];
- void *last_end;
- int i;
-
- /* Sort by source iovecs by base address */
- for (i = 0; i < src->niov; i++) {
- sortelems[i].src_index = i;
- sortelems[i].src_iov = &src->iov[i];
- }
- qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_base);
-
- /* Allocate buffer space taking into account overlapping iovecs */
- last_end = NULL;
- for (i = 0; i < src->niov; i++) {
- struct iovec *cur = sortelems[i].src_iov;
- ptrdiff_t rewind = 0;
-
- /* Detect overlap */
- if (last_end && last_end > cur->iov_base) {
- rewind = last_end - cur->iov_base;
- }
-
- sortelems[i].dest_base = buf - rewind;
- buf += cur->iov_len - MIN(rewind, cur->iov_len);
- last_end = MAX(cur->iov_base + cur->iov_len, last_end);
- }
-
- /* Sort by source iovec index and build destination iovec */
- qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_index);
- for (i = 0; i < src->niov; i++) {
- qemu_iovec_add(dest, sortelems[i].dest_base, src->iov[i].iov_len);
- }
-}
-
static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
int64_t sector_num, QEMUIOVector *qiov,
int nb_sectors,
@@ -290,7 +186,7 @@ static void blkverify_aio_cb(void *opaque, int ret)
static void blkverify_verify_readv(BlkverifyAIOCB *acb)
{
- ssize_t offset = blkverify_iovec_compare(acb->qiov, &acb->raw_qiov);
+ ssize_t offset = qemu_iovec_compare(acb->qiov, &acb->raw_qiov);
if (offset != -1) {
blkverify_err(acb, "contents mismatch in sector %" PRId64,
acb->sector_num + (int64_t)(offset / BDRV_SECTOR_SIZE));
@@ -308,7 +204,7 @@ static BlockDriverAIOCB *blkverify_aio_readv(BlockDriverState *bs,
acb->verify = blkverify_verify_readv;
acb->buf = qemu_blockalign(bs->file, qiov->size);
qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
- blkverify_iovec_clone(&acb->raw_qiov, qiov, acb->buf);
+ qemu_iovec_clone(&acb->raw_qiov, qiov, acb->buf);
bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors,
blkverify_aio_cb, acb);
diff --git a/include/qemu-common.h b/include/qemu-common.h
index ca464bb..13ce13a 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -342,6 +342,8 @@ size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset,
const void *buf, size_t bytes);
size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset,
int fillc, size_t bytes);
+ssize_t qemu_iovec_compare(QEMUIOVector *a, QEMUIOVector *b);
+void qemu_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src, void *buf);
bool buffer_is_zero(const void *buf, size_t len);
diff --git a/iov.c b/iov.c
index c0f5c56..bed2c22 100644
--- a/iov.c
+++ b/iov.c
@@ -370,6 +370,109 @@ size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset,
return iov_memset(qiov->iov, qiov->niov, offset, fillc, bytes);
}
+/**
+ * Check that I/O vector contents are identical
+ *
+ * @a: I/O vector
+ * @b: I/O vector
+ * @ret: Offset to first mismatching byte or -1 if match
+ */
+ssize_t qemu_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
+{
+ int i;
+ ssize_t offset = 0;
+
+ assert(a->niov == b->niov);
+ for (i = 0; i < a->niov; i++) {
+ size_t len = 0;
+ uint8_t *p = (uint8_t *)a->iov[i].iov_base;
+ uint8_t *q = (uint8_t *)b->iov[i].iov_base;
+
+ assert(a->iov[i].iov_len == b->iov[i].iov_len);
+ while (len < a->iov[i].iov_len && *p++ == *q++) {
+ len++;
+ }
+
+ offset += len;
+
+ if (len != a->iov[i].iov_len) {
+ return offset;
+ }
+ }
+ return -1;
+}
+
+typedef struct {
+ int src_index;
+ struct iovec *src_iov;
+ void *dest_base;
+} IOVectorSortElem;
+
+static int sortelem_cmp_src_base(const void *a, const void *b)
+{
+ const IOVectorSortElem *elem_a = a;
+ const IOVectorSortElem *elem_b = b;
+
+ /* Don't overflow */
+ if (elem_a->src_iov->iov_base < elem_b->src_iov->iov_base) {
+ return -1;
+ } else if (elem_a->src_iov->iov_base > elem_b->src_iov->iov_base) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+static int sortelem_cmp_src_index(const void *a, const void *b)
+{
+ const IOVectorSortElem *elem_a = a;
+ const IOVectorSortElem *elem_b = b;
+
+ return elem_a->src_index - elem_b->src_index;
+}
+
+/**
+ * Copy contents of I/O vector
+ *
+ * The relative relationships of overlapping iovecs are preserved. This is
+ * necessary to ensure identical semantics in the cloned I/O vector.
+ */
+void qemu_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src, void *buf)
+{
+ IOVectorSortElem sortelems[src->niov];
+ void *last_end;
+ int i;
+
+ /* Sort by source iovecs by base address */
+ for (i = 0; i < src->niov; i++) {
+ sortelems[i].src_index = i;
+ sortelems[i].src_iov = &src->iov[i];
+ }
+ qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_base);
+
+ /* Allocate buffer space taking into account overlapping iovecs */
+ last_end = NULL;
+ for (i = 0; i < src->niov; i++) {
+ struct iovec *cur = sortelems[i].src_iov;
+ ptrdiff_t rewind = 0;
+
+ /* Detect overlap */
+ if (last_end && last_end > cur->iov_base) {
+ rewind = last_end - cur->iov_base;
+ }
+
+ sortelems[i].dest_base = buf - rewind;
+ buf += cur->iov_len - MIN(rewind, cur->iov_len);
+ last_end = MAX(cur->iov_base + cur->iov_len, last_end);
+ }
+
+ /* Sort by source iovec index and build destination iovec */
+ qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_index);
+ for (i = 0; i < src->niov; i++) {
+ qemu_iovec_add(dest, sortelems[i].dest_base, src->iov[i].iov_len);
+ }
+}
+
size_t iov_discard_front(struct iovec **iov, unsigned int *iov_cnt,
size_t bytes)
{
--
1.7.10.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [RFC V6 06/11] quorum: Add quorum_aio_readv.
2013-01-17 15:51 [Qemu-devel] [RFC V6 00/11] Quorum block filter Benoît Canet
` (4 preceding siblings ...)
2013-01-17 15:51 ` [Qemu-devel] [RFC V6 05/11] blkverify: Extract qemu_iovec_clone() and qemu_iovec_compare() from blkverify Benoît Canet
@ 2013-01-17 15:51 ` Benoît Canet
2013-01-17 15:51 ` [Qemu-devel] [RFC V6 07/11] quorum: Add quorum mechanism Benoît Canet
` (4 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Benoît Canet @ 2013-01-17 15:51 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, pbonzini, Benoît Canet, stefanha
Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
block/quorum.c | 38 +++++++++++++++++++++++++++++++++++++-
1 file changed, 37 insertions(+), 1 deletion(-)
diff --git a/block/quorum.c b/block/quorum.c
index cb95eab..98052eb 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -192,15 +192,24 @@ static void quorum_aio_bh(void *opaque)
{
QuorumAIOCB *acb = opaque;
BDRVQuorumState *s = acb->bqs;
- int ret;
+ int i, ret;
ret = s->threshold <= acb->success_count ? 0 : -EIO;
+ for (i = 0; i < s->total; i++) {
+ qemu_vfree(acb->aios[i].buf);
+ acb->aios[i].buf = NULL;
+ acb->aios[i].ret = 0;
+ }
+
qemu_bh_delete(acb->bh);
acb->common.cb(acb->common.opaque, ret);
if (acb->finished) {
*acb->finished = true;
}
+ for (i = 0; i < s->total; i++) {
+ qemu_iovec_destroy(&acb->qiovs[i]);
+ }
g_free(acb->aios);
g_free(acb->qiovs);
qemu_aio_release(acb);
@@ -261,6 +270,32 @@ static void quorum_aio_cb(void *opaque, int ret)
qemu_bh_schedule(acb->bh);
}
+static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ BDRVQuorumState *s = bs->opaque;
+ QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num,
+ nb_sectors, cb, opaque);
+ int i;
+
+ for (i = 0; i < s->total; i++) {
+ acb->aios[i].buf = qemu_blockalign(bs->file, qiov->size);
+ qemu_iovec_init(&acb->qiovs[i], qiov->niov);
+ qemu_iovec_clone(&acb->qiovs[i], qiov, acb->aios[i].buf);
+ }
+
+ for (i = 0; i < s->total; i++) {
+ bdrv_aio_readv(s->bs[i], sector_num, qiov, nb_sectors,
+ quorum_aio_cb, &acb->aios[i]);
+ }
+
+ return &acb->common;
+}
+
static BlockDriverAIOCB *quorum_aio_writev(BlockDriverState *bs,
int64_t sector_num,
QEMUIOVector *qiov,
@@ -291,6 +326,7 @@ static BlockDriver bdrv_quorum = {
.bdrv_file_open = quorum_open,
.bdrv_close = quorum_close,
+ .bdrv_aio_readv = quorum_aio_readv,
.bdrv_aio_writev = quorum_aio_writev,
};
--
1.7.10.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [RFC V6 07/11] quorum: Add quorum mechanism.
2013-01-17 15:51 [Qemu-devel] [RFC V6 00/11] Quorum block filter Benoît Canet
` (5 preceding siblings ...)
2013-01-17 15:51 ` [Qemu-devel] [RFC V6 06/11] quorum: Add quorum_aio_readv Benoît Canet
@ 2013-01-17 15:51 ` Benoît Canet
2013-01-17 22:23 ` Eric Blake
2013-01-17 15:51 ` [Qemu-devel] [RFC V6 08/11] quorum: Add quorum_getlength() Benoît Canet
` (3 subsequent siblings)
10 siblings, 1 reply; 15+ messages in thread
From: Benoît Canet @ 2013-01-17 15:51 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, pbonzini, Benoît Canet, stefanha
Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
block/quorum.c | 278 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 277 insertions(+), 1 deletion(-)
diff --git a/block/quorum.c b/block/quorum.c
index 98052eb..2bffff4 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -14,6 +14,20 @@
*/
#include "block/block_int.h"
+#include "zlib.h"
+
+typedef struct QuorumVoteItem {
+ int index;
+ QLIST_ENTRY(QuorumVoteItem) next;
+} QuorumVoteItem;
+
+typedef struct QuorumVoteVersion {
+ unsigned long value;
+ int index;
+ int vote_count;
+ QLIST_HEAD(, QuorumVoteItem) items;
+ QLIST_ENTRY(QuorumVoteVersion) next;
+} QuorumVoteVersion;
typedef struct {
BlockDriverState **bs;
@@ -31,6 +45,10 @@ typedef struct QuorumSingleAIOCB {
QuorumAIOCB *parent;
} QuorumSingleAIOCB;
+typedef struct QuorumVotes {
+ QLIST_HEAD(, QuorumVoteVersion) vote_list;
+} QuorumVotes;
+
struct QuorumAIOCB {
BlockDriverAIOCB common;
BDRVQuorumState *bqs;
@@ -48,6 +66,8 @@ struct QuorumAIOCB {
int success_count; /* number of successfully completed AIOCB */
bool *finished; /* completion signal for cancel */
+ QuorumVotes votes;
+
void (*vote)(QuorumAIOCB *acb);
int vote_ret;
};
@@ -203,6 +223,11 @@ static void quorum_aio_bh(void *opaque)
}
qemu_bh_delete(acb->bh);
+
+ if (acb->vote_ret) {
+ ret = acb->vote_ret;
+ }
+
acb->common.cb(acb->common.opaque, ret);
if (acb->finished) {
*acb->finished = true;
@@ -239,6 +264,7 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
acb->vote = NULL;
acb->vote_ret = 0;
acb->finished = NULL;
+ QLIST_INIT(&acb->votes.vote_list);
for (i = 0; i < s->total; i++) {
acb->aios[i].buf = NULL;
@@ -266,10 +292,258 @@ static void quorum_aio_cb(void *opaque, int ret)
return;
}
+ /* Do the vote */
+ if (acb->vote) {
+ acb->vote(acb);
+ }
+
acb->bh = qemu_bh_new(quorum_aio_bh, acb);
qemu_bh_schedule(acb->bh);
}
+static void quorum_print_bad(QuorumAIOCB *acb, const char *filename)
+{
+ fprintf(stderr, "quorum: corrected error in quorum file %s: sector_num=%"
+ PRId64 " nb_sectors=%i\n", filename, acb->sector_num,
+ acb->nb_sectors);
+}
+
+static void quorum_print_failure(QuorumAIOCB *acb)
+{
+ fprintf(stderr, "quorum: failure sector_num=%" PRId64 " nb_sectors=%i\n",
+ acb->sector_num, acb->nb_sectors);
+}
+
+static void quorum_print_bad_versions(QuorumAIOCB *acb,
+ unsigned long checksum)
+{
+ QuorumVoteVersion *version;
+ QuorumVoteItem *item;
+ BDRVQuorumState *s = acb->bqs;
+
+ QLIST_FOREACH(version, &acb->votes.vote_list, next) {
+ if (version->value == checksum) {
+ continue;
+ }
+ QLIST_FOREACH(item, &version->items, next) {
+ quorum_print_bad(acb, s->filenames[item->index]);
+ }
+ }
+}
+
+static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
+{
+ int i;
+ assert(dest->niov == source->niov);
+ assert(dest->size == source->size);
+ for (i = 0; i < source->niov; i++) {
+ assert(dest->iov[i].iov_len == source->iov[i].iov_len);
+ memcpy(dest->iov[i].iov_base,
+ source->iov[i].iov_base,
+ source->iov[i].iov_len);
+ }
+}
+
+static void quorum_count_vote(QuorumVotes *votes,
+ unsigned long checksum,
+ int index)
+{
+ QuorumVoteVersion *v = NULL, *version = NULL;
+ QuorumVoteItem *item;
+
+ /* look if we have something with this checksum */
+ QLIST_FOREACH(v, &votes->vote_list, next) {
+ if (v->value == checksum) {
+ version = v;
+ break;
+ }
+ }
+
+ /* It's a version not yet in the list add it */
+ if (!version) {
+ version = g_new0(QuorumVoteVersion, 1);
+ QLIST_INIT(&version->items);
+ version->value = checksum;
+ version->index = index;
+ version->vote_count = 0;
+ QLIST_INSERT_HEAD(&votes->vote_list, version, next);
+ }
+
+ version->vote_count++;
+
+ item = g_new0(QuorumVoteItem, 1);
+ item->index = index;
+ QLIST_INSERT_HEAD(&version->items, item, next);
+}
+
+static void quorum_free_vote_list(QuorumVotes *votes)
+{
+ QuorumVoteVersion *version, *next_version;
+ QuorumVoteItem *item, *next_item;
+
+ QLIST_FOREACH_SAFE(version, &votes->vote_list, next, next_version) {
+ QLIST_REMOVE(version, next);
+ QLIST_FOREACH_SAFE(item, &version->items, next, next_item) {
+ QLIST_REMOVE(item, next);
+ g_free(item);
+ }
+ g_free(version);
+ }
+}
+
+static unsigned long quorum_compute_checksum(QuorumAIOCB *acb, int i)
+{
+ int j;
+ unsigned long adler = adler32(0L, Z_NULL, 0);
+ QEMUIOVector *qiov = &acb->qiovs[i];
+
+ for (j = 0; j < qiov->niov; j++) {
+ adler = adler32(adler,
+ qiov->iov[j].iov_base,
+ qiov->iov[j].iov_len);
+ }
+
+ return adler;
+}
+
+static QuorumVoteVersion *quorum_get_vote_winner(QuorumVotes *votes)
+{
+ int i = 0;
+ QuorumVoteVersion *candidate, *winner = NULL;
+
+ QLIST_FOREACH(candidate, &votes->vote_list, next) {
+ if (candidate->vote_count > i) {
+ i = candidate->vote_count;
+ winner = candidate;
+ }
+ }
+
+ return winner;
+}
+
+static bool quorum_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
+{
+ int i;
+ int result;
+
+ assert(a->niov == b->niov);
+ for (i = 0; i < a->niov; i++) {
+ assert(a->iov[i].iov_len == b->iov[i].iov_len);
+ result = memcmp(a->iov[i].iov_base,
+ b->iov[i].iov_base,
+ a->iov[i].iov_len);
+ if (result) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static void GCC_FMT_ATTR(2, 3) quorum_err(QuorumAIOCB *acb,
+ const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ fprintf(stderr, "quorum: sector_num=%" PRId64 " nb_sectors=%d ",
+ acb->sector_num, acb->nb_sectors);
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+ va_end(ap);
+ exit(1);
+}
+
+static bool quorum_compare(QuorumAIOCB *acb,
+ QEMUIOVector *a,
+ QEMUIOVector *b)
+{
+ BDRVQuorumState *s = acb->bqs;
+ bool blkverify = false;
+ ssize_t offset;
+
+ if (s->total == 2 && s->threshold == 2) {
+ blkverify = true;
+ }
+
+ if (blkverify) {
+ offset = qemu_iovec_compare(a, b);
+ if (offset != -1) {
+ quorum_err(acb, "contents mismatch in sector %" PRId64,
+ acb->sector_num +
+ (uint64_t)(offset / BDRV_SECTOR_SIZE));
+ }
+ return true;
+ }
+
+ return quorum_iovec_compare(a, b);
+}
+
+
+static void quorum_vote(QuorumAIOCB *acb)
+{
+ bool quorum = true;
+ int i, j;
+ unsigned long checksum = 0;
+ BDRVQuorumState *s = acb->bqs;
+ QuorumVoteVersion *winner;
+
+ /* get the index of the first successfull read */
+ for (i = 0; i < s->total; i++) {
+ if (!acb->aios[i].ret) {
+ break;
+ }
+ }
+
+ /* compare this read with all other successfull read looking for quorum */
+ for (j = i + 1; j < s->total; j++) {
+ if (acb->aios[j].ret) {
+ continue;
+ }
+ quorum = quorum_compare(acb, &acb->qiovs[i], &acb->qiovs[j]);
+ if (!quorum) {
+ break;
+ }
+ }
+
+ /* Every successfull read agrees -> Quorum */
+ if (quorum) {
+ quorum_copy_qiov(acb->qiov, &acb->qiovs[i]);
+ return;
+ }
+
+ /* compute checksums for each successfull read, also store indexes */
+ for (i = 0; i < s->total; i++) {
+ if (acb->aios[i].ret) {
+ continue;
+ }
+ checksum = quorum_compute_checksum(acb, i);
+ quorum_count_vote(&acb->votes, checksum, i);
+ }
+
+ /* vote to select the most represented version */
+ winner = quorum_get_vote_winner(&acb->votes);
+ assert(winner != NULL);
+
+ /* if the winner count is smaller than threshold read fail */
+ if (winner->vote_count < s->threshold) {
+ quorum_print_failure(acb);
+ acb->vote_ret = -EIO;
+ fprintf(stderr, "quorum: vote result inferior to threshold\n");
+ goto free_exit;
+ }
+
+ /* we have a winner: copy it */
+ quorum_copy_qiov(acb->qiov, &acb->qiovs[winner->index]);
+
+ /* some versions are bad print them */
+ quorum_print_bad_versions(acb, winner->value);
+
+free_exit:
+ /* free lists */
+ quorum_free_vote_list(&acb->votes);
+}
+
static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
int64_t sector_num,
QEMUIOVector *qiov,
@@ -282,6 +556,8 @@ static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
nb_sectors, cb, opaque);
int i;
+ acb->vote = quorum_vote;
+
for (i = 0; i < s->total; i++) {
acb->aios[i].buf = qemu_blockalign(bs->file, qiov->size);
qemu_iovec_init(&acb->qiovs[i], qiov->niov);
@@ -289,7 +565,7 @@ static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
}
for (i = 0; i < s->total; i++) {
- bdrv_aio_readv(s->bs[i], sector_num, qiov, nb_sectors,
+ bdrv_aio_readv(s->bs[i], sector_num, &acb->qiovs[i], nb_sectors,
quorum_aio_cb, &acb->aios[i]);
}
--
1.7.10.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] [RFC V6 07/11] quorum: Add quorum mechanism.
2013-01-17 15:51 ` [Qemu-devel] [RFC V6 07/11] quorum: Add quorum mechanism Benoît Canet
@ 2013-01-17 22:23 ` Eric Blake
2013-01-18 15:19 ` Benoît Canet
0 siblings, 1 reply; 15+ messages in thread
From: Eric Blake @ 2013-01-17 22:23 UTC (permalink / raw)
To: Benoît Canet; +Cc: kwolf, pbonzini, qemu-devel, stefanha
[-- Attachment #1: Type: text/plain, Size: 1251 bytes --]
On 01/17/2013 08:51 AM, Benoît Canet wrote:
> Signed-off-by: Benoit Canet <benoit@irqsave.net>
> ---
> block/quorum.c | 278 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 277 insertions(+), 1 deletion(-)
>
> diff --git a/block/quorum.c b/block/quorum.c
> index 98052eb..2bffff4 100644
> --- a/block/quorum.c
> +++ b/block/quorum.c
> @@ -14,6 +14,20 @@
> */
>
> #include "block/block_int.h"
> +#include "zlib.h"
Since zlib.h isn't part of qemu.git, this should be <zlib.h>.
> +
> +static unsigned long quorum_compute_checksum(QuorumAIOCB *acb, int i)
> +{
> + int j;
> + unsigned long adler = adler32(0L, Z_NULL, 0);
Hmm, adler32() is basically a weak hashing mechanism; are you sure you
won't have any false collisions? Furthermore, how does this compare
with the series for adding deduplication, which uses much
stronger/longer hashes, but where those take more time to compute? Is
there any way you can share efforts between the two series?
> + /* get the index of the first successfull read */
s/successfull/successful/ (several times in this patch)
--
Eric Blake eblake redhat com +1-919-301-3266
Libvirt virtualization library http://libvirt.org
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 621 bytes --]
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] [RFC V6 07/11] quorum: Add quorum mechanism.
2013-01-17 22:23 ` Eric Blake
@ 2013-01-18 15:19 ` Benoît Canet
0 siblings, 0 replies; 15+ messages in thread
From: Benoît Canet @ 2013-01-18 15:19 UTC (permalink / raw)
To: Eric Blake; +Cc: kwolf, pbonzini, qemu-devel, stefanha
> Hmm, adler32() is basically a weak hashing mechanism; are you sure you
> won't have any false collisions? Furthermore, how does this compare
> with the series for adding deduplication, which uses much
> stronger/longer hashes, but where those take more time to compute? Is
> there any way you can share efforts between the two series?
I agree that using sha256 would be a better option as it will not be called
frequently.
However the call to gnutls is a one liner so it's likely not shareable.
The configure would need to be modified to require gnutls and It likely not
factorisable either.
I'll change the code to use sha256.
Regards
Benoît
>
> > + /* get the index of the first successfull read */
>
> s/successfull/successful/ (several times in this patch)
>
> --
> Eric Blake eblake redhat com +1-919-301-3266
> Libvirt virtualization library http://libvirt.org
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* [Qemu-devel] [RFC V6 08/11] quorum: Add quorum_getlength().
2013-01-17 15:51 [Qemu-devel] [RFC V6 00/11] Quorum block filter Benoît Canet
` (6 preceding siblings ...)
2013-01-17 15:51 ` [Qemu-devel] [RFC V6 07/11] quorum: Add quorum mechanism Benoît Canet
@ 2013-01-17 15:51 ` Benoît Canet
2013-01-17 15:51 ` [Qemu-devel] [RFC V6 09/11] quorum: Add quorum_invalidate_cache() Benoît Canet
` (2 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Benoît Canet @ 2013-01-17 15:51 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, pbonzini, Benoît Canet, stefanha
Check that every bs file return the same length.
If not return -EIO to disable the quorum and
avoid length discrepancy.
Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
block/quorum.c | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/block/quorum.c b/block/quorum.c
index 2bffff4..261e9b6 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -593,12 +593,32 @@ static BlockDriverAIOCB *quorum_aio_writev(BlockDriverState *bs,
return &acb->common;
}
+static int64_t quorum_getlength(BlockDriverState *bs)
+{
+ BDRVQuorumState *s = bs->opaque;
+ int64_t result;
+ int i;
+
+ /* check that every file have the same length */
+ result = bdrv_getlength(s->bs[0]);
+ for (i = 1; i < s->total; i++) {
+ int64_t value = bdrv_getlength(s->bs[i]);
+ if (value != result) {
+ return -EIO;
+ }
+ }
+
+ return result;
+}
+
static BlockDriver bdrv_quorum = {
.format_name = "quorum",
.protocol_name = "quorum",
.instance_size = sizeof(BDRVQuorumState),
+ .bdrv_getlength = quorum_getlength,
+
.bdrv_file_open = quorum_open,
.bdrv_close = quorum_close,
--
1.7.10.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [RFC V6 09/11] quorum: Add quorum_invalidate_cache().
2013-01-17 15:51 [Qemu-devel] [RFC V6 00/11] Quorum block filter Benoît Canet
` (7 preceding siblings ...)
2013-01-17 15:51 ` [Qemu-devel] [RFC V6 08/11] quorum: Add quorum_getlength() Benoît Canet
@ 2013-01-17 15:51 ` Benoît Canet
2013-01-17 15:51 ` [Qemu-devel] [RFC V6 10/11] quorum: Add quorum_co_is_allocated Benoît Canet
2013-01-17 15:51 ` [Qemu-devel] [RFC V6 11/11] quorum: Add quorum_co_flush() Benoît Canet
10 siblings, 0 replies; 15+ messages in thread
From: Benoît Canet @ 2013-01-17 15:51 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, pbonzini, Benoît Canet, stefanha
Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
block/quorum.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/block/quorum.c b/block/quorum.c
index 261e9b6..bd478ab 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -611,6 +611,16 @@ static int64_t quorum_getlength(BlockDriverState *bs)
return result;
}
+static void quorum_invalidate_cache(BlockDriverState *bs)
+{
+ BDRVQuorumState *s = bs->opaque;
+ int i;
+
+ for (i = 0; i < s->total; i++) {
+ bdrv_invalidate_cache(s->bs[i]);
+ }
+}
+
static BlockDriver bdrv_quorum = {
.format_name = "quorum",
.protocol_name = "quorum",
@@ -624,6 +634,7 @@ static BlockDriver bdrv_quorum = {
.bdrv_aio_readv = quorum_aio_readv,
.bdrv_aio_writev = quorum_aio_writev,
+ .bdrv_invalidate_cache = quorum_invalidate_cache,
};
static void bdrv_quorum_init(void)
--
1.7.10.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [RFC V6 10/11] quorum: Add quorum_co_is_allocated.
2013-01-17 15:51 [Qemu-devel] [RFC V6 00/11] Quorum block filter Benoît Canet
` (8 preceding siblings ...)
2013-01-17 15:51 ` [Qemu-devel] [RFC V6 09/11] quorum: Add quorum_invalidate_cache() Benoît Canet
@ 2013-01-17 15:51 ` Benoît Canet
2013-01-17 15:51 ` [Qemu-devel] [RFC V6 11/11] quorum: Add quorum_co_flush() Benoît Canet
10 siblings, 0 replies; 15+ messages in thread
From: Benoît Canet @ 2013-01-17 15:51 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, pbonzini, Benoît Canet, stefanha
Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
block/quorum.c | 32 ++++++++++++++++++++++++++++++++
1 file changed, 32 insertions(+)
diff --git a/block/quorum.c b/block/quorum.c
index bd478ab..4bea906 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -621,6 +621,37 @@ static void quorum_invalidate_cache(BlockDriverState *bs)
}
}
+static int coroutine_fn quorum_co_is_allocated(BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors,
+ int *pnum)
+{
+ BDRVQuorumState *s = bs->opaque;
+ QuorumVoteVersion *winner = NULL;
+ QuorumVotes result_votes, num_votes;
+ int i, result = 0, num;
+
+ QLIST_INIT(&result_votes.vote_list);
+ QLIST_INIT(&num_votes.vote_list);
+
+ for (i = 0; i < s->total; i++) {
+ result = bdrv_co_is_allocated(s->bs[i], sector_num, nb_sectors, &num);
+ quorum_count_vote(&result_votes, result, i);
+ quorum_count_vote(&num_votes, num, i);
+ }
+
+ winner = quorum_get_vote_winner(&result_votes);
+ result = winner->value;
+
+ winner = quorum_get_vote_winner(&num_votes);
+ *pnum = winner->value;
+
+ quorum_free_vote_list(&result_votes);
+ quorum_free_vote_list(&num_votes);
+
+ return result;
+}
+
static BlockDriver bdrv_quorum = {
.format_name = "quorum",
.protocol_name = "quorum",
@@ -635,6 +666,7 @@ static BlockDriver bdrv_quorum = {
.bdrv_aio_readv = quorum_aio_readv,
.bdrv_aio_writev = quorum_aio_writev,
.bdrv_invalidate_cache = quorum_invalidate_cache,
+ .bdrv_co_is_allocated = quorum_co_is_allocated,
};
static void bdrv_quorum_init(void)
--
1.7.10.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [RFC V6 11/11] quorum: Add quorum_co_flush().
2013-01-17 15:51 [Qemu-devel] [RFC V6 00/11] Quorum block filter Benoît Canet
` (9 preceding siblings ...)
2013-01-17 15:51 ` [Qemu-devel] [RFC V6 10/11] quorum: Add quorum_co_is_allocated Benoît Canet
@ 2013-01-17 15:51 ` Benoît Canet
10 siblings, 0 replies; 15+ messages in thread
From: Benoît Canet @ 2013-01-17 15:51 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, pbonzini, Benoît Canet, stefanha
Makes a vote to select error if any.
Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
block/quorum.c | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/block/quorum.c b/block/quorum.c
index 4bea906..dba2918 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -652,6 +652,35 @@ static int coroutine_fn quorum_co_is_allocated(BlockDriverState *bs,
return result;
}
+static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
+{
+ BDRVQuorumState *s = bs->opaque;
+ QuorumVoteVersion *winner = NULL;
+ QuorumVotes error_votes;
+ int i;
+ int result = 0;
+ bool error = false;
+
+ QLIST_INIT(&error_votes.vote_list);
+
+ for (i = 0; i < s->total; i++) {
+ result = bdrv_co_flush(s->bs[i]);
+ if (result) {
+ error = true;
+ quorum_count_vote(&error_votes, result, i);
+ }
+ }
+
+ if (error) {
+ winner = quorum_get_vote_winner(&error_votes);
+ result = winner->value;
+ }
+
+ quorum_free_vote_list(&error_votes);
+
+ return result;
+}
+
static BlockDriver bdrv_quorum = {
.format_name = "quorum",
.protocol_name = "quorum",
@@ -662,6 +691,7 @@ static BlockDriver bdrv_quorum = {
.bdrv_file_open = quorum_open,
.bdrv_close = quorum_close,
+ .bdrv_co_flush_to_disk = quorum_co_flush,
.bdrv_aio_readv = quorum_aio_readv,
.bdrv_aio_writev = quorum_aio_writev,
--
1.7.10.4
^ permalink raw reply related [flat|nested] 15+ messages in thread