* [Qemu-devel] [PATCH 0/3] Quorum maintainance operations
@ 2014-03-11 16:36 Benoît Canet
2014-03-11 16:36 ` [Qemu-devel] [PATCH 1/3] quorum: Add the rewrite-corrupted parameter to quorum Benoît Canet
` (2 more replies)
0 siblings, 3 replies; 9+ messages in thread
From: Benoît Canet @ 2014-03-11 16:36 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, Benoît Canet, stefanha, mreitz
Hello,
This series for 2.1 add the missing maintainance feature for Quorum.
The first patch allows quorum to correct corrupted reads by rewriting them.
The second add the drive-mirror-replace command to be used on a drive-mirrored
device to replace a target bs by the mirror.
The series apply on top of Fam's NBD image feecing patches.
Best regards
Benoît
Benoît Canet (3):
quorum: Add the rewrite-corrupted parameter to quorum.
block: Add drive-mirror-replace command to repair quorum files.
qemu-iotests: Add 088 new test for drive-mirror-replace.
block.c | 6 +-
block/mirror.c | 115 +++++++++++++++++++++-
block/quorum.c | 97 ++++++++++++++++--
blockdev.c | 27 ++++++
include/block/block.h | 3 +
include/block/block_int.h | 15 +++
qapi-schema.json | 38 +++++++-
qmp-commands.hx | 5 +
tests/qemu-iotests/041 | 34 +------
tests/qemu-iotests/081 | 14 +++
tests/qemu-iotests/081.out | 11 ++-
tests/qemu-iotests/088 | 221 ++++++++++++++++++++++++++++++++++++++++++
tests/qemu-iotests/088.out | 5 +
tests/qemu-iotests/group | 1 +
tests/qemu-iotests/iotests.py | 33 +++++++
trace-events | 1 +
16 files changed, 579 insertions(+), 47 deletions(-)
create mode 100755 tests/qemu-iotests/088
create mode 100644 tests/qemu-iotests/088.out
--
1.8.3.2
^ permalink raw reply [flat|nested] 9+ messages in thread
* [Qemu-devel] [PATCH 1/3] quorum: Add the rewrite-corrupted parameter to quorum.
2014-03-11 16:36 [Qemu-devel] [PATCH 0/3] Quorum maintainance operations Benoît Canet
@ 2014-03-11 16:36 ` Benoît Canet
2014-03-11 19:58 ` Max Reitz
2014-03-11 16:36 ` [Qemu-devel] [PATCH 2/3] block: Add drive-mirror-replace command to repair quorum files Benoît Canet
2014-03-11 16:36 ` [Qemu-devel] [PATCH 3/3] qemu-iotests: Add 088 new test for drive-mirror-replace Benoît Canet
2 siblings, 1 reply; 9+ messages in thread
From: Benoît Canet @ 2014-03-11 16:36 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, Benoît Canet, Benoit Canet, mreitz, stefanha
On read operations when this parameter is set and some replicas are corrupted
while quorum can be reached quorum will proceed to rewrite the correct version
of the data to fix the corrupted replicas.
This will shine with SSD where the FTL will remap the same block at another
place on rewrite.
Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
block/quorum.c | 97 +++++++++++++++++++++++++++++++++++++++++++---
qapi-schema.json | 5 ++-
tests/qemu-iotests/081 | 14 +++++++
tests/qemu-iotests/081.out | 11 +++++-
4 files changed, 119 insertions(+), 8 deletions(-)
diff --git a/block/quorum.c b/block/quorum.c
index bd997b7..af4fd3c 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -22,6 +22,7 @@
#define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold"
#define QUORUM_OPT_BLKVERIFY "blkverify"
+#define QUORUM_OPT_REWRITE "rewrite-corrupted"
/* This union holds a vote hash value */
typedef union QuorumVoteValue {
@@ -69,6 +70,9 @@ typedef struct BDRVQuorumState {
* It is useful to debug other block drivers by
* comparing them with a reference one.
*/
+ bool rewrite_corrupted;/* true if the driver must rewrite-on-read corrupted
+ * block if Quorum is reached.
+ */
} BDRVQuorumState;
typedef struct QuorumAIOCB QuorumAIOCB;
@@ -104,13 +108,17 @@ struct QuorumAIOCB {
int count; /* number of completed AIOCB */
int success_count; /* number of successfully completed AIOCB */
+ int rewrite_count; /* number of replica to rewrite: count down to
+ * zero once writes are fired
+ */
+
QuorumVotes votes;
bool is_read;
int vote_ret;
};
-static void quorum_vote(QuorumAIOCB *acb);
+static bool quorum_vote(QuorumAIOCB *acb);
static void quorum_aio_cancel(BlockDriverAIOCB *blockacb)
{
@@ -182,6 +190,7 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
acb->count = 0;
acb->success_count = 0;
+ acb->rewrite_count = 0;
acb->votes.compare = quorum_sha256_compare;
QLIST_INIT(&acb->votes.vote_list);
acb->is_read = false;
@@ -241,11 +250,27 @@ static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
return false;
}
+static void quorum_rewrite_aio_cb(void *opaque, int ret)
+{
+ QuorumAIOCB *acb = opaque;
+
+ /* one less rewrite to do */
+ acb->rewrite_count--;
+
+ /* wait until all rewrite callback have completed */
+ if (acb->rewrite_count) {
+ return;
+ }
+
+ quorum_aio_finalize(acb);
+}
+
static void quorum_aio_cb(void *opaque, int ret)
{
QuorumChildRequest *sacb = opaque;
QuorumAIOCB *acb = sacb->parent;
BDRVQuorumState *s = acb->common.bs->opaque;
+ bool rewrite = false;
sacb->ret = ret;
acb->count++;
@@ -262,12 +287,15 @@ static void quorum_aio_cb(void *opaque, int ret)
/* Do the vote on read */
if (acb->is_read) {
- quorum_vote(acb);
+ rewrite = quorum_vote(acb);
} else {
quorum_has_too_much_io_failed(acb);
}
- quorum_aio_finalize(acb);
+ /* if no rewrite is done the code will finish right away */
+ if (!rewrite) {
+ quorum_aio_finalize(acb);
+ }
}
static void quorum_report_bad_versions(BDRVQuorumState *s,
@@ -287,6 +315,43 @@ static void quorum_report_bad_versions(BDRVQuorumState *s,
}
}
+static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
+ QuorumVoteValue *value)
+{
+ QuorumVoteVersion *version;
+ QuorumVoteItem *item;
+ int count = 0;
+
+ /* first count the number of bad versions: done first to avoid concurency
+ * issues.
+ */
+ QLIST_FOREACH(version, &acb->votes.vote_list, next) {
+ if (acb->votes.compare(&version->value, value)) {
+ continue;
+ }
+ QLIST_FOREACH(item, &version->items, next) {
+ count++;
+ }
+ }
+
+ /* quorum_rewrite_aio_cb will count down this to zero */
+ acb->rewrite_count = count;
+
+ /* now fire the correcting rewrites */
+ QLIST_FOREACH(version, &acb->votes.vote_list, next) {
+ if (acb->votes.compare(&version->value, value)) {
+ continue;
+ }
+ QLIST_FOREACH(item, &version->items, next) {
+ bdrv_aio_writev(s->bs[item->index], acb->sector_num, acb->qiov,
+ acb->nb_sectors, quorum_rewrite_aio_cb, acb);
+ }
+ }
+
+ /* return true if any rewrite is done else false */
+ return count;
+}
+
static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
{
int i;
@@ -477,16 +542,17 @@ static int quorum_vote_error(QuorumAIOCB *acb)
return ret;
}
-static void quorum_vote(QuorumAIOCB *acb)
+static bool quorum_vote(QuorumAIOCB *acb)
{
bool quorum = true;
+ bool rewrite = false;
int i, j, ret;
QuorumVoteValue hash;
BDRVQuorumState *s = acb->common.bs->opaque;
QuorumVoteVersion *winner;
if (quorum_has_too_much_io_failed(acb)) {
- return;
+ return false;;
}
/* get the index of the first successful read */
@@ -514,7 +580,7 @@ static void quorum_vote(QuorumAIOCB *acb)
/* Every successful read agrees */
if (quorum) {
quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov);
- return;
+ return false;;
}
/* compute hashes for each successful read, also store indexes */
@@ -547,9 +613,15 @@ static void quorum_vote(QuorumAIOCB *acb)
/* some versions are bad print them */
quorum_report_bad_versions(s, acb, &winner->value);
+ /* corruption correction is actived */
+ if (s->rewrite_corrupted) {
+ rewrite = quorum_rewrite_bad_versions(s, acb, &winner->value);
+ }
+
free_exit:
/* free lists */
quorum_free_vote_list(&acb->votes);
+ return rewrite;
}
static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
@@ -709,6 +781,11 @@ static QemuOptsList quorum_runtime_opts = {
.type = QEMU_OPT_BOOL,
.help = "Trigger block verify mode if set",
},
+ {
+ .name = QUORUM_OPT_REWRITE,
+ .type = QEMU_OPT_BOOL,
+ .help = "Rewrite corrupted block on read quorum",
+ },
{ /* end of list */ }
},
};
@@ -770,6 +847,14 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
"and using two files with vote_threshold=2\n");
}
+ s->rewrite_corrupted = qemu_opt_get_bool(opts, QUORUM_OPT_REWRITE, false);
+ if (s->rewrite_corrupted && s->is_blkverify) {
+ error_setg(&local_err,
+ "rewrite-corrupted=on cannot be used with blkverify=on");
+ ret = -EINVAL;
+ goto exit;
+ }
+
/* allocate the children BlockDriverState array */
s->bs = g_new0(BlockDriverState *, s->num_children);
opened = g_new0(bool, s->num_children);
diff --git a/qapi-schema.json b/qapi-schema.json
index 6476d4a..f5a8767 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -4542,12 +4542,15 @@
#
# @vote-threshold: the vote limit under which a read will fail
#
+# @rewrite-corrupted: #optional rewrite corrupted data when quorum is reached
+# (Since 2.1)
+#
# Since: 2.0
##
{ 'type': 'BlockdevOptionsQuorum',
'data': { '*blkverify': 'bool',
'children': [ 'BlockdevRef' ],
- 'vote-threshold': 'int' } }
+ 'vote-threshold': 'int', '*rewrite-corrupted': 'bool' } }
##
# @BlockdevOptions
diff --git a/tests/qemu-iotests/081 b/tests/qemu-iotests/081
index b512d00..bb211d6 100755
--- a/tests/qemu-iotests/081
+++ b/tests/qemu-iotests/081
@@ -95,6 +95,18 @@ echo "== checking quorum correction =="
$QEMU_IO -c "open -o $quorum" -c "read -P 0x32 0 $size" | _filter_qemu_io
echo
+echo "== using quorum rewrite corrupted mode =="
+
+quorum="$quorum,file.rewrite-corrupted=on"
+
+$QEMU_IO -c "open -o $quorum" -c "read -P 0x32 0 $size" | _filter_qemu_io
+
+echo
+echo "== checking that quorum has corrected the corrupted file =="
+
+$QEMU_IO -c "read -P 0x32 0 $size" "$TEST_DIR/2.raw" | _filter_qemu_io
+
+echo
echo "== checking mixed reference/option specification =="
run_qemu -drive "file=$TEST_DIR/2.raw,format=$IMGFMT,if=none,id=drive2" <<EOF
@@ -137,6 +149,8 @@ echo
echo "== breaking quorum =="
$QEMU_IO -c "write -P 0x41 0 $size" "$TEST_DIR/1.raw" | _filter_qemu_io
+$QEMU_IO -c "write -P 0x42 0 $size" "$TEST_DIR/2.raw" | _filter_qemu_io
+
echo
echo "== checking that quorum is broken =="
diff --git a/tests/qemu-iotests/081.out b/tests/qemu-iotests/081.out
index 84aeb0c..2d8b290 100644
--- a/tests/qemu-iotests/081.out
+++ b/tests/qemu-iotests/081.out
@@ -25,12 +25,19 @@ wrote 10485760/10485760 bytes at offset 0
read 10485760/10485760 bytes at offset 0
10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== using quorum rewrite corrupted mode ==
+read 10485760/10485760 bytes at offset 0
+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== checking that quorum has corrected the corrupted file ==
+read 10485760/10485760 bytes at offset 0
+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
== checking mixed reference/option specification ==
Testing: -drive file=TEST_DIR/2.IMGFMT,format=IMGFMT,if=none,id=drive2
QMP_VERSION
{"return": {}}
{"return": {}}
-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "QUORUM_REPORT_BAD", "data": {"node-name": "", "sectors-count": 20480, "sector-num": 0}}
read 10485760/10485760 bytes at offset 0
10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
{"return": ""}
@@ -43,6 +50,8 @@ read 10485760/10485760 bytes at offset 0
== breaking quorum ==
wrote 10485760/10485760 bytes at offset 0
10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 10485760/10485760 bytes at offset 0
+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
== checking that quorum is broken ==
qemu-io: can't open device (null): Could not read image for determining its format: Input/output error
--
1.8.3.2
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [Qemu-devel] [PATCH 2/3] block: Add drive-mirror-replace command to repair quorum files.
2014-03-11 16:36 [Qemu-devel] [PATCH 0/3] Quorum maintainance operations Benoît Canet
2014-03-11 16:36 ` [Qemu-devel] [PATCH 1/3] quorum: Add the rewrite-corrupted parameter to quorum Benoît Canet
@ 2014-03-11 16:36 ` Benoît Canet
2014-03-11 21:00 ` Max Reitz
2014-03-11 16:36 ` [Qemu-devel] [PATCH 3/3] qemu-iotests: Add 088 new test for drive-mirror-replace Benoît Canet
2 siblings, 1 reply; 9+ messages in thread
From: Benoît Canet @ 2014-03-11 16:36 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, Benoît Canet, Benoit Canet, mreitz, stefanha
When a quorum file is totally destroyed (broken filer) the user can start a
drive-mirror job on the quorum block backend and then replace the broken
quorum file with drive-mirror-replace given it has a node-name.
Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
block.c | 6 +--
block/mirror.c | 115 ++++++++++++++++++++++++++++++++++++++++++++--
blockdev.c | 27 +++++++++++
include/block/block.h | 3 ++
include/block/block_int.h | 15 ++++++
qapi-schema.json | 33 +++++++++++++
qmp-commands.hx | 5 ++
trace-events | 1 +
8 files changed, 199 insertions(+), 6 deletions(-)
diff --git a/block.c b/block.c
index 9f2fe85..2354e5b 100644
--- a/block.c
+++ b/block.c
@@ -787,9 +787,9 @@ static int bdrv_open_flags(BlockDriverState *bs, int flags)
return open_flags;
}
-static int bdrv_assign_node_name(BlockDriverState *bs,
- const char *node_name,
- Error **errp)
+int bdrv_assign_node_name(BlockDriverState *bs,
+ const char *node_name,
+ Error **errp)
{
if (!node_name) {
return 0;
diff --git a/block/mirror.c b/block/mirror.c
index 6dc84e8..8133ca5 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -49,6 +49,15 @@ typedef struct MirrorBlockJob {
unsigned long *in_flight_bitmap;
int in_flight;
int ret;
+ /* these four fields are used by drive-mirror-replace */
+ /* Must the code replace a target with the new mirror */
+ bool must_replace;
+ /* The block BDS to replace */
+ BlockDriverState *to_replace;
+ /* the node-name of the new mirror BDS */
+ char *new_node_name;
+ /* Used to block operations on the drive-mirror-replace target. */
+ Error *replace_blocker;
} MirrorBlockJob;
typedef struct MirrorOp {
@@ -299,6 +308,7 @@ static void coroutine_fn mirror_run(void *opaque)
MirrorBlockJob *s = opaque;
BlockDriverState *bs = s->common.bs;
int64_t sector_num, end, sectors_per_chunk, length;
+ BlockDriverState *to_replace;
uint64_t last_pause_ns;
BlockDriverInfo bdi;
char backing_filename[1024];
@@ -477,11 +487,17 @@ immediate_exit:
g_free(s->in_flight_bitmap);
bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
bdrv_iostatus_disable(s->target);
+ /* Here we handle the drive-mirror-replace QMP command */
+ if (s->must_replace) {
+ to_replace = s->to_replace;
+ } else {
+ to_replace = s->common.bs;
+ }
if (s->should_complete && ret == 0) {
- if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
- bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
+ if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
+ bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
}
- bdrv_swap(s->target, s->common.bs);
+ bdrv_swap(s->target, to_replace);
if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
/* drop the bs loop chain formed by the swap: break the loop then
* trigger the unref from the top one */
@@ -491,6 +507,11 @@ immediate_exit:
bdrv_unref(p);
}
}
+ if (s->must_replace) {
+ bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
+ error_free(s->replace_blocker);
+ bdrv_unref(s->to_replace);
+ }
bdrv_unref(s->target);
block_job_completed(&s->common, ret);
}
@@ -513,6 +534,87 @@ static void mirror_iostatus_reset(BlockJob *job)
bdrv_iostatus_reset(s->target);
}
+bool mirror_set_replace_target(BlockJob *job, const char *reference,
+ bool has_new_node_name,
+ const char *new_node_name, Error **errp)
+{
+ MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+ BlockDriverState *to_replace;
+
+ /* We don't want to give too much power to the user as could result on
+ * BlockDriverState loops if used with something other than sync=full.
+ */
+ if (s->is_none_mode || s->base) {
+ error_setg(errp, "Can only be used on a mirror done with sync=full");
+ return false;
+ }
+
+ /* check target reference is not an empty string */
+ if (!reference[0]) {
+ error_setg(errp, "target-reference is an empty string");
+ return false;
+ }
+
+ /* Get the to replace block driver state */
+ to_replace = bdrv_lookup_bs(reference, reference, errp);
+ if (!to_replace) {
+ return false;
+ }
+
+ /* If the BDS to replace is a regular node we need a new node name */
+ if (to_replace->node_name[0] && !has_new_node_name) {
+ error_setg(errp, "A new-node-name must be provided");
+ return false;
+ }
+
+ /* Can only replace something else than the source of the mirror */
+ if (to_replace == job->bs) {
+ error_setg(errp, "Cannot replace the mirror source");
+ return false;
+ }
+
+ /* are this bs replace operation blocked */
+ if (bdrv_op_is_blocked(to_replace, BLOCK_OP_TYPE_REPLACE, errp)) {
+ return false;
+ }
+
+ /* save usefull infos for later */
+ s->to_replace = to_replace;
+ assert(has_new_node_name);
+ s->new_node_name = g_strdup(new_node_name);
+
+ return true;
+}
+
+static void mirror_activate_replace_target(BlockJob *job, Error **errp)
+{
+ MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+
+ /* Set the new node name if the BDS to replace is a regular node
+ * of the graph.
+ */
+ if (s->to_replace->node_name[0]) {
+ assert(s->new_node_name);
+ bdrv_assign_node_name(s->target, s->new_node_name, errp);
+ g_free(s->new_node_name);
+ }
+
+ if (error_is_set(errp)) {
+ s->to_replace = NULL;
+ return;
+ }
+
+ /* block all operations on the target bs */
+ error_setg(&s->replace_blocker,
+ "block device is in use by drive-mirror-replace");
+ bdrv_op_block_all(s->to_replace, s->replace_blocker);
+
+ bdrv_ref(s->to_replace);
+
+ /* activate the replacement operation */
+ s->must_replace = true;
+}
+
static void mirror_complete(BlockJob *job, Error **errp)
{
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
@@ -529,6 +631,13 @@ static void mirror_complete(BlockJob *job, Error **errp)
return;
}
+ /* drive-mirror-replace is being called on this job so activate the
+ * replacement target
+ */
+ if (s->to_replace) {
+ mirror_activate_replace_target(job, errp);
+ }
+
s->should_complete = true;
block_job_resume(job);
}
diff --git a/blockdev.c b/blockdev.c
index 8a6ae0a..901a05d 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2365,6 +2365,33 @@ void qmp_block_job_complete(const char *device, Error **errp)
block_job_complete(job, errp);
}
+void qmp_drive_mirror_replace(const char *device, const char *target_reference,
+ bool has_new_node_name,
+ const char *new_node_name,
+ Error **errp)
+{
+ BlockJob *job = find_block_job(device);
+
+ if (!job) {
+ error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
+ return;
+ }
+
+ if (!job->driver || job->driver->job_type != BLOCK_JOB_TYPE_MIRROR) {
+ error_setg(errp, "Can only be used on a drive-mirror block job");
+ return;
+ }
+
+ if (!mirror_set_replace_target(job, target_reference, has_new_node_name,
+ new_node_name, errp)) {
+ return;
+ }
+
+ trace_qmp_drive_mirror_replace(job, target_reference,
+ has_new_node_name ? new_node_name : "");
+ block_job_complete(job, errp);
+}
+
void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
{
QmpOutputVisitor *ov = qmp_output_visitor_new();
diff --git a/include/block/block.h b/include/block/block.h
index ee1582d..a9d6ead 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -171,6 +171,7 @@ typedef enum BlockOpType {
BLOCK_OP_TYPE_MIRROR,
BLOCK_OP_TYPE_RESIZE,
BLOCK_OP_TYPE_STREAM,
+ BLOCK_OP_TYPE_REPLACE,
BLOCK_OP_TYPE_MAX,
} BlockOpType;
@@ -209,6 +210,8 @@ int bdrv_open_image(BlockDriverState **pbs, const char *filename,
QDict *options, const char *bdref_key, int flags,
bool allow_none, Error **errp);
void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd);
+int bdrv_assign_node_name(BlockDriverState *bs, const char *node_name,
+ Error **errp);
int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp);
int bdrv_open(BlockDriverState **pbs, const char *filename,
const char *reference, QDict *options, int flags,
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 1f4f78b..ea281c8 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -486,6 +486,21 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
void *opaque, Error **errp);
/*
+ * mirror_set_replace_target:
+ * @job: An active mirroring block job started with sync=full.
+ * @reference: id or node-name of the BDS to replace when the mirror is done.
+ * @has_new_node_name: Set to true if new_node_name if provided
+ * @new_node_name: The optional new node name of the new mirror.
+ * @errp: Error object.
+ *
+ * Prepare a mirroring operation to replace a BDS pointed to by reference with
+ * the new mirror.
+ */
+bool mirror_set_replace_target(BlockJob *job, const char *reference,
+ bool has_new_node_name,
+ const char *new_node_name, Error **errp);
+
+/*
* backup_start:
* @bs: Block device to operate on.
* @target: Block device to write to.
diff --git a/qapi-schema.json b/qapi-schema.json
index f5a8767..33c5ab1 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2716,6 +2716,39 @@
{ 'command': 'block-job-complete', 'data': { 'device': 'str' } }
##
+# @drive-mirror-replace:
+#
+# Manually trigger completion of an active background drive-mirror operation
+# and replace the target reference with the new mirror.
+# This switch the device to write to the target path only.
+# The ability to complete is signaled with a BLOCK_JOB_READY event.
+#
+# This command completes an active drive-mirror background operation
+# synchronously and replace the target reference with the mirror.
+# The ordering of this command's return with the BLOCK_JOB_COMPLETED event
+# is not defined. Note that if an I/O error occurs during the processing of
+# this command: 1) the command itself will fail; 2) the error will be processed
+# according to the rerror/werror arguments that were specified when starting
+# the operation.
+#
+# A cancelled or paused drive-mirror job cannot be completed.
+#
+# @device: the device name
+# @target-reference: the id or node name of the block driver state to replace
+# @new-node-name: #optional set the node-name of the new block driver state
+# needed the target reference point to a regular node of the
+# graph
+#
+# Returns: Nothing on success
+# If no background operation is active on this device, DeviceNotActive
+#
+# Since: 2.1
+##
+{ 'command': 'drive-mirror-replace',
+ 'data': { 'device': 'str', 'target-reference': 'str',
+ '*new-node-name': 'str' } }
+
+##
# @ObjectTypeInfo:
#
# This structure describes a search result from @qom-list-types
diff --git a/qmp-commands.hx b/qmp-commands.hx
index dd336f7..3b5b382 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1150,6 +1150,11 @@ EQMP
.mhandler.cmd_new = qmp_marshal_input_block_job_complete,
},
{
+ .name = "drive-mirror-replace",
+ .args_type = "device:B,target-reference:s,new-node-name:s?",
+ .mhandler.cmd_new = qmp_marshal_input_drive_mirror_replace,
+ },
+ {
.name = "transaction",
.args_type = "actions:q",
.mhandler.cmd_new = qmp_marshal_input_transaction,
diff --git a/trace-events b/trace-events
index aec4202..5282904 100644
--- a/trace-events
+++ b/trace-events
@@ -103,6 +103,7 @@ qmp_block_job_cancel(void *job) "job %p"
qmp_block_job_pause(void *job) "job %p"
qmp_block_job_resume(void *job) "job %p"
qmp_block_job_complete(void *job) "job %p"
+qmp_drive_mirror_replace(void *job, const char *target_reference, const char *new_node_name) "job %p target_reference %s new_node_name %s"
block_job_cb(void *bs, void *job, int ret) "bs %p job %p ret %d"
qmp_block_stream(void *bs, void *job) "bs %p job %p"
--
1.8.3.2
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [Qemu-devel] [PATCH 3/3] qemu-iotests: Add 088 new test for drive-mirror-replace.
2014-03-11 16:36 [Qemu-devel] [PATCH 0/3] Quorum maintainance operations Benoît Canet
2014-03-11 16:36 ` [Qemu-devel] [PATCH 1/3] quorum: Add the rewrite-corrupted parameter to quorum Benoît Canet
2014-03-11 16:36 ` [Qemu-devel] [PATCH 2/3] block: Add drive-mirror-replace command to repair quorum files Benoît Canet
@ 2014-03-11 16:36 ` Benoît Canet
2 siblings, 0 replies; 9+ messages in thread
From: Benoît Canet @ 2014-03-11 16:36 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf, Benoît Canet, Benoit Canet, mreitz, stefanha
Tests for drive-mirror-replace whose purpose is to enable quorum file mirroring
and replacement after failure.
Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
tests/qemu-iotests/041 | 34 +------
tests/qemu-iotests/088 | 221 ++++++++++++++++++++++++++++++++++++++++++
tests/qemu-iotests/088.out | 5 +
tests/qemu-iotests/group | 1 +
tests/qemu-iotests/iotests.py | 33 +++++++
5 files changed, 261 insertions(+), 33 deletions(-)
create mode 100755 tests/qemu-iotests/088
create mode 100644 tests/qemu-iotests/088.out
diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index ec470b2..10535a6 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -21,45 +21,13 @@
import time
import os
import iotests
-from iotests import qemu_img, qemu_io
+from iotests import qemu_img, qemu_io, ImageMirroringTestCase
backing_img = os.path.join(iotests.test_dir, 'backing.img')
target_backing_img = os.path.join(iotests.test_dir, 'target-backing.img')
test_img = os.path.join(iotests.test_dir, 'test.img')
target_img = os.path.join(iotests.test_dir, 'target.img')
-class ImageMirroringTestCase(iotests.QMPTestCase):
- '''Abstract base class for image mirroring test cases'''
-
- def wait_ready(self, drive='drive0'):
- '''Wait until a block job BLOCK_JOB_READY event'''
- ready = False
- while not ready:
- for event in self.vm.get_qmp_events(wait=True):
- if event['event'] == 'BLOCK_JOB_READY':
- self.assert_qmp(event, 'data/type', 'mirror')
- self.assert_qmp(event, 'data/device', drive)
- ready = True
-
- def wait_ready_and_cancel(self, drive='drive0'):
- self.wait_ready(drive)
- event = self.cancel_and_wait()
- self.assertEquals(event['event'], 'BLOCK_JOB_COMPLETED')
- self.assert_qmp(event, 'data/type', 'mirror')
- self.assert_qmp(event, 'data/offset', self.image_len)
- self.assert_qmp(event, 'data/len', self.image_len)
-
- def complete_and_wait(self, drive='drive0', wait_ready=True):
- '''Complete a block job and wait for it to finish'''
- if wait_ready:
- self.wait_ready()
-
- result = self.vm.qmp('block-job-complete', device=drive)
- self.assert_qmp(result, 'return', {})
-
- event = self.wait_until_completed()
- self.assert_qmp(event, 'data/type', 'mirror')
-
class TestSingleDrive(ImageMirroringTestCase):
image_len = 1 * 1024 * 1024 # MB
diff --git a/tests/qemu-iotests/088 b/tests/qemu-iotests/088
new file mode 100755
index 0000000..326d307
--- /dev/null
+++ b/tests/qemu-iotests/088
@@ -0,0 +1,221 @@
+#!/usr/bin/env python
+#
+# Tests for Quorum image replacement
+#
+# Copyright (C) 2014 Nodalink, EURL.
+#
+# based on 041
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+import subprocess
+import time
+import os
+import iotests
+from iotests import qemu_img, qemu_img_args
+from iotests import ImageMirroringTestCase
+
+quorum_img1 = os.path.join(iotests.test_dir, 'quorum1.img')
+quorum_img2 = os.path.join(iotests.test_dir, 'quorum2.img')
+quorum_img3 = os.path.join(iotests.test_dir, 'quorum3.img')
+quorum_backup_img = os.path.join(iotests.test_dir, 'quorum_backup.img')
+quorum_repair_img = os.path.join(iotests.test_dir, 'quorum_repair.img')
+quorum_snapshot_file = os.path.join(iotests.test_dir, 'quorum_snapshot.img')
+
+class TestRepairQuorum(ImageMirroringTestCase):
+ """ This class test quorum file repair using drive-mirror.
+ It's mostly a fork of TestSingleDrive. """
+ image_len = 1 * 1024 * 1024 # MB
+ IMAGES = [ quorum_img1, quorum_img2, quorum_img3 ]
+
+ def setUp(self):
+ self.vm = iotests.VM()
+
+ # Add each individual quorum images
+ for i in self.IMAGES:
+ qemu_img('create', '-f', iotests.imgfmt, i,
+ str(self.image_len))
+ # Assign a node name to each quorum image in order to manipulate
+ # them
+ opts = "node-name=img%i" % self.IMAGES.index(i)
+ self.vm = self.vm.add_drive(i, opts)
+
+ self.vm.launch()
+
+ #assemble the quorum block device from the individual files
+ args = { "options" : { "driver": "quorum", "id": "quorum0",
+ "vote-threshold": 2, "children": [ "img0", "img1", "img2" ] } }
+ result = self.vm.qmp("blockdev-add", **args)
+ self.assert_qmp(result, 'return', {})
+
+
+ def tearDown(self):
+ self.vm.shutdown()
+ for i in self.IMAGES + [ quorum_repair_img ]:
+ # Do a try/except because the test may have deleted some images
+ try:
+ os.remove(i)
+ except OSError:
+ pass
+
+ def test_complete(self):
+ self.assert_no_active_block_jobs()
+
+ result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
+ target=quorum_repair_img, format=iotests.imgfmt)
+ self.assert_qmp(result, 'return', {})
+
+ self.wait_ready(drive='quorum0')
+
+ result = self.vm.qmp('drive-mirror-replace', device='quorum0',
+ target_reference='img1', new_node_name='img11')
+ self.assert_qmp(result, 'return', {})
+
+ event = self.wait_until_completed(drive='quorum0')
+ self.assert_qmp(event, 'data/type', 'mirror')
+
+ result = self.vm.qmp('query-named-block-nodes')
+ self.assert_qmp(result, 'return[0]/file', quorum_repair_img)
+ # TODO: a better test requiring some QEMU infrastructure will be added
+ # to check that this file is really driven by quorum
+ self.vm.shutdown()
+ self.assertTrue(iotests.compare_images(quorum_img2, quorum_repair_img),
+ 'target image does not match source after mirroring')
+
+ def test_device_not_active(self):
+ result = self.vm.qmp('drive-mirror-replace', device='quorum12',
+ target_reference='img1', new_node_name='img11')
+ self.assert_qmp(result, 'error/class', 'DeviceNotActive')
+ self.vm.shutdown()
+
+ def test_sync_no_full(self):
+ self.assert_no_active_block_jobs()
+
+ result = self.vm.qmp('drive-mirror', device='quorum0', sync='none',
+ target=quorum_repair_img, format=iotests.imgfmt)
+ self.assert_qmp(result, 'return', {})
+
+ self.wait_ready(drive='quorum0')
+
+ result = self.vm.qmp('drive-mirror-replace', device='quorum0',
+ target_reference='img1', new_node_name='img11')
+ self.assert_qmp(result, 'error/class', 'GenericError')
+
+ event = self.cancel_and_wait(drive='quorum0', resume=True)
+ self.assert_qmp(event, 'data/type', 'mirror')
+
+ self.vm.shutdown()
+
+ def test_sync_empty_reference(self):
+ self.assert_no_active_block_jobs()
+
+ result = self.vm.qmp('drive-mirror', device='quorum0', sync='none',
+ target=quorum_repair_img, format=iotests.imgfmt)
+ self.assert_qmp(result, 'return', {})
+
+ self.wait_ready(drive='quorum0')
+
+ result = self.vm.qmp('drive-mirror-replace', device='quorum0',
+ target_reference='', new_node_name='img11')
+ self.assert_qmp(result, 'error/class', 'GenericError')
+
+ event = self.cancel_and_wait(drive='quorum0', resume=True)
+ self.assert_qmp(event, 'data/type', 'mirror')
+
+ self.vm.shutdown()
+
+ def test_sync_no_reference(self):
+ self.assert_no_active_block_jobs()
+
+ result = self.vm.qmp('drive-mirror', device='quorum0', sync='none',
+ target=quorum_repair_img, format=iotests.imgfmt)
+ self.assert_qmp(result, 'return', {})
+
+ self.wait_ready(drive='quorum0')
+
+ result = self.vm.qmp('drive-mirror-replace', device='quorum0',
+ new_node_name='img11')
+ self.assert_qmp(result, 'error/class', 'GenericError')
+
+ event = self.cancel_and_wait(drive='quorum0', resume=True)
+ self.assert_qmp(event, 'data/type', 'mirror')
+
+ self.vm.shutdown()
+
+ def test_sync_no_node_name(self):
+ self.assert_no_active_block_jobs()
+
+ result = self.vm.qmp('drive-mirror', device='quorum0', sync='none',
+ target=quorum_repair_img, format=iotests.imgfmt)
+ self.assert_qmp(result, 'return', {})
+
+ self.wait_ready(drive='quorum0')
+
+ result = self.vm.qmp('drive-mirror-replace', device='quorum0',
+ target_reference='img1')
+ self.assert_qmp(result, 'error/class', 'GenericError')
+
+ event = self.cancel_and_wait(drive='quorum0', resume=True)
+ self.assert_qmp(event, 'data/type', 'mirror')
+
+ self.vm.shutdown()
+
+ def test_sync_empty_node_name(self):
+ self.assert_no_active_block_jobs()
+
+ result = self.vm.qmp('drive-mirror', device='quorum0', sync='none',
+ target=quorum_repair_img, format=iotests.imgfmt)
+ self.assert_qmp(result, 'return', {})
+
+ self.wait_ready(drive='quorum0')
+
+ result = self.vm.qmp('drive-mirror-replace', device='quorum0',
+ target_reference='img1', new_node_name='')
+ self.assert_qmp(result, 'error/class', 'GenericError')
+
+ event = self.cancel_and_wait(drive='quorum0', resume=True)
+ self.assert_qmp(event, 'data/type', 'mirror')
+
+ self.vm.shutdown()
+
+ def test_source_and_dest_equal(self):
+ self.assert_no_active_block_jobs()
+
+ result = self.vm.qmp('drive-mirror', device='quorum0', sync='none',
+ target=quorum_repair_img, format=iotests.imgfmt)
+ self.assert_qmp(result, 'return', {})
+
+ self.wait_ready(drive='quorum0')
+
+ result = self.vm.qmp('drive-mirror-replace', device='quorum0',
+ target_reference='quorum0', new_node_name='img11')
+ self.assert_qmp(result, 'error/class', 'GenericError')
+
+ event = self.cancel_and_wait(drive='quorum0', resume=True)
+ self.assert_qmp(event, 'data/type', 'mirror')
+
+ self.vm.shutdown()
+
+if __name__ == '__main__':
+ p1 = subprocess.Popen(qemu_img_args, stdout=subprocess.PIPE)
+ p2 = subprocess.Popen(["grep", "quorum"], stdin=p1.stdout, stdout=subprocess.PIPE)
+ p1.stdout.close() # Allow p1 to receive a SIGsubprocess.PIPE if p2 exits.
+ has_quorum = len(p2.communicate()[0]) != 0
+ if has_quorum:
+ iotests.main(supported_fmts=['qcow2', 'qed'])
+ else:
+ f = open("088.out", "r")
+ print(f.read().strip())
+ f.close()
diff --git a/tests/qemu-iotests/088.out b/tests/qemu-iotests/088.out
new file mode 100644
index 0000000..594c16f
--- /dev/null
+++ b/tests/qemu-iotests/088.out
@@ -0,0 +1,5 @@
+........
+----------------------------------------------------------------------
+Ran 8 tests
+
+OK
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index b96c6bc..e590d09 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -89,3 +89,4 @@
085 rw auto quick
086 rw auto quick
087 rw auto quick
+088 rw auto
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index e4fa9af..a803943 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -272,6 +272,39 @@ class QMPTestCase(unittest.TestCase):
self.assert_no_active_block_jobs()
return event
+# made common here for 041 and 088
+class ImageMirroringTestCase(QMPTestCase):
+ '''Abstract base class for image mirroring test cases'''
+
+ def wait_ready(self, drive='drive0'):
+ '''Wait until a block job BLOCK_JOB_READY event'''
+ ready = False
+ while not ready:
+ for event in self.vm.get_qmp_events(wait=True):
+ if event['event'] == 'BLOCK_JOB_READY':
+ self.assert_qmp(event, 'data/type', 'mirror')
+ self.assert_qmp(event, 'data/device', drive)
+ ready = True
+
+ def wait_ready_and_cancel(self, drive='drive0'):
+ self.wait_ready(drive=drive)
+ event = self.cancel_and_wait(drive=drive)
+ self.assertEquals(event['event'], 'BLOCK_JOB_COMPLETED')
+ self.assert_qmp(event, 'data/type', 'mirror')
+ self.assert_qmp(event, 'data/offset', self.image_len)
+ self.assert_qmp(event, 'data/len', self.image_len)
+
+ def complete_and_wait(self, drive='drive0', wait_ready=True):
+ '''Complete a block job and wait for it to finish'''
+ if wait_ready:
+ self.wait_ready(drive=drive)
+
+ result = self.vm.qmp('block-job-complete', device=drive)
+ self.assert_qmp(result, 'return', {})
+
+ event = self.wait_until_completed(drive=drive)
+ self.assert_qmp(event, 'data/type', 'mirror')
+
def notrun(reason):
'''Skip this test suite'''
# Each test in qemu-iotests has a number ("seq")
--
1.8.3.2
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [Qemu-devel] [PATCH 1/3] quorum: Add the rewrite-corrupted parameter to quorum.
2014-03-11 16:36 ` [Qemu-devel] [PATCH 1/3] quorum: Add the rewrite-corrupted parameter to quorum Benoît Canet
@ 2014-03-11 19:58 ` Max Reitz
2014-03-11 20:14 ` Benoît Canet
0 siblings, 1 reply; 9+ messages in thread
From: Max Reitz @ 2014-03-11 19:58 UTC (permalink / raw)
To: Benoît Canet, qemu-devel; +Cc: kwolf, Benoit Canet, stefanha
On 11.03.2014 17:36, Benoît Canet wrote:
> On read operations when this parameter is set and some replicas are corrupted
> while quorum can be reached quorum will proceed to rewrite the correct version
> of the data to fix the corrupted replicas.
>
> This will shine with SSD where the FTL will remap the same block at another
> place on rewrite.
>
> Signed-off-by: Benoit Canet <benoit@irqsave.net>
> ---
> block/quorum.c | 97 +++++++++++++++++++++++++++++++++++++++++++---
> qapi-schema.json | 5 ++-
> tests/qemu-iotests/081 | 14 +++++++
> tests/qemu-iotests/081.out | 11 +++++-
> 4 files changed, 119 insertions(+), 8 deletions(-)
>
> diff --git a/block/quorum.c b/block/quorum.c
> index bd997b7..af4fd3c 100644
> --- a/block/quorum.c
> +++ b/block/quorum.c
> @@ -22,6 +22,7 @@
>
> #define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold"
> #define QUORUM_OPT_BLKVERIFY "blkverify"
> +#define QUORUM_OPT_REWRITE "rewrite-corrupted"
>
> /* This union holds a vote hash value */
> typedef union QuorumVoteValue {
> @@ -69,6 +70,9 @@ typedef struct BDRVQuorumState {
> * It is useful to debug other block drivers by
> * comparing them with a reference one.
> */
> + bool rewrite_corrupted;/* true if the driver must rewrite-on-read corrupted
> + * block if Quorum is reached.
> + */
> } BDRVQuorumState;
>
> typedef struct QuorumAIOCB QuorumAIOCB;
> @@ -104,13 +108,17 @@ struct QuorumAIOCB {
> int count; /* number of completed AIOCB */
> int success_count; /* number of successfully completed AIOCB */
>
> + int rewrite_count; /* number of replica to rewrite: count down to
> + * zero once writes are fired
> + */
> +
> QuorumVotes votes;
>
> bool is_read;
> int vote_ret;
> };
>
> -static void quorum_vote(QuorumAIOCB *acb);
> +static bool quorum_vote(QuorumAIOCB *acb);
>
> static void quorum_aio_cancel(BlockDriverAIOCB *blockacb)
> {
> @@ -182,6 +190,7 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
> acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
> acb->count = 0;
> acb->success_count = 0;
> + acb->rewrite_count = 0;
> acb->votes.compare = quorum_sha256_compare;
> QLIST_INIT(&acb->votes.vote_list);
> acb->is_read = false;
> @@ -241,11 +250,27 @@ static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
> return false;
> }
>
> +static void quorum_rewrite_aio_cb(void *opaque, int ret)
> +{
> + QuorumAIOCB *acb = opaque;
> +
> + /* one less rewrite to do */
> + acb->rewrite_count--;
> +
> + /* wait until all rewrite callback have completed */
*callbacks
> + if (acb->rewrite_count) {
> + return;
> + }
> +
> + quorum_aio_finalize(acb);
> +}
> +
> static void quorum_aio_cb(void *opaque, int ret)
> {
> QuorumChildRequest *sacb = opaque;
> QuorumAIOCB *acb = sacb->parent;
> BDRVQuorumState *s = acb->common.bs->opaque;
> + bool rewrite = false;
>
> sacb->ret = ret;
> acb->count++;
> @@ -262,12 +287,15 @@ static void quorum_aio_cb(void *opaque, int ret)
>
> /* Do the vote on read */
> if (acb->is_read) {
> - quorum_vote(acb);
> + rewrite = quorum_vote(acb);
> } else {
> quorum_has_too_much_io_failed(acb);
> }
>
> - quorum_aio_finalize(acb);
> + /* if no rewrite is done the code will finish right away */
> + if (!rewrite) {
> + quorum_aio_finalize(acb);
> + }
> }
>
> static void quorum_report_bad_versions(BDRVQuorumState *s,
> @@ -287,6 +315,43 @@ static void quorum_report_bad_versions(BDRVQuorumState *s,
> }
> }
>
> +static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
> + QuorumVoteValue *value)
> +{
> + QuorumVoteVersion *version;
> + QuorumVoteItem *item;
> + int count = 0;
> +
> + /* first count the number of bad versions: done first to avoid concurency
*concurrency
> + * issues.
> + */
> + QLIST_FOREACH(version, &acb->votes.vote_list, next) {
> + if (acb->votes.compare(&version->value, value)) {
> + continue;
> + }
> + QLIST_FOREACH(item, &version->items, next) {
> + count++;
> + }
> + }
> +
> + /* quorum_rewrite_aio_cb will count down this to zero */
> + acb->rewrite_count = count;
> +
> + /* now fire the correcting rewrites */
> + QLIST_FOREACH(version, &acb->votes.vote_list, next) {
> + if (acb->votes.compare(&version->value, value)) {
> + continue;
> + }
> + QLIST_FOREACH(item, &version->items, next) {
> + bdrv_aio_writev(s->bs[item->index], acb->sector_num, acb->qiov,
> + acb->nb_sectors, quorum_rewrite_aio_cb, acb);
> + }
> + }
> +
> + /* return true if any rewrite is done else false */
> + return count;
> +}
> +
> static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
> {
> int i;
> @@ -477,16 +542,17 @@ static int quorum_vote_error(QuorumAIOCB *acb)
> return ret;
> }
>
> -static void quorum_vote(QuorumAIOCB *acb)
> +static bool quorum_vote(QuorumAIOCB *acb)
> {
> bool quorum = true;
> + bool rewrite = false;
> int i, j, ret;
> QuorumVoteValue hash;
> BDRVQuorumState *s = acb->common.bs->opaque;
> QuorumVoteVersion *winner;
>
> if (quorum_has_too_much_io_failed(acb)) {
> - return;
> + return false;;
Two semicolons here.
> }
>
> /* get the index of the first successful read */
> @@ -514,7 +580,7 @@ static void quorum_vote(QuorumAIOCB *acb)
> /* Every successful read agrees */
> if (quorum) {
> quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov);
> - return;
> + return false;;
And here.
> }
>
> /* compute hashes for each successful read, also store indexes */
> @@ -547,9 +613,15 @@ static void quorum_vote(QuorumAIOCB *acb)
> /* some versions are bad print them */
> quorum_report_bad_versions(s, acb, &winner->value);
>
> + /* corruption correction is actived */
I'd vote for "enabled" rather than "actived". But I like "corruption
correction". :-)
> + if (s->rewrite_corrupted) {
> + rewrite = quorum_rewrite_bad_versions(s, acb, &winner->value);
> + }
> +
> free_exit:
> /* free lists */
> quorum_free_vote_list(&acb->votes);
> + return rewrite;
> }
>
> static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
> @@ -709,6 +781,11 @@ static QemuOptsList quorum_runtime_opts = {
> .type = QEMU_OPT_BOOL,
> .help = "Trigger block verify mode if set",
> },
> + {
> + .name = QUORUM_OPT_REWRITE,
> + .type = QEMU_OPT_BOOL,
> + .help = "Rewrite corrupted block on read quorum",
> + },
> { /* end of list */ }
> },
> };
> @@ -770,6 +847,14 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
> "and using two files with vote_threshold=2\n");
> }
>
> + s->rewrite_corrupted = qemu_opt_get_bool(opts, QUORUM_OPT_REWRITE, false);
> + if (s->rewrite_corrupted && s->is_blkverify) {
> + error_setg(&local_err,
> + "rewrite-corrupted=on cannot be used with blkverify=on");
> + ret = -EINVAL;
> + goto exit;
> + }
> +
> /* allocate the children BlockDriverState array */
> s->bs = g_new0(BlockDriverState *, s->num_children);
> opened = g_new0(bool, s->num_children);
> diff --git a/qapi-schema.json b/qapi-schema.json
> index 6476d4a..f5a8767 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -4542,12 +4542,15 @@
> #
> # @vote-threshold: the vote limit under which a read will fail
> #
> +# @rewrite-corrupted: #optional rewrite corrupted data when quorum is reached
> +# (Since 2.1)
> +#
> # Since: 2.0
> ##
> { 'type': 'BlockdevOptionsQuorum',
> 'data': { '*blkverify': 'bool',
> 'children': [ 'BlockdevRef' ],
> - 'vote-threshold': 'int' } }
> + 'vote-threshold': 'int', '*rewrite-corrupted': 'bool' } }
>
> ##
> # @BlockdevOptions
> diff --git a/tests/qemu-iotests/081 b/tests/qemu-iotests/081
> index b512d00..bb211d6 100755
> --- a/tests/qemu-iotests/081
> +++ b/tests/qemu-iotests/081
> @@ -95,6 +95,18 @@ echo "== checking quorum correction =="
> $QEMU_IO -c "open -o $quorum" -c "read -P 0x32 0 $size" | _filter_qemu_io
>
> echo
> +echo "== using quorum rewrite corrupted mode =="
> +
> +quorum="$quorum,file.rewrite-corrupted=on"
> +
> +$QEMU_IO -c "open -o $quorum" -c "read -P 0x32 0 $size" | _filter_qemu_io
> +
> +echo
> +echo "== checking that quorum has corrected the corrupted file =="
> +
> +$QEMU_IO -c "read -P 0x32 0 $size" "$TEST_DIR/2.raw" | _filter_qemu_io
> +
I'd prefer it if you could put these new test cases below the "checking
mixed reference/option specification" test, just because I'd like that
test to output the quorum warning - especially there is no other point
in the test where we can see that QMP message.
But that's up to you and if you at least fix the double semicolons:
Reviewed-by: Max Reitz <mreitz@redhat.com>
> +echo
> echo "== checking mixed reference/option specification =="
>
> run_qemu -drive "file=$TEST_DIR/2.raw,format=$IMGFMT,if=none,id=drive2" <<EOF
> @@ -137,6 +149,8 @@ echo
> echo "== breaking quorum =="
>
> $QEMU_IO -c "write -P 0x41 0 $size" "$TEST_DIR/1.raw" | _filter_qemu_io
> +$QEMU_IO -c "write -P 0x42 0 $size" "$TEST_DIR/2.raw" | _filter_qemu_io
> +
> echo
> echo "== checking that quorum is broken =="
>
> diff --git a/tests/qemu-iotests/081.out b/tests/qemu-iotests/081.out
> index 84aeb0c..2d8b290 100644
> --- a/tests/qemu-iotests/081.out
> +++ b/tests/qemu-iotests/081.out
> @@ -25,12 +25,19 @@ wrote 10485760/10485760 bytes at offset 0
> read 10485760/10485760 bytes at offset 0
> 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
>
> +== using quorum rewrite corrupted mode ==
> +read 10485760/10485760 bytes at offset 0
> +10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> +
> +== checking that quorum has corrected the corrupted file ==
> +read 10485760/10485760 bytes at offset 0
> +10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> +
> == checking mixed reference/option specification ==
> Testing: -drive file=TEST_DIR/2.IMGFMT,format=IMGFMT,if=none,id=drive2
> QMP_VERSION
> {"return": {}}
> {"return": {}}
> -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "QUORUM_REPORT_BAD", "data": {"node-name": "", "sectors-count": 20480, "sector-num": 0}}
> read 10485760/10485760 bytes at offset 0
> 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> {"return": ""}
> @@ -43,6 +50,8 @@ read 10485760/10485760 bytes at offset 0
> == breaking quorum ==
> wrote 10485760/10485760 bytes at offset 0
> 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> +wrote 10485760/10485760 bytes at offset 0
> +10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
>
> == checking that quorum is broken ==
> qemu-io: can't open device (null): Could not read image for determining its format: Input/output error
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Qemu-devel] [PATCH 1/3] quorum: Add the rewrite-corrupted parameter to quorum.
2014-03-11 19:58 ` Max Reitz
@ 2014-03-11 20:14 ` Benoît Canet
0 siblings, 0 replies; 9+ messages in thread
From: Benoît Canet @ 2014-03-11 20:14 UTC (permalink / raw)
To: Max Reitz; +Cc: Benoît Canet, kwolf, qemu-devel, stefanha
The Tuesday 11 Mar 2014 à 20:58:06 (+0100), Max Reitz wrote :
> On 11.03.2014 17:36, Benoît Canet wrote:
> >On read operations when this parameter is set and some replicas are corrupted
> >while quorum can be reached quorum will proceed to rewrite the correct version
> >of the data to fix the corrupted replicas.
> >
> >This will shine with SSD where the FTL will remap the same block at another
> >place on rewrite.
> >
> >Signed-off-by: Benoit Canet <benoit@irqsave.net>
> >---
> > block/quorum.c | 97 +++++++++++++++++++++++++++++++++++++++++++---
> > qapi-schema.json | 5 ++-
> > tests/qemu-iotests/081 | 14 +++++++
> > tests/qemu-iotests/081.out | 11 +++++-
> > 4 files changed, 119 insertions(+), 8 deletions(-)
> >
> >diff --git a/block/quorum.c b/block/quorum.c
> >index bd997b7..af4fd3c 100644
> >--- a/block/quorum.c
> >+++ b/block/quorum.c
> >@@ -22,6 +22,7 @@
> > #define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold"
> > #define QUORUM_OPT_BLKVERIFY "blkverify"
> >+#define QUORUM_OPT_REWRITE "rewrite-corrupted"
> > /* This union holds a vote hash value */
> > typedef union QuorumVoteValue {
> >@@ -69,6 +70,9 @@ typedef struct BDRVQuorumState {
> > * It is useful to debug other block drivers by
> > * comparing them with a reference one.
> > */
> >+ bool rewrite_corrupted;/* true if the driver must rewrite-on-read corrupted
> >+ * block if Quorum is reached.
> >+ */
> > } BDRVQuorumState;
> > typedef struct QuorumAIOCB QuorumAIOCB;
> >@@ -104,13 +108,17 @@ struct QuorumAIOCB {
> > int count; /* number of completed AIOCB */
> > int success_count; /* number of successfully completed AIOCB */
> >+ int rewrite_count; /* number of replica to rewrite: count down to
> >+ * zero once writes are fired
> >+ */
> >+
> > QuorumVotes votes;
> > bool is_read;
> > int vote_ret;
> > };
> >-static void quorum_vote(QuorumAIOCB *acb);
> >+static bool quorum_vote(QuorumAIOCB *acb);
> > static void quorum_aio_cancel(BlockDriverAIOCB *blockacb)
> > {
> >@@ -182,6 +190,7 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
> > acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
> > acb->count = 0;
> > acb->success_count = 0;
> >+ acb->rewrite_count = 0;
> > acb->votes.compare = quorum_sha256_compare;
> > QLIST_INIT(&acb->votes.vote_list);
> > acb->is_read = false;
> >@@ -241,11 +250,27 @@ static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
> > return false;
> > }
> >+static void quorum_rewrite_aio_cb(void *opaque, int ret)
> >+{
> >+ QuorumAIOCB *acb = opaque;
> >+
> >+ /* one less rewrite to do */
> >+ acb->rewrite_count--;
> >+
> >+ /* wait until all rewrite callback have completed */
>
> *callbacks
>
> >+ if (acb->rewrite_count) {
> >+ return;
> >+ }
> >+
> >+ quorum_aio_finalize(acb);
> >+}
> >+
> > static void quorum_aio_cb(void *opaque, int ret)
> > {
> > QuorumChildRequest *sacb = opaque;
> > QuorumAIOCB *acb = sacb->parent;
> > BDRVQuorumState *s = acb->common.bs->opaque;
> >+ bool rewrite = false;
> > sacb->ret = ret;
> > acb->count++;
> >@@ -262,12 +287,15 @@ static void quorum_aio_cb(void *opaque, int ret)
> > /* Do the vote on read */
> > if (acb->is_read) {
> >- quorum_vote(acb);
> >+ rewrite = quorum_vote(acb);
> > } else {
> > quorum_has_too_much_io_failed(acb);
> > }
> >- quorum_aio_finalize(acb);
> >+ /* if no rewrite is done the code will finish right away */
> >+ if (!rewrite) {
> >+ quorum_aio_finalize(acb);
> >+ }
> > }
> > static void quorum_report_bad_versions(BDRVQuorumState *s,
> >@@ -287,6 +315,43 @@ static void quorum_report_bad_versions(BDRVQuorumState *s,
> > }
> > }
> >+static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
> >+ QuorumVoteValue *value)
> >+{
> >+ QuorumVoteVersion *version;
> >+ QuorumVoteItem *item;
> >+ int count = 0;
> >+
> >+ /* first count the number of bad versions: done first to avoid concurency
>
> *concurrency
>
> >+ * issues.
> >+ */
> >+ QLIST_FOREACH(version, &acb->votes.vote_list, next) {
> >+ if (acb->votes.compare(&version->value, value)) {
> >+ continue;
> >+ }
> >+ QLIST_FOREACH(item, &version->items, next) {
> >+ count++;
> >+ }
> >+ }
> >+
> >+ /* quorum_rewrite_aio_cb will count down this to zero */
> >+ acb->rewrite_count = count;
> >+
> >+ /* now fire the correcting rewrites */
> >+ QLIST_FOREACH(version, &acb->votes.vote_list, next) {
> >+ if (acb->votes.compare(&version->value, value)) {
> >+ continue;
> >+ }
> >+ QLIST_FOREACH(item, &version->items, next) {
> >+ bdrv_aio_writev(s->bs[item->index], acb->sector_num, acb->qiov,
> >+ acb->nb_sectors, quorum_rewrite_aio_cb, acb);
> >+ }
> >+ }
> >+
> >+ /* return true if any rewrite is done else false */
> >+ return count;
> >+}
> >+
> > static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
> > {
> > int i;
> >@@ -477,16 +542,17 @@ static int quorum_vote_error(QuorumAIOCB *acb)
> > return ret;
> > }
> >-static void quorum_vote(QuorumAIOCB *acb)
> >+static bool quorum_vote(QuorumAIOCB *acb)
> > {
> > bool quorum = true;
> >+ bool rewrite = false;
> > int i, j, ret;
> > QuorumVoteValue hash;
> > BDRVQuorumState *s = acb->common.bs->opaque;
> > QuorumVoteVersion *winner;
> > if (quorum_has_too_much_io_failed(acb)) {
> >- return;
> >+ return false;;
>
> Two semicolons here.
>
> > }
> > /* get the index of the first successful read */
> >@@ -514,7 +580,7 @@ static void quorum_vote(QuorumAIOCB *acb)
> > /* Every successful read agrees */
> > if (quorum) {
> > quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov);
> >- return;
> >+ return false;;
>
> And here.
>
> > }
> > /* compute hashes for each successful read, also store indexes */
> >@@ -547,9 +613,15 @@ static void quorum_vote(QuorumAIOCB *acb)
> > /* some versions are bad print them */
> > quorum_report_bad_versions(s, acb, &winner->value);
> >+ /* corruption correction is actived */
>
> I'd vote for "enabled" rather than "actived". But I like "corruption
> correction". :-)
>
> >+ if (s->rewrite_corrupted) {
> >+ rewrite = quorum_rewrite_bad_versions(s, acb, &winner->value);
> >+ }
> >+
> > free_exit:
> > /* free lists */
> > quorum_free_vote_list(&acb->votes);
> >+ return rewrite;
> > }
> > static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
> >@@ -709,6 +781,11 @@ static QemuOptsList quorum_runtime_opts = {
> > .type = QEMU_OPT_BOOL,
> > .help = "Trigger block verify mode if set",
> > },
> >+ {
> >+ .name = QUORUM_OPT_REWRITE,
> >+ .type = QEMU_OPT_BOOL,
> >+ .help = "Rewrite corrupted block on read quorum",
> >+ },
> > { /* end of list */ }
> > },
> > };
> >@@ -770,6 +847,14 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
> > "and using two files with vote_threshold=2\n");
> > }
> >+ s->rewrite_corrupted = qemu_opt_get_bool(opts, QUORUM_OPT_REWRITE, false);
> >+ if (s->rewrite_corrupted && s->is_blkverify) {
> >+ error_setg(&local_err,
> >+ "rewrite-corrupted=on cannot be used with blkverify=on");
> >+ ret = -EINVAL;
> >+ goto exit;
> >+ }
> >+
> > /* allocate the children BlockDriverState array */
> > s->bs = g_new0(BlockDriverState *, s->num_children);
> > opened = g_new0(bool, s->num_children);
> >diff --git a/qapi-schema.json b/qapi-schema.json
> >index 6476d4a..f5a8767 100644
> >--- a/qapi-schema.json
> >+++ b/qapi-schema.json
> >@@ -4542,12 +4542,15 @@
> > #
> > # @vote-threshold: the vote limit under which a read will fail
> > #
> >+# @rewrite-corrupted: #optional rewrite corrupted data when quorum is reached
> >+# (Since 2.1)
> >+#
> > # Since: 2.0
> > ##
> > { 'type': 'BlockdevOptionsQuorum',
> > 'data': { '*blkverify': 'bool',
> > 'children': [ 'BlockdevRef' ],
> >- 'vote-threshold': 'int' } }
> >+ 'vote-threshold': 'int', '*rewrite-corrupted': 'bool' } }
> > ##
> > # @BlockdevOptions
> >diff --git a/tests/qemu-iotests/081 b/tests/qemu-iotests/081
> >index b512d00..bb211d6 100755
> >--- a/tests/qemu-iotests/081
> >+++ b/tests/qemu-iotests/081
> >@@ -95,6 +95,18 @@ echo "== checking quorum correction =="
> > $QEMU_IO -c "open -o $quorum" -c "read -P 0x32 0 $size" | _filter_qemu_io
> > echo
> >+echo "== using quorum rewrite corrupted mode =="
> >+
> >+quorum="$quorum,file.rewrite-corrupted=on"
> >+
> >+$QEMU_IO -c "open -o $quorum" -c "read -P 0x32 0 $size" | _filter_qemu_io
> >+
> >+echo
> >+echo "== checking that quorum has corrected the corrupted file =="
> >+
> >+$QEMU_IO -c "read -P 0x32 0 $size" "$TEST_DIR/2.raw" | _filter_qemu_io
> >+
>
> I'd prefer it if you could put these new test cases below the
> "checking mixed reference/option specification" test, just because
> I'd like that test to output the quorum warning - especially there
> is no other point in the test where we can see that QMP message.
>
> But that's up to you and if you at least fix the double semicolons:
Ok I will do these changes.
Thanks for the review
Best regards
Benoît
>
> Reviewed-by: Max Reitz <mreitz@redhat.com>
>
> >+echo
> > echo "== checking mixed reference/option specification =="
> > run_qemu -drive "file=$TEST_DIR/2.raw,format=$IMGFMT,if=none,id=drive2" <<EOF
> >@@ -137,6 +149,8 @@ echo
> > echo "== breaking quorum =="
> > $QEMU_IO -c "write -P 0x41 0 $size" "$TEST_DIR/1.raw" | _filter_qemu_io
> >+$QEMU_IO -c "write -P 0x42 0 $size" "$TEST_DIR/2.raw" | _filter_qemu_io
> >+
> > echo
> > echo "== checking that quorum is broken =="
> >diff --git a/tests/qemu-iotests/081.out b/tests/qemu-iotests/081.out
> >index 84aeb0c..2d8b290 100644
> >--- a/tests/qemu-iotests/081.out
> >+++ b/tests/qemu-iotests/081.out
> >@@ -25,12 +25,19 @@ wrote 10485760/10485760 bytes at offset 0
> > read 10485760/10485760 bytes at offset 0
> > 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> >+== using quorum rewrite corrupted mode ==
> >+read 10485760/10485760 bytes at offset 0
> >+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> >+
> >+== checking that quorum has corrected the corrupted file ==
> >+read 10485760/10485760 bytes at offset 0
> >+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> >+
> > == checking mixed reference/option specification ==
> > Testing: -drive file=TEST_DIR/2.IMGFMT,format=IMGFMT,if=none,id=drive2
> > QMP_VERSION
> > {"return": {}}
> > {"return": {}}
> >-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "QUORUM_REPORT_BAD", "data": {"node-name": "", "sectors-count": 20480, "sector-num": 0}}
> > read 10485760/10485760 bytes at offset 0
> > 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> > {"return": ""}
> >@@ -43,6 +50,8 @@ read 10485760/10485760 bytes at offset 0
> > == breaking quorum ==
> > wrote 10485760/10485760 bytes at offset 0
> > 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> >+wrote 10485760/10485760 bytes at offset 0
> >+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> > == checking that quorum is broken ==
> > qemu-io: can't open device (null): Could not read image for determining its format: Input/output error
>
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Qemu-devel] [PATCH 2/3] block: Add drive-mirror-replace command to repair quorum files.
2014-03-11 16:36 ` [Qemu-devel] [PATCH 2/3] block: Add drive-mirror-replace command to repair quorum files Benoît Canet
@ 2014-03-11 21:00 ` Max Reitz
2014-03-11 21:17 ` Benoît Canet
0 siblings, 1 reply; 9+ messages in thread
From: Max Reitz @ 2014-03-11 21:00 UTC (permalink / raw)
To: Benoît Canet, qemu-devel; +Cc: kwolf, Benoit Canet, stefanha
On 11.03.2014 17:36, Benoît Canet wrote:
> When a quorum file is totally destroyed (broken filer) the user can start a
*file
> drive-mirror job on the quorum block backend and then replace the broken
> quorum file with drive-mirror-replace given it has a node-name.
>
> Signed-off-by: Benoit Canet <benoit@irqsave.net>
> ---
> block.c | 6 +--
> block/mirror.c | 115 ++++++++++++++++++++++++++++++++++++++++++++--
> blockdev.c | 27 +++++++++++
> include/block/block.h | 3 ++
> include/block/block_int.h | 15 ++++++
> qapi-schema.json | 33 +++++++++++++
> qmp-commands.hx | 5 ++
> trace-events | 1 +
> 8 files changed, 199 insertions(+), 6 deletions(-)
>
> diff --git a/block.c b/block.c
> index 9f2fe85..2354e5b 100644
> --- a/block.c
> +++ b/block.c
> @@ -787,9 +787,9 @@ static int bdrv_open_flags(BlockDriverState *bs, int flags)
> return open_flags;
> }
>
> -static int bdrv_assign_node_name(BlockDriverState *bs,
> - const char *node_name,
> - Error **errp)
> +int bdrv_assign_node_name(BlockDriverState *bs,
> + const char *node_name,
> + Error **errp)
> {
Before this patch, there was only a single caller to this function, and
it (obviously, due to the "static") resided in block.c as well (it was
part of bdrv_open_common(), to be precise). This caller used this
function to assign a node name to a previously unnamed BDS. With this
patch, there is a new caller (mirror_activate_replace_target()) and
apparently, the BDS it uses this function on is unnamed as well (the
call to bdrv_open_backing_file() in mirror_complete() could have named
it, but there are not options given, thus, it will remain unnamed).
The point why I'm bringing this up is that bdrv_assign_node_name()
always adds the BDS to the graph_bdrv_states list if a new name is
given. Therefore, if the BDS was already named, it will be twice in that
list. This is now not an issue, as the BDS will have been unnamed in any
case before bdrv_assign_node_name() was called. I'd however prefer some
assertion in that latter function that the BDS will not be added twice
to the list; I guess a simple assertion that it is unnamed
(bs->node_name[0] == '\0') will suffice.
> if (!node_name) {
> return 0;
> diff --git a/block/mirror.c b/block/mirror.c
> index 6dc84e8..8133ca5 100644
> --- a/block/mirror.c
> +++ b/block/mirror.c
> @@ -49,6 +49,15 @@ typedef struct MirrorBlockJob {
> unsigned long *in_flight_bitmap;
> int in_flight;
> int ret;
> + /* these four fields are used by drive-mirror-replace */
> + /* Must the code replace a target with the new mirror */
Hm, probably better "The code must/has to/should replace a target with
the new mirror".
> + bool must_replace;
> + /* The block BDS to replace */
> + BlockDriverState *to_replace;
> + /* the node-name of the new mirror BDS */
> + char *new_node_name;
> + /* Used to block operations on the drive-mirror-replace target. */
> + Error *replace_blocker;
> } MirrorBlockJob;
>
> typedef struct MirrorOp {
> @@ -299,6 +308,7 @@ static void coroutine_fn mirror_run(void *opaque)
> MirrorBlockJob *s = opaque;
> BlockDriverState *bs = s->common.bs;
> int64_t sector_num, end, sectors_per_chunk, length;
> + BlockDriverState *to_replace;
> uint64_t last_pause_ns;
> BlockDriverInfo bdi;
> char backing_filename[1024];
> @@ -477,11 +487,17 @@ immediate_exit:
> g_free(s->in_flight_bitmap);
> bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
> bdrv_iostatus_disable(s->target);
> + /* Here we handle the drive-mirror-replace QMP command */
> + if (s->must_replace) {
> + to_replace = s->to_replace;
> + } else {
> + to_replace = s->common.bs;
> + }
> if (s->should_complete && ret == 0) {
> - if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
> - bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
> + if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
> + bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
> }
> - bdrv_swap(s->target, s->common.bs);
> + bdrv_swap(s->target, to_replace);
> if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
> /* drop the bs loop chain formed by the swap: break the loop then
> * trigger the unref from the top one */
> @@ -491,6 +507,11 @@ immediate_exit:
> bdrv_unref(p);
> }
> }
> + if (s->must_replace) {
> + bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
> + error_free(s->replace_blocker);
> + bdrv_unref(s->to_replace);
> + }
> bdrv_unref(s->target);
> block_job_completed(&s->common, ret);
> }
> @@ -513,6 +534,87 @@ static void mirror_iostatus_reset(BlockJob *job)
> bdrv_iostatus_reset(s->target);
> }
>
> +bool mirror_set_replace_target(BlockJob *job, const char *reference,
> + bool has_new_node_name,
> + const char *new_node_name, Error **errp)
> +{
> + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
> + BlockDriverState *to_replace;
> +
> + /* We don't want to give too much power to the user as could result on
*as this could result in
> + * BlockDriverState loops if used with something other than sync=full.
> + */
> + if (s->is_none_mode || s->base) {
> + error_setg(errp, "Can only be used on a mirror done with sync=full");
> + return false;
> + }
> +
> + /* check target reference is not an empty string */
*check that the target reference
> + if (!reference[0]) {
> + error_setg(errp, "target-reference is an empty string");
> + return false;
> + }
> +
> + /* Get the to replace block driver state */
It's correct, but I find it hard to understand at the first read. I
think "Get the block driver state to be replaced" would be easier.
> + to_replace = bdrv_lookup_bs(reference, reference, errp);
> + if (!to_replace) {
> + return false;
> + }
> +
> + /* If the BDS to replace is a regular node we need a new node name */
Likewise, s/to replace/to be replaced/?
> + if (to_replace->node_name[0] && !has_new_node_name) {
> + error_setg(errp, "A new-node-name must be provided");
> + return false;
> + }
> +
> + /* Can only replace something else than the source of the mirror */
> + if (to_replace == job->bs) {
> + error_setg(errp, "Cannot replace the mirror source");
> + return false;
> + }
> +
> + /* are this bs replace operation blocked */
s/are/is/
> + if (bdrv_op_is_blocked(to_replace, BLOCK_OP_TYPE_REPLACE, errp)) {
> + return false;
> + }
> +
> + /* save usefull infos for later */
*useful
> + s->to_replace = to_replace;
> + assert(has_new_node_name);
> + s->new_node_name = g_strdup(new_node_name);
> +
> + return true;
> +}
> +
> +static void mirror_activate_replace_target(BlockJob *job, Error **errp)
> +{
> + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
> +
> + /* Set the new node name if the BDS to replace is a regular node
> + * of the graph.
> + */
> + if (s->to_replace->node_name[0]) {
> + assert(s->new_node_name);
> + bdrv_assign_node_name(s->target, s->new_node_name, errp);
> + g_free(s->new_node_name);
> + }
Is s->new_node_name leaked if !s->to_replace->node_name[0]?
> +
> + if (error_is_set(errp)) {
> + s->to_replace = NULL;
> + return;
> + }
> +
> + /* block all operations on the target bs */
> + error_setg(&s->replace_blocker,
> + "block device is in use by drive-mirror-replace");
> + bdrv_op_block_all(s->to_replace, s->replace_blocker);
> +
> + bdrv_ref(s->to_replace);
> +
> + /* activate the replacement operation */
> + s->must_replace = true;
> +}
> +
> static void mirror_complete(BlockJob *job, Error **errp)
> {
> MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
> @@ -529,6 +631,13 @@ static void mirror_complete(BlockJob *job, Error **errp)
> return;
> }
>
> + /* drive-mirror-replace is being called on this job so activate the
> + * replacement target
> + */
> + if (s->to_replace) {
> + mirror_activate_replace_target(job, errp);
> + }
> +
> s->should_complete = true;
> block_job_resume(job);
> }
> diff --git a/blockdev.c b/blockdev.c
> index 8a6ae0a..901a05d 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -2365,6 +2365,33 @@ void qmp_block_job_complete(const char *device, Error **errp)
> block_job_complete(job, errp);
> }
>
> +void qmp_drive_mirror_replace(const char *device, const char *target_reference,
> + bool has_new_node_name,
> + const char *new_node_name,
> + Error **errp)
> +{
> + BlockJob *job = find_block_job(device);
> +
> + if (!job) {
> + error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
> + return;
> + }
> +
> + if (!job->driver || job->driver->job_type != BLOCK_JOB_TYPE_MIRROR) {
> + error_setg(errp, "Can only be used on a drive-mirror block job");
> + return;
> + }
> +
> + if (!mirror_set_replace_target(job, target_reference, has_new_node_name,
> + new_node_name, errp)) {
> + return;
> + }
> +
> + trace_qmp_drive_mirror_replace(job, target_reference,
> + has_new_node_name ? new_node_name : "");
> + block_job_complete(job, errp);
> +}
> +
> void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
> {
> QmpOutputVisitor *ov = qmp_output_visitor_new();
> diff --git a/include/block/block.h b/include/block/block.h
> index ee1582d..a9d6ead 100644
> --- a/include/block/block.h
> +++ b/include/block/block.h
> @@ -171,6 +171,7 @@ typedef enum BlockOpType {
> BLOCK_OP_TYPE_MIRROR,
> BLOCK_OP_TYPE_RESIZE,
> BLOCK_OP_TYPE_STREAM,
> + BLOCK_OP_TYPE_REPLACE,
> BLOCK_OP_TYPE_MAX,
> } BlockOpType;
>
> @@ -209,6 +210,8 @@ int bdrv_open_image(BlockDriverState **pbs, const char *filename,
> QDict *options, const char *bdref_key, int flags,
> bool allow_none, Error **errp);
> void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd);
> +int bdrv_assign_node_name(BlockDriverState *bs, const char *node_name,
> + Error **errp);
> int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp);
> int bdrv_open(BlockDriverState **pbs, const char *filename,
> const char *reference, QDict *options, int flags,
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index 1f4f78b..ea281c8 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -486,6 +486,21 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
> void *opaque, Error **errp);
>
> /*
> + * mirror_set_replace_target:
> + * @job: An active mirroring block job started with sync=full.
> + * @reference: id or node-name of the BDS to replace when the mirror is done.
> + * @has_new_node_name: Set to true if new_node_name if provided
> + * @new_node_name: The optional new node name of the new mirror.
> + * @errp: Error object.
> + *
> + * Prepare a mirroring operation to replace a BDS pointed to by reference with
> + * the new mirror.
> + */
> +bool mirror_set_replace_target(BlockJob *job, const char *reference,
> + bool has_new_node_name,
> + const char *new_node_name, Error **errp);
> +
> +/*
> * backup_start:
> * @bs: Block device to operate on.
> * @target: Block device to write to.
> diff --git a/qapi-schema.json b/qapi-schema.json
> index f5a8767..33c5ab1 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -2716,6 +2716,39 @@
> { 'command': 'block-job-complete', 'data': { 'device': 'str' } }
>
> ##
> +# @drive-mirror-replace:
> +#
> +# Manually trigger completion of an active background drive-mirror operation
> +# and replace the target reference with the new mirror.
> +# This switch the device to write to the target path only.
*switches
> +# The ability to complete is signaled with a BLOCK_JOB_READY event.
> +#
> +# This command completes an active drive-mirror background operation
> +# synchronously and replace the target reference with the mirror.
*replaces
Max
> +# The ordering of this command's return with the BLOCK_JOB_COMPLETED event
> +# is not defined. Note that if an I/O error occurs during the processing of
> +# this command: 1) the command itself will fail; 2) the error will be processed
> +# according to the rerror/werror arguments that were specified when starting
> +# the operation.
> +#
> +# A cancelled or paused drive-mirror job cannot be completed.
> +#
> +# @device: the device name
> +# @target-reference: the id or node name of the block driver state to replace
> +# @new-node-name: #optional set the node-name of the new block driver state
> +# needed the target reference point to a regular node of the
> +# graph
> +#
> +# Returns: Nothing on success
> +# If no background operation is active on this device, DeviceNotActive
> +#
> +# Since: 2.1
> +##
> +{ 'command': 'drive-mirror-replace',
> + 'data': { 'device': 'str', 'target-reference': 'str',
> + '*new-node-name': 'str' } }
> +
> +##
> # @ObjectTypeInfo:
> #
> # This structure describes a search result from @qom-list-types
> diff --git a/qmp-commands.hx b/qmp-commands.hx
> index dd336f7..3b5b382 100644
> --- a/qmp-commands.hx
> +++ b/qmp-commands.hx
> @@ -1150,6 +1150,11 @@ EQMP
> .mhandler.cmd_new = qmp_marshal_input_block_job_complete,
> },
> {
> + .name = "drive-mirror-replace",
> + .args_type = "device:B,target-reference:s,new-node-name:s?",
> + .mhandler.cmd_new = qmp_marshal_input_drive_mirror_replace,
> + },
> + {
> .name = "transaction",
> .args_type = "actions:q",
> .mhandler.cmd_new = qmp_marshal_input_transaction,
> diff --git a/trace-events b/trace-events
> index aec4202..5282904 100644
> --- a/trace-events
> +++ b/trace-events
> @@ -103,6 +103,7 @@ qmp_block_job_cancel(void *job) "job %p"
> qmp_block_job_pause(void *job) "job %p"
> qmp_block_job_resume(void *job) "job %p"
> qmp_block_job_complete(void *job) "job %p"
> +qmp_drive_mirror_replace(void *job, const char *target_reference, const char *new_node_name) "job %p target_reference %s new_node_name %s"
> block_job_cb(void *bs, void *job, int ret) "bs %p job %p ret %d"
> qmp_block_stream(void *bs, void *job) "bs %p job %p"
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Qemu-devel] [PATCH 2/3] block: Add drive-mirror-replace command to repair quorum files.
2014-03-11 21:00 ` Max Reitz
@ 2014-03-11 21:17 ` Benoît Canet
2014-03-13 19:12 ` Max Reitz
0 siblings, 1 reply; 9+ messages in thread
From: Benoît Canet @ 2014-03-11 21:17 UTC (permalink / raw)
To: Max Reitz; +Cc: Benoît Canet, kwolf, qemu-devel, stefanha
The Tuesday 11 Mar 2014 à 22:00:29 (+0100), Max Reitz wrote :
> On 11.03.2014 17:36, Benoît Canet wrote:
> >When a quorum file is totally destroyed (broken filer) the user can start a
>
> *file
I really meant filer as in file server: I will write NAS instead.
>
> >drive-mirror job on the quorum block backend and then replace the broken
> >quorum file with drive-mirror-replace given it has a node-name.
> >
> >Signed-off-by: Benoit Canet <benoit@irqsave.net>
> >---
> > block.c | 6 +--
> > block/mirror.c | 115 ++++++++++++++++++++++++++++++++++++++++++++--
> > blockdev.c | 27 +++++++++++
> > include/block/block.h | 3 ++
> > include/block/block_int.h | 15 ++++++
> > qapi-schema.json | 33 +++++++++++++
> > qmp-commands.hx | 5 ++
> > trace-events | 1 +
> > 8 files changed, 199 insertions(+), 6 deletions(-)
> >
> >diff --git a/block.c b/block.c
> >index 9f2fe85..2354e5b 100644
> >--- a/block.c
> >+++ b/block.c
> >@@ -787,9 +787,9 @@ static int bdrv_open_flags(BlockDriverState *bs, int flags)
> > return open_flags;
> > }
> >-static int bdrv_assign_node_name(BlockDriverState *bs,
> >- const char *node_name,
> >- Error **errp)
> >+int bdrv_assign_node_name(BlockDriverState *bs,
> >+ const char *node_name,
> >+ Error **errp)
> > {
>
> Before this patch, there was only a single caller to this function,
> and it (obviously, due to the "static") resided in block.c as well
> (it was part of bdrv_open_common(), to be precise). This caller used
> this function to assign a node name to a previously unnamed BDS.
> With this patch, there is a new caller
> (mirror_activate_replace_target()) and apparently, the BDS it uses
> this function on is unnamed as well (the call to
> bdrv_open_backing_file() in mirror_complete() could have named it,
> but there are not options given, thus, it will remain unnamed).
>
> The point why I'm bringing this up is that bdrv_assign_node_name()
> always adds the BDS to the graph_bdrv_states list if a new name is
> given. Therefore, if the BDS was already named, it will be twice in
> that list. This is now not an issue, as the BDS will have been
> unnamed in any case before bdrv_assign_node_name() was called. I'd
> however prefer some assertion in that latter function that the BDS
> will not be added twice to the list; I guess a simple assertion that
> it is unnamed (bs->node_name[0] == '\0') will suffice.
>
> > if (!node_name) {
> > return 0;
> >diff --git a/block/mirror.c b/block/mirror.c
> >index 6dc84e8..8133ca5 100644
> >--- a/block/mirror.c
> >+++ b/block/mirror.c
> >@@ -49,6 +49,15 @@ typedef struct MirrorBlockJob {
> > unsigned long *in_flight_bitmap;
> > int in_flight;
> > int ret;
> >+ /* these four fields are used by drive-mirror-replace */
> >+ /* Must the code replace a target with the new mirror */
>
> Hm, probably better "The code must/has to/should replace a target
> with the new mirror".
>
> >+ bool must_replace;
> >+ /* The block BDS to replace */
> >+ BlockDriverState *to_replace;
> >+ /* the node-name of the new mirror BDS */
> >+ char *new_node_name;
> >+ /* Used to block operations on the drive-mirror-replace target. */
> >+ Error *replace_blocker;
> > } MirrorBlockJob;
> > typedef struct MirrorOp {
> >@@ -299,6 +308,7 @@ static void coroutine_fn mirror_run(void *opaque)
> > MirrorBlockJob *s = opaque;
> > BlockDriverState *bs = s->common.bs;
> > int64_t sector_num, end, sectors_per_chunk, length;
> >+ BlockDriverState *to_replace;
> > uint64_t last_pause_ns;
> > BlockDriverInfo bdi;
> > char backing_filename[1024];
> >@@ -477,11 +487,17 @@ immediate_exit:
> > g_free(s->in_flight_bitmap);
> > bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
> > bdrv_iostatus_disable(s->target);
> >+ /* Here we handle the drive-mirror-replace QMP command */
> >+ if (s->must_replace) {
> >+ to_replace = s->to_replace;
> >+ } else {
> >+ to_replace = s->common.bs;
> >+ }
> > if (s->should_complete && ret == 0) {
> >- if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
> >- bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
> >+ if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
> >+ bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
> > }
> >- bdrv_swap(s->target, s->common.bs);
> >+ bdrv_swap(s->target, to_replace);
> > if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
> > /* drop the bs loop chain formed by the swap: break the loop then
> > * trigger the unref from the top one */
> >@@ -491,6 +507,11 @@ immediate_exit:
> > bdrv_unref(p);
> > }
> > }
> >+ if (s->must_replace) {
> >+ bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
> >+ error_free(s->replace_blocker);
> >+ bdrv_unref(s->to_replace);
> >+ }
> > bdrv_unref(s->target);
> > block_job_completed(&s->common, ret);
> > }
> >@@ -513,6 +534,87 @@ static void mirror_iostatus_reset(BlockJob *job)
> > bdrv_iostatus_reset(s->target);
> > }
> >+bool mirror_set_replace_target(BlockJob *job, const char *reference,
> >+ bool has_new_node_name,
> >+ const char *new_node_name, Error **errp)
> >+{
> >+ MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
> >+ BlockDriverState *to_replace;
> >+
> >+ /* We don't want to give too much power to the user as could result on
>
> *as this could result in
>
> >+ * BlockDriverState loops if used with something other than sync=full.
> >+ */
> >+ if (s->is_none_mode || s->base) {
> >+ error_setg(errp, "Can only be used on a mirror done with sync=full");
> >+ return false;
> >+ }
> >+
> >+ /* check target reference is not an empty string */
>
> *check that the target reference
>
> >+ if (!reference[0]) {
> >+ error_setg(errp, "target-reference is an empty string");
> >+ return false;
> >+ }
> >+
> >+ /* Get the to replace block driver state */
>
> It's correct, but I find it hard to understand at the first read. I
> think "Get the block driver state to be replaced" would be easier.
>
> >+ to_replace = bdrv_lookup_bs(reference, reference, errp);
> >+ if (!to_replace) {
> >+ return false;
> >+ }
> >+
> >+ /* If the BDS to replace is a regular node we need a new node name */
>
> Likewise, s/to replace/to be replaced/?
>
> >+ if (to_replace->node_name[0] && !has_new_node_name) {
> >+ error_setg(errp, "A new-node-name must be provided");
> >+ return false;
> >+ }
> >+
> >+ /* Can only replace something else than the source of the mirror */
> >+ if (to_replace == job->bs) {
> >+ error_setg(errp, "Cannot replace the mirror source");
> >+ return false;
> >+ }
> >+
> >+ /* are this bs replace operation blocked */
>
> s/are/is/
>
> >+ if (bdrv_op_is_blocked(to_replace, BLOCK_OP_TYPE_REPLACE, errp)) {
> >+ return false;
> >+ }
> >+
> >+ /* save usefull infos for later */
>
> *useful
>
> >+ s->to_replace = to_replace;
> >+ assert(has_new_node_name);
> >+ s->new_node_name = g_strdup(new_node_name);
> >+
> >+ return true;
> >+}
> >+
> >+static void mirror_activate_replace_target(BlockJob *job, Error **errp)
> >+{
> >+ MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
> >+
> >+ /* Set the new node name if the BDS to replace is a regular node
> >+ * of the graph.
> >+ */
> >+ if (s->to_replace->node_name[0]) {
> >+ assert(s->new_node_name);
> >+ bdrv_assign_node_name(s->target, s->new_node_name, errp);
> >+ g_free(s->new_node_name);
> >+ }
>
> Is s->new_node_name leaked if !s->to_replace->node_name[0]?
>
> >+
> >+ if (error_is_set(errp)) {
> >+ s->to_replace = NULL;
> >+ return;
> >+ }
> >+
> >+ /* block all operations on the target bs */
> >+ error_setg(&s->replace_blocker,
> >+ "block device is in use by drive-mirror-replace");
> >+ bdrv_op_block_all(s->to_replace, s->replace_blocker);
> >+
> >+ bdrv_ref(s->to_replace);
> >+
> >+ /* activate the replacement operation */
> >+ s->must_replace = true;
> >+}
> >+
> > static void mirror_complete(BlockJob *job, Error **errp)
> > {
> > MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
> >@@ -529,6 +631,13 @@ static void mirror_complete(BlockJob *job, Error **errp)
> > return;
> > }
> >+ /* drive-mirror-replace is being called on this job so activate the
> >+ * replacement target
> >+ */
> >+ if (s->to_replace) {
> >+ mirror_activate_replace_target(job, errp);
> >+ }
> >+
> > s->should_complete = true;
> > block_job_resume(job);
> > }
> >diff --git a/blockdev.c b/blockdev.c
> >index 8a6ae0a..901a05d 100644
> >--- a/blockdev.c
> >+++ b/blockdev.c
> >@@ -2365,6 +2365,33 @@ void qmp_block_job_complete(const char *device, Error **errp)
> > block_job_complete(job, errp);
> > }
> >+void qmp_drive_mirror_replace(const char *device, const char *target_reference,
> >+ bool has_new_node_name,
> >+ const char *new_node_name,
> >+ Error **errp)
> >+{
> >+ BlockJob *job = find_block_job(device);
> >+
> >+ if (!job) {
> >+ error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
> >+ return;
> >+ }
> >+
> >+ if (!job->driver || job->driver->job_type != BLOCK_JOB_TYPE_MIRROR) {
> >+ error_setg(errp, "Can only be used on a drive-mirror block job");
> >+ return;
> >+ }
> >+
> >+ if (!mirror_set_replace_target(job, target_reference, has_new_node_name,
> >+ new_node_name, errp)) {
> >+ return;
> >+ }
> >+
> >+ trace_qmp_drive_mirror_replace(job, target_reference,
> >+ has_new_node_name ? new_node_name : "");
> >+ block_job_complete(job, errp);
> >+}
> >+
> > void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
> > {
> > QmpOutputVisitor *ov = qmp_output_visitor_new();
> >diff --git a/include/block/block.h b/include/block/block.h
> >index ee1582d..a9d6ead 100644
> >--- a/include/block/block.h
> >+++ b/include/block/block.h
> >@@ -171,6 +171,7 @@ typedef enum BlockOpType {
> > BLOCK_OP_TYPE_MIRROR,
> > BLOCK_OP_TYPE_RESIZE,
> > BLOCK_OP_TYPE_STREAM,
> >+ BLOCK_OP_TYPE_REPLACE,
> > BLOCK_OP_TYPE_MAX,
> > } BlockOpType;
> >@@ -209,6 +210,8 @@ int bdrv_open_image(BlockDriverState **pbs, const char *filename,
> > QDict *options, const char *bdref_key, int flags,
> > bool allow_none, Error **errp);
> > void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd);
> >+int bdrv_assign_node_name(BlockDriverState *bs, const char *node_name,
> >+ Error **errp);
> > int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp);
> > int bdrv_open(BlockDriverState **pbs, const char *filename,
> > const char *reference, QDict *options, int flags,
> >diff --git a/include/block/block_int.h b/include/block/block_int.h
> >index 1f4f78b..ea281c8 100644
> >--- a/include/block/block_int.h
> >+++ b/include/block/block_int.h
> >@@ -486,6 +486,21 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
> > void *opaque, Error **errp);
> > /*
> >+ * mirror_set_replace_target:
> >+ * @job: An active mirroring block job started with sync=full.
> >+ * @reference: id or node-name of the BDS to replace when the mirror is done.
> >+ * @has_new_node_name: Set to true if new_node_name if provided
> >+ * @new_node_name: The optional new node name of the new mirror.
> >+ * @errp: Error object.
> >+ *
> >+ * Prepare a mirroring operation to replace a BDS pointed to by reference with
> >+ * the new mirror.
> >+ */
> >+bool mirror_set_replace_target(BlockJob *job, const char *reference,
> >+ bool has_new_node_name,
> >+ const char *new_node_name, Error **errp);
> >+
> >+/*
> > * backup_start:
> > * @bs: Block device to operate on.
> > * @target: Block device to write to.
> >diff --git a/qapi-schema.json b/qapi-schema.json
> >index f5a8767..33c5ab1 100644
> >--- a/qapi-schema.json
> >+++ b/qapi-schema.json
> >@@ -2716,6 +2716,39 @@
> > { 'command': 'block-job-complete', 'data': { 'device': 'str' } }
> > ##
> >+# @drive-mirror-replace:
> >+#
> >+# Manually trigger completion of an active background drive-mirror operation
> >+# and replace the target reference with the new mirror.
> >+# This switch the device to write to the target path only.
>
> *switches
>
> >+# The ability to complete is signaled with a BLOCK_JOB_READY event.
> >+#
> >+# This command completes an active drive-mirror background operation
> >+# synchronously and replace the target reference with the mirror.
>
> *replaces
>
> Max
>
> >+# The ordering of this command's return with the BLOCK_JOB_COMPLETED event
> >+# is not defined. Note that if an I/O error occurs during the processing of
> >+# this command: 1) the command itself will fail; 2) the error will be processed
> >+# according to the rerror/werror arguments that were specified when starting
> >+# the operation.
> >+#
> >+# A cancelled or paused drive-mirror job cannot be completed.
> >+#
> >+# @device: the device name
> >+# @target-reference: the id or node name of the block driver state to replace
> >+# @new-node-name: #optional set the node-name of the new block driver state
> >+# needed the target reference point to a regular node of the
> >+# graph
> >+#
> >+# Returns: Nothing on success
> >+# If no background operation is active on this device, DeviceNotActive
> >+#
> >+# Since: 2.1
> >+##
> >+{ 'command': 'drive-mirror-replace',
> >+ 'data': { 'device': 'str', 'target-reference': 'str',
> >+ '*new-node-name': 'str' } }
> >+
> >+##
> > # @ObjectTypeInfo:
> > #
> > # This structure describes a search result from @qom-list-types
> >diff --git a/qmp-commands.hx b/qmp-commands.hx
> >index dd336f7..3b5b382 100644
> >--- a/qmp-commands.hx
> >+++ b/qmp-commands.hx
> >@@ -1150,6 +1150,11 @@ EQMP
> > .mhandler.cmd_new = qmp_marshal_input_block_job_complete,
> > },
> > {
> >+ .name = "drive-mirror-replace",
> >+ .args_type = "device:B,target-reference:s,new-node-name:s?",
> >+ .mhandler.cmd_new = qmp_marshal_input_drive_mirror_replace,
> >+ },
> >+ {
> > .name = "transaction",
> > .args_type = "actions:q",
> > .mhandler.cmd_new = qmp_marshal_input_transaction,
> >diff --git a/trace-events b/trace-events
> >index aec4202..5282904 100644
> >--- a/trace-events
> >+++ b/trace-events
> >@@ -103,6 +103,7 @@ qmp_block_job_cancel(void *job) "job %p"
> > qmp_block_job_pause(void *job) "job %p"
> > qmp_block_job_resume(void *job) "job %p"
> > qmp_block_job_complete(void *job) "job %p"
> >+qmp_drive_mirror_replace(void *job, const char *target_reference, const char *new_node_name) "job %p target_reference %s new_node_name %s"
> > block_job_cb(void *bs, void *job, int ret) "bs %p job %p ret %d"
> > qmp_block_stream(void *bs, void *job) "bs %p job %p"
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Qemu-devel] [PATCH 2/3] block: Add drive-mirror-replace command to repair quorum files.
2014-03-11 21:17 ` Benoît Canet
@ 2014-03-13 19:12 ` Max Reitz
0 siblings, 0 replies; 9+ messages in thread
From: Max Reitz @ 2014-03-13 19:12 UTC (permalink / raw)
To: Benoît Canet; +Cc: kwolf, qemu-devel, stefanha
On 11.03.2014 22:17, Benoît Canet wrote:
> The Tuesday 11 Mar 2014 à 22:00:29 (+0100), Max Reitz wrote :
>> On 11.03.2014 17:36, Benoît Canet wrote:
>>> When a quorum file is totally destroyed (broken filer) the user can start a
>> *file
> I really meant filer as in file server: I will write NAS instead.
Ah, my fault then. :-)
Max
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2014-03-13 20:07 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-03-11 16:36 [Qemu-devel] [PATCH 0/3] Quorum maintainance operations Benoît Canet
2014-03-11 16:36 ` [Qemu-devel] [PATCH 1/3] quorum: Add the rewrite-corrupted parameter to quorum Benoît Canet
2014-03-11 19:58 ` Max Reitz
2014-03-11 20:14 ` Benoît Canet
2014-03-11 16:36 ` [Qemu-devel] [PATCH 2/3] block: Add drive-mirror-replace command to repair quorum files Benoît Canet
2014-03-11 21:00 ` Max Reitz
2014-03-11 21:17 ` Benoît Canet
2014-03-13 19:12 ` Max Reitz
2014-03-11 16:36 ` [Qemu-devel] [PATCH 3/3] qemu-iotests: Add 088 new test for drive-mirror-replace Benoît Canet
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).