From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:40504) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1WuGRI-0002Um-5v for qemu-devel@nongnu.org; Tue, 10 Jun 2014 03:26:06 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1WuGQb-00029k-Oq for qemu-devel@nongnu.org; Tue, 10 Jun 2014 03:25:48 -0400 Received: from lputeaux-656-01-25-125.w80-12.abo.wanadoo.fr ([80.12.84.125]:46502 helo=paradis.irqsave.net) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1WuGQb-000284-GZ for qemu-devel@nongnu.org; Tue, 10 Jun 2014 03:25:05 -0400 Date: Tue, 10 Jun 2014 09:25:04 +0200 From: =?iso-8859-1?Q?Beno=EEt?= Canet Message-ID: <20140610072504.GA32755@irqsave.net> References: <1402384906-335-1-git-send-email-benoit.canet@irqsave.net> <1402384906-335-2-git-send-email-benoit.canet@irqsave.net> MIME-Version: 1.0 Content-Type: text/plain; charset=iso-8859-1 Content-Disposition: inline In-Reply-To: <1402384906-335-2-git-send-email-benoit.canet@irqsave.net> Content-Transfer-Encoding: quoted-printable Subject: Re: [Qemu-devel] [PATCH v8 1/4] quorum: Add the rewrite-corrupted parameter to quorum. List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: =?iso-8859-1?Q?Beno=EEt?= Canet Cc: kwolf@redhat.com, qemu-devel@nongnu.org, stefanha@redhat.com, mreitz@redhat.com The Tuesday 10 Jun 2014 =E0 09:21:43 (+0200), Beno=EEt Canet wrote : Oh i again submitted a commit subject ending with a dot I will remove the= ses. > On read operations when this parameter is set and some replicas are cor= rupted > while quorum can be reached quorum will proceed to rewrite the correct = version > of the data to fix the corrupted replicas. >=20 > This will shine with SSD where the FTL will remap the same block at ano= ther > place on rewrite. >=20 > Signed-off-by: Benoit Canet > --- > block/quorum.c | 97 ++++++++++++++++++++++++++++++++++++++= +++++--- > qapi/block-core.json | 5 ++- > tests/qemu-iotests/081 | 15 ++++++- > tests/qemu-iotests/081.out | 10 +++++ > 4 files changed, 119 insertions(+), 8 deletions(-) >=20 > diff --git a/block/quorum.c b/block/quorum.c > index 426077a..1a7a01e 100644 > --- a/block/quorum.c > +++ b/block/quorum.c > @@ -22,6 +22,7 @@ > =20 > #define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold" > #define QUORUM_OPT_BLKVERIFY "blkverify" > +#define QUORUM_OPT_REWRITE "rewrite-corrupted" > =20 > /* This union holds a vote hash value */ > typedef union QuorumVoteValue { > @@ -69,6 +70,9 @@ typedef struct BDRVQuorumState { > * It is useful to debug other block driver= s by > * comparing them with a reference one. > */ > + bool rewrite_corrupted;/* true if the driver must rewrite-on-read = corrupted > + * block if Quorum is reached. > + */ > } BDRVQuorumState; > =20 > typedef struct QuorumAIOCB QuorumAIOCB; > @@ -104,13 +108,17 @@ struct QuorumAIOCB { > int count; /* number of completed AIOCB */ > int success_count; /* number of successfully completed AI= OCB */ > =20 > + int rewrite_count; /* number of replica to rewrite: count= down to > + * zero once writes are fired > + */ > + > QuorumVotes votes; > =20 > bool is_read; > int vote_ret; > }; > =20 > -static void quorum_vote(QuorumAIOCB *acb); > +static bool quorum_vote(QuorumAIOCB *acb); > =20 > static void quorum_aio_cancel(BlockDriverAIOCB *blockacb) > { > @@ -182,6 +190,7 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState = *s, > acb->qcrs =3D g_new0(QuorumChildRequest, s->num_children); > acb->count =3D 0; > acb->success_count =3D 0; > + acb->rewrite_count =3D 0; > acb->votes.compare =3D quorum_sha256_compare; > QLIST_INIT(&acb->votes.vote_list); > acb->is_read =3D false; > @@ -241,11 +250,27 @@ static bool quorum_has_too_much_io_failed(QuorumA= IOCB *acb) > return false; > } > =20 > +static void quorum_rewrite_aio_cb(void *opaque, int ret) > +{ > + QuorumAIOCB *acb =3D opaque; > + > + /* one less rewrite to do */ > + acb->rewrite_count--; > + > + /* wait until all rewrite callbacks have completed */ > + if (acb->rewrite_count) { > + return; > + } > + > + quorum_aio_finalize(acb); > +} > + > static void quorum_aio_cb(void *opaque, int ret) > { > QuorumChildRequest *sacb =3D opaque; > QuorumAIOCB *acb =3D sacb->parent; > BDRVQuorumState *s =3D acb->common.bs->opaque; > + bool rewrite =3D false; > =20 > sacb->ret =3D ret; > acb->count++; > @@ -262,12 +287,15 @@ static void quorum_aio_cb(void *opaque, int ret) > =20 > /* Do the vote on read */ > if (acb->is_read) { > - quorum_vote(acb); > + rewrite =3D quorum_vote(acb); > } else { > quorum_has_too_much_io_failed(acb); > } > =20 > - quorum_aio_finalize(acb); > + /* if no rewrite is done the code will finish right away */ > + if (!rewrite) { > + quorum_aio_finalize(acb); > + } > } > =20 > static void quorum_report_bad_versions(BDRVQuorumState *s, > @@ -287,6 +315,43 @@ static void quorum_report_bad_versions(BDRVQuorumS= tate *s, > } > } > =20 > +static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOC= B *acb, > + QuorumVoteValue *value) > +{ > + QuorumVoteVersion *version; > + QuorumVoteItem *item; > + int count =3D 0; > + > + /* first count the number of bad versions: done first to avoid con= currency > + * issues. > + */ > + QLIST_FOREACH(version, &acb->votes.vote_list, next) { > + if (acb->votes.compare(&version->value, value)) { > + continue; > + } > + QLIST_FOREACH(item, &version->items, next) { > + count++; > + } > + } > + > + /* quorum_rewrite_aio_cb will count down this to zero */ > + acb->rewrite_count =3D count; > + > + /* now fire the correcting rewrites */ > + QLIST_FOREACH(version, &acb->votes.vote_list, next) { > + if (acb->votes.compare(&version->value, value)) { > + continue; > + } > + QLIST_FOREACH(item, &version->items, next) { > + bdrv_aio_writev(s->bs[item->index], acb->sector_num, acb->= qiov, > + acb->nb_sectors, quorum_rewrite_aio_cb, ac= b); > + } > + } > + > + /* return true if any rewrite is done else false */ > + return count; > +} > + > static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source) > { > int i; > @@ -477,16 +542,17 @@ static int quorum_vote_error(QuorumAIOCB *acb) > return ret; > } > =20 > -static void quorum_vote(QuorumAIOCB *acb) > +static bool quorum_vote(QuorumAIOCB *acb) > { > bool quorum =3D true; > + bool rewrite =3D false; > int i, j, ret; > QuorumVoteValue hash; > BDRVQuorumState *s =3D acb->common.bs->opaque; > QuorumVoteVersion *winner; > =20 > if (quorum_has_too_much_io_failed(acb)) { > - return; > + return false; > } > =20 > /* get the index of the first successful read */ > @@ -514,7 +580,7 @@ static void quorum_vote(QuorumAIOCB *acb) > /* Every successful read agrees */ > if (quorum) { > quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov); > - return; > + return false; > } > =20 > /* compute hashes for each successful read, also store indexes */ > @@ -547,9 +613,15 @@ static void quorum_vote(QuorumAIOCB *acb) > /* some versions are bad print them */ > quorum_report_bad_versions(s, acb, &winner->value); > =20 > + /* corruption correction is enabled */ > + if (s->rewrite_corrupted) { > + rewrite =3D quorum_rewrite_bad_versions(s, acb, &winner->value= ); > + } > + > free_exit: > /* free lists */ > quorum_free_vote_list(&acb->votes); > + return rewrite; > } > =20 > static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs, > @@ -714,6 +786,11 @@ static QemuOptsList quorum_runtime_opts =3D { > .type =3D QEMU_OPT_BOOL, > .help =3D "Trigger block verify mode if set", > }, > + { > + .name =3D QUORUM_OPT_REWRITE, > + .type =3D QEMU_OPT_BOOL, > + .help =3D "Rewrite corrupted block on read quorum", > + }, > { /* end of list */ } > }, > }; > @@ -775,6 +852,14 @@ static int quorum_open(BlockDriverState *bs, QDict= *options, int flags, > "and using two files with vote_threshold=3D2\n"); > } > =20 > + s->rewrite_corrupted =3D qemu_opt_get_bool(opts, QUORUM_OPT_REWRIT= E, false); > + if (s->rewrite_corrupted && s->is_blkverify) { > + error_setg(&local_err, > + "rewrite-corrupted=3Don cannot be used with blkveri= fy=3Don"); > + ret =3D -EINVAL; > + goto exit; > + } > + > /* allocate the children BlockDriverState array */ > s->bs =3D g_new0(BlockDriverState *, s->num_children); > opened =3D g_new0(bool, s->num_children); > diff --git a/qapi/block-core.json b/qapi/block-core.json > index 33bd93f..eec881b 100644 > --- a/qapi/block-core.json > +++ b/qapi/block-core.json > @@ -1333,12 +1333,15 @@ > # > # @vote-threshold: the vote limit under which a read will fail > # > +# @rewrite-corrupted: #optional rewrite corrupted data when quorum is = reached > +# (Since 2.1) > +# > # Since: 2.0 > ## > { 'type': 'BlockdevOptionsQuorum', > 'data': { '*blkverify': 'bool', > 'children': [ 'BlockdevRef' ], > - 'vote-threshold': 'int' } } > + 'vote-threshold': 'int', '*rewrite-corrupted': 'bool' } } > =20 > ## > # @BlockdevOptions > diff --git a/tests/qemu-iotests/081 b/tests/qemu-iotests/081 > index b512d00..7ae4be2 100755 > --- a/tests/qemu-iotests/081 > +++ b/tests/qemu-iotests/081 > @@ -134,15 +134,28 @@ run_qemu -drive "file=3D$TEST_DIR/2.raw,format=3D= $IMGFMT,if=3Dnone,id=3Ddrive2" < EOF > =20 > echo > +echo "=3D=3D using quorum rewrite corrupted mode =3D=3D" > + > +quorum=3D"$quorum,file.rewrite-corrupted=3Don" > + > +$QEMU_IO -c "open -o $quorum" -c "read -P 0x32 0 $size" | _filter_qemu= _io > + > +echo > +echo "=3D=3D checking that quorum has corrected the corrupted file =3D= =3D" > + > +$QEMU_IO -c "read -P 0x32 0 $size" "$TEST_DIR/2.raw" | _filter_qemu_io > + > +echo > echo "=3D=3D breaking quorum =3D=3D" > =20 > $QEMU_IO -c "write -P 0x41 0 $size" "$TEST_DIR/1.raw" | _filter_qemu_i= o > +$QEMU_IO -c "write -P 0x42 0 $size" "$TEST_DIR/2.raw" | _filter_qemu_i= o > + > echo > echo "=3D=3D checking that quorum is broken =3D=3D" > =20 > $QEMU_IO -c "open -o $quorum" -c "read -P 0x32 0 $size" | _filter_qemu= _io > =20 > - > # success, all done > echo "*** done" > rm -f $seq.full > diff --git a/tests/qemu-iotests/081.out b/tests/qemu-iotests/081.out > index 84aeb0c..1629d2a 100644 > --- a/tests/qemu-iotests/081.out > +++ b/tests/qemu-iotests/081.out > @@ -40,9 +40,19 @@ read 10485760/10485760 bytes at offset 0 > {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "ev= ent": "DEVICE_TRAY_MOVED", "data": {"device": "floppy0", "tray-open": tru= e}} > =20 > =20 > +=3D=3D using quorum rewrite corrupted mode =3D=3D > +read 10485760/10485760 bytes at offset 0 > +10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) > + > +=3D=3D checking that quorum has corrected the corrupted file =3D=3D > +read 10485760/10485760 bytes at offset 0 > +10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) > + > =3D=3D breaking quorum =3D=3D > wrote 10485760/10485760 bytes at offset 0 > 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) > +wrote 10485760/10485760 bytes at offset 0 > +10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) > =20 > =3D=3D checking that quorum is broken =3D=3D > qemu-io: can't open device (null): Could not read image for determinin= g its format: Input/output error > --=20 > 1.9.1 >=20