qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Max Reitz <mreitz@redhat.com>
To: "Benoît Canet" <benoit.canet@irqsave.net>, qemu-devel@nongnu.org
Cc: kwolf@redhat.com, "Benoît Canet" <benoit@irqsave.net>,
	stefanha@redhat.com
Subject: Re: [Qemu-devel] [PATCH V14 06/13] quorum: Add quorum mechanism.
Date: Mon, 03 Feb 2014 20:42:43 +0100	[thread overview]
Message-ID: <52EFF133.9060604@redhat.com> (raw)
In-Reply-To: <1391454712-14353-7-git-send-email-benoit.canet@irqsave.net>

On 03.02.2014 20:11, Benoît Canet wrote:
> From: Benoît Canet <benoit@irqsave.net>
>
> Use gnutls's SHA-256 to compare versions.
>
> Signed-off-by: Benoit Canet <benoit@irqsave.net>
> ---
>   block/Makefile.objs       |   2 +-
>   block/quorum.c            | 401 +++++++++++++++++++++++++++++++++++++++++++---
>   configure                 |  36 +++++
>   docs/qmp/qmp-events.txt   |  33 ++++
>   include/monitor/monitor.h |   2 +
>   monitor.c                 |   2 +
>   6 files changed, 457 insertions(+), 19 deletions(-)
>
> diff --git a/block/Makefile.objs b/block/Makefile.objs
> index a2650b9..4ca9d43 100644
> --- a/block/Makefile.objs
> +++ b/block/Makefile.objs
> @@ -3,7 +3,7 @@ block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-c
>   block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
>   block-obj-y += qed-check.o
>   block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
> -block-obj-y += quorum.o
> +block-obj-$(CONFIG_QUORUM) += quorum.o
>   block-obj-y += parallels.o blkdebug.o blkverify.o
>   block-obj-y += snapshot.o qapi.o
>   block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
> diff --git a/block/quorum.c b/block/quorum.c
> index 5b11ac1..56ad6a0 100644
> --- a/block/quorum.c
> +++ b/block/quorum.c
> @@ -13,7 +13,43 @@
>    * See the COPYING file in the top-level directory.
>    */
>   
> +#include <gnutls/gnutls.h>
> +#include <gnutls/crypto.h>
>   #include "block/block_int.h"
> +#include "qapi/qmp/qjson.h"
> +
> +#define HASH_LENGTH 32
> +
> +/* This union holds a vote hash value */
> +typedef union QuorumVoteValue {
> +    char h[HASH_LENGTH];       /* SHA-256 hash */
> +    int64_t l;                 /* simpler 64 bits hash */
> +} QuorumVoteValue;
> +
> +/* A vote item */
> +typedef struct QuorumVoteItem {
> +    int index;
> +    QLIST_ENTRY(QuorumVoteItem) next;
> +} QuorumVoteItem;
> +
> +/* this structure is a vote version. A version is the set of votes sharing the
> + * same vote value.
> + * The set of votes will be tracked with the items field and its cardinality is
> + * vote_count.
> + */
> +typedef struct QuorumVoteVersion {
> +    QuorumVoteValue value;
> +    int index;
> +    int vote_count;
> +    QLIST_HEAD(, QuorumVoteItem) items;
> +    QLIST_ENTRY(QuorumVoteVersion) next;
> +} QuorumVoteVersion;
> +
> +/* this structure holds a group of vote versions together */
> +typedef struct QuorumVotes {
> +    QLIST_HEAD(, QuorumVoteVersion) vote_list;
> +    int (*compare)(QuorumVoteValue *a, QuorumVoteValue *b);
> +} QuorumVotes;
>   
>   /* the following structure holds the state of one quorum instance */
>   typedef struct {
> @@ -60,10 +96,14 @@ struct QuorumAIOCB {
>       int success_count;          /* number of successfully completed AIOCB */
>       bool *finished;             /* completion signal for cancel */
>   
> +    QuorumVotes votes;
> +
>       bool is_read;
>       int vote_ret;
>   };
>   
> +static void quorum_vote(QuorumAIOCB *acb);
> +
>   static void quorum_aio_cancel(BlockDriverAIOCB *blockacb)
>   {
>       QuorumAIOCB *acb = container_of(blockacb, QuorumAIOCB, common);
> @@ -81,29 +121,14 @@ static AIOCBInfo quorum_aiocb_info = {
>       .cancel             = quorum_aio_cancel,
>   };
>   
> -/* return the first error code get by each individual callbacks */
> -static int quorum_get_first_error(QuorumAIOCB *acb)
> -{
> -    BDRVQuorumState *s = acb->bqs;
> -    int i, ret = 0;
> -
> -    for (i = 0; i < s->total; i++) {
> -        ret = acb->aios[i].ret;
> -        if (ret) {
> -            return ret;
> -        }
> -    }
> -
> -    /* should not pass here */
> -    assert(false);
> -}
> +static int quorum_vote_error(QuorumAIOCB *acb);
>   
>   static void quorum_aio_finalize(QuorumAIOCB *acb)
>   {
>       BDRVQuorumState *s = acb->bqs;
>       int i, ret;
>   
> -    ret = s->threshold <= acb->success_count ? 0 : quorum_get_first_error(acb);
> +    ret = s->threshold <= acb->success_count ? 0 : quorum_vote_error(acb);
>   
>       for (i = 0; i < s->total; i++) {
>           qemu_vfree(acb->aios[i].buf);
> @@ -111,6 +136,10 @@ static void quorum_aio_finalize(QuorumAIOCB *acb)
>           acb->aios[i].ret = 0;
>       }
>   
> +    if (acb->vote_ret) {
> +        ret = acb->vote_ret;
> +    }
> +
>       acb->common.cb(acb->common.opaque, ret);
>       if (acb->finished) {
>           *acb->finished = true;
> @@ -122,6 +151,27 @@ static void quorum_aio_finalize(QuorumAIOCB *acb)
>       qemu_aio_release(acb);
>   }
>   
> +static int quorum_sha256_compare(QuorumVoteValue *a, QuorumVoteValue *b)
> +{
> +    return memcmp(a->h, b->h, HASH_LENGTH);
> +}
> +
> +static int quorum_64bits_compare(QuorumVoteValue *a, QuorumVoteValue *b)
> +{
> +    int64_t i = a->l;
> +    int64_t j = b->l;
> +
> +    if (i < j) {
> +        return -1;
> +    }
> +
> +    if (i > j) {
> +        return 1;
> +    }
> +
> +    return 0;
> +}
> +
>   static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
>                                      BlockDriverState *bs,
>                                      QEMUIOVector *qiov,
> @@ -141,6 +191,7 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
>       acb->count = 0;
>       acb->success_count = 0;
>       acb->finished = NULL;
> +    acb->votes.compare = quorum_sha256_compare;
>       acb->is_read = false;
>       acb->vote_ret = 0;
>   
> @@ -170,9 +221,323 @@ static void quorum_aio_cb(void *opaque, int ret)
>           return;
>       }
>   
> +    /* Do the vote on read */
> +    if (acb->is_read) {
> +        quorum_vote(acb);
> +    }
> +
>       quorum_aio_finalize(acb);
>   }
>   
> +static void quorum_report_bad(QuorumAIOCB *acb, char *node_name)
> +{
> +    QObject *data;
> +    data = qobject_from_jsonf("{ 'node-name': \"%s\""
> +                              ", 'sector-num': %" PRId64
> +                              ", 'sectors-count': %i }",
> +                              node_name,
> +                              acb->sector_num,
> +                              acb->nb_sectors);
> +    monitor_protocol_event(QEVENT_QUORUM_REPORT_BAD, data);
> +    qobject_decref(data);
> +}
> +
> +static void quorum_report_failure(QuorumAIOCB *acb)
> +{
> +    QObject *data;
> +    data = qobject_from_jsonf("{ 'sector-num': %" PRId64
> +                              ", 'sectors-count': %i }",
> +                              acb->sector_num,
> +                              acb->nb_sectors);
> +    monitor_protocol_event(QEVENT_QUORUM_FAILURE, data);
> +    qobject_decref(data);
> +}
> +
> +static void quorum_report_bad_versions(BDRVQuorumState *s,
> +                                       QuorumAIOCB *acb,
> +                                       QuorumVoteValue *value)
> +{
> +    QuorumVoteVersion *version;
> +    QuorumVoteItem *item;
> +
> +    QLIST_FOREACH(version, &acb->votes.vote_list, next) {
> +        if (!acb->votes.compare(&version->value, value)) {
> +            continue;
> +        }
> +        QLIST_FOREACH(item, &version->items, next) {
> +            quorum_report_bad(acb, s->bs[item->index]->node_name);
> +        }
> +    }
> +}
> +
> +static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
> +{
> +    int i;
> +    assert(dest->niov == source->niov);
> +    assert(dest->size == source->size);
> +    for (i = 0; i < source->niov; i++) {
> +        assert(dest->iov[i].iov_len == source->iov[i].iov_len);
> +        memcpy(dest->iov[i].iov_base,
> +               source->iov[i].iov_base,
> +               source->iov[i].iov_len);
> +    }
> +}
> +
> +static void quorum_count_vote(QuorumVotes *votes,
> +                              QuorumVoteValue *value,
> +                              int index)
> +{
> +    QuorumVoteVersion *v = NULL, *version = NULL;
> +    QuorumVoteItem *item;
> +
> +    /* look if we have something with this hash */
> +    QLIST_FOREACH(v, &votes->vote_list, next) {
> +        if (!votes->compare(&v->value, value)) {
> +            version = v;
> +            break;
> +        }
> +    }
> +
> +    /* It's a version not yet in the list add it */
> +    if (!version) {
> +        version = g_new0(QuorumVoteVersion, 1);
> +        QLIST_INIT(&version->items);
> +        memcpy(&version->value, value, sizeof(version->value));
> +        version->index = index;
> +        version->vote_count = 0;
> +        QLIST_INSERT_HEAD(&votes->vote_list, version, next);
> +    }
> +
> +    version->vote_count++;
> +
> +    item = g_new0(QuorumVoteItem, 1);
> +    item->index = index;
> +    QLIST_INSERT_HEAD(&version->items, item, next);
> +}
> +
> +static void quorum_free_vote_list(QuorumVotes *votes)
> +{
> +    QuorumVoteVersion *version, *next_version;
> +    QuorumVoteItem *item, *next_item;
> +
> +    QLIST_FOREACH_SAFE(version, &votes->vote_list, next, next_version) {
> +        QLIST_REMOVE(version, next);
> +        QLIST_FOREACH_SAFE(item, &version->items, next, next_item) {
> +            QLIST_REMOVE(item, next);
> +            g_free(item);
> +        }
> +        g_free(version);
> +    }
> +}
> +
> +static int quorum_compute_hash(QuorumAIOCB *acb, int i, QuorumVoteValue *hash)
> +{
> +    int j, ret;
> +    gnutls_hash_hd_t dig;
> +    QEMUIOVector *qiov = &acb->aios[i].qiov;
> +
> +    ret = gnutls_hash_init(&dig, GNUTLS_DIG_SHA256);
> +
> +    if (ret < 0) {
> +        return ret;
> +    }
> +
> +    for (j = 0; j < qiov->niov; j++) {
> +        ret = gnutls_hash(dig, qiov->iov[j].iov_base, qiov->iov[j].iov_len);
> +        if (ret < 0) {
> +            break;
> +        }
> +    }
> +
> +    gnutls_hash_deinit(dig, (void *) hash);
> +    return ret;
> +}
> +
> +static QuorumVoteVersion *quorum_get_vote_winner(QuorumVotes *votes)
> +{
> +    int i = 0;
> +    QuorumVoteVersion *candidate, *winner = NULL;
> +
> +    QLIST_FOREACH(candidate, &votes->vote_list, next) {
> +        if (candidate->vote_count > i) {
> +            i = candidate->vote_count;
> +            winner = candidate;
> +        }
> +    }
> +
> +    return winner;
> +}
> +
> +static bool quorum_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
> +{
> +    int i;
> +    int result;
> +
> +    assert(a->niov == b->niov);
> +    for (i = 0; i < a->niov; i++) {
> +        assert(a->iov[i].iov_len == b->iov[i].iov_len);
> +        result = memcmp(a->iov[i].iov_base,
> +                        b->iov[i].iov_base,
> +                        a->iov[i].iov_len);
> +        if (result) {
> +            return false;
> +        }
> +    }
> +
> +    return true;
> +}
> +
> +static void GCC_FMT_ATTR(2, 3) quorum_err(QuorumAIOCB *acb,
> +                                          const char *fmt, ...)
> +{
> +    va_list ap;
> +
> +    va_start(ap, fmt);
> +    fprintf(stderr, "quorum: sector_num=%" PRId64 " nb_sectors=%d ",
> +            acb->sector_num, acb->nb_sectors);
> +    vfprintf(stderr, fmt, ap);
> +    fprintf(stderr, "\n");
> +    va_end(ap);
> +    exit(1);
> +}
> +
> +static bool quorum_compare(QuorumAIOCB *acb,
> +                           QEMUIOVector *a,
> +                           QEMUIOVector *b)
> +{
> +    BDRVQuorumState *s = acb->bqs;
> +    ssize_t offset;
> +
> +    /* This driver will replace blkverify in this particular case */
> +    if (s->is_blkverify) {
> +        offset = qemu_iovec_compare(a, b);
> +        if (offset != -1) {
> +            quorum_err(acb, "contents mismatch in sector %" PRId64,
> +                       acb->sector_num +
> +                       (uint64_t)(offset / BDRV_SECTOR_SIZE));
> +        }
> +        return true;
> +    }
> +
> +    return quorum_iovec_compare(a, b);
> +}
> +
> +/* Do a vote to get the error code */
> +static int quorum_vote_error(QuorumAIOCB *acb)
> +{
> +    BDRVQuorumState *s = acb->bqs;
> +    QuorumVoteVersion *winner = NULL;
> +    QuorumVotes error_votes;
> +    QuorumVoteValue result_value;
> +    int i, ret = 0;
> +    bool error = false;
> +
> +    QLIST_INIT(&error_votes.vote_list);
> +    error_votes.compare = quorum_64bits_compare;
> +
> +    for (i = 0; i < s->total; i++) {
> +        ret = acb->aios[i].ret;
> +        if (ret) {
> +            error = true;
> +            result_value.l = ret;
> +            quorum_count_vote(&error_votes, &result_value, i);
> +        }
> +    }
> +
> +    if (error) {
> +        winner = quorum_get_vote_winner(&error_votes);
> +        ret = winner->value.l;
> +    }
> +
> +    quorum_free_vote_list(&error_votes);
> +
> +    return ret;
> +}
> +
> +static void quorum_vote(QuorumAIOCB *acb)
> +{
> +    bool quorum = false;
> +    int i, j, ret;
> +    QuorumVoteValue hash;
> +    BDRVQuorumState *s = acb->bqs;
> +    QuorumVoteVersion *winner;
> +
> +    QLIST_INIT(&acb->votes.vote_list);
> +
> +    /* if we don't get enough successful read use the first error code */

This comment should be updated, since there is now a vote.

> +    if (acb->success_count < s->threshold) {
> +        acb->vote_ret = quorum_vote_error(acb);
> +        quorum_report_failure(acb);
> +        return;
> +    }

With this change (the if condition), I'm fine with leaving giving the 
Quorum error code precedence in quorum_aio_finalize(). (If this hadn't 
been changed, a non-zero success count below the threshold would have 
led this code to set acb->vote_ret to -EIO and subsequently using this 
as the return code in quorum_aio_finalize() due to the precedence. With 
this change, we'll get a more meaningful return code in acb->vote_ret 
and subsequently in quorum_aio_finalize().)


Aside from the comment:

Reviewed-by: Max Reitz <mreitz@redhat.com>

  reply	other threads:[~2014-02-03 19:40 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-02-03 19:11 [Qemu-devel] [PATCH V14 00/13] Quorum block filter Benoît Canet
2014-02-03 19:11 ` [Qemu-devel] [PATCH V14 01/13] quorum: Create quorum.c, add QuorumSingleAIOCB and QuorumAIOCB Benoît Canet
2014-02-03 19:11 ` [Qemu-devel] [PATCH V14 02/13] quorum: Create BDRVQuorumState and BlkDriver and do init Benoît Canet
2014-02-03 19:11 ` [Qemu-devel] [PATCH V14 03/13] quorum: Add quorum_aio_writev and its dependencies Benoît Canet
2014-02-03 19:11 ` [Qemu-devel] [PATCH V14 04/13] blkverify: Extract qemu_iovec_clone() and qemu_iovec_compare() from blkverify Benoît Canet
2014-02-03 19:11 ` [Qemu-devel] [PATCH V14 05/13] quorum: Add quorum_aio_readv Benoît Canet
2014-02-03 19:11 ` [Qemu-devel] [PATCH V14 06/13] quorum: Add quorum mechanism Benoît Canet
2014-02-03 19:42   ` Max Reitz [this message]
2014-02-03 19:11 ` [Qemu-devel] [PATCH V14 07/13] quorum: Add quorum_getlength() Benoît Canet
2014-02-03 19:11 ` [Qemu-devel] [PATCH V14 08/13] quorum: Add quorum_invalidate_cache() Benoît Canet
2014-02-03 19:11 ` [Qemu-devel] [PATCH V14 09/13] quorum: Add quorum_co_get_block_status Benoît Canet
2014-02-03 19:46   ` Max Reitz
2014-02-03 19:11 ` [Qemu-devel] [PATCH V14 10/13] quorum: Add quorum_co_flush() Benoît Canet
2014-02-03 19:51   ` Max Reitz
2014-02-03 19:11 ` [Qemu-devel] [PATCH V14 11/13] quorum: Implement recursive .bdrv_recurse_is_first_non_filter in quorum Benoît Canet
2014-02-03 19:11 ` [Qemu-devel] [PATCH V14 12/13] quorum: Add quorum_open() and quorum_close() Benoît Canet
2014-02-03 20:22   ` Max Reitz
2014-02-03 21:19     ` Benoît Canet
2014-02-03 19:11 ` [Qemu-devel] [PATCH V14 13/13] quorum: Add unit test Benoît Canet
2014-02-03 20:23   ` Max Reitz
2014-02-03 21:10 ` [Qemu-devel] [PATCH V14 00/13] Quorum block filter Jeff Cody
2014-02-03 22:14   ` Benoît Canet
2014-02-04 10:35 ` Kevin Wolf
2014-02-04 21:54   ` Benoît Canet

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=52EFF133.9060604@redhat.com \
    --to=mreitz@redhat.com \
    --cc=benoit.canet@irqsave.net \
    --cc=benoit@irqsave.net \
    --cc=kwolf@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).