From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:43754) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1WAIWj-0002Ho-M6 for qemu-devel@nongnu.org; Mon, 03 Feb 2014 07:21:30 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1WAIWe-0005wd-Ho for qemu-devel@nongnu.org; Mon, 03 Feb 2014 07:21:25 -0500 Received: from paradis.irqsave.net ([62.212.105.220]:48918) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1WAIWd-0005wR-QQ for qemu-devel@nongnu.org; Mon, 03 Feb 2014 07:21:20 -0500 Date: Mon, 3 Feb 2014 13:21:18 +0100 From: =?iso-8859-1?Q?Beno=EEt?= Canet Message-ID: <20140203122118.GE3078@irqsave.net> References: <1390927974-31325-1-git-send-email-benoit.canet@irqsave.net> <1390927974-31325-13-git-send-email-benoit.canet@irqsave.net> <52EED028.2090104@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline In-Reply-To: <52EED028.2090104@redhat.com> Content-Transfer-Encoding: quoted-printable Subject: Re: [Qemu-devel] [PATCH V10 12/13] quorum: Add quorum_open() and quorum_close(). List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Max Reitz Cc: =?iso-8859-1?Q?Beno=EEt?= Canet , kwolf@redhat.com, qemu-devel@nongnu.org, stefanha@redhat.com Le Monday 03 Feb 2014 =C3=A0 00:09:28 (+0100), Max Reitz a =C3=A9crit : > On 28.01.2014 17:52, Beno=C3=AEt Canet wrote: > >From: Beno=C3=AEt Canet > > > >Example of command line: > >-drive if=3Dvirtio,file.driver=3Dquorum,\ > >file.children.0.file.filename=3D1.raw,\ > >file.children.0.node-name=3D1.raw,\ > >file.children.0.driver=3Draw,\ > >file.children.1.file.filename=3D2.raw,\ > >file.children.1.node-name=3D2.raw,\ > >file.children.1.driver=3Draw,\ > >file.children.2.file.filename=3D3.raw,\ > >file.children.2.node-name=3D3.raw,\ > >file.children.2.driver=3Draw,\ > >file.vote_threshold=3D2 > > > >file.blkverify=3Don with file.vote_threshold=3D2 and two files can be = passed to > >emulated blkverify. > > > >Signed-off-by: Benoit Canet > >--- > > block/quorum.c | 308 +++++++++++++++++++++++++++++++++++++++++++++= ++++++++++ > > qapi-schema.json | 21 +++- > > 2 files changed, 328 insertions(+), 1 deletion(-) > > > >diff --git a/block/quorum.c b/block/quorum.c > >index e7b2090..0c0d630 100644 > >--- a/block/quorum.c > >+++ b/block/quorum.c > >@@ -17,8 +17,12 @@ > > #include > > #include "block/block_int.h" > > #include "qapi/qmp/qjson.h" > >+#include "qapi/qmp/types.h" > >+#include "qemu-common.h" > > #define HASH_LENGTH 32 > >+#define KEY_PREFIX "children." > >+#define KEY_FILENAME_SUFFIX ".file.filename" > > /* This union holds a vote hash value */ > > typedef union QuorumVoteValue { > >@@ -702,12 +706,316 @@ static bool quorum_recurse_is_first_non_filter(= BlockDriverState *bs, > > return false; > > } >=20 > Perhaps you could make use of qdict_extract_subqdict() and > qdict_array_split() functions here...? That is, > qdict_extract_subqdict(..., "children.") and then > qdict_array_split() on the result? >=20 > >+static int quorum_match_key(const char *key, > >+ char **key_prefix) > >+{ > >+ const char *start; > >+ char *next; > >+ unsigned long long idx; > >+ int ret; > >+ > >+ *key_prefix =3D NULL; > >+ > >+ /* the following code is a translation of the following pseudo co= de: > >+ * match =3D key.match('(^children\.(\d+)\.)$suffix) > >+ * if not match: > >+ * return -1; > >+ * key_prefix =3D match.outer_match() > >+ * idx =3D match.inner_match() > >+ * > >+ * note: we also match the .file suffix to avoid futher checkings > >+ */ > >+ > >+ /* this key is not a child */ > >+ if (strncmp(key, KEY_PREFIX, strlen(KEY_PREFIX))) { > >+ return -1; > >+ } > >+ > >+ /* take first char after prefix */ > >+ start =3D key + strlen(KEY_PREFIX); > >+ > >+ /* if the string end here -> scan fail */ > >+ if (start[0] =3D=3D '\0') { > >+ return -1; > >+ } > >+ > >+ /* try to retrieve the index */ > >+ ret =3D parse_uint(start, &idx, &next, 10); > >+ > >+ /* no int found -> scan fail */ > >+ if (ret < 0) { > >+ return -1; > >+ } > >+ > >+ /* we are taking a reference via QMP */ > >+ if (next - key =3D=3D strlen(key)) { >=20 > "if (!*next)" would probably accomplish the same (or "if next[0] =3D=3D > '\0')" to keep your coding style) without having to call strlen(). >=20 > >+ *key_prefix =3D g_strdup(key); > >+ return idx; > >+ } > >+ > >+ /* match the suffix to avoid matching the same idx > >+ * multiple times and be required to do more checks later > >+ */ > >+ if (strncmp(next, KEY_FILENAME_SUFFIX, strlen(KEY_FILENAME_SUFFIX= ))) { >=20 > As stated in my review from October, you may use sizeof() to avoid strl= en(). >=20 > >+ return -1; > >+ } > >+ > >+ /* do not include '.' */ > >+ int len =3D next - key; > >+ *key_prefix =3D g_strndup(key, len); > >+ > >+ return idx; > >+} > >+ > >+static QDict *quorum_get_children_idx(const QDict *options) > >+{ > >+ const QDictEntry *ent; > >+ QDict *result; > >+ char *key_prefix; > >+ int idx; > >+ > >+ result =3D qdict_new(); > >+ > >+ for (ent =3D qdict_first(options); ent; ent =3D qdict_next(option= s, ent)) { > >+ const char *key =3D qdict_entry_key(ent); > >+ idx =3D quorum_match_key(key, > >+ &key_prefix); > >+ > >+ /* if the result zero or positive we got a key */ > >+ if (idx < 0) { > >+ continue; > >+ } > >+ > >+ qdict_put(result, key_prefix, qint_from_int(idx)); > >+ } > >+ > >+ return result; > >+} > >+ > >+static int quorum_fill_validation_array(bool *array, > >+ const QDict *dict, > >+ int total, > >+ Error **errp) > >+{ > >+ const QDictEntry *ent; > >+ > >+ /* fill the checking array with children indexes */ > >+ for (ent =3D qdict_first(dict); ent; ent =3D qdict_next(dict, ent= )) { > >+ const char *key =3D qdict_entry_key(ent); > >+ int idx =3D qdict_get_int(dict, key); > >+ > >+ if (idx < 0 || idx >=3D total) { > >+ error_setg(errp, > >+ "Children index must be between 0 and children count = -1"); >=20 > Quote from October: I'd prefer "- 1" instead of "-1" (or even "Child > index must be an unsigned integer smaller than the children count"). >=20 > >+ return -ERANGE; > >+ } > >+ > >+ array[idx] =3D true; > >+ } > >+ > >+ return 0; > >+} > >+ > >+static int quorum_valid_indexes(const bool *array, int total, Error *= *errp) > >+{ > >+ int i; > >+ > >+ for (i =3D 0; i < total; i++) { > >+ if (array[i] =3D=3D true) { > >+ continue; > >+ } > >+ > >+ error_setg(errp, > >+ "All child indexes between 0 and children count -1 must = be " > >+ " used"); >=20 > Again, I'd prefer "- 1"; then, there are two spaces to the left of > "must" and two spaces after "be". >=20 > >+ return -ERANGE; > >+ } > >+ > >+ return 0; > >+} > >+ > >+static int quorum_valid_children_indexes(const QDict *dict, > >+ int total, > >+ Error **errp) > >+{ > >+ bool *array; > >+ int ret =3D 0;; > >+ > >+ /* allocate indexes checking array and put false in it */ > >+ array =3D g_new0(bool, total); > >+ > >+ ret =3D quorum_fill_validation_array(array, dict, total, errp); > >+ if (ret < 0) { > >+ goto free_exit; > >+ } > >+ > >+ ret =3D quorum_valid_indexes(array, total, errp); > >+free_exit: > >+ g_free(array); > >+ return ret; > >+} > >+ > >+static int quorum_valid_threshold(int threshold, > >+ int total, > >+ Error **errp) > >+{ > >+ > >+ if (threshold < 1) { > >+ error_set(errp, QERR_INVALID_PARAMETER_VALUE, > >+ "vote-threshold", "value >=3D 1"); > >+ return -ERANGE; > >+ } > >+ > >+ if (threshold > total) { > >+ error_setg(errp, "threshold <=3D children count must be true"= ); >=20 > Quote from October: Well, while this might technically be true, I'd > rather go for "threshold may not exceed children count" instead. ;-) >=20 > >+ return -ERANGE; > >+ } > >+ > >+ return 0; > >+} > >+ > >+static int quorum_open(BlockDriverState *bs, > >+ QDict *options, > >+ int flags, > >+ Error **errp) > >+{ > >+ BDRVQuorumState *s =3D bs->opaque; > >+ Error *local_err =3D NULL; > >+ const QDictEntry *ent; > >+ QDict *idx_dict; > >+ bool *opened; > >+ const char *value; > >+ char *next; > >+ int i; > >+ int ret =3D 0; > >+ > >+ /* get a dict of children indexes for validation */ > >+ idx_dict =3D quorum_get_children_idx(options); > >+ > >+ /* count how many different children indexes are present and vali= date */ > >+ s->total =3D qdict_size(idx_dict); > >+ if (s->total < 2) { > >+ error_setg(&local_err, > >+ "Number of provided children must be greater than = 1"); > >+ ret =3D -EINVAL; > >+ goto exit; > >+ } > >+ > >+ /* validate that the set of index is coherent */ > >+ ret =3D quorum_valid_children_indexes(idx_dict, s->total, &local_= err); > >+ if (ret < 0) { > >+ goto exit; > >+ } > >+ > >+ ret =3D qdict_get_try_int(options, "vote-threshold", -1); > >+ /* from QMP */ > >+ if (ret !=3D -1) { > >+ qdict_del(options, "vote-threshold"); > >+ s->threshold =3D ret; > >+ /* from command line */ > >+ } else { > >+ /* retrieve the threshold option from the command line */ > >+ value =3D qdict_get_try_str(options, "vote_threshold"); > >+ if (!value) { > >+ error_setg(&local_err, > >+ "vote_threshold must be provided"); > >+ ret =3D -EINVAL; > >+ goto exit; > >+ } > >+ qdict_del(options, "vote_threshold"); > >+ > >+ ret =3D parse_uint(value, (unsigned long long *) &s->threshol= d, &next, 10); >=20 > I don't like this cast at all... >=20 > >+ > >+ /* no int found -> scan fail */ > >+ if (ret < 0) { > >+ error_setg(&local_err, > >+ "invalid voter_threshold specified"); >=20 > *vote_threshold >=20 > >+ ret =3D -EINVAL; > >+ goto exit; > >+ } > >+ } > >+ > >+ /* and validate it againts s->total */ >=20 > Still *against >=20 > >+ ret =3D quorum_valid_threshold(s->threshold, s->total, &local_err= ); > >+ if (ret < 0) { > >+ goto exit; > >+ } > >+ > >+ /* is the driver in blkverify mode */ > >+ value =3D qdict_get_try_str(options, "blkverify"); > >+ if (value && !strcmp(value, "on") && > >+ s->total =3D=3D 2 && s->threshold =3D=3D 2) { > >+ s->is_blkverify =3D true; > >+ } >=20 > Quote from October: If the user specifies anything different from > "on" or if he does and s->total or s->threshold is not 2, quorum > will silently work in non-blkverify mode without telling the user. > So I'd emit a message here if blkverify has been specified and its > value is different from "off" but s->is_blkverify remains false > (i.e., =E2=80=9Celse if (value && strcmp(value, "off")) { fprintf(stder= r, > "[Some warning]"); }=E2=80=9D). >=20 > >+ qdict_del(options, "blkverify"); > >+ > >+ /* allocate the children BlockDriverState array */ > >+ s->bs =3D g_new0(BlockDriverState *, s->total); > >+ opened =3D g_new0(bool, s->total); > >+ > >+ /* open children bs */ > >+ for (ent =3D qdict_first(idx_dict); > >+ ent; ent =3D qdict_next(idx_dict, ent)) { >=20 > This condition fits on a single line. >=20 > >+ const char *key =3D qdict_entry_key(ent); > >+ int idx =3D qdict_get_int(idx_dict, key); > >+ ret =3D bdrv_open_image(&s->bs[idx], > >+ NULL, > >+ options, > >+ key, > >+ flags, > >+ false, > >+ &local_err); >=20 > This takes two, but definitely not seven. >=20 > >+ if (ret < 0) { > >+ goto close_exit; > >+ } > >+ opened[idx] =3D true; > >+ } > >+ > >+ g_free(opened); > >+ goto exit; > >+ > >+close_exit: > >+ /* cleanup on error */ > >+ for (i =3D 0; i < s->total; i++) { > >+ if (!opened[i]) { > >+ continue; > >+ } > >+ bdrv_close(s->bs[i]); >=20 > You'll probably want bdrv_unref() here. I don't think so because to simplify memory management s->bs is an array containing all the allocated bs and is freed at once. So should bdrv_unref still be used ? >=20 > >+ } > >+ g_free(s->bs); > >+ g_free(opened); > >+exit: > >+ /* propagate error */ > >+ if (error_is_set(&local_err)) { > >+ error_propagate(errp, local_err); > >+ } > >+ return ret; > >+} > >+ > >+static void quorum_close(BlockDriverState *bs) > >+{ > >+ BDRVQuorumState *s =3D bs->opaque; > >+ int i; > >+ > >+ for (i =3D 0; i < s->total; i++) { > >+ /* Ensure writes reach stable storage */ > >+ bdrv_flush(s->bs[i]); > >+ /* close manually because we'll free s->bs */ > >+ bdrv_close(s->bs[i]); >=20 > Again, bdrv_unref(). >=20 > >+ } > >+ > >+ g_free(s->bs); > >+} > >+ > > static BlockDriver bdrv_quorum =3D { > > .format_name =3D "quorum", > > .protocol_name =3D "quorum", > > .instance_size =3D sizeof(BDRVQuorumState), > >+ .bdrv_file_open =3D quorum_open, > >+ .bdrv_close =3D quorum_close, > >+ > > .bdrv_co_flush_to_disk =3D quorum_co_flush, > > .bdrv_getlength =3D quorum_getlength, > >diff --git a/qapi-schema.json b/qapi-schema.json > >index 05ced9d..903a3a0 100644 > >--- a/qapi-schema.json > >+++ b/qapi-schema.json > >@@ -4352,6 +4352,24 @@ > > 'raw': 'BlockdevRef' } } > > ## > >+# @BlockdevOptionsQuorum > >+# > >+# Driver specific block device options for Quorum > >+# > >+# @blkverify: #optional true if the driver must print content mi= smatch > >+# > >+# @children: the children block device to use > >+# > >+# @vote_threshold: the vote limit under which a read will fail > >+# > >+# Since: 2.0 > >+## > >+{ 'type': 'BlockdevOptionsQuorum', > >+ 'data': { '*blkverify': 'bool', > >+ 'children': [ 'BlockdevRef' ], > >+ 'vote-threshold': 'int' } } > >+ > >+## > > # @BlockdevOptions > > # > > # Options for creating a block device. > >@@ -4389,7 +4407,8 @@ > > 'vdi': 'BlockdevOptionsGenericFormat', > > 'vhdx': 'BlockdevOptionsGenericFormat', > > 'vmdk': 'BlockdevOptionsGenericCOWFormat', > >- 'vpc': 'BlockdevOptionsGenericFormat' > >+ 'vpc': 'BlockdevOptionsGenericFormat', > >+ 'quorum': 'BlockdevOptionsQuorum' > > } } > > ## >=20 > As I've said before, I'd really like it if you could abandon all > those functions for parsing the incoming options QDict in favor of > qdict_extract_subqdict() and qdict_array_split() which will most > likely do the job just fine; and, in fact, I have written > qdict_array_split() with Quorum in mind. ;-) >=20 > Max >=20