From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:45168) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1acGhg-0003y2-9B for qemu-devel@nongnu.org; Sat, 05 Mar 2016 13:13:25 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1acGhe-0003FO-R8 for qemu-devel@nongnu.org; Sat, 05 Mar 2016 13:13:24 -0500 References: <1455615450-15138-1-git-send-email-xiecl.fnst@cn.fujitsu.com> <1455615450-15138-3-git-send-email-xiecl.fnst@cn.fujitsu.com> From: Max Reitz Message-ID: <56DB21B7.7050104@redhat.com> Date: Sat, 5 Mar 2016 19:13:11 +0100 MIME-Version: 1.0 In-Reply-To: <1455615450-15138-3-git-send-email-xiecl.fnst@cn.fujitsu.com> Content-Type: multipart/signed; micalg=pgp-sha256; protocol="application/pgp-signature"; boundary="VIvnALD8HlAMH14QRj8WPMfJU7FOvn1Ps" Subject: Re: [Qemu-devel] [PATCH v10 2/3] quorum: implement bdrv_add_child() and bdrv_del_child() List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Changlong Xie , qemu devel , Eric Blake , Alberto Garcia , Kevin Wolf , Stefan Hajnoczi Cc: qemu block , Jiang Yunhong , Dong Eddie , Markus Armbruster , "Dr. David Alan Gilbert" , Gonglei , zhanghailiang This is an OpenPGP/MIME signed message (RFC 4880 and 3156) --VIvnALD8HlAMH14QRj8WPMfJU7FOvn1Ps Content-Type: multipart/mixed; boundary="kkGdNSxNBr868XNGgL16uSvuiBx12lf92" From: Max Reitz To: Changlong Xie , qemu devel , Eric Blake , Alberto Garcia , Kevin Wolf , Stefan Hajnoczi Cc: Markus Armbruster , "Dr. David Alan Gilbert" , Dong Eddie , Jiang Yunhong , Wen Congyang , qemu block , zhanghailiang , Gonglei Message-ID: <56DB21B7.7050104@redhat.com> Subject: Re: [PATCH v10 2/3] quorum: implement bdrv_add_child() and bdrv_del_child() References: <1455615450-15138-1-git-send-email-xiecl.fnst@cn.fujitsu.com> <1455615450-15138-3-git-send-email-xiecl.fnst@cn.fujitsu.com> In-Reply-To: <1455615450-15138-3-git-send-email-xiecl.fnst@cn.fujitsu.com> --kkGdNSxNBr868XNGgL16uSvuiBx12lf92 Content-Type: text/plain; charset=iso-8859-15 Content-Transfer-Encoding: quoted-printable On 16.02.2016 10:37, Changlong Xie wrote: > From: Wen Congyang >=20 > Signed-off-by: Wen Congyang > Signed-off-by: zhanghailiang > Signed-off-by: Gonglei > Signed-off-by: Changlong Xie > --- > block.c | 8 ++-- > block/quorum.c | 122 ++++++++++++++++++++++++++++++++++++++++++= +++++++- > include/block/block.h | 4 ++ > 3 files changed, 128 insertions(+), 6 deletions(-) >=20 > diff --git a/block.c b/block.c > index 08aa979..c3c9dc0 100644 > --- a/block.c > +++ b/block.c > @@ -1198,10 +1198,10 @@ static int bdrv_fill_options(QDict **options, c= onst char *filename, > return 0; > } > =20 > -static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, > - BlockDriverState *child_bs, > - const char *child_name, > - const BdrvChildRole *child_role) > +BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, > + BlockDriverState *child_bs, > + const char *child_name, > + const BdrvChildRole *child_role) > { > BdrvChild *child =3D g_new(BdrvChild, 1); > *child =3D (BdrvChild) { > diff --git a/block/quorum.c b/block/quorum.c > index a5ae4b8..e5a7e4f 100644 > --- a/block/quorum.c > +++ b/block/quorum.c > @@ -24,6 +24,7 @@ > #include "qapi/qmp/qstring.h" > #include "qapi-event.h" > #include "crypto/hash.h" > +#include "qemu/bitmap.h" > =20 > #define HASH_LENGTH 32 > =20 > @@ -81,6 +82,8 @@ typedef struct BDRVQuorumState { > bool rewrite_corrupted;/* true if the driver must rewrite-on-read = corrupted > * block if Quorum is reached. > */ > + unsigned long *index_bitmap; > + int bsize; > =20 > QuorumReadPattern read_pattern; > } BDRVQuorumState; > @@ -876,9 +879,9 @@ static int quorum_open(BlockDriverState *bs, QDict = *options, int flags, > ret =3D -EINVAL; > goto exit; > } > - if (s->num_children < 2) { > + if (s->num_children < 1) { > error_setg(&local_err, > - "Number of provided children must be greater than 1= "); > + "Number of provided children must be 1 or more"); Side note: Actually, we could work with 0 children, too. Quorum would then need to implement bdrv_is_inserted() and return false if there are no children. But that is something that can be implemented later on if the need arises= =2E > ret =3D -EINVAL; > goto exit; > } > @@ -927,6 +930,7 @@ static int quorum_open(BlockDriverState *bs, QDict = *options, int flags, > /* allocate the children array */ > s->children =3D g_new0(BdrvChild *, s->num_children); > opened =3D g_new0(bool, s->num_children); > + s->index_bitmap =3D bitmap_new(s->num_children); > =20 > for (i =3D 0; i < s->num_children; i++) { > char indexstr[32]; > @@ -942,6 +946,8 @@ static int quorum_open(BlockDriverState *bs, QDict = *options, int flags, > =20 > opened[i] =3D true; > } > + bitmap_set(s->index_bitmap, 0, s->num_children); > + s->bsize =3D s->num_children; > =20 > g_free(opened); > goto exit; > @@ -998,6 +1004,115 @@ static void quorum_attach_aio_context(BlockDrive= rState *bs, > } > } > =20 > +static int get_new_child_index(BDRVQuorumState *s) > +{ > + int index; > + > + index =3D find_next_zero_bit(s->index_bitmap, s->bsize, 0); > + if (index < s->bsize) { > + return index; > + } > + > + if ((s->bsize % BITS_PER_LONG) =3D=3D 0) { > + s->index_bitmap =3D bitmap_zero_extend(s->index_bitmap, s->bsi= ze, > + s->bsize + 1); I think this function needs to be called unconditionally. Looking into its implementation, its call to g_realloc() will not do anything (and it will probably be pretty quick at that), but the following bitmap_clear() will only clear the bits from old_nbits (s->bsize) to new_nbits (s->bsize + 1). Thus, if you only call this function every 32nd/64th child, only that child's bit will be initialized to zero. All the rest is undefined. You probably didn't notice because bitmap_new() returns a zero-initialized bitmap, and thus you'd have to create around 64 children (on an x64 machine) to notice. > + } > + > + return s->bsize++; > +} > + > +static void remove_child_index(BDRVQuorumState *s, int index) > +{ > + int last_index; > + long new_len; size_t would be the more appropriate type. > + > + assert(index < s->bsize); > + > + clear_bit(index, s->index_bitmap); > + if (index < s->bsize - 1) { > + /* > + * The last bit is always set, and we don't clear s/don't/didn't/ > + * the last bit. > + */ > + return; > + } > + > + last_index =3D find_last_bit(s->index_bitmap, s->bsize); An assert(last_index < s->bsize); here wouldn't hurt. (last_index =3D=3D s->bsize would be the case if no bit is set in s->index_bitmap anymore, which should be impossible.) > + s->bsize =3D last_index + 1; > + if (BITS_TO_LONGS(last_index + 1) =3D=3D BITS_TO_LONGS(s->bsize)) = { > + return; > + } > + > + new_len =3D BITS_TO_LONGS(last_index + 1) * sizeof(unsigned long);= s/last_index + 1/s->bsize/ looks better to me. > + s->index_bitmap =3D g_realloc(s->index_bitmap, new_len); > +} > + > +static void quorum_add_child(BlockDriverState *bs, BlockDriverState *c= hild_bs, > + Error **errp) > +{ > + BDRVQuorumState *s =3D bs->opaque; > + BdrvChild *child; > + char indexstr[32]; > + int index, ret; > + > + index =3D get_new_child_index(s); > + ret =3D snprintf(indexstr, 32, "children.%d", index); > + if (ret < 0 || ret >=3D 32) { > + error_setg(errp, "cannot generate child name"); > + return; > + } > + > + bdrv_drain(bs); > + > + assert(s->num_children <=3D INT_MAX / sizeof(BdrvChild *)); > + if (s->num_children =3D=3D INT_MAX / sizeof(BdrvChild *)) { > + error_setg(errp, "Too many children"); > + return; > + } > + s->children =3D g_renew(BdrvChild *, s->children, s->num_children = + 1); > + > + bdrv_ref(child_bs); > + child =3D bdrv_attach_child(bs, child_bs, indexstr, &child_format)= ; > + s->children[s->num_children++] =3D child; > + set_bit(index, s->index_bitmap); > +} > + > +static void quorum_del_child(BlockDriverState *bs, BlockDriverState *c= hild_bs, > + Error **errp) > +{ > + BDRVQuorumState *s =3D bs->opaque; > + BdrvChild *child; > + int i, index; > + > + for (i =3D 0; i < s->num_children; i++) { > + if (s->children[i]->bs =3D=3D child_bs) { > + break; > + } > + } > + > + /* we have checked it in bdrv_del_child() */ > + assert(i < s->num_children); > + child =3D s->children[i]; > + > + if (s->num_children <=3D s->threshold) { > + error_setg(errp, > + "The number of children cannot be lower than the vote thre= shold %d", > + s->threshold); > + return; > + } > + > + /* child->name is "children.%d" */ Optional: assert(!strncmp(child->name, "children.", 9)); > + index =3D atoi(child->name + 9); Optional: Assert absence of an error: unsigned long index; char *endptr; index =3D strtoul(child->name + 9, &endptr, 10); assert(index >=3D 0 && !*endptr); Max > + > + bdrv_drain(bs); > + /* We can safely remove this child now */ > + memmove(&s->children[i], &s->children[i + 1], > + (s->num_children - i - 1) * sizeof(void *)); > + s->children =3D g_renew(BdrvChild *, s->children, --s->num_childre= n); > + remove_child_index(s, index); > + bdrv_unref_child(bs, child); > +} > + > static void quorum_refresh_filename(BlockDriverState *bs, QDict *optio= ns) > { > BDRVQuorumState *s =3D bs->opaque; > @@ -1053,6 +1168,9 @@ static BlockDriver bdrv_quorum =3D { > .bdrv_detach_aio_context =3D quorum_detach_aio_context,= > .bdrv_attach_aio_context =3D quorum_attach_aio_context,= > =20 > + .bdrv_add_child =3D quorum_add_child, > + .bdrv_del_child =3D quorum_del_child, > + > .is_filter =3D true, > .bdrv_recurse_is_first_non_filter =3D quorum_recurse_is_first_no= n_filter, > }; > diff --git a/include/block/block.h b/include/block/block.h > index ecde190..4b787d2 100644 > --- a/include/block/block.h > +++ b/include/block/block.h > @@ -517,6 +517,10 @@ void bdrv_disable_copy_on_read(BlockDriverState *b= s); > void bdrv_ref(BlockDriverState *bs); > void bdrv_unref(BlockDriverState *bs); > void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child); > +BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, > + BlockDriverState *child_bs, > + const char *child_name, > + const BdrvChildRole *child_role); > =20 > bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **= errp); > void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason= ); >=20 --kkGdNSxNBr868XNGgL16uSvuiBx12lf92-- --VIvnALD8HlAMH14QRj8WPMfJU7FOvn1Ps Content-Type: application/pgp-signature; name="signature.asc" Content-Description: OpenPGP digital signature Content-Disposition: attachment; filename="signature.asc" -----BEGIN PGP SIGNATURE----- Version: GnuPG v2 iQEcBAEBCAAGBQJW2yG3AAoJEDuxQgLoOKytgC8H/jYBVCsPDqTGr3mQyU9J9eX9 3Heghq/yPDIip9Sqz4y26P+BjoRNFBUSMPvA1XvJ6qSP48gfNqeEhTg1PHIGxcxm UE08sclLKC6fqWR02jZ77b33604CHYacy3OPJbyiWRETpn0u3HX3FIri+XySlfDr z5j9Rc8p6K89JTFxATsNOX+1RzggI/CWLHHcQBlgIj5iq5JPCrp8wdWxgh/fCDNB RlSRoIXDSj1v50JwaqKWWdoZrJT8uKI6a7lU58i+k/ZkSx0xBaKJkxDyi7h+rMte SQKP+O5KRslcIE3lx36p6Iijvdw7rarcNCZeXq3erbXsqHgGbLCFJjG2+h7+6tA= =un5R -----END PGP SIGNATURE----- --VIvnALD8HlAMH14QRj8WPMfJU7FOvn1Ps--