From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1KyOBY-0007cN-K0 for qemu-devel@nongnu.org; Fri, 07 Nov 2008 05:03:24 -0500 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1KyOBX-0007c7-FY for qemu-devel@nongnu.org; Fri, 07 Nov 2008 05:03:23 -0500 Received: from [199.232.76.173] (port=44784 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1KyOBX-0007c4-Ae for qemu-devel@nongnu.org; Fri, 07 Nov 2008 05:03:23 -0500 Received: from ecfrec.frec.bull.fr ([129.183.4.8]:41249) by monty-python.gnu.org with esmtp (Exim 4.60) (envelope-from ) id 1KyOBW-0000m1-1Y for qemu-devel@nongnu.org; Fri, 07 Nov 2008 05:03:23 -0500 Subject: Re: [Qemu-devel] [PATCH 2/4] qcow2: Allow update_cluster_refcount() to update several clusters refcount. From: Laurent Vivier In-Reply-To: <4913336E.3060309@suse.de> References: <20081106165212.380421945@bull.net> <1225990558.6576.12.camel@frecb07144> <4913336E.3060309@suse.de> Content-Type: text/plain; charset=utf-8 Date: Fri, 07 Nov 2008 11:03:14 +0100 Message-Id: <1226052194.4046.21.camel@frecb07144> Mime-Version: 1.0 Content-Transfer-Encoding: quoted-printable Reply-To: qemu-devel@nongnu.org List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Kevin Wolf Cc: qemu-devel@nongnu.org Le jeudi 06 novembre 2008 =C3=A0 19:11 +0100, Kevin Wolf a =C3=A9crit : > Laurent Vivier schrieb: > > pi=C3=A8ce jointe document texte brut > > (0002-Allow-update_cluster_refcount-to-update-several-cl.patch) > > To improve performance when the qcow2 file is empty, this patch > > allows update_cluster_refcount() to update refcount of > > several clusters. > >=20 > > Signed-off-by: Laurent Vivier > > --- > > block-qcow2.c | 107 ++++++++++++++++++++++++++++++++++++-----------= ----------- > > 1 file changed, 68 insertions(+), 39 deletions(-) > >=20 > > Index: qemu/block-qcow2.c > > =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D > > --- qemu.orig/block-qcow2.c 2008-11-06 16:40:44.000000000 +0100 > > +++ qemu/block-qcow2.c 2008-11-06 16:40:45.000000000 +0100 > > @@ -159,6 +159,7 @@ static void refcount_close(BlockDriverSt > > static int get_refcount(BlockDriverState *bs, int64_t cluster_index)= ; > > static int update_cluster_refcount(BlockDriverState *bs, > > int64_t cluster_index, > > + int nb_clusters, > > int addend); > > static void update_refcount(BlockDriverState *bs, > > int64_t offset, int64_t length, > > @@ -1711,7 +1712,7 @@ static int update_snapshot_refcount(Bloc > > refcount =3D 2; > > } else { > > if (addend !=3D 0) { > > - refcount =3D update_cluster_refcount(bs,= offset >> s->cluster_bits, addend); > > + refcount =3D update_cluster_refcount(bs,= offset >> s->cluster_bits, 1, addend); > > } else { > > refcount =3D get_refcount(bs, offset >> = s->cluster_bits); > > } > > @@ -1733,7 +1734,7 @@ static int update_snapshot_refcount(Bloc > > } > > =20 > > if (addend !=3D 0) { > > - refcount =3D update_cluster_refcount(bs, l2_offset >= > s->cluster_bits, addend); > > + refcount =3D update_cluster_refcount(bs, l2_offset >= > s->cluster_bits, 1, addend); > > } else { > > refcount =3D get_refcount(bs, l2_offset >> s->cluste= r_bits); > > } > > @@ -2292,14 +2293,14 @@ static int64_t alloc_bytes(BlockDriverSt > > if (free_in_cluster =3D=3D 0) > > s->free_byte_offset =3D 0; > > if ((offset & (s->cluster_size - 1)) !=3D 0) > > - update_cluster_refcount(bs, offset >> s->cluster_bits, 1= ); > > + update_cluster_refcount(bs, offset >> s->cluster_bits, 1= , 1); > > } else { > > offset =3D alloc_clusters(bs, s->cluster_size); > > cluster_offset =3D s->free_byte_offset & ~(s->cluster_size -= 1); > > if ((cluster_offset + s->cluster_size) =3D=3D offset) { > > /* we are lucky: contiguous data */ > > offset =3D s->free_byte_offset; > > - update_cluster_refcount(bs, offset >> s->cluster_bits, 1= ); > > + update_cluster_refcount(bs, offset >> s->cluster_bits, 1= , 1); > > s->free_byte_offset +=3D size; > > } else { > > s->free_byte_offset =3D offset; > > @@ -2389,46 +2390,77 @@ static int grow_refcount_table(BlockDriv > > /* XXX: cache several refcount block clusters ? */ > > static int update_cluster_refcount(BlockDriverState *bs, > > int64_t cluster_index, > > + int nb_clusters, > > int addend) > > { > > BDRVQcowState *s =3D bs->opaque; > > int64_t refcount_block_offset; > > - int ret, refcount_table_index, block_index, refcount; > > + int ret, refcount_table_index, refcount_table_last_index, block_= index, refcount; > > + int nb_block_index; > > + int refcount_cache_size; > > =20 > > - refcount_table_index =3D cluster_index >> (s->cluster_bits - REF= COUNT_SHIFT); > > - if (refcount_table_index >=3D s->refcount_table_size) { > > + if (nb_clusters =3D=3D 0) > > + return 0; > > + > > + refcount_table_last_index =3D (cluster_index + nb_clusters - 1) = >> > > + (s->cluster_bits - REFCOUNT_SHIFT); > > + > > + /* grow the refcount table if needed */ > > + > > + if (refcount_table_last_index >=3D s->refcount_table_size) { > > if (addend < 0) > > return -EINVAL; > > - ret =3D grow_refcount_table(bs, refcount_table_index + 1); > > + ret =3D grow_refcount_table(bs, refcount_table_last_index + = 1); > > if (ret < 0) > > return ret; > > } > > - refcount_block_offset =3D s->refcount_table[refcount_table_index= ]; > > - if (!refcount_block_offset) { > > - if (addend < 0) > > - return -EINVAL; > > - refcount_block_offset =3D alloc_refcount_block(bs, refcount_= table_index); > > - if (refcount_block_offset < 0) > > - return -EINVAL; > > - } else { > > - if (load_refcount_block(bs, refcount_block_offset) < 0) > > + > > + while (nb_clusters) { > > + refcount_table_index =3D cluster_index >> > > + (s->cluster_bits - REFCOUNT_SHIFT); > > + refcount_block_offset =3D s->refcount_table[refcount_table_i= ndex]; >=20 > I guess (comment? ;-)) this outer loop is for handling requests which > span multiple refcount blocks? If so, am I missing something or is Yes, > refcount_block_offset the same for each iteration because cluster_index > never changes? You're right, there is a missing "cluster_index++" at the end of this loop. > > + > > + if (!refcount_block_offset) { > > + if (addend < 0) > > + return -EINVAL; > > + refcount_block_offset =3D alloc_refcount_block(bs, refco= unt_table_index); > > + if (refcount_block_offset < 0) > > + return -EINVAL; > > + } else { > > + if (load_refcount_block(bs, refcount_block_offset) < 0) > > + return -EIO; > > + } > > + > > + /* we can update the count and save it */ > > + > > + refcount_cache_size =3D 1 << (s->cluster_bits - REFCOUNT_SHI= FT); > > + nb_block_index =3D 0; >=20 > I have to admit that nb_block_index is a name where I can't image what > the variable might be for. Seems to be a counter for the changed > refcounts in the current refcount block, and block_index seems to be th= e > first index to be touched in this block. What about first_index and > cur_refcount or something like that? (I don't like these suggestions to= o > much, either. Maybe someone has better names.) >=20 > > + block_index =3D cluster_index & (refcount_cache_size - 1); > > + refcount =3D 0; > > + while (nb_clusters && > > + block_index + nb_block_index < refcount_cache_size) { > > + > > + refcount =3D be16_to_cpu( > > + s->refcount_block_cache[block_index + nb_bl= ock_index]); > > + refcount +=3D addend; > > + if (refcount < 0 || refcount > 0xffff) > > + return -EINVAL; >=20 > Here we possibly abort in the middle of the operation. If it fails > somewhere in the fifth refcount block, what happens with the four > already updated blocks? Yes, you're right. I think we have at least to save refcount we have updated. >=20 > > + if (refcount =3D=3D 0 && > > + cluster_index + nb_block_index < s->free_cluster_ind= ex) { > > + s->free_cluster_index =3D cluster_index + nb_block_i= ndex; > > + } > > + s->refcount_block_cache[block_index + nb_block_index] =3D > > + cpu_to_be1= 6(refcount); > > + nb_block_index++; > > + nb_clusters--; > > + } > > + if (bdrv_pwrite(s->hd, > > + refcount_block_offset + (block_index << REFC= OUNT_SHIFT), > > + s->refcount_block_cache + block_index, > > + nb_block_index * sizeof(uint16_t)) !=3D > > + nb_block_index * sizeof(uint16_t)) > > return -EIO; >=20 > Same here. I think we are not worst than the original behavior. I don't know how to manage this better. > > } > > - /* we can update the count and save it */ > > - block_index =3D cluster_index & > > - ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1); > > - refcount =3D be16_to_cpu(s->refcount_block_cache[block_index]); > > - refcount +=3D addend; > > - if (refcount < 0 || refcount > 0xffff) > > - return -EINVAL; > > - if (refcount =3D=3D 0 && cluster_index < s->free_cluster_index) = { > > - s->free_cluster_index =3D cluster_index; > > - } > > - s->refcount_block_cache[block_index] =3D cpu_to_be16(refcount); > > - if (bdrv_pwrite(s->hd, > > - refcount_block_offset + (block_index << REFCOUNT= _SHIFT), > > - &s->refcount_block_cache[block_index], 2) !=3D 2= ) > > - return -EIO; > > return refcount; > > } > > =20 > > @@ -2437,7 +2469,7 @@ static void update_refcount(BlockDriverS > > int addend) > > { > > BDRVQcowState *s =3D bs->opaque; > > - int64_t start, last, cluster_offset; > > + int64_t start, last; > > =20 > > #ifdef DEBUG_ALLOC2 > > printf("update_refcount: offset=3D%lld size=3D%lld addend=3D%d\n= ", > > @@ -2445,12 +2477,9 @@ static void update_refcount(BlockDriverS > > #endif > > if (length <=3D 0) > > return; > > - start =3D offset & ~(s->cluster_size - 1); > > - last =3D (offset + length - 1) & ~(s->cluster_size - 1); > > - for(cluster_offset =3D start; cluster_offset <=3D last; > > - cluster_offset +=3D s->cluster_size) { > > - update_cluster_refcount(bs, cluster_offset >> s->cluster_bit= s, addend); > > - } > > + start =3D offset >> s->cluster_bits; > > + last =3D (offset + length) >> s->cluster_bits; >=20 > Off by one for length % cluster_size =3D=3D 0? Explain, please (but notice the "<=3D" in the "for (...)"). > > + update_cluster_refcount(bs, start, last - start + 1, addend); > > } > > =20 > > #ifdef DEBUG_ALLOC > >=20 >=20 > Kevin >=20 --=20 ------------------ Laurent.Vivier@bull.net ------------------ "Tout ce qui est impossible reste =C3=A0 accomplir" Jules Verne "Things are only impossible until they're not" Jean-Luc Picard