From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:46020) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1XMi65-0001i7-Te for qemu-devel@nongnu.org; Wed, 27 Aug 2014 14:37:36 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1XMi5z-0005o0-Ds for qemu-devel@nongnu.org; Wed, 27 Aug 2014 14:37:29 -0400 Received: from mx1.redhat.com ([209.132.183.28]:61893) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1XMi5z-0005nm-3L for qemu-devel@nongnu.org; Wed, 27 Aug 2014 14:37:23 -0400 Message-ID: <53FE2553.5030605@redhat.com> Date: Wed, 27 Aug 2014 20:37:07 +0200 From: Max Reitz MIME-Version: 1.0 References: <1408725104-17176-1-git-send-email-mreitz@redhat.com> <1408725104-17176-8-git-send-email-mreitz@redhat.com> <20140825174039.GC18202@nodalink.com> In-Reply-To: <20140825174039.GC18202@nodalink.com> Content-Type: text/plain; charset=windows-1252; format=flowed Content-Transfer-Encoding: quoted-printable Subject: Re: [Qemu-devel] [PATCH v3 07/10] qcow2: Rebuild refcount structure during check List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: =?windows-1252?Q?Beno=EEt_Canet?= Cc: Kevin Wolf , qemu-devel@nongnu.org, Stefan Hajnoczi On 25.08.2014 19:40, Beno=EEt Canet wrote: > On Fri, Aug 22, 2014 at 06:31:41PM +0200, Max Reitz wrote: >> The previous commit introduced the "rebuild" variable to qcow2's >> implementation of the image consistency check. Now make use of this by >> adding a function which creates a completely new refcount structure >> based solely on the in-memory information gathered before. >> >> The old refcount structure will be leaked, however. >> >> Signed-off-by: Max Reitz >> --- >> block/qcow2-refcount.c | 265 +++++++++++++++++++++++++++++++++++++++= +++++++++- >> 1 file changed, 262 insertions(+), 3 deletions(-) >> >> diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c >> index 242a20c..59cab65 100644 >> --- a/block/qcow2-refcount.c >> +++ b/block/qcow2-refcount.c >> @@ -1591,6 +1591,245 @@ static void compare_refcounts(BlockDriverState= *bs, BdrvCheckResult *res, >> } >> =20 >> /* >> + * Allocates a cluster using an in-memory refcount table (IMRT) in co= ntrast to >> + * the on-disk refcount structures. >> + * >> + * *first_free_cluster does not necessarily point to the first free c= luster, but >> + * may point to one cluster as close as possible before it. The offse= t returned >> + * will never be before that cluster. >> + * >> + * Note that *first_free_cluster is a cluster index whereas the retur= n value is >> + * an offset. >> + */ >> +static int64_t alloc_clusters_imrt(BlockDriverState *bs, >> + int cluster_count, >> + uint16_t **refcount_table, >> + int64_t *nb_clusters, >> + int64_t *first_free_cluster) >> +{ >> + BDRVQcowState *s =3D bs->opaque; >> + int64_t cluster =3D *first_free_cluster, i; >> + bool first_gap =3D true; >> + int contiguous_clusters; >> + > >> + for (contiguous_clusters =3D 0; >> + cluster < *nb_clusters && contiguous_clusters < cluster_coun= t; >> + cluster++) >> + { >> + if (!(*refcount_table)[cluster]) { >> + contiguous_clusters++; >> + if (first_gap) { >> + *first_free_cluster =3D cluster; >> + first_gap =3D false; >> + } >> + } else if (contiguous_clusters) { >> + contiguous_clusters =3D 0; >> + } >> + } > s/contignuous_clusters/contiguous_free_clusters/ > > I think I understood the above block better than the first time but I i= t would > worth a comment on it's rather complex purpose. > >> + >> + if (contiguous_clusters < cluster_count) { >> + int64_t old_nb_clusters =3D *nb_clusters; >> + >> + *nb_clusters =3D cluster + cluster_count - contiguous_cluster= s; >> + *refcount_table =3D g_try_realloc(*refcount_table, >> + *nb_clusters * sizeof(uint16_= t)); >> + if (!*refcount_table) { >> + return -ENOMEM; >> + } >> + >> + memset(*refcount_table + old_nb_clusters, 0, >> + (*nb_clusters - old_nb_clusters) * sizeof(uint16_t)); >> + } >> + >> + cluster -=3D contiguous_clusters; >> + for (i =3D 0; i < cluster_count; i++) { >> + (*refcount_table)[cluster + i] =3D 1; >> + } >> + >> + return cluster << s->cluster_bits; >> +} >> + >> +/* >> + * Creates a new refcount structure based solely on the in-memory inf= ormation >> + * given through *refcount_table. All necessary allocations will be r= eflected >> + * in that array. >> + * >> + * On success, the old refcount structure is leaked (it will be cover= ed by the >> + * new refcount structure). >> + */ >> +static int rebuild_refcount_structure(BlockDriverState *bs, >> + BdrvCheckResult *res, >> + uint16_t **refcount_table, >> + int64_t *nb_clusters) >> +{ >> + BDRVQcowState *s =3D bs->opaque; >> + int64_t first_free_cluster =3D 0, rt_ofs =3D -1, cluster =3D 0; > /* rb stands for refcount block and rt stands for refcount table */ > >> + int64_t rb_ofs, rb_start, rb_index; >> + uint32_t reftable_size =3D 0; >> + uint64_t *reftable =3D NULL; >> + uint16_t *on_disk_rb; >> + uint8_t rt_offset_and_clusters[sizeof(uint64_t) + sizeof(uint32_t= )]; >> + int i, ret =3D 0; >> + >> + qcow2_cache_empty(bs, s->refcount_block_cache); >> + >> +write_refblocks: >> + for (; cluster < *nb_clusters; cluster++) { >> + if (!(*refcount_table)[cluster]) { >> + continue; >> + } >> + >> + rb_index =3D cluster >> (s->cluster_bits - 1); >> + rb_start =3D rb_index << (s->cluster_bits - 1); >> + >> + /* Don't allocate a cluster in a refblock already written to = disk */ >> + if (first_free_cluster < rb_start) { >> + first_free_cluster =3D rb_start; >> + } >> + rb_ofs =3D alloc_clusters_imrt(bs, 1, refcount_table, nb_clus= ters, >> + &first_free_cluster); >> + if (rb_ofs < 0) { >> + fprintf(stderr, "ERROR allocating refblock: %s\n", strerr= or(-ret)); >> + res->check_errors++; >> + ret =3D rb_ofs; >> + goto fail; >> + } >> + >> + if (reftable_size <=3D rb_index) { >> + uint32_t old_rt_size =3D reftable_size; >> + reftable_size =3D ROUND_UP((rb_index + 1) * sizeof(uint64= _t), >> + s->cluster_size) / sizeof(uint64= _t); >> + reftable =3D g_try_realloc(reftable, >> + reftable_size * sizeof(uint64_t)= ); >> + if (!reftable) { >> + res->check_errors++; >> + ret =3D -ENOMEM; >> + goto fail; >> + } >> + >> + memset(reftable + old_rt_size, 0, >> + (reftable_size - old_rt_size) * sizeof(uint64_t)); >> + >> + /* The offset we have for the reftable is now no longer v= alid; >> + * this will leak that range, but we can easily fix that = by running >> + * a leak-fixing check after this rebuild operation */ >> + rt_ofs =3D -1; >> + } >> + reftable[rb_index] =3D rb_ofs; >> + >> + /* If this is apparently the last refblock (for now), try to = squeeze the >> + * reftable in */ >> + if (rb_index =3D=3D (*nb_clusters - 1) >> (s->cluster_bits - = 1) > ^ = I don't understand the - 1 here. >> && Oh, sorry, I missed your remark here. The 1 is log2(sizeof(uint16_t)),=20 that is, log2(size of a refcount block entry). I'll try to make it more=20 obvious in v4 (and I'll add comments addressing for alloc_clusters_imrt()= ). Max >> + rt_ofs < 0) >> + { >> + rt_ofs =3D alloc_clusters_imrt(bs, size_to_clusters(s, re= ftable_size * >> + sizeof(= uint64_t)), >> + refcount_table, nb_clusters, >> + &first_free_cluster); >> + if (rt_ofs < 0) { >> + fprintf(stderr, "ERROR allocating reftable: %s\n", >> + strerror(-ret)); >> + res->check_errors++; >> + ret =3D rt_ofs; >> + goto fail; >> + } >> + } >> + >> + ret =3D qcow2_pre_write_overlap_check(bs, 0, rb_ofs, s->clust= er_size); >> + if (ret < 0) { >> + fprintf(stderr, "ERROR writing refblock: %s\n", strerror(= -ret)); >> + goto fail; >> + } >> + >> + on_disk_rb =3D g_malloc0(s->cluster_size); >> + for (i =3D 0; i < s->cluster_size / sizeof(uint16_t) && >> + rb_start + i < *nb_clusters; i++) >> + { >> + on_disk_rb[i] =3D cpu_to_be16((*refcount_table)[rb_start = + i]); >> + } >> + >> + ret =3D bdrv_write(bs->file, rb_ofs / BDRV_SECTOR_SIZE, >> + (void *)on_disk_rb, s->cluster_sectors); >> + g_free(on_disk_rb); >> + if (ret < 0) { >> + fprintf(stderr, "ERROR writing refblock: %s\n", strerror(= -ret)); >> + goto fail; >> + } >> + >> + /* Go to the end of this refblock */ >> + cluster =3D rb_start + s->cluster_size / sizeof(uint16_t) - 1= ; >> + } >> + >> + if (rt_ofs < 0) { >> + int64_t post_rb_start =3D ROUND_UP(*nb_clusters, >> + s->cluster_size / sizeof(uin= t16_t)); >> + >> + /* Not pretty but simple */ >> + if (first_free_cluster < post_rb_start) { >> + first_free_cluster =3D post_rb_start; >> + } >> + rt_ofs =3D alloc_clusters_imrt(bs, size_to_clusters(s, reftab= le_size * >> + sizeof(uint= 64_t)), >> + refcount_table, nb_clusters, >> + &first_free_cluster); >> + if (rt_ofs < 0) { >> + fprintf(stderr, "ERROR allocating reftable: %s\n", strerr= or(-ret)); >> + res->check_errors++; >> + ret =3D rt_ofs; >> + goto fail; >> + } >> + >> + goto write_refblocks; >> + } >> + >> + assert(reftable); >> + >> + for (rb_index =3D 0; rb_index < reftable_size; rb_index++) { >> + cpu_to_be64s(&reftable[rb_index]); >> + } >> + >> + ret =3D qcow2_pre_write_overlap_check(bs, 0, rt_ofs, >> + reftable_size * sizeof(uint64= _t)); >> + if (ret < 0) { >> + fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret= )); >> + goto fail; >> + } >> + >> + ret =3D bdrv_write(bs->file, rt_ofs / BDRV_SECTOR_SIZE, (void *)r= eftable, >> + reftable_size * sizeof(uint64_t) / BDRV_SECTOR_S= IZE); >> + if (ret < 0) { >> + fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret= )); >> + goto fail; >> + } >> + >> + /* Enter new reftable into the image header */ >> + cpu_to_be64w((uint64_t *)&rt_offset_and_clusters[0], rt_ofs); >> + cpu_to_be32w((uint32_t *)&rt_offset_and_clusters[sizeof(uint64_t)= ], >> + size_to_clusters(s, reftable_size * sizeof(uint64_t)= )); >> + ret =3D bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, >> + refcount_table_offset), >> + rt_offset_and_clusters, >> + sizeof(rt_offset_and_clusters)); >> + if (ret < 0) { >> + fprintf(stderr, "ERROR setting reftable: %s\n", strerror(-ret= )); >> + goto fail; >> + } >> + >> + for (rb_index =3D 0; rb_index < reftable_size; rb_index++) { >> + be64_to_cpus(&reftable[rb_index]); >> + } >> + s->refcount_table =3D reftable; >> + s->refcount_table_offset =3D rt_ofs; >> + s->refcount_table_size =3D reftable_size; >> + >> + return 0; >> + >> +fail: >> + g_free(reftable); >> + return ret; >> +} >> + >> +/* >> * Checks an image for refcount consistency. >> * >> * Returns 0 if no errors are found, the number of errors in case th= e image is >> @@ -1600,6 +1839,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, = BdrvCheckResult *res, >> BdrvCheckMode fix) >> { >> BDRVQcowState *s =3D bs->opaque; >> + BdrvCheckResult pre_compare_res; >> int64_t size, highest_cluster, nb_clusters; >> uint16_t *refcount_table =3D NULL; >> bool rebuild =3D false; >> @@ -1626,11 +1866,30 @@ int qcow2_check_refcounts(BlockDriverState *bs= , BdrvCheckResult *res, >> goto fail; >> } >> =20 >> - compare_refcounts(bs, res, fix, &rebuild, &highest_cluster, refco= unt_table, >> + /* In case we don't need to rebuild the refcount structure (but w= ant to fix >> + * something), this function is immediately called again, in whic= h case the >> + * result should be ignored */ >> + pre_compare_res =3D *res; >> + compare_refcounts(bs, res, 0, &rebuild, &highest_cluster, refcoun= t_table, >> nb_clusters); >> =20 >> - if (rebuild) { >> - fprintf(stderr, "ERROR need to rebuild refcount structures\n"= ); >> + if (rebuild && (fix & BDRV_FIX_ERRORS)) { >> + fprintf(stderr, "Rebuilding refcount structure\n"); >> + ret =3D rebuild_refcount_structure(bs, res, &refcount_table, >> + &nb_clusters); >> + if (ret < 0) { >> + goto fail; >> + } >> + } else if (fix) { >> + if (rebuild) { >> + fprintf(stderr, "ERROR need to rebuild refcount structure= s\n"); >> + } >> + >> + if (res->leaks || res->corruptions) { >> + *res =3D pre_compare_res; >> + compare_refcounts(bs, res, fix, &rebuild, &highest_cluste= r, >> + refcount_table, nb_clusters); >> + } >> } >> =20 >> /* check OFLAG_COPIED */ >> --=20 >> 2.0.4 >>