From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:57275) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1XIGqB-0002WK-HF for qemu-devel@nongnu.org; Fri, 15 Aug 2014 08:42:49 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1XIGq5-0006AU-2L for qemu-devel@nongnu.org; Fri, 15 Aug 2014 08:42:43 -0400 Received: from mx1.redhat.com ([209.132.183.28]:43933) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1XIGq4-0006AQ-PU for qemu-devel@nongnu.org; Fri, 15 Aug 2014 08:42:36 -0400 Message-ID: <53EE0034.2050901@redhat.com> Date: Fri, 15 Aug 2014 14:42:28 +0200 From: Max Reitz MIME-Version: 1.0 References: <1407963710-4942-1-git-send-email-mreitz@redhat.com> <1407963710-4942-5-git-send-email-mreitz@redhat.com> <20140814123313.GI2009@irqsave.net> In-Reply-To: <20140814123313.GI2009@irqsave.net> Content-Type: text/plain; charset=windows-1252; format=flowed Content-Transfer-Encoding: quoted-printable Subject: Re: [Qemu-devel] [PATCH 4/8] qcow2: Do not perform potentially damaging repairs List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: =?windows-1252?Q?Beno=EEt_Canet?= Cc: Kevin Wolf , qemu-devel@nongnu.org, Stefan Hajnoczi On 14.08.2014 14:33, Beno=EEt Canet wrote: > The Wednesday 13 Aug 2014 =E0 23:01:46 (+0200), Max Reitz wrote : >> If a referenced cluster has a refcount of 0, increasing its refcount m= ay >> result in clusters being allocated for the refcount structures. This m= ay >> overwrite the referenced cluster, therefore we cannot simply increase >> the refcount then. >> >> In such cases, we can either try to replicate all the refcount >> operations solely for the check operation, basing the allocations on t= he >> in-memory refcount table; or we can simply rebuild the whole refcount >> structure based on the in-memory refcount table. Since the latter will >> be much easier, do that. >> >> To prepare for this, introduce a "rebuild" boolean which should be set >> to true whenever a fix is rather dangerous or too complicated using th= e >> current refcount structures. Another example for this is refcount bloc= ks >> being referenced more than once. >> >> Signed-off-by: Max Reitz >> --- >> block/qcow2-refcount.c | 191 +++++++--------------------------------= ---------- >> 1 file changed, 27 insertions(+), 164 deletions(-) >> >> diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c >> index a1d93e5..6400840 100644 >> --- a/block/qcow2-refcount.c >> +++ b/block/qcow2-refcount.c >> @@ -1381,127 +1381,12 @@ fail: >> } >> =20 >> /* >> - * Writes one sector of the refcount table to the disk >> - */ >> -#define RT_ENTRIES_PER_SECTOR (512 / sizeof(uint64_t)) >> -static int write_reftable_entry(BlockDriverState *bs, int rt_index) >> -{ >> - BDRVQcowState *s =3D bs->opaque; >> - uint64_t buf[RT_ENTRIES_PER_SECTOR]; >> - int rt_start_index; >> - int i, ret; >> - >> - rt_start_index =3D rt_index & ~(RT_ENTRIES_PER_SECTOR - 1); >> - for (i =3D 0; i < RT_ENTRIES_PER_SECTOR; i++) { >> - buf[i] =3D cpu_to_be64(s->refcount_table[rt_start_index + i])= ; >> - } >> - >> - ret =3D qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_TABLE= , >> - s->refcount_table_offset + rt_start_index * sizeof(uint64= _t), >> - sizeof(buf)); >> - if (ret < 0) { >> - return ret; >> - } >> - >> - BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE); >> - ret =3D bdrv_pwrite_sync(bs->file, s->refcount_table_offset + >> - rt_start_index * sizeof(uint64_t), buf, sizeof(buf)); >> - if (ret < 0) { >> - return ret; >> - } >> - >> - return 0; >> -} >> - >> -/* >> - * Allocates a new cluster for the given refcount block (represented = by its >> - * offset in the image file) and copies the current content there. Th= is function >> - * does _not_ decrement the reference count for the currently occupie= d cluster. >> - * >> - * This function prints an informative message to stderr on error (an= d returns >> - * -errno); on success, the offset of the newly allocated cluster is = returned. >> - */ >> -static int64_t realloc_refcount_block(BlockDriverState *bs, int refta= ble_index, >> - uint64_t offset) >> -{ >> - BDRVQcowState *s =3D bs->opaque; >> - int64_t new_offset =3D 0; >> - void *refcount_block =3D NULL; >> - int ret; >> - >> - /* allocate new refcount block */ >> - new_offset =3D qcow2_alloc_clusters(bs, s->cluster_size); >> - if (new_offset < 0) { >> - fprintf(stderr, "Could not allocate new cluster: %s\n", >> - strerror(-new_offset)); >> - ret =3D new_offset; >> - goto done; >> - } >> - >> - /* fetch current refcount block content */ >> - ret =3D qcow2_cache_get(bs, s->refcount_block_cache, offset, &ref= count_block); >> - if (ret < 0) { >> - fprintf(stderr, "Could not fetch refcount block: %s\n", strer= ror(-ret)); >> - goto fail_free_cluster; >> - } >> - >> - /* new block has not yet been entered into refcount table, theref= ore it is >> - * no refcount block yet (regarding this check) */ >> - ret =3D qcow2_pre_write_overlap_check(bs, 0, new_offset, s->clust= er_size); >> - if (ret < 0) { >> - fprintf(stderr, "Could not write refcount block; metadata ove= rlap " >> - "check failed: %s\n", strerror(-ret)); >> - /* the image will be marked corrupt, so don't even attempt on= freeing >> - * the cluster */ >> - goto done; >> - } >> - >> - /* write to new block */ >> - ret =3D bdrv_write(bs->file, new_offset / BDRV_SECTOR_SIZE, refco= unt_block, >> - s->cluster_sectors); >> - if (ret < 0) { >> - fprintf(stderr, "Could not write refcount block: %s\n", strer= ror(-ret)); >> - goto fail_free_cluster; >> - } >> - >> - /* update refcount table */ >> - assert(!offset_into_cluster(s, new_offset)); >> - s->refcount_table[reftable_index] =3D new_offset; >> - ret =3D write_reftable_entry(bs, reftable_index); >> - if (ret < 0) { >> - fprintf(stderr, "Could not update refcount table: %s\n", >> - strerror(-ret)); >> - goto fail_free_cluster; >> - } >> - >> - goto done; >> - >> -fail_free_cluster: >> - qcow2_free_clusters(bs, new_offset, s->cluster_size, QCOW2_DISCAR= D_OTHER); >> - >> -done: >> - if (refcount_block) { >> - /* This should never fail, as it would only do so if the give= n refcount >> - * block cannot be found in the cache. As this is impossible = as long as >> - * there are no bugs, assert the success. */ >> - int tmp =3D qcow2_cache_put(bs, s->refcount_block_cache, &ref= count_block); >> - assert(tmp =3D=3D 0); >> - } >> - >> - if (ret < 0) { >> - return ret; >> - } >> - >> - return new_offset; >> -} >> - >> -/* >> * Checks consistency of refblocks and accounts for each refblock in >> * *refcount_table. >> */ >> static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *re= s, >> - BdrvCheckMode fix, uint16_t **refcount_tab= le, >> - int64_t *nb_clusters) >> + BdrvCheckMode fix, bool *rebuild, >> + uint16_t **refcount_table, int64_t *nb_clu= sters) >> { >> BDRVQcowState *s =3D bs->opaque; >> int64_t i, size; >> @@ -1517,6 +1402,7 @@ static int check_refblocks(BlockDriverState *bs,= BdrvCheckResult *res, >> fprintf(stderr, "ERROR refcount block %" PRId64 " is not= " >> "cluster aligned; refcount table entry corrupted= \n", i); >> res->corruptions++; >> + *rebuild =3D true; >> continue; >> } >> =20 >> @@ -1571,47 +1457,12 @@ resize_fail: >> if (offset !=3D 0) { >> inc_refcounts(bs, res, *refcount_table, *nb_clusters, >> offset, s->cluster_size); >> - if ((*refcount_table)[cluster] !=3D 1) { >> - fprintf(stderr, "%s refcount block %" PRId64 >> - " refcount=3D%d\n", >> - fix & BDRV_FIX_ERRORS ? "Repairing" : >> - "ERROR", >> - i, (*refcount_table)[cluster]); >> - >> - if (fix & BDRV_FIX_ERRORS) { >> - int64_t new_offset; >> - >> - new_offset =3D realloc_refcount_block(bs, i, offs= et); >> - if (new_offset < 0) { >> - res->corruptions++; >> - continue; >> - } >> - >> - /* update refcounts */ >> - if ((new_offset >> s->cluster_bits) >=3D *nb_clus= ters) { >> - /* increase refcount_table size if necessary = */ >> - int old_nb_clusters =3D *nb_clusters; >> - *nb_clusters =3D (new_offset >> s->cluster_bi= ts) + 1; >> - >> - *refcount_table =3D g_try_realloc(*refcount_t= able, >> - *nb_clusters * sizeof(uint16_t)); >> - if (!*refcount_table) { >> - res->check_errors++; >> - return -ENOMEM; >> - } >> - >> - memset(&(*refcount_table)[old_nb_clusters], 0= , >> - (*nb_clusters - old_nb_clusters) * >> - sizeof(uint16_t)); >> - } >> - (*refcount_table)[cluster]--; >> - inc_refcounts(bs, res, *refcount_table, *nb_clust= ers, >> - new_offset, s->cluster_size); >> =20 >> - res->corruptions_fixed++; >> - } else { >> - res->corruptions++; >> - } >> + if ((*refcount_table)[cluster] !=3D 1) { >> + fprintf(stderr, "ERROR refcount block %" PRId64 >> + " refcount=3D%d\n", i, (*refcount_table)[clus= ter]); >> + res->corruptions++; >> + *rebuild =3D true; >> } >> } >> } >> @@ -1623,8 +1474,8 @@ resize_fail: >> * Calculates an in-memory refcount table. >> */ >> static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult= *res, >> - BdrvCheckMode fix, uint16_t **refcount= _table, >> - int64_t *nb_clusters) >> + BdrvCheckMode fix, bool *rebuild, >> + uint16_t **refcount_table, int64_t *nb= _clusters) >> { >> BDRVQcowState *s =3D bs->opaque; >> QCowSnapshot *sn; >> @@ -1667,7 +1518,7 @@ static int calculate_refcounts(BlockDriverState = *bs, BdrvCheckResult *res, >> s->refcount_table_offset, >> s->refcount_table_size * sizeof(uint64_t)); >> =20 >> - return check_refblocks(bs, res, fix, refcount_table, nb_clusters)= ; >> + return check_refblocks(bs, res, fix, rebuild, refcount_table, nb_= clusters); >> } >> =20 >> /* >> @@ -1675,7 +1526,8 @@ static int calculate_refcounts(BlockDriverState = *bs, BdrvCheckResult *res, >> * refcount as reported by the refcount structures on-disk. >> */ >> static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult = *res, >> - BdrvCheckMode fix, int64_t *highest_clu= ster, >> + BdrvCheckMode fix, bool *rebuild, >> + int64_t *highest_cluster, >> uint16_t *refcount_table, int64_t nb_c= lusters) >> { >> BDRVQcowState *s =3D bs->opaque; >> @@ -1702,10 +1554,15 @@ static void compare_refcounts(BlockDriverState= *bs, BdrvCheckResult *res, >> int *num_fixed =3D NULL; >> if (refcount1 > refcount2 && (fix & BDRV_FIX_LEAKS)) { >> num_fixed =3D &res->leaks_fixed; >> - } else if (refcount1 < refcount2 && (fix & BDRV_FIX_ERROR= S)) { >> + } else if (refcount1 > 0 && refcount1 < refcount2 && >> + (fix & BDRV_FIX_ERRORS)) { >> num_fixed =3D &res->corruptions_fixed; >> } >> =20 >> + if (refcount1 =3D=3D 0) { >> + *rebuild =3D true; >> + } >> + >> fprintf(stderr, "%s cluster %" PRId64 " refcount=3D%d re= ference=3D%d\n", >> num_fixed !=3D NULL ? "Repairing" : >> refcount1 < refcount2 ? "ERROR" : >> @@ -1744,6 +1601,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, = BdrvCheckResult *res, >> BDRVQcowState *s =3D bs->opaque; >> int64_t size, highest_cluster, nb_clusters; >> uint16_t *refcount_table =3D NULL; >> + bool rebuild =3D false; >> int ret; >> =20 >> size =3D bdrv_getlength(bs->file); >> @@ -1761,14 +1619,19 @@ int qcow2_check_refcounts(BlockDriverState *bs= , BdrvCheckResult *res, >> res->bfi.total_clusters =3D >> size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE); >> =20 >> - ret =3D calculate_refcounts(bs, res, fix, &refcount_table, &nb_cl= usters); >> + ret =3D calculate_refcounts(bs, res, fix, &rebuild, &refcount_tab= le, >> + &nb_clusters); >> if (ret < 0) { >> goto fail; >> } >> =20 >> - compare_refcounts(bs, res, fix, &highest_cluster, refcount_table, >> + compare_refcounts(bs, res, fix, &rebuild, &highest_cluster, refco= unt_table, >> nb_clusters); >> =20 >> + if (rebuild) { >> + fprintf(stderr, "ERROR need to rebuild refcount structures\n"= ); >> + } > Aren't you afraid that introducing rebuild in this way could > break git bisect ? > > Why not introducing the new rebuild code in the previous patch and > use it in this one ? Because it's a static function and an unused static function really=20 breaks git bisect. The worst this patch does is that it prevents qemu from repairing files=20 in a way which could actually break the file. It does not fix the repair=20 operation (that's the next patch), but it at least prevents it, which is=20 in my opinion not really breaking things (because things are already=20 broken). If one runs some tests against some expected output (like the=20 qemu-iotests do), bisect is broken anyway because the output from after=20 this series differs from before (that's why patch 7 exists). The best I could probably do is to pull patch 5 before this one (like=20 you suggest) while creating the rebuild function as a non-static one in=20 that patch and making it static here so that gcc does not complain. Max > Best regards > > Beno=EEt > >> + >> /* check OFLAG_COPIED */ >> ret =3D check_oflag_copied(bs, res, fix); >> if (ret < 0) { >> --=20 >> 2.0.3 >> >>