From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:42542) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1XHuEN-0003I7-De for qemu-devel@nongnu.org; Thu, 14 Aug 2014 08:34:17 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1XHuEH-0007gu-6T for qemu-devel@nongnu.org; Thu, 14 Aug 2014 08:34:11 -0400 Received: from lputeaux-656-01-25-125.w80-12.abo.wanadoo.fr ([80.12.84.125]:35847 helo=paradis.irqsave.net) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1XHuEG-0007gk-Rc for qemu-devel@nongnu.org; Thu, 14 Aug 2014 08:34:05 -0400 Date: Thu, 14 Aug 2014 14:33:13 +0200 From: =?iso-8859-1?Q?Beno=EEt?= Canet Message-ID: <20140814123313.GI2009@irqsave.net> References: <1407963710-4942-1-git-send-email-mreitz@redhat.com> <1407963710-4942-5-git-send-email-mreitz@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset=iso-8859-1 Content-Disposition: inline In-Reply-To: <1407963710-4942-5-git-send-email-mreitz@redhat.com> Content-Transfer-Encoding: quoted-printable Subject: Re: [Qemu-devel] [PATCH 4/8] qcow2: Do not perform potentially damaging repairs List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Max Reitz Cc: Kevin Wolf , qemu-devel@nongnu.org, Stefan Hajnoczi The Wednesday 13 Aug 2014 =E0 23:01:46 (+0200), Max Reitz wrote : > If a referenced cluster has a refcount of 0, increasing its refcount ma= y > result in clusters being allocated for the refcount structures. This ma= y > overwrite the referenced cluster, therefore we cannot simply increase > the refcount then. >=20 > In such cases, we can either try to replicate all the refcount > operations solely for the check operation, basing the allocations on th= e > in-memory refcount table; or we can simply rebuild the whole refcount > structure based on the in-memory refcount table. Since the latter will > be much easier, do that. >=20 > To prepare for this, introduce a "rebuild" boolean which should be set > to true whenever a fix is rather dangerous or too complicated using the > current refcount structures. Another example for this is refcount block= s > being referenced more than once. >=20 > Signed-off-by: Max Reitz > --- > block/qcow2-refcount.c | 191 +++++++----------------------------------= -------- > 1 file changed, 27 insertions(+), 164 deletions(-) >=20 > diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c > index a1d93e5..6400840 100644 > --- a/block/qcow2-refcount.c > +++ b/block/qcow2-refcount.c > @@ -1381,127 +1381,12 @@ fail: > } > =20 > /* > - * Writes one sector of the refcount table to the disk > - */ > -#define RT_ENTRIES_PER_SECTOR (512 / sizeof(uint64_t)) > -static int write_reftable_entry(BlockDriverState *bs, int rt_index) > -{ > - BDRVQcowState *s =3D bs->opaque; > - uint64_t buf[RT_ENTRIES_PER_SECTOR]; > - int rt_start_index; > - int i, ret; > - > - rt_start_index =3D rt_index & ~(RT_ENTRIES_PER_SECTOR - 1); > - for (i =3D 0; i < RT_ENTRIES_PER_SECTOR; i++) { > - buf[i] =3D cpu_to_be64(s->refcount_table[rt_start_index + i]); > - } > - > - ret =3D qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_TABLE, > - s->refcount_table_offset + rt_start_index * sizeof(uint64_= t), > - sizeof(buf)); > - if (ret < 0) { > - return ret; > - } > - > - BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE); > - ret =3D bdrv_pwrite_sync(bs->file, s->refcount_table_offset + > - rt_start_index * sizeof(uint64_t), buf, sizeof(buf)); > - if (ret < 0) { > - return ret; > - } > - > - return 0; > -} > - > -/* > - * Allocates a new cluster for the given refcount block (represented b= y its > - * offset in the image file) and copies the current content there. Thi= s function > - * does _not_ decrement the reference count for the currently occupied= cluster. > - * > - * This function prints an informative message to stderr on error (and= returns > - * -errno); on success, the offset of the newly allocated cluster is r= eturned. > - */ > -static int64_t realloc_refcount_block(BlockDriverState *bs, int reftab= le_index, > - uint64_t offset) > -{ > - BDRVQcowState *s =3D bs->opaque; > - int64_t new_offset =3D 0; > - void *refcount_block =3D NULL; > - int ret; > - > - /* allocate new refcount block */ > - new_offset =3D qcow2_alloc_clusters(bs, s->cluster_size); > - if (new_offset < 0) { > - fprintf(stderr, "Could not allocate new cluster: %s\n", > - strerror(-new_offset)); > - ret =3D new_offset; > - goto done; > - } > - > - /* fetch current refcount block content */ > - ret =3D qcow2_cache_get(bs, s->refcount_block_cache, offset, &refc= ount_block); > - if (ret < 0) { > - fprintf(stderr, "Could not fetch refcount block: %s\n", strerr= or(-ret)); > - goto fail_free_cluster; > - } > - > - /* new block has not yet been entered into refcount table, therefo= re it is > - * no refcount block yet (regarding this check) */ > - ret =3D qcow2_pre_write_overlap_check(bs, 0, new_offset, s->cluste= r_size); > - if (ret < 0) { > - fprintf(stderr, "Could not write refcount block; metadata over= lap " > - "check failed: %s\n", strerror(-ret)); > - /* the image will be marked corrupt, so don't even attempt on = freeing > - * the cluster */ > - goto done; > - } > - > - /* write to new block */ > - ret =3D bdrv_write(bs->file, new_offset / BDRV_SECTOR_SIZE, refcou= nt_block, > - s->cluster_sectors); > - if (ret < 0) { > - fprintf(stderr, "Could not write refcount block: %s\n", strerr= or(-ret)); > - goto fail_free_cluster; > - } > - > - /* update refcount table */ > - assert(!offset_into_cluster(s, new_offset)); > - s->refcount_table[reftable_index] =3D new_offset; > - ret =3D write_reftable_entry(bs, reftable_index); > - if (ret < 0) { > - fprintf(stderr, "Could not update refcount table: %s\n", > - strerror(-ret)); > - goto fail_free_cluster; > - } > - > - goto done; > - > -fail_free_cluster: > - qcow2_free_clusters(bs, new_offset, s->cluster_size, QCOW2_DISCARD= _OTHER); > - > -done: > - if (refcount_block) { > - /* This should never fail, as it would only do so if the given= refcount > - * block cannot be found in the cache. As this is impossible a= s long as > - * there are no bugs, assert the success. */ > - int tmp =3D qcow2_cache_put(bs, s->refcount_block_cache, &refc= ount_block); > - assert(tmp =3D=3D 0); > - } > - > - if (ret < 0) { > - return ret; > - } > - > - return new_offset; > -} > - > -/* > * Checks consistency of refblocks and accounts for each refblock in > * *refcount_table. > */ > static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res, > - BdrvCheckMode fix, uint16_t **refcount_tabl= e, > - int64_t *nb_clusters) > + BdrvCheckMode fix, bool *rebuild, > + uint16_t **refcount_table, int64_t *nb_clus= ters) > { > BDRVQcowState *s =3D bs->opaque; > int64_t i, size; > @@ -1517,6 +1402,7 @@ static int check_refblocks(BlockDriverState *bs, = BdrvCheckResult *res, > fprintf(stderr, "ERROR refcount block %" PRId64 " is not " > "cluster aligned; refcount table entry corrupted\n= ", i); > res->corruptions++; > + *rebuild =3D true; > continue; > } > =20 > @@ -1571,47 +1457,12 @@ resize_fail: > if (offset !=3D 0) { > inc_refcounts(bs, res, *refcount_table, *nb_clusters, > offset, s->cluster_size); > - if ((*refcount_table)[cluster] !=3D 1) { > - fprintf(stderr, "%s refcount block %" PRId64 > - " refcount=3D%d\n", > - fix & BDRV_FIX_ERRORS ? "Repairing" : > - "ERROR", > - i, (*refcount_table)[cluster]); > - > - if (fix & BDRV_FIX_ERRORS) { > - int64_t new_offset; > - > - new_offset =3D realloc_refcount_block(bs, i, offse= t); > - if (new_offset < 0) { > - res->corruptions++; > - continue; > - } > - > - /* update refcounts */ > - if ((new_offset >> s->cluster_bits) >=3D *nb_clust= ers) { > - /* increase refcount_table size if necessary *= / > - int old_nb_clusters =3D *nb_clusters; > - *nb_clusters =3D (new_offset >> s->cluster_bit= s) + 1; > - > - *refcount_table =3D g_try_realloc(*refcount_ta= ble, > - *nb_clusters * sizeof(uint16_t)); > - if (!*refcount_table) { > - res->check_errors++; > - return -ENOMEM; > - } > - > - memset(&(*refcount_table)[old_nb_clusters], 0, > - (*nb_clusters - old_nb_clusters) * > - sizeof(uint16_t)); > - } > - (*refcount_table)[cluster]--; > - inc_refcounts(bs, res, *refcount_table, *nb_cluste= rs, > - new_offset, s->cluster_size); > =20 > - res->corruptions_fixed++; > - } else { > - res->corruptions++; > - } > + if ((*refcount_table)[cluster] !=3D 1) { > + fprintf(stderr, "ERROR refcount block %" PRId64 > + " refcount=3D%d\n", i, (*refcount_table)[clust= er]); > + res->corruptions++; > + *rebuild =3D true; > } > } > } > @@ -1623,8 +1474,8 @@ resize_fail: > * Calculates an in-memory refcount table. > */ > static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *= res, > - BdrvCheckMode fix, uint16_t **refcount_= table, > - int64_t *nb_clusters) > + BdrvCheckMode fix, bool *rebuild, > + uint16_t **refcount_table, int64_t *nb_= clusters) > { > BDRVQcowState *s =3D bs->opaque; > QCowSnapshot *sn; > @@ -1667,7 +1518,7 @@ static int calculate_refcounts(BlockDriverState *= bs, BdrvCheckResult *res, > s->refcount_table_offset, > s->refcount_table_size * sizeof(uint64_t)); > =20 > - return check_refblocks(bs, res, fix, refcount_table, nb_clusters); > + return check_refblocks(bs, res, fix, rebuild, refcount_table, nb_c= lusters); > } > =20 > /* > @@ -1675,7 +1526,8 @@ static int calculate_refcounts(BlockDriverState *= bs, BdrvCheckResult *res, > * refcount as reported by the refcount structures on-disk. > */ > static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *r= es, > - BdrvCheckMode fix, int64_t *highest_clus= ter, > + BdrvCheckMode fix, bool *rebuild, > + int64_t *highest_cluster, > uint16_t *refcount_table, int64_t nb_clu= sters) > { > BDRVQcowState *s =3D bs->opaque; > @@ -1702,10 +1554,15 @@ static void compare_refcounts(BlockDriverState = *bs, BdrvCheckResult *res, > int *num_fixed =3D NULL; > if (refcount1 > refcount2 && (fix & BDRV_FIX_LEAKS)) { > num_fixed =3D &res->leaks_fixed; > - } else if (refcount1 < refcount2 && (fix & BDRV_FIX_ERRORS= )) { > + } else if (refcount1 > 0 && refcount1 < refcount2 && > + (fix & BDRV_FIX_ERRORS)) { > num_fixed =3D &res->corruptions_fixed; > } > =20 > + if (refcount1 =3D=3D 0) { > + *rebuild =3D true; > + } > + > fprintf(stderr, "%s cluster %" PRId64 " refcount=3D%d refe= rence=3D%d\n", > num_fixed !=3D NULL ? "Repairing" : > refcount1 < refcount2 ? "ERROR" : > @@ -1744,6 +1601,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, B= drvCheckResult *res, > BDRVQcowState *s =3D bs->opaque; > int64_t size, highest_cluster, nb_clusters; > uint16_t *refcount_table =3D NULL; > + bool rebuild =3D false; > int ret; > =20 > size =3D bdrv_getlength(bs->file); > @@ -1761,14 +1619,19 @@ int qcow2_check_refcounts(BlockDriverState *bs,= BdrvCheckResult *res, > res->bfi.total_clusters =3D > size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE); > =20 > - ret =3D calculate_refcounts(bs, res, fix, &refcount_table, &nb_clu= sters); > + ret =3D calculate_refcounts(bs, res, fix, &rebuild, &refcount_tabl= e, > + &nb_clusters); > if (ret < 0) { > goto fail; > } > =20 > - compare_refcounts(bs, res, fix, &highest_cluster, refcount_table, > + compare_refcounts(bs, res, fix, &rebuild, &highest_cluster, refcou= nt_table, > nb_clusters); > =20 > + if (rebuild) { > + fprintf(stderr, "ERROR need to rebuild refcount structures\n")= ; > + } Aren't you afraid that introducing rebuild in this way could break git bisect ? Why not introducing the new rebuild code in the previous patch and use it in this one ? Best regards Beno=EEt > + > /* check OFLAG_COPIED */ > ret =3D check_oflag_copied(bs, res, fix); > if (ret < 0) { > --=20 > 2.0.3 >=20 >=20