From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:59802) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1VGOnN-0005Tn-Cd for qemu-devel@nongnu.org; Mon, 02 Sep 2013 03:43:39 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1VGOnH-0000Qy-6G for qemu-devel@nongnu.org; Mon, 02 Sep 2013 03:43:33 -0400 Received: from mx1.redhat.com ([209.132.183.28]:33032) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1VGOnG-0000Qs-Rl for qemu-devel@nongnu.org; Mon, 02 Sep 2013 03:43:27 -0400 Received: from int-mx10.intmail.prod.int.phx2.redhat.com (int-mx10.intmail.prod.int.phx2.redhat.com [10.5.11.23]) by mx1.redhat.com (8.14.4/8.14.4) with ESMTP id r827hQg1010953 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK) for ; Mon, 2 Sep 2013 03:43:26 -0400 Message-ID: <5224419B.30805@redhat.com> Date: Mon, 02 Sep 2013 09:43:23 +0200 From: Max Reitz MIME-Version: 1.0 References: <1377858440-4004-1-git-send-email-mreitz@redhat.com> <1377858440-4004-3-git-send-email-mreitz@redhat.com> <20130902034307.GA10844@T430s.nay.redhat.com> In-Reply-To: <20130902034307.GA10844@T430s.nay.redhat.com> Content-Type: text/plain; charset=windows-1252; format=flowed Content-Transfer-Encoding: quoted-printable Subject: Re: [Qemu-devel] [PATCH v3 2/3] qcow2: Implement bdrv_amend_options List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: famz@redhat.com Cc: Kevin Wolf , qemu-devel@nongnu.org, Stefan Hajnoczi Am 02.09.2013 05:43, schrieb Fam Zheng: > On Fri, 08/30 12:27, Max Reitz wrote: >> Implement bdrv_amend_options for compat, size, backing_file, backing_f= mt >> and lazy_refcounts. >> >> Downgrading images from compat=3D1.1 to compat=3D0.10 is achieved thro= ugh >> handling all incompatible flags accordingly, clearing all compatible a= nd >> autoclear flags and expanding all zero clusters. >> >> Signed-off-by: Max Reitz >> --- >> block/qcow2-cluster.c | 165 ++++++++++++++++++++++++++++++++++++++++= ++ >> block/qcow2.c | 194 ++++++++++++++++++++++++++++++++++++++++= +++++++++- >> block/qcow2.h | 3 + >> 3 files changed, 361 insertions(+), 1 deletion(-) >> >> diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c >> index cca76d4..e0ca104 100644 >> --- a/block/qcow2-cluster.c >> +++ b/block/qcow2-cluster.c >> @@ -1476,3 +1476,168 @@ fail: >> =20 >> return ret; >> } >> + >> +/* >> + * Expands all zero clusters in a specific L1 table (or deallocates t= hem, for >> + * non-backed non-pre-allocated zero clusters). >> + */ >> +static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t = *l1_table, >> + int l1_size) >> +{ >> + BDRVQcowState *s =3D bs->opaque; >> + bool is_active_l1 =3D (l1_table =3D=3D s->l1_table); >> + uint64_t *l2_table; >> + int ret; >> + int i, j; >> + >> + if (!is_active_l1) { >> + /* inactive L2 tables require a buffer to be stored in when l= oading >> + * them from disk */ >> + l2_table =3D qemu_blockalign(bs, s->cluster_size); >> + } >> + >> + for (i =3D 0; i < l1_size; i++) { >> + uint64_t l2_offset =3D l1_table[i] & L1E_OFFSET_MASK; >> + bool l2_dirty =3D false; >> + >> + if (!l2_offset) { >> + /* unallocated */ >> + continue; >> + } >> + >> + if (is_active_l1) { >> + /* get active L2 tables from cache */ >> + ret =3D qcow2_cache_get(bs, s->l2_table_cache, l2_offset, >> + (void **)&l2_table); >> + } else { >> + /* load inactive L2 tables from disk */ >> + ret =3D bdrv_read(bs->file, l2_offset / BDRV_SECTOR_SIZE, >> + (void *)l2_table, s->cluster_sectors); >> + } >> + if (ret < 0) { >> + goto fail; >> + } >> + >> + for (j =3D 0; j < s->l2_size; j++) { >> + uint64_t l2_entry =3D be64_to_cpu(l2_table[j]); >> + int64_t offset; >> + >> + if (qcow2_get_cluster_type(l2_entry) !=3D QCOW2_CLUSTER_Z= ERO) { >> + continue; >> + } >> + >> + offset =3D l2_entry & L2E_OFFSET_MASK; >> + if (!offset) { >> + /* not preallocated */ >> + if (!bs->backing_hd) { >> + /* not backed; therefore we can simply deallocate= the >> + * cluster */ >> + l2_table[j] =3D 0; >> + l2_dirty =3D true; >> + continue; >> + } >> + >> + offset =3D qcow2_alloc_clusters(bs, s->cluster_size); >> + if (offset < 0) { >> + ret =3D offset; >> + goto fail; >> + } >> + } >> + >> + ret =3D bdrv_write_zeroes(bs->file, offset / BDRV_SECTOR_= SIZE, >> + s->cluster_sectors); >> + if (ret < 0) { >> + qcow2_free_clusters(bs, offset, s->cluster_size, >> + QCOW2_DISCARD_ALWAYS); >> + goto fail; >> + } >> + >> + l2_table[j] =3D cpu_to_be64(offset | QCOW_OFLAG_COPIED); >> + l2_dirty =3D true; >> + } >> + >> + if (is_active_l1) { >> + if (l2_dirty) { >> + qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_ta= ble); >> + qcow2_cache_depends_on_flush(s->l2_table_cache); >> + } >> + ret =3D qcow2_cache_put(bs, s->l2_table_cache, (void **)&= l2_table); >> + if (ret < 0) { >> + l2_table =3D NULL; >> + goto fail; >> + } >> + } else { >> + if (l2_dirty) { >> + ret =3D bdrv_write(bs->file, l2_offset / BDRV_SECTOR_= SIZE, >> + (void *)l2_table, s->cluster_sectors); >> + if (ret < 0) { >> + goto fail; >> + } >> + } >> + } >> + } >> + >> + ret =3D 0; >> + >> +fail: >> + if (l2_table) { >> + if (!is_active_l1) { >> + qemu_vfree(l2_table); >> + } else { >> + if (ret < 0) { >> + qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_t= able); >> + } else { >> + ret =3D qcow2_cache_put(bs, s->l2_table_cache, >> + (void **)&l2_table); >> + } >> + } >> + } >> + return ret; >> +} >> + >> +/* >> + * For backed images, expands all zero clusters on the image. For non= -backed >> + * images, deallocates all non-pre-allocated zero clusters (and claim= s the >> + * allocation for pre-allocated ones). This is important for downgrad= ing to a >> + * qcow2 version which doesn't yet support metadata zero clusters. >> + */ >> +int qcow2_expand_zero_clusters(BlockDriverState *bs) >> +{ >> + BDRVQcowState *s =3D bs->opaque; >> + uint64_t *l1_table =3D NULL; >> + int ret; >> + int i, j; >> + >> + ret =3D expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size); >> + if (ret < 0) { >> + goto fail; >> + } >> + >> + for (i =3D 0; i < s->nb_snapshots; i++) { >> + int l1_sectors =3D (s->snapshots[i].l1_size * sizeof(uint64_t= ) + >> + BDRV_SECTOR_SIZE - 1) / BDRV_SECTOR_SIZE; >> + >> + l1_table =3D g_realloc(l1_table, l1_sectors * BDRV_SECTOR_SIZ= E); >> + >> + ret =3D bdrv_read(bs->file, s->snapshots[i].l1_table_offset / >> + BDRV_SECTOR_SIZE, (void *)l1_table, l1_sectors); >> + if (ret < 0) { >> + goto fail; >> + } >> + >> + for (j =3D 0; j < s->snapshots[i].l1_size; j++) { >> + be64_to_cpus(&l1_table[j]); >> + } >> + >> + ret =3D expand_zero_clusters_in_l1(bs, l1_table, s->snapshots= [i].l1_size); >> + if (ret < 0) { >> + goto fail; >> + } >> + } >> + >> + ret =3D 0; >> + >> +fail: >> + g_free(l1_table); >> + return ret; >> +} >> diff --git a/block/qcow2.c b/block/qcow2.c >> index 78097e5..a8eaf45 100644 >> --- a/block/qcow2.c >> +++ b/block/qcow2.c >> @@ -409,6 +409,7 @@ static int qcow2_open(BlockDriverState *bs, QDict = *options, int flags) >> ret =3D -ENOTSUP; >> goto fail; >> } >> + s->refcount_order =3D header.refcount_order; >> =20 >> if (header.cluster_bits < MIN_CLUSTER_BITS || >> header.cluster_bits > MAX_CLUSTER_BITS) { >> @@ -1076,7 +1077,7 @@ int qcow2_update_header(BlockDriverState *bs) >> .incompatible_features =3D cpu_to_be64(s->incompatible_feat= ures), >> .compatible_features =3D cpu_to_be64(s->compatible_featur= es), >> .autoclear_features =3D cpu_to_be64(s->autoclear_feature= s), >> - .refcount_order =3D cpu_to_be32(3 + REFCOUNT_SHIFT), >> + .refcount_order =3D cpu_to_be32(s->refcount_order), >> .header_length =3D cpu_to_be32(header_length), >> }; >> =20 >> @@ -1735,6 +1736,196 @@ static int qcow2_load_vmstate(BlockDriverState= *bs, uint8_t *buf, >> return ret; >> } >> =20 >> +/* >> + * Downgrades an image's version. To achieve this, any incompatible f= eatures >> + * have to be removed. >> + */ >> +static int qcow2_downgrade(BlockDriverState *bs, int target_version) >> +{ >> + BDRVQcowState *s =3D bs->opaque; >> + int current_version =3D s->qcow_version; >> + int ret; >> + >> + if (target_version =3D=3D current_version) { >> + return 0; >> + } else if (target_version > current_version) { >> + return -EINVAL; >> + } else if (target_version !=3D 2) { >> + return -EINVAL; >> + } >> + >> + if (s->refcount_order !=3D 4) { >> + /* we would have to convert the image to a refcount_order =3D= =3D 4 image >> + * here; however, since qemu (at the time of writing this) do= es not >> + * support anything different than 4 anyway, there is no poin= t in doing >> + * so right now; however, we should error out (if qemu suppor= ts this in >> + * the future and this code has not been adapted) */ >> + error_report("qcow2_downgrade: Image refcount orders other th= an 4 are" >> + "currently not supported."); >> + return -ENOTSUP; >> + } >> + >> + /* clear incompatible features */ >> + if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { >> + ret =3D qcow2_mark_clean(bs); >> + if (ret < 0) { >> + return ret; >> + } >> + } >> + >> + if (s->incompatible_features) { >> + return -ENOTSUP; >> + } >> + >> + /* since we can ignore compatible features, we can set them to 0 = as well */ >> + s->compatible_features =3D 0; >> + /* if lazy refcounts have been used, they have already been fixed= through >> + * clearing the dirty flag */ >> + >> + /* clearing autoclear features is trivial */ >> + s->autoclear_features =3D 0; >> + >> + ret =3D qcow2_expand_zero_clusters(bs); >> + if (ret < 0) { >> + return ret; >> + } >> + >> + s->qcow_version =3D target_version; >> + ret =3D qcow2_update_header(bs); >> + if (ret < 0) { >> + s->qcow_version =3D current_version; >> + return ret; >> + } >> + return 0; >> +} >> + >> +static int qcow2_amend_options(BlockDriverState *bs, >> + QEMUOptionParameter *options) >> +{ >> + BDRVQcowState *s =3D bs->opaque; >> + int old_version =3D s->qcow_version, new_version =3D old_version; >> + uint64_t new_size =3D 0; >> + const char *backing_file =3D NULL, *backing_format =3D NULL; >> + bool lazy_refcounts =3D s->use_lazy_refcounts; >> + int ret; >> + int i; >> + >> + for (i =3D 0; options[i].name; i++) >> + { >> + if (!strcmp(options[i].name, "compat")) { >> + if (!options[i].value.s) { >> + /* preserve default */ >> + } else if (!strcmp(options[i].value.s, "0.10")) { >> + new_version =3D 2; >> + } else if (!strcmp(options[i].value.s, "1.1")) { >> + new_version =3D 3; >> + } else { >> + fprintf(stderr, "Unknown compatibility level %s.\n", >> + options[i].value.s); >> + return -EINVAL; >> + } >> + } else if (!strcmp(options[i].name, "preallocation")) { >> + if (options[i].assigned) { > For encryption flag and cluster_size, you checked the original value an= d only > error out on actual change, should check the original preallocation val= ue here > as well? As fas as I know, there is no way to check the original preallocation val= ue. >> + fprintf(stderr, "Cannot change preallocation mode.\n"= ); >> + return -ENOTSUP; >> + } >> + } else if (!strcmp(options[i].name, "size")) { >> + new_size =3D options[i].value.n; >> + } else if (!strcmp(options[i].name, "backing_file")) { >> + backing_file =3D options[i].value.s; >> + } else if (!strcmp(options[i].name, "backing_fmt")) { >> + backing_format =3D options[i].value.s; >> + } else if (!strcmp(options[i].name, "encryption")) { >> + if (options[i].assigned && >> + (options[i].value.n !=3D !!s->crypt_method)) { >> + fprintf(stderr, "Changing the encryption flag is not = " >> + "supported.\n"); >> + return -ENOTSUP; >> + } >> + } else if (!strcmp(options[i].name, "cluster_size")) { >> + if (options[i].assigned && (options[i].value.n !=3D s->cl= uster_size)) >> + { >> + fprintf(stderr, "Changing the cluster size is not " >> + "supported.\n"); >> + return -ENOTSUP; >> + } >> + } else if (!strcmp(options[i].name, "lazy_refcounts")) { >> + if (options[i].assigned) { >> + lazy_refcounts =3D options[i].value.n; >> + } >> + } else { >> + /* if this assertion fails, this probably means a new opt= ion was >> + * added without having it covered here */ >> + assert(false); > A unknown option reported as -ENOTSUP with a proper message is good eno= ugh, > it's not that critical for an assert. Well, Kevin suggested the assert(false), and since this code would be=20 indeed probably reached only because of a programming error, I=20 personally think it to be appropriate, too =96 although I'll gladly chang= e=20 it if the majority decides otherwise. ;-) >> + } >> + } >> + >> + if (new_version !=3D old_version) { >> + if (new_version > old_version) { >> + /* Upgrade */ >> + s->qcow_version =3D new_version; >> + ret =3D qcow2_update_header(bs); >> + if (ret < 0) { >> + s->qcow_version =3D old_version; >> + return ret; >> + } >> + } else { >> + ret =3D qcow2_downgrade(bs, new_version); >> + if (ret < 0) { >> + return ret; >> + } >> + } >> + } >> + >> + if (new_size) { >> + ret =3D qcow2_truncate(bs, new_size); >> + if (ret < 0) { >> + return ret; >> + } >> + } >> + >> + if (backing_file || backing_format) { >> + ret =3D qcow2_change_backing_file(bs, backing_file ?: bs->bac= king_file, >> + backing_format ?: bs->backing= _format); >> + if (ret < 0) { >> + return ret; >> + } >> + } >> + >> + if (s->use_lazy_refcounts !=3D lazy_refcounts) { >> + if (lazy_refcounts) { >> + if (s->qcow_version < 3) { >> + fprintf(stderr, "Lazy refcounts only supported with c= ompatibility " >> + "level 1.1 and above (use compat=3D1.1 or gre= ater)\n"); >> + return -EINVAL; >> + } >> + s->compatible_features |=3D QCOW2_COMPAT_LAZY_REFCOUNTS; >> + ret =3D qcow2_update_header(bs); >> + if (ret < 0) { >> + s->compatible_features &=3D ~QCOW2_COMPAT_LAZY_REFCOU= NTS; >> + return ret; >> + } >> + s->use_lazy_refcounts =3D true; >> + } else { >> + /* make image clean first */ >> + ret =3D qcow2_mark_clean(bs); >> + if (ret < 0) { >> + return ret; >> + } >> + /* now disallow lazy refcounts */ >> + s->compatible_features &=3D ~QCOW2_COMPAT_LAZY_REFCOUNTS; >> + ret =3D qcow2_update_header(bs); >> + if (ret < 0) { >> + s->compatible_features |=3D QCOW2_COMPAT_LAZY_REFCOUN= TS; >> + return ret; >> + } >> + s->use_lazy_refcounts =3D false; >> + } >> + } >> + >> + return 0; >> +} >> + >> static QEMUOptionParameter qcow2_create_options[] =3D { >> { >> .name =3D BLOCK_OPT_SIZE, >> @@ -1818,6 +2009,7 @@ static BlockDriver bdrv_qcow2 =3D { >> =20 >> .create_options =3D qcow2_create_options, >> .bdrv_check =3D qcow2_check, >> + .bdrv_amend_options =3D qcow2_amend_options, >> }; >> =20 >> static void bdrv_qcow2_init(void) >> diff --git a/block/qcow2.h b/block/qcow2.h >> index dba9771..ad3fd21 100644 >> --- a/block/qcow2.h >> +++ b/block/qcow2.h >> @@ -196,6 +196,7 @@ typedef struct BDRVQcowState { >> int flags; >> int qcow_version; >> bool use_lazy_refcounts; >> + int refcount_order; >> =20 >> bool discard_passthrough[QCOW2_DISCARD_MAX]; >> =20 >> @@ -408,6 +409,8 @@ int qcow2_discard_clusters(BlockDriverState *bs, u= int64_t offset, >> int nb_sectors); >> int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int n= b_sectors); >> =20 >> +int qcow2_expand_zero_clusters(BlockDriverState *bs); >> + >> /* qcow2-snapshot.c functions */ >> int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn= _info); >> int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_i= d); >> --=20 >> 1.8.3.1 >> >> Max