From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:36569) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1VFLvX-000734-Ma for qemu-devel@nongnu.org; Fri, 30 Aug 2013 06:27:47 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1VFLvQ-00076f-V6 for qemu-devel@nongnu.org; Fri, 30 Aug 2013 06:27:39 -0400 Received: from mx1.redhat.com ([209.132.183.28]:15552) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1VFLvQ-00076Y-KH for qemu-devel@nongnu.org; Fri, 30 Aug 2013 06:27:32 -0400 Received: from int-mx09.intmail.prod.int.phx2.redhat.com (int-mx09.intmail.prod.int.phx2.redhat.com [10.5.11.22]) by mx1.redhat.com (8.14.4/8.14.4) with ESMTP id r7UARWMn011471 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK) for ; Fri, 30 Aug 2013 06:27:32 -0400 From: Max Reitz Date: Fri, 30 Aug 2013 12:27:19 +0200 Message-Id: <1377858440-4004-3-git-send-email-mreitz@redhat.com> In-Reply-To: <1377858440-4004-1-git-send-email-mreitz@redhat.com> References: <1377858440-4004-1-git-send-email-mreitz@redhat.com> Subject: [Qemu-devel] [PATCH v3 2/3] qcow2: Implement bdrv_amend_options List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Cc: Kevin Wolf , Stefan Hajnoczi , Max Reitz Implement bdrv_amend_options for compat, size, backing_file, backing_fmt and lazy_refcounts. Downgrading images from compat=1.1 to compat=0.10 is achieved through handling all incompatible flags accordingly, clearing all compatible and autoclear flags and expanding all zero clusters. Signed-off-by: Max Reitz --- block/qcow2-cluster.c | 165 ++++++++++++++++++++++++++++++++++++++++++ block/qcow2.c | 194 +++++++++++++++++++++++++++++++++++++++++++++++++- block/qcow2.h | 3 + 3 files changed, 361 insertions(+), 1 deletion(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index cca76d4..e0ca104 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -1476,3 +1476,168 @@ fail: return ret; } + +/* + * Expands all zero clusters in a specific L1 table (or deallocates them, for + * non-backed non-pre-allocated zero clusters). + */ +static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, + int l1_size) +{ + BDRVQcowState *s = bs->opaque; + bool is_active_l1 = (l1_table == s->l1_table); + uint64_t *l2_table; + int ret; + int i, j; + + if (!is_active_l1) { + /* inactive L2 tables require a buffer to be stored in when loading + * them from disk */ + l2_table = qemu_blockalign(bs, s->cluster_size); + } + + for (i = 0; i < l1_size; i++) { + uint64_t l2_offset = l1_table[i] & L1E_OFFSET_MASK; + bool l2_dirty = false; + + if (!l2_offset) { + /* unallocated */ + continue; + } + + if (is_active_l1) { + /* get active L2 tables from cache */ + ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, + (void **)&l2_table); + } else { + /* load inactive L2 tables from disk */ + ret = bdrv_read(bs->file, l2_offset / BDRV_SECTOR_SIZE, + (void *)l2_table, s->cluster_sectors); + } + if (ret < 0) { + goto fail; + } + + for (j = 0; j < s->l2_size; j++) { + uint64_t l2_entry = be64_to_cpu(l2_table[j]); + int64_t offset; + + if (qcow2_get_cluster_type(l2_entry) != QCOW2_CLUSTER_ZERO) { + continue; + } + + offset = l2_entry & L2E_OFFSET_MASK; + if (!offset) { + /* not preallocated */ + if (!bs->backing_hd) { + /* not backed; therefore we can simply deallocate the + * cluster */ + l2_table[j] = 0; + l2_dirty = true; + continue; + } + + offset = qcow2_alloc_clusters(bs, s->cluster_size); + if (offset < 0) { + ret = offset; + goto fail; + } + } + + ret = bdrv_write_zeroes(bs->file, offset / BDRV_SECTOR_SIZE, + s->cluster_sectors); + if (ret < 0) { + qcow2_free_clusters(bs, offset, s->cluster_size, + QCOW2_DISCARD_ALWAYS); + goto fail; + } + + l2_table[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED); + l2_dirty = true; + } + + if (is_active_l1) { + if (l2_dirty) { + qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table); + qcow2_cache_depends_on_flush(s->l2_table_cache); + } + ret = qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table); + if (ret < 0) { + l2_table = NULL; + goto fail; + } + } else { + if (l2_dirty) { + ret = bdrv_write(bs->file, l2_offset / BDRV_SECTOR_SIZE, + (void *)l2_table, s->cluster_sectors); + if (ret < 0) { + goto fail; + } + } + } + } + + ret = 0; + +fail: + if (l2_table) { + if (!is_active_l1) { + qemu_vfree(l2_table); + } else { + if (ret < 0) { + qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table); + } else { + ret = qcow2_cache_put(bs, s->l2_table_cache, + (void **)&l2_table); + } + } + } + return ret; +} + +/* + * For backed images, expands all zero clusters on the image. For non-backed + * images, deallocates all non-pre-allocated zero clusters (and claims the + * allocation for pre-allocated ones). This is important for downgrading to a + * qcow2 version which doesn't yet support metadata zero clusters. + */ +int qcow2_expand_zero_clusters(BlockDriverState *bs) +{ + BDRVQcowState *s = bs->opaque; + uint64_t *l1_table = NULL; + int ret; + int i, j; + + ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size); + if (ret < 0) { + goto fail; + } + + for (i = 0; i < s->nb_snapshots; i++) { + int l1_sectors = (s->snapshots[i].l1_size * sizeof(uint64_t) + + BDRV_SECTOR_SIZE - 1) / BDRV_SECTOR_SIZE; + + l1_table = g_realloc(l1_table, l1_sectors * BDRV_SECTOR_SIZE); + + ret = bdrv_read(bs->file, s->snapshots[i].l1_table_offset / + BDRV_SECTOR_SIZE, (void *)l1_table, l1_sectors); + if (ret < 0) { + goto fail; + } + + for (j = 0; j < s->snapshots[i].l1_size; j++) { + be64_to_cpus(&l1_table[j]); + } + + ret = expand_zero_clusters_in_l1(bs, l1_table, s->snapshots[i].l1_size); + if (ret < 0) { + goto fail; + } + } + + ret = 0; + +fail: + g_free(l1_table); + return ret; +} diff --git a/block/qcow2.c b/block/qcow2.c index 78097e5..a8eaf45 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -409,6 +409,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags) ret = -ENOTSUP; goto fail; } + s->refcount_order = header.refcount_order; if (header.cluster_bits < MIN_CLUSTER_BITS || header.cluster_bits > MAX_CLUSTER_BITS) { @@ -1076,7 +1077,7 @@ int qcow2_update_header(BlockDriverState *bs) .incompatible_features = cpu_to_be64(s->incompatible_features), .compatible_features = cpu_to_be64(s->compatible_features), .autoclear_features = cpu_to_be64(s->autoclear_features), - .refcount_order = cpu_to_be32(3 + REFCOUNT_SHIFT), + .refcount_order = cpu_to_be32(s->refcount_order), .header_length = cpu_to_be32(header_length), }; @@ -1735,6 +1736,196 @@ static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf, return ret; } +/* + * Downgrades an image's version. To achieve this, any incompatible features + * have to be removed. + */ +static int qcow2_downgrade(BlockDriverState *bs, int target_version) +{ + BDRVQcowState *s = bs->opaque; + int current_version = s->qcow_version; + int ret; + + if (target_version == current_version) { + return 0; + } else if (target_version > current_version) { + return -EINVAL; + } else if (target_version != 2) { + return -EINVAL; + } + + if (s->refcount_order != 4) { + /* we would have to convert the image to a refcount_order == 4 image + * here; however, since qemu (at the time of writing this) does not + * support anything different than 4 anyway, there is no point in doing + * so right now; however, we should error out (if qemu supports this in + * the future and this code has not been adapted) */ + error_report("qcow2_downgrade: Image refcount orders other than 4 are" + "currently not supported."); + return -ENOTSUP; + } + + /* clear incompatible features */ + if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { + ret = qcow2_mark_clean(bs); + if (ret < 0) { + return ret; + } + } + + if (s->incompatible_features) { + return -ENOTSUP; + } + + /* since we can ignore compatible features, we can set them to 0 as well */ + s->compatible_features = 0; + /* if lazy refcounts have been used, they have already been fixed through + * clearing the dirty flag */ + + /* clearing autoclear features is trivial */ + s->autoclear_features = 0; + + ret = qcow2_expand_zero_clusters(bs); + if (ret < 0) { + return ret; + } + + s->qcow_version = target_version; + ret = qcow2_update_header(bs); + if (ret < 0) { + s->qcow_version = current_version; + return ret; + } + return 0; +} + +static int qcow2_amend_options(BlockDriverState *bs, + QEMUOptionParameter *options) +{ + BDRVQcowState *s = bs->opaque; + int old_version = s->qcow_version, new_version = old_version; + uint64_t new_size = 0; + const char *backing_file = NULL, *backing_format = NULL; + bool lazy_refcounts = s->use_lazy_refcounts; + int ret; + int i; + + for (i = 0; options[i].name; i++) + { + if (!strcmp(options[i].name, "compat")) { + if (!options[i].value.s) { + /* preserve default */ + } else if (!strcmp(options[i].value.s, "0.10")) { + new_version = 2; + } else if (!strcmp(options[i].value.s, "1.1")) { + new_version = 3; + } else { + fprintf(stderr, "Unknown compatibility level %s.\n", + options[i].value.s); + return -EINVAL; + } + } else if (!strcmp(options[i].name, "preallocation")) { + if (options[i].assigned) { + fprintf(stderr, "Cannot change preallocation mode.\n"); + return -ENOTSUP; + } + } else if (!strcmp(options[i].name, "size")) { + new_size = options[i].value.n; + } else if (!strcmp(options[i].name, "backing_file")) { + backing_file = options[i].value.s; + } else if (!strcmp(options[i].name, "backing_fmt")) { + backing_format = options[i].value.s; + } else if (!strcmp(options[i].name, "encryption")) { + if (options[i].assigned && + (options[i].value.n != !!s->crypt_method)) { + fprintf(stderr, "Changing the encryption flag is not " + "supported.\n"); + return -ENOTSUP; + } + } else if (!strcmp(options[i].name, "cluster_size")) { + if (options[i].assigned && (options[i].value.n != s->cluster_size)) + { + fprintf(stderr, "Changing the cluster size is not " + "supported.\n"); + return -ENOTSUP; + } + } else if (!strcmp(options[i].name, "lazy_refcounts")) { + if (options[i].assigned) { + lazy_refcounts = options[i].value.n; + } + } else { + /* if this assertion fails, this probably means a new option was + * added without having it covered here */ + assert(false); + } + } + + if (new_version != old_version) { + if (new_version > old_version) { + /* Upgrade */ + s->qcow_version = new_version; + ret = qcow2_update_header(bs); + if (ret < 0) { + s->qcow_version = old_version; + return ret; + } + } else { + ret = qcow2_downgrade(bs, new_version); + if (ret < 0) { + return ret; + } + } + } + + if (new_size) { + ret = qcow2_truncate(bs, new_size); + if (ret < 0) { + return ret; + } + } + + if (backing_file || backing_format) { + ret = qcow2_change_backing_file(bs, backing_file ?: bs->backing_file, + backing_format ?: bs->backing_format); + if (ret < 0) { + return ret; + } + } + + if (s->use_lazy_refcounts != lazy_refcounts) { + if (lazy_refcounts) { + if (s->qcow_version < 3) { + fprintf(stderr, "Lazy refcounts only supported with compatibility " + "level 1.1 and above (use compat=1.1 or greater)\n"); + return -EINVAL; + } + s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS; + ret = qcow2_update_header(bs); + if (ret < 0) { + s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS; + return ret; + } + s->use_lazy_refcounts = true; + } else { + /* make image clean first */ + ret = qcow2_mark_clean(bs); + if (ret < 0) { + return ret; + } + /* now disallow lazy refcounts */ + s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS; + ret = qcow2_update_header(bs); + if (ret < 0) { + s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS; + return ret; + } + s->use_lazy_refcounts = false; + } + } + + return 0; +} + static QEMUOptionParameter qcow2_create_options[] = { { .name = BLOCK_OPT_SIZE, @@ -1818,6 +2009,7 @@ static BlockDriver bdrv_qcow2 = { .create_options = qcow2_create_options, .bdrv_check = qcow2_check, + .bdrv_amend_options = qcow2_amend_options, }; static void bdrv_qcow2_init(void) diff --git a/block/qcow2.h b/block/qcow2.h index dba9771..ad3fd21 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -196,6 +196,7 @@ typedef struct BDRVQcowState { int flags; int qcow_version; bool use_lazy_refcounts; + int refcount_order; bool discard_passthrough[QCOW2_DISCARD_MAX]; @@ -408,6 +409,8 @@ int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors); int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors); +int qcow2_expand_zero_clusters(BlockDriverState *bs); + /* qcow2-snapshot.c functions */ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info); int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id); -- 1.8.3.1