qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v2 0/5] vmdk: zeroed-grain GTE support
@ 2013-04-22  2:07 Fam Zheng
  2013-04-22  2:07 ` [Qemu-devel] [PATCH v2 1/5] vmdk: named return code Fam Zheng
                   ` (4 more replies)
  0 siblings, 5 replies; 10+ messages in thread
From: Fam Zheng @ 2013-04-22  2:07 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, Fam Zheng, stefanha

Added support for zeroed-grain GTE to VMDK according to VMDK Spec 5.0[1].

[1] Virtual Disk Format 5.0 - VMware,
    http://www.vmware.com/support/developer/vddk/vmdk_50_technote.pdf?src=vmdk

Changes since v1:
 - all: fix From: field
 - 1/5: squash one line of ret code macro change from 2/5
 - 2/5: change VMDK4_FLAG_ZG to VMDK4_FLAG_ZERO_GRAIN
 - 3/5: move BLOCK_OPT_ZEROED_GRAIN defination from block_int.h to vmdk.c
 - 5/5: fix metadata update issue, unit test with cases 033 034

Fam Zheng (5):
  vmdk: named return code.
  vmdk: add support for “zeroed‐grain” GTE
  vmdk: Add option to create zeroed-grain image
  vmdk: change magic number to macro
  vmdk: add bdrv_co_write_zeroes

 block/vmdk.c | 185 ++++++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 131 insertions(+), 54 deletions(-)

-- 
1.8.1.4

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [Qemu-devel] [PATCH v2 1/5] vmdk: named return code.
  2013-04-22  2:07 [Qemu-devel] [PATCH v2 0/5] vmdk: zeroed-grain GTE support Fam Zheng
@ 2013-04-22  2:07 ` Fam Zheng
  2013-04-22  2:07 ` [Qemu-devel] [PATCH v2 2/5] vmdk: add support for “zeroed‐grain” GTE Fam Zheng
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 10+ messages in thread
From: Fam Zheng @ 2013-04-22  2:07 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, Fam Zheng, stefanha

Internal routines in vmdk.c previously return -1 on error and 0 on
success. More return values are useful for future changes such as
zeroed-grain GTE. Change all the magic `return 0` and `return -1` to
macro names:

 * VMDK_OK      0
 * VMDK_ERROR   (-1)
 * VMDK_UNALLOC (-2)
 * VMDK_ZEROED  (-3)

Signed-off-by: Fam Zheng <famz@redhat.com>
---
 block/vmdk.c | 60 ++++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 34 insertions(+), 26 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index 7bad757..16aa29c 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -37,6 +37,14 @@
 #define VMDK4_FLAG_MARKER (1 << 17)
 #define VMDK4_GD_AT_END 0xffffffffffffffffULL
 
+
+/* VMDK internal error codes */
+#define VMDK_OK      0
+#define VMDK_ERROR   (-1)
+/* Cluster not allocated */
+#define VMDK_UNALLOC (-2)
+#define VMDK_ZEROED  (-3)
+
 typedef struct {
     uint32_t version;
     uint32_t flags;
@@ -578,22 +586,22 @@ static int vmdk_parse_description(const char *desc, const char *opt_name,
 
     opt_pos = strstr(desc, opt_name);
     if (!opt_pos) {
-        return -1;
+        return VMDK_ERROR;
     }
     /* Skip "=\"" following opt_name */
     opt_pos += strlen(opt_name) + 2;
     if (opt_pos >= end) {
-        return -1;
+        return VMDK_ERROR;
     }
     opt_end = opt_pos;
     while (opt_end < end && *opt_end != '"') {
         opt_end++;
     }
     if (opt_end == end || buf_size < opt_end - opt_pos + 1) {
-        return -1;
+        return VMDK_ERROR;
     }
     pstrcpy(buf, opt_end - opt_pos + 1, opt_pos);
-    return 0;
+    return VMDK_OK;
 }
 
 /* Open an extent file and append to bs array */
@@ -772,7 +780,7 @@ static int get_whole_cluster(BlockDriverState *bs,
         int ret;
 
         if (!vmdk_is_cid_valid(bs)) {
-            return -1;
+            return VMDK_ERROR;
         }
 
         /* floor offset to cluster */
@@ -780,17 +788,17 @@ static int get_whole_cluster(BlockDriverState *bs,
         ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
                 extent->cluster_sectors);
         if (ret < 0) {
-            return -1;
+            return VMDK_ERROR;
         }
 
         /* Write grain only into the active image */
         ret = bdrv_write(extent->file, cluster_offset, whole_grain,
                 extent->cluster_sectors);
         if (ret < 0) {
-            return -1;
+            return VMDK_ERROR;
         }
     }
-    return 0;
+    return VMDK_OK;
 }
 
 static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
@@ -803,7 +811,7 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
                 &(m_data->offset),
                 sizeof(m_data->offset)
             ) < 0) {
-        return -1;
+        return VMDK_ERROR;
     }
     /* update backup L2 table */
     if (extent->l1_backup_table_offset != 0) {
@@ -814,11 +822,11 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
                         + (m_data->l2_index * sizeof(m_data->offset)),
                     &(m_data->offset), sizeof(m_data->offset)
                 ) < 0) {
-            return -1;
+            return VMDK_ERROR;
         }
     }
 
-    return 0;
+    return VMDK_OK;
 }
 
 static int get_cluster_offset(BlockDriverState *bs,
@@ -837,17 +845,17 @@ static int get_cluster_offset(BlockDriverState *bs,
     }
     if (extent->flat) {
         *cluster_offset = extent->flat_start_offset;
-        return 0;
+        return VMDK_OK;
     }
 
     offset -= (extent->end_sector - extent->sectors) * SECTOR_SIZE;
     l1_index = (offset >> 9) / extent->l1_entry_sectors;
     if (l1_index >= extent->l1_size) {
-        return -1;
+        return VMDK_ERROR;
     }
     l2_offset = extent->l1_table[l1_index];
     if (!l2_offset) {
-        return -1;
+        return VMDK_UNALLOC;
     }
     for (i = 0; i < L2_CACHE_SIZE; i++) {
         if (l2_offset == extent->l2_cache_offsets[i]) {
@@ -877,7 +885,7 @@ static int get_cluster_offset(BlockDriverState *bs,
                 l2_table,
                 extent->l2_size * sizeof(uint32_t)
             ) != extent->l2_size * sizeof(uint32_t)) {
-        return -1;
+        return VMDK_ERROR;
     }
 
     extent->l2_cache_offsets[min_index] = l2_offset;
@@ -888,7 +896,7 @@ static int get_cluster_offset(BlockDriverState *bs,
 
     if (!*cluster_offset) {
         if (!allocate) {
-            return -1;
+            return VMDK_UNALLOC;
         }
 
         /* Avoid the L2 tables update for the images that have snapshots. */
@@ -911,7 +919,7 @@ static int get_cluster_offset(BlockDriverState *bs,
          */
         if (get_whole_cluster(
                 bs, extent, *cluster_offset, offset, allocate) == -1) {
-            return -1;
+            return VMDK_ERROR;
         }
 
         if (m_data) {
@@ -923,7 +931,7 @@ static int get_cluster_offset(BlockDriverState *bs,
         }
     }
     *cluster_offset <<= 9;
-    return 0;
+    return VMDK_OK;
 }
 
 static VmdkExtent *find_extent(BDRVVmdkState *s,
@@ -1173,7 +1181,7 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
                                 sector_num << 9, !extent->compressed,
                                 &cluster_offset);
         if (extent->compressed) {
-            if (ret == 0) {
+            if (ret == VMDK_OK) {
                 /* Refuse write to allocated cluster for streamOptimized */
                 fprintf(stderr,
                         "VMDK: can't write to allocated cluster"
@@ -1357,7 +1365,7 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
 
     if (filename == NULL || !strlen(filename)) {
         fprintf(stderr, "Vmdk: no filename provided.\n");
-        return -1;
+        return VMDK_ERROR;
     }
     p = strrchr(filename, '/');
     if (p == NULL) {
@@ -1369,7 +1377,7 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
     if (p != NULL) {
         p++;
         if (p - filename >= buf_len) {
-            return -1;
+            return VMDK_ERROR;
         }
         pstrcpy(path, p - filename + 1, filename);
     } else {
@@ -1382,12 +1390,12 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
         postfix[0] = '\0';
     } else {
         if (q - p >= buf_len) {
-            return -1;
+            return VMDK_ERROR;
         }
         pstrcpy(prefix, q - p + 1, p);
         pstrcpy(postfix, buf_len, q);
     }
-    return 0;
+    return VMDK_OK;
 }
 
 static int relative_path(char *dest, int dest_size,
@@ -1403,11 +1411,11 @@ static int relative_path(char *dest, int dest_size,
 #endif
 
     if (!(dest && base && target)) {
-        return -1;
+        return VMDK_ERROR;
     }
     if (path_is_absolute(target)) {
         pstrcpy(dest, dest_size, target);
-        return 0;
+        return VMDK_OK;
     }
     while (base[i] == target[i]) {
         i++;
@@ -1426,7 +1434,7 @@ static int relative_path(char *dest, int dest_size,
         pstrcat(dest, dest_size, sep);
     }
     pstrcat(dest, dest_size, q);
-    return 0;
+    return VMDK_OK;
 }
 
 static int vmdk_create(const char *filename, QEMUOptionParameter *options)
-- 
1.8.1.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Qemu-devel] [PATCH v2 2/5] vmdk: add support for “zeroed‐grain” GTE
  2013-04-22  2:07 [Qemu-devel] [PATCH v2 0/5] vmdk: zeroed-grain GTE support Fam Zheng
  2013-04-22  2:07 ` [Qemu-devel] [PATCH v2 1/5] vmdk: named return code Fam Zheng
@ 2013-04-22  2:07 ` Fam Zheng
  2013-04-22  2:07 ` [Qemu-devel] [PATCH v2 3/5] vmdk: Add option to create zeroed-grain image Fam Zheng
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 10+ messages in thread
From: Fam Zheng @ 2013-04-22  2:07 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, Fam Zheng, stefanha

Introduced support for zeroed-grain GTE, as specified in Virtual Disk
Format 5.0[1].

    Recent VMware hosted platform products support a new “zeroed‐grain”
    grain table entry (GTE). The zeroed‐grain GTE returns all zeros on
    read.  In other words, the zeroed‐grain GTE indicates that a grain
    in the child disk is zero‐filled but does not actually occupy space
    in storage.  A sparse extent with zeroed‐grain GTE has the following
    in its header:

     * SparseExtentHeader.version = 2
     * SparseExtentHeader.flags has bit 2 set

    Other than the new flag and the possibly zeroed‐grain GTE, version 2
    sparse extents are identical to version 1.  Also, a zeroed‐grain GTE
    has value 0x1 in the GT table.

[1] Virtual Disk Format 5.0, http://www.vmware.com/support/developer/vddk/vmdk_50_technote.pdf?src=vmdk
Signed-off-by: Fam Zheng <famz@redhat.com>
---
 block/vmdk.c | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index 16aa29c..7e07c0f 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -33,10 +33,13 @@
 #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
 #define VMDK4_COMPRESSION_DEFLATE 1
 #define VMDK4_FLAG_RGD (1 << 1)
+/* Zeroed-grain enable bit */
+#define VMDK4_FLAG_ZERO_GRAIN   (1 << 2)
 #define VMDK4_FLAG_COMPRESS (1 << 16)
 #define VMDK4_FLAG_MARKER (1 << 17)
 #define VMDK4_GD_AT_END 0xffffffffffffffffULL
 
+#define VMDK_GTE_ZEROED 0x1
 
 /* VMDK internal error codes */
 #define VMDK_OK      0
@@ -81,6 +84,8 @@ typedef struct VmdkExtent {
     bool flat;
     bool compressed;
     bool has_marker;
+    bool has_zero_grain;
+    int version;
     int64_t sectors;
     int64_t end_sector;
     int64_t flat_start_offset;
@@ -569,6 +574,8 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
     extent->compressed =
         le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
     extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER;
+    extent->version = le32_to_cpu(header.version);
+    extent->has_zero_grain = le32_to_cpu(header.flags) & VMDK4_FLAG_ZERO_GRAIN;
     ret = vmdk_init_tables(bs, extent);
     if (ret) {
         /* free extent allocated by vmdk_add_extent */
@@ -839,6 +846,7 @@ static int get_cluster_offset(BlockDriverState *bs,
     unsigned int l1_index, l2_offset, l2_index;
     int min_index, i, j;
     uint32_t min_count, *l2_table, tmp = 0;
+    bool zeroed = false;
 
     if (m_data) {
         m_data->valid = 0;
@@ -894,9 +902,13 @@ static int get_cluster_offset(BlockDriverState *bs,
     l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
     *cluster_offset = le32_to_cpu(l2_table[l2_index]);
 
-    if (!*cluster_offset) {
+    if (extent->has_zero_grain && *cluster_offset == VMDK_GTE_ZEROED) {
+        zeroed = true;
+    }
+
+    if (!*cluster_offset || zeroed) {
         if (!allocate) {
-            return VMDK_UNALLOC;
+            return zeroed ? VMDK_ZEROED : VMDK_UNALLOC;
         }
 
         /* Avoid the L2 tables update for the images that have snapshots. */
@@ -967,8 +979,8 @@ static int coroutine_fn vmdk_co_is_allocated(BlockDriverState *bs,
     ret = get_cluster_offset(bs, extent, NULL,
                             sector_num * 512, 0, &offset);
     qemu_co_mutex_unlock(&s->lock);
-    /* get_cluster_offset returning 0 means success */
-    ret = !ret;
+
+    ret = (ret == VMDK_OK || ret == VMDK_ZEROED);
 
     index_in_cluster = sector_num % extent->cluster_sectors;
     n = extent->cluster_sectors - index_in_cluster;
@@ -1111,9 +1123,9 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
         if (n > nb_sectors) {
             n = nb_sectors;
         }
-        if (ret) {
+        if (ret != VMDK_OK) {
             /* if not allocated, try to read from parent image, if exist */
-            if (bs->backing_hd) {
+            if (bs->backing_hd && ret != VMDK_ZEROED) {
                 if (!vmdk_is_cid_valid(bs)) {
                     return -EINVAL;
                 }
-- 
1.8.1.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Qemu-devel] [PATCH v2 3/5] vmdk: Add option to create zeroed-grain image
  2013-04-22  2:07 [Qemu-devel] [PATCH v2 0/5] vmdk: zeroed-grain GTE support Fam Zheng
  2013-04-22  2:07 ` [Qemu-devel] [PATCH v2 1/5] vmdk: named return code Fam Zheng
  2013-04-22  2:07 ` [Qemu-devel] [PATCH v2 2/5] vmdk: add support for “zeroed‐grain” GTE Fam Zheng
@ 2013-04-22  2:07 ` Fam Zheng
  2013-04-22 14:06   ` Stefan Hajnoczi
  2013-04-22  2:07 ` [Qemu-devel] [PATCH v2 4/5] vmdk: change magic number to macro Fam Zheng
  2013-04-22  2:07 ` [Qemu-devel] [PATCH v2 5/5] vmdk: add bdrv_co_write_zeroes Fam Zheng
  4 siblings, 1 reply; 10+ messages in thread
From: Fam Zheng @ 2013-04-22  2:07 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, Fam Zheng, stefanha

Add image create option "zeroed-grain" to enable zeroed-grain GTE
feature of vmdk sparse extents. When this option is on, header version
of newly created extent will be 2 and VMDK4_FLAG_ZG flag bit will be
set.
Signed-off-by: Fam Zheng <famz@redhat.com>
---
 block/vmdk.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index 7e07c0f..d8c6c70 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -48,6 +48,8 @@
 #define VMDK_UNALLOC (-2)
 #define VMDK_ZEROED  (-3)
 
+#define BLOCK_OPT_ZEROED_GRAIN "zeroed_grain"
+
 typedef struct {
     uint32_t version;
     uint32_t flags;
@@ -1262,7 +1264,7 @@ static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num,
 
 
 static int vmdk_create_extent(const char *filename, int64_t filesize,
-                              bool flat, bool compress)
+                              bool flat, bool compress, bool zeroed_grain)
 {
     int ret, i;
     int fd = 0;
@@ -1284,9 +1286,10 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
     }
     magic = cpu_to_be32(VMDK4_MAGIC);
     memset(&header, 0, sizeof(header));
-    header.version = 1;
-    header.flags =
-        3 | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0);
+    header.version = zeroed_grain ? 2 : 1;
+    header.flags = 3
+                   | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0)
+                   | (zeroed_grain ? VMDK4_FLAG_ZG : 0);
     header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0;
     header.capacity = filesize / 512;
     header.granularity = 128;
@@ -1467,6 +1470,7 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
     char parent_desc_line[BUF_SIZE] = "";
     uint32_t parent_cid = 0xffffffff;
     uint32_t number_heads = 16;
+    bool zeroed_grain = false;
     const char desc_template[] =
         "# Disk DescriptorFile\n"
         "version=1\n"
@@ -1502,6 +1506,8 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
             flags |= options->value.n ? BLOCK_FLAG_COMPAT6 : 0;
         } else if (!strcmp(options->name, BLOCK_OPT_SUBFMT)) {
             fmt = options->value.s;
+        } else if (!strcmp(options->name, BLOCK_OPT_ZEROED_GRAIN)) {
+            zeroed_grain |= options->value.n;
         }
         options++;
     }
@@ -1588,7 +1594,8 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
         snprintf(ext_filename, sizeof(ext_filename), "%s%s",
                 path, desc_filename);
 
-        if (vmdk_create_extent(ext_filename, size, flat, compress)) {
+        if (vmdk_create_extent(ext_filename, size,
+                               flat, compress, zeroed_grain)) {
             return -EINVAL;
         }
         filesize -= size;
@@ -1714,6 +1721,11 @@ static QEMUOptionParameter vmdk_create_options[] = {
             "VMDK flat extent format, can be one of "
             "{monolithicSparse (default) | monolithicFlat | twoGbMaxExtentSparse | twoGbMaxExtentFlat | streamOptimized} "
     },
+    {
+        .name = BLOCK_OPT_ZEROED_GRAIN,
+        .type = OPT_FLAG,
+        .help = "Enable efficient zero writes using the zeroed-grain GTE feature"
+    },
     { NULL }
 };
 
-- 
1.8.1.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Qemu-devel] [PATCH v2 4/5] vmdk: change magic number to macro
  2013-04-22  2:07 [Qemu-devel] [PATCH v2 0/5] vmdk: zeroed-grain GTE support Fam Zheng
                   ` (2 preceding siblings ...)
  2013-04-22  2:07 ` [Qemu-devel] [PATCH v2 3/5] vmdk: Add option to create zeroed-grain image Fam Zheng
@ 2013-04-22  2:07 ` Fam Zheng
  2013-04-22  2:07 ` [Qemu-devel] [PATCH v2 5/5] vmdk: add bdrv_co_write_zeroes Fam Zheng
  4 siblings, 0 replies; 10+ messages in thread
From: Fam Zheng @ 2013-04-22  2:07 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, Fam Zheng, stefanha

Two hard coded flag bits are changed to macros.
Signed-off-by: Fam Zheng <famz@redhat.com>
---
 block/vmdk.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index d8c6c70..632689b 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -32,6 +32,7 @@
 #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
 #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
 #define VMDK4_COMPRESSION_DEFLATE 1
+#define VMDK4_FLAG_NL_DETECT (1 << 0)
 #define VMDK4_FLAG_RGD (1 << 1)
 /* Zeroed-grain enable bit */
 #define VMDK4_FLAG_ZERO_GRAIN   (1 << 2)
@@ -1287,7 +1288,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
     magic = cpu_to_be32(VMDK4_MAGIC);
     memset(&header, 0, sizeof(header));
     header.version = zeroed_grain ? 2 : 1;
-    header.flags = 3
+    header.flags = VMDK4_FLAG_RGD | VMDK4_FLAG_NL_DETECT
                    | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0)
                    | (zeroed_grain ? VMDK4_FLAG_ZG : 0);
     header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0;
-- 
1.8.1.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Qemu-devel] [PATCH v2 5/5] vmdk: add bdrv_co_write_zeroes
  2013-04-22  2:07 [Qemu-devel] [PATCH v2 0/5] vmdk: zeroed-grain GTE support Fam Zheng
                   ` (3 preceding siblings ...)
  2013-04-22  2:07 ` [Qemu-devel] [PATCH v2 4/5] vmdk: change magic number to macro Fam Zheng
@ 2013-04-22  2:07 ` Fam Zheng
  2013-04-22 14:49   ` Stefan Hajnoczi
  4 siblings, 1 reply; 10+ messages in thread
From: Fam Zheng @ 2013-04-22  2:07 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, Fam Zheng, stefanha

Use special offset to write zeroes efficiently, when zeroed-grain GTE is
available. If zero-write an allocated cluster, cluster is leaked because
its offset pointer is overwritten by "0x1".

Signed-off-by: Fam Zheng <famz@redhat.com>
---
 block/vmdk.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 63 insertions(+), 19 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index 632689b..7475090 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -814,6 +814,7 @@ static int get_whole_cluster(BlockDriverState *bs,
 static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
 {
     /* update L2 table */
+    m_data->l2_offset = extent->l1_table[m_data->l1_index];
     if (bdrv_pwrite_sync(
                 extent->file,
                 ((int64_t)m_data->l2_offset * 512)
@@ -905,6 +906,12 @@ static int get_cluster_offset(BlockDriverState *bs,
     l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
     *cluster_offset = le32_to_cpu(l2_table[l2_index]);
 
+    if (m_data) {
+        m_data->valid = 1;
+        m_data->l1_index = l1_index;
+        m_data->l2_index = l2_index;
+        m_data->offset = cpu_to_le32(*cluster_offset);
+    }
     if (extent->has_zero_grain && *cluster_offset == VMDK_GTE_ZEROED) {
         zeroed = true;
     }
@@ -939,10 +946,6 @@ static int get_cluster_offset(BlockDriverState *bs,
 
         if (m_data) {
             m_data->offset = tmp;
-            m_data->l1_index = l1_index;
-            m_data->l2_index = l2_index;
-            m_data->l2_offset = l2_offset;
-            m_data->valid = 1;
         }
     }
     *cluster_offset <<= 9;
@@ -1165,8 +1168,16 @@ static coroutine_fn int vmdk_co_read(BlockDriverState *bs, int64_t sector_num,
     return ret;
 }
 
+/**
+ * params:
+ *  - zeroed:       buf is ignored (data is zero), use zeroed_grain GTE
+ *                  feature if possible, otherwise return -ENOTSUP.
+ *  - zero_dry_run: used for zeroed == true only, don't update L2 table, just
+ *                  try if it's supported
+ */
 static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
-                     const uint8_t *buf, int nb_sectors)
+                      const uint8_t *buf, int nb_sectors,
+                      bool zeroed, bool zero_dry_run)
 {
     BDRVVmdkState *s = bs->opaque;
     VmdkExtent *extent = NULL;
@@ -1212,7 +1223,7 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
                                         &cluster_offset);
             }
         }
-        if (ret) {
+        if (ret == VMDK_ERROR) {
             return -EINVAL;
         }
         extent_begin_sector = extent->end_sector - extent->sectors;
@@ -1222,17 +1233,34 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
         if (n > nb_sectors) {
             n = nb_sectors;
         }
-
-        ret = vmdk_write_extent(extent,
-                        cluster_offset, index_in_cluster * 512,
-                        buf, n, sector_num);
-        if (ret) {
-            return ret;
-        }
-        if (m_data.valid) {
-            /* update L2 tables */
-            if (vmdk_L2update(extent, &m_data) == -1) {
-                return -EIO;
+        if (zeroed) {
+            /* Do zeroed write, buf is ignored */
+            if (extent->has_zero_grain &&
+                    index_in_cluster == 0 &&
+                    n >= extent->cluster_sectors) {
+                n = extent->cluster_sectors;
+                if (!zero_dry_run) {
+                    m_data.offset = cpu_to_le32(VMDK_GTE_ZEROED);
+                    /* update L2 tables */
+                    if (vmdk_L2update(extent, &m_data) == -1) {
+                        return -EIO;
+                    }
+                }
+            } else {
+                return -ENOTSUP;
+            }
+        } else {
+            ret = vmdk_write_extent(extent,
+                            cluster_offset, index_in_cluster * 512,
+                            buf, n, sector_num);
+            if (ret) {
+                return ret;
+            }
+            if (m_data.valid) {
+                /* update L2 tables */
+                if (vmdk_L2update(extent, &m_data) == -1) {
+                    return -EIO;
+                }
             }
         }
         nb_sectors -= n;
@@ -1258,7 +1286,22 @@ static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num,
     int ret;
     BDRVVmdkState *s = bs->opaque;
     qemu_co_mutex_lock(&s->lock);
-    ret = vmdk_write(bs, sector_num, buf, nb_sectors);
+    ret = vmdk_write(bs, sector_num, buf, nb_sectors, false, false);
+    qemu_co_mutex_unlock(&s->lock);
+    return ret;
+}
+
+static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs,
+                                             int64_t sector_num,
+                                             int nb_sectors)
+{
+    int ret;
+    BDRVVmdkState *s = bs->opaque;
+    qemu_co_mutex_lock(&s->lock);
+    ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, true);
+    if (!ret) {
+        ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, false);
+    }
     qemu_co_mutex_unlock(&s->lock);
     return ret;
 }
@@ -1290,7 +1333,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
     header.version = zeroed_grain ? 2 : 1;
     header.flags = VMDK4_FLAG_RGD | VMDK4_FLAG_NL_DETECT
                    | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0)
-                   | (zeroed_grain ? VMDK4_FLAG_ZG : 0);
+                   | (zeroed_grain ? VMDK4_FLAG_ZERO_GRAIN : 0);
     header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0;
     header.capacity = filesize / 512;
     header.granularity = 128;
@@ -1738,6 +1781,7 @@ static BlockDriver bdrv_vmdk = {
     .bdrv_reopen_prepare = vmdk_reopen_prepare,
     .bdrv_read      = vmdk_co_read,
     .bdrv_write     = vmdk_co_write,
+    .bdrv_co_write_zeroes = vmdk_co_write_zeroes,
     .bdrv_close     = vmdk_close,
     .bdrv_create    = vmdk_create,
     .bdrv_co_flush_to_disk  = vmdk_co_flush,
-- 
1.8.1.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH v2 3/5] vmdk: Add option to create zeroed-grain image
  2013-04-22  2:07 ` [Qemu-devel] [PATCH v2 3/5] vmdk: Add option to create zeroed-grain image Fam Zheng
@ 2013-04-22 14:06   ` Stefan Hajnoczi
  2013-04-23  0:52     ` Fam Zheng
  0 siblings, 1 reply; 10+ messages in thread
From: Stefan Hajnoczi @ 2013-04-22 14:06 UTC (permalink / raw)
  To: Fam Zheng; +Cc: kwolf, qemu-devel, stefanha

On Mon, Apr 22, 2013 at 10:07:57AM +0800, Fam Zheng wrote:
> Add image create option "zeroed-grain" to enable zeroed-grain GTE
> feature of vmdk sparse extents. When this option is on, header version
> of newly created extent will be 2 and VMDK4_FLAG_ZG flag bit will be
> set.
> Signed-off-by: Fam Zheng <famz@redhat.com>
> ---
>  block/vmdk.c | 22 +++++++++++++++++-----
>  1 file changed, 17 insertions(+), 5 deletions(-)

Is there a way to upgrade an image file to use zeroed-grain GTEs once
the file has been created?

Stefan

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH v2 5/5] vmdk: add bdrv_co_write_zeroes
  2013-04-22  2:07 ` [Qemu-devel] [PATCH v2 5/5] vmdk: add bdrv_co_write_zeroes Fam Zheng
@ 2013-04-22 14:49   ` Stefan Hajnoczi
  0 siblings, 0 replies; 10+ messages in thread
From: Stefan Hajnoczi @ 2013-04-22 14:49 UTC (permalink / raw)
  To: Fam Zheng; +Cc: kwolf, qemu-devel, stefanha

On Mon, Apr 22, 2013 at 10:07:59AM +0800, Fam Zheng wrote:
> Use special offset to write zeroes efficiently, when zeroed-grain GTE is
> available. If zero-write an allocated cluster, cluster is leaked because
> its offset pointer is overwritten by "0x1".
> 
> Signed-off-by: Fam Zheng <famz@redhat.com>
> ---
>  block/vmdk.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++--------------
>  1 file changed, 63 insertions(+), 19 deletions(-)
> 
> diff --git a/block/vmdk.c b/block/vmdk.c
> index 632689b..7475090 100644
> --- a/block/vmdk.c
> +++ b/block/vmdk.c
> @@ -814,6 +814,7 @@ static int get_whole_cluster(BlockDriverState *bs,
>  static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
>  {
>      /* update L2 table */
> +    m_data->l2_offset = extent->l1_table[m_data->l1_index];

This is weird.  vmdk_L2update() should not modify m_data.  Please avoid
side-effects like this.

Is the VmdkMetaData.l2_offset field even needed?  Perhaps you can drop
it from the struct and use a local in vmdk_L2update().

>      if (bdrv_pwrite_sync(
>                  extent->file,
>                  ((int64_t)m_data->l2_offset * 512)
> @@ -905,6 +906,12 @@ static int get_cluster_offset(BlockDriverState *bs,
>      l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
>      *cluster_offset = le32_to_cpu(l2_table[l2_index]);
>  
> +    if (m_data) {
> +        m_data->valid = 1;
> +        m_data->l1_index = l1_index;
> +        m_data->l2_index = l2_index;
> +        m_data->offset = cpu_to_le32(*cluster_offset);

It's risky to mix host endian and le32 fields in the same struct - the
chance is high that someone will make a mistake which field needs to be
byteswapped.

Instead, put the cpu_to_le32() inside vmdk_L2update() and use a local
variable there.  Please do this in a separate patch so it's easy to
review in isolation.

> +    }
>      if (extent->has_zero_grain && *cluster_offset == VMDK_GTE_ZEROED) {
>          zeroed = true;
>      }
> @@ -939,10 +946,6 @@ static int get_cluster_offset(BlockDriverState *bs,
>  
>          if (m_data) {
>              m_data->offset = tmp;
> -            m_data->l1_index = l1_index;
> -            m_data->l2_index = l2_index;
> -            m_data->l2_offset = l2_offset;
> -            m_data->valid = 1;
>          }
>      }
>      *cluster_offset <<= 9;
> @@ -1165,8 +1168,16 @@ static coroutine_fn int vmdk_co_read(BlockDriverState *bs, int64_t sector_num,
>      return ret;
>  }
>  
> +/**
> + * params:
> + *  - zeroed:       buf is ignored (data is zero), use zeroed_grain GTE
> + *                  feature if possible, otherwise return -ENOTSUP.
> + *  - zero_dry_run: used for zeroed == true only, don't update L2 table, just
> + *                  try if it's supported
> + */

Thanks for documenting the function!  Please use gtkdoc style, for
example:

/**
 * object_get_class:
 * @obj: A derivative of #Object
 *
 * Returns: The #ObjectClass of the type associated with @obj.
 */
ObjectClass *object_get_class(Object *obj);

(from include/qom/object.h)

https://developer.gnome.org/gtk-doc-manual/unstable/documenting_syntax.html.en

Previously there was no standard but it helps if we all use the same doc
comment style.

> @@ -1290,7 +1333,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
>      header.version = zeroed_grain ? 2 : 1;
>      header.flags = VMDK4_FLAG_RGD | VMDK4_FLAG_NL_DETECT
>                     | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0)
> -                   | (zeroed_grain ? VMDK4_FLAG_ZG : 0);
> +                   | (zeroed_grain ? VMDK4_FLAG_ZERO_GRAIN : 0);

Please fix the patch series so the old VMDK4_FLAG_ZG is never
introduced.  The best way of ensuring this is to buildtest the series at
each patch:

git rebase -i master -x make

This will run make on every commit from master..HEAD.  If the build
fails, it stops and lets you git commit --amend.

Stefan

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH v2 3/5] vmdk: Add option to create zeroed-grain image
  2013-04-22 14:06   ` Stefan Hajnoczi
@ 2013-04-23  0:52     ` Fam Zheng
  2013-04-23  7:00       ` Stefan Hajnoczi
  0 siblings, 1 reply; 10+ messages in thread
From: Fam Zheng @ 2013-04-23  0:52 UTC (permalink / raw)
  To: Stefan Hajnoczi; +Cc: kwolf, qemu-devel, stefanha

On Mon, 04/22 16:06, Stefan Hajnoczi wrote:
> On Mon, Apr 22, 2013 at 10:07:57AM +0800, Fam Zheng wrote:
> > Add image create option "zeroed-grain" to enable zeroed-grain GTE
> > feature of vmdk sparse extents. When this option is on, header version
> > of newly created extent will be 2 and VMDK4_FLAG_ZG flag bit will be
> > set.
> > Signed-off-by: Fam Zheng <famz@redhat.com>
> > ---
> >  block/vmdk.c | 22 +++++++++++++++++-----
> >  1 file changed, 17 insertions(+), 5 deletions(-)
> 
> Is there a way to upgrade an image file to use zeroed-grain GTEs once
> the file has been created?
> 

Yes. We have discussed this on IRC.

Here's how we do it:
 - If file version is 2 and zeroed-grain GTE bit set in header, we can
   support bdrv_co_write_zeroes (this patch series implemented).
 - If the file version is 2, we can enable zeroed-grain flag in header
   on bdrv_co_write_zeroes, and use zeroed-grain GTE. (need another
   patch to update the header).
 - Otherwize, -ENOTSUP

For background, here's how vmware uses this feature:
 - On shrinking a *child* disk (e.g.: vmware-vdiskmanager -k
   child.vmdk), if the child cluster is filled with zero, then the image
   version is upgraded to 2 and zeroed-grian flag is set, and
   zeroed-grain GTE is used to clear out the cluster, no need to take
   care of parent cluster allocation and actual data at all. That's
   where the efficiency comes.

-- 
Fam

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH v2 3/5] vmdk: Add option to create zeroed-grain image
  2013-04-23  0:52     ` Fam Zheng
@ 2013-04-23  7:00       ` Stefan Hajnoczi
  0 siblings, 0 replies; 10+ messages in thread
From: Stefan Hajnoczi @ 2013-04-23  7:00 UTC (permalink / raw)
  To: Stefan Hajnoczi, qemu-devel, kwolf

On Tue, Apr 23, 2013 at 08:52:02AM +0800, Fam Zheng wrote:
> On Mon, 04/22 16:06, Stefan Hajnoczi wrote:
> > On Mon, Apr 22, 2013 at 10:07:57AM +0800, Fam Zheng wrote:
> > > Add image create option "zeroed-grain" to enable zeroed-grain GTE
> > > feature of vmdk sparse extents. When this option is on, header version
> > > of newly created extent will be 2 and VMDK4_FLAG_ZG flag bit will be
> > > set.
> > > Signed-off-by: Fam Zheng <famz@redhat.com>
> > > ---
> > >  block/vmdk.c | 22 +++++++++++++++++-----
> > >  1 file changed, 17 insertions(+), 5 deletions(-)
> > 
> > Is there a way to upgrade an image file to use zeroed-grain GTEs once
> > the file has been created?
> > 
> 
> Yes. We have discussed this on IRC.
> 
> Here's how we do it:
>  - If file version is 2 and zeroed-grain GTE bit set in header, we can
>    support bdrv_co_write_zeroes (this patch series implemented).
>  - If the file version is 2, we can enable zeroed-grain flag in header
>    on bdrv_co_write_zeroes, and use zeroed-grain GTE. (need another
>    patch to update the header).
>  - Otherwize, -ENOTSUP
> 
> For background, here's how vmware uses this feature:
>  - On shrinking a *child* disk (e.g.: vmware-vdiskmanager -k
>    child.vmdk), if the child cluster is filled with zero, then the image
>    version is upgraded to 2 and zeroed-grian flag is set, and
>    zeroed-grain GTE is used to clear out the cluster, no need to take
>    care of parent cluster allocation and actual data at all. That's
>    where the efficiency comes.

The question is when QEMU should upgrade images.  It's safest to avoid
silently increasing the image file's version requirements.  But it would
be nice to allow upgrading if the users wishes to do that.

It doesn't need to be solved in this patch series.

Stefan

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2013-04-23  7:01 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-04-22  2:07 [Qemu-devel] [PATCH v2 0/5] vmdk: zeroed-grain GTE support Fam Zheng
2013-04-22  2:07 ` [Qemu-devel] [PATCH v2 1/5] vmdk: named return code Fam Zheng
2013-04-22  2:07 ` [Qemu-devel] [PATCH v2 2/5] vmdk: add support for “zeroed‐grain” GTE Fam Zheng
2013-04-22  2:07 ` [Qemu-devel] [PATCH v2 3/5] vmdk: Add option to create zeroed-grain image Fam Zheng
2013-04-22 14:06   ` Stefan Hajnoczi
2013-04-23  0:52     ` Fam Zheng
2013-04-23  7:00       ` Stefan Hajnoczi
2013-04-22  2:07 ` [Qemu-devel] [PATCH v2 4/5] vmdk: change magic number to macro Fam Zheng
2013-04-22  2:07 ` [Qemu-devel] [PATCH v2 5/5] vmdk: add bdrv_co_write_zeroes Fam Zheng
2013-04-22 14:49   ` Stefan Hajnoczi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).