qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v3 0/6] vmdk: zeroed-grain GTE support
@ 2013-04-24 12:44 Fam Zheng
  2013-04-24 12:44 ` [Qemu-devel] [PATCH v3 1/6] vmdk: named return code Fam Zheng
                   ` (6 more replies)
  0 siblings, 7 replies; 10+ messages in thread
From: Fam Zheng @ 2013-04-24 12:44 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, Fam Zheng, stefanha

Added support for zeroed-grain GTE to VMDK according to VMDK Spec 5.0[1].

[1] Virtual Disk Format 5.0 - VMware,
    http://www.vmware.com/support/developer/vddk/vmdk_50_technote.pdf?src=vmdk

Changes since v2:
 - all: Added 5/6 (vmdk: store fields of VmdkMetaData in cpu endian)
 - 6/6: Avoid side-effect of vmdk_L2update.
        Change function comment to gtkdoc stype.
        Fix VMDK4_FLAG_ZG.

Changes since v1:
 - all: fix From: field
 - 1/5: squash one line of ret code macro change from 2/5
 - 2/5: change VMDK4_FLAG_ZG to VMDK4_FLAG_ZERO_GRAIN
 - 3/5: move BLOCK_OPT_ZEROED_GRAIN defination from block_int.h to vmdk.c
 - 5/5: fix metadata update issue, unit test with cases 033 034

Fam Zheng (6):
  vmdk: named return code.
  vmdk: add support for “zeroed‐grain” GTE
  vmdk: Add option to create zeroed-grain image
  vmdk: change magic number to macro
  vmdk: store fields of VmdkMetaData in cpu endian
  vmdk: add bdrv_co_write_zeroes

 block/vmdk.c | 194 ++++++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 138 insertions(+), 56 deletions(-)

-- 
1.8.1.4

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [Qemu-devel] [PATCH v3 1/6] vmdk: named return code.
  2013-04-24 12:44 [Qemu-devel] [PATCH v3 0/6] vmdk: zeroed-grain GTE support Fam Zheng
@ 2013-04-24 12:44 ` Fam Zheng
  2013-04-24 12:44 ` [Qemu-devel] [PATCH v3 2/6] vmdk: add support for “zeroed‐grain” GTE Fam Zheng
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: Fam Zheng @ 2013-04-24 12:44 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, Fam Zheng, stefanha

Internal routines in vmdk.c previously return -1 on error and 0 on
success. More return values are useful for future changes such as
zeroed-grain GTE. Change all the magic `return 0` and `return -1` to
macro names:

 * VMDK_OK      0
 * VMDK_ERROR   (-1)
 * VMDK_UNALLOC (-2)
 * VMDK_ZEROED  (-3)

Signed-off-by: Fam Zheng <famz@redhat.com>
---
 block/vmdk.c | 60 ++++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 34 insertions(+), 26 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index 7bad757..16aa29c 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -37,6 +37,14 @@
 #define VMDK4_FLAG_MARKER (1 << 17)
 #define VMDK4_GD_AT_END 0xffffffffffffffffULL
 
+
+/* VMDK internal error codes */
+#define VMDK_OK      0
+#define VMDK_ERROR   (-1)
+/* Cluster not allocated */
+#define VMDK_UNALLOC (-2)
+#define VMDK_ZEROED  (-3)
+
 typedef struct {
     uint32_t version;
     uint32_t flags;
@@ -578,22 +586,22 @@ static int vmdk_parse_description(const char *desc, const char *opt_name,
 
     opt_pos = strstr(desc, opt_name);
     if (!opt_pos) {
-        return -1;
+        return VMDK_ERROR;
     }
     /* Skip "=\"" following opt_name */
     opt_pos += strlen(opt_name) + 2;
     if (opt_pos >= end) {
-        return -1;
+        return VMDK_ERROR;
     }
     opt_end = opt_pos;
     while (opt_end < end && *opt_end != '"') {
         opt_end++;
     }
     if (opt_end == end || buf_size < opt_end - opt_pos + 1) {
-        return -1;
+        return VMDK_ERROR;
     }
     pstrcpy(buf, opt_end - opt_pos + 1, opt_pos);
-    return 0;
+    return VMDK_OK;
 }
 
 /* Open an extent file and append to bs array */
@@ -772,7 +780,7 @@ static int get_whole_cluster(BlockDriverState *bs,
         int ret;
 
         if (!vmdk_is_cid_valid(bs)) {
-            return -1;
+            return VMDK_ERROR;
         }
 
         /* floor offset to cluster */
@@ -780,17 +788,17 @@ static int get_whole_cluster(BlockDriverState *bs,
         ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
                 extent->cluster_sectors);
         if (ret < 0) {
-            return -1;
+            return VMDK_ERROR;
         }
 
         /* Write grain only into the active image */
         ret = bdrv_write(extent->file, cluster_offset, whole_grain,
                 extent->cluster_sectors);
         if (ret < 0) {
-            return -1;
+            return VMDK_ERROR;
         }
     }
-    return 0;
+    return VMDK_OK;
 }
 
 static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
@@ -803,7 +811,7 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
                 &(m_data->offset),
                 sizeof(m_data->offset)
             ) < 0) {
-        return -1;
+        return VMDK_ERROR;
     }
     /* update backup L2 table */
     if (extent->l1_backup_table_offset != 0) {
@@ -814,11 +822,11 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
                         + (m_data->l2_index * sizeof(m_data->offset)),
                     &(m_data->offset), sizeof(m_data->offset)
                 ) < 0) {
-            return -1;
+            return VMDK_ERROR;
         }
     }
 
-    return 0;
+    return VMDK_OK;
 }
 
 static int get_cluster_offset(BlockDriverState *bs,
@@ -837,17 +845,17 @@ static int get_cluster_offset(BlockDriverState *bs,
     }
     if (extent->flat) {
         *cluster_offset = extent->flat_start_offset;
-        return 0;
+        return VMDK_OK;
     }
 
     offset -= (extent->end_sector - extent->sectors) * SECTOR_SIZE;
     l1_index = (offset >> 9) / extent->l1_entry_sectors;
     if (l1_index >= extent->l1_size) {
-        return -1;
+        return VMDK_ERROR;
     }
     l2_offset = extent->l1_table[l1_index];
     if (!l2_offset) {
-        return -1;
+        return VMDK_UNALLOC;
     }
     for (i = 0; i < L2_CACHE_SIZE; i++) {
         if (l2_offset == extent->l2_cache_offsets[i]) {
@@ -877,7 +885,7 @@ static int get_cluster_offset(BlockDriverState *bs,
                 l2_table,
                 extent->l2_size * sizeof(uint32_t)
             ) != extent->l2_size * sizeof(uint32_t)) {
-        return -1;
+        return VMDK_ERROR;
     }
 
     extent->l2_cache_offsets[min_index] = l2_offset;
@@ -888,7 +896,7 @@ static int get_cluster_offset(BlockDriverState *bs,
 
     if (!*cluster_offset) {
         if (!allocate) {
-            return -1;
+            return VMDK_UNALLOC;
         }
 
         /* Avoid the L2 tables update for the images that have snapshots. */
@@ -911,7 +919,7 @@ static int get_cluster_offset(BlockDriverState *bs,
          */
         if (get_whole_cluster(
                 bs, extent, *cluster_offset, offset, allocate) == -1) {
-            return -1;
+            return VMDK_ERROR;
         }
 
         if (m_data) {
@@ -923,7 +931,7 @@ static int get_cluster_offset(BlockDriverState *bs,
         }
     }
     *cluster_offset <<= 9;
-    return 0;
+    return VMDK_OK;
 }
 
 static VmdkExtent *find_extent(BDRVVmdkState *s,
@@ -1173,7 +1181,7 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
                                 sector_num << 9, !extent->compressed,
                                 &cluster_offset);
         if (extent->compressed) {
-            if (ret == 0) {
+            if (ret == VMDK_OK) {
                 /* Refuse write to allocated cluster for streamOptimized */
                 fprintf(stderr,
                         "VMDK: can't write to allocated cluster"
@@ -1357,7 +1365,7 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
 
     if (filename == NULL || !strlen(filename)) {
         fprintf(stderr, "Vmdk: no filename provided.\n");
-        return -1;
+        return VMDK_ERROR;
     }
     p = strrchr(filename, '/');
     if (p == NULL) {
@@ -1369,7 +1377,7 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
     if (p != NULL) {
         p++;
         if (p - filename >= buf_len) {
-            return -1;
+            return VMDK_ERROR;
         }
         pstrcpy(path, p - filename + 1, filename);
     } else {
@@ -1382,12 +1390,12 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
         postfix[0] = '\0';
     } else {
         if (q - p >= buf_len) {
-            return -1;
+            return VMDK_ERROR;
         }
         pstrcpy(prefix, q - p + 1, p);
         pstrcpy(postfix, buf_len, q);
     }
-    return 0;
+    return VMDK_OK;
 }
 
 static int relative_path(char *dest, int dest_size,
@@ -1403,11 +1411,11 @@ static int relative_path(char *dest, int dest_size,
 #endif
 
     if (!(dest && base && target)) {
-        return -1;
+        return VMDK_ERROR;
     }
     if (path_is_absolute(target)) {
         pstrcpy(dest, dest_size, target);
-        return 0;
+        return VMDK_OK;
     }
     while (base[i] == target[i]) {
         i++;
@@ -1426,7 +1434,7 @@ static int relative_path(char *dest, int dest_size,
         pstrcat(dest, dest_size, sep);
     }
     pstrcat(dest, dest_size, q);
-    return 0;
+    return VMDK_OK;
 }
 
 static int vmdk_create(const char *filename, QEMUOptionParameter *options)
-- 
1.8.1.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Qemu-devel] [PATCH v3 2/6] vmdk: add support for “zeroed‐grain” GTE
  2013-04-24 12:44 [Qemu-devel] [PATCH v3 0/6] vmdk: zeroed-grain GTE support Fam Zheng
  2013-04-24 12:44 ` [Qemu-devel] [PATCH v3 1/6] vmdk: named return code Fam Zheng
@ 2013-04-24 12:44 ` Fam Zheng
  2013-04-24 12:44 ` [Qemu-devel] [PATCH v3 3/6] vmdk: Add option to create zeroed-grain image Fam Zheng
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: Fam Zheng @ 2013-04-24 12:44 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, Fam Zheng, stefanha

Introduced support for zeroed-grain GTE, as specified in Virtual Disk
Format 5.0[1].

    Recent VMware hosted platform products support a new “zeroed‐grain”
    grain table entry (GTE). The zeroed‐grain GTE returns all zeros on
    read.  In other words, the zeroed‐grain GTE indicates that a grain
    in the child disk is zero‐filled but does not actually occupy space
    in storage.  A sparse extent with zeroed‐grain GTE has the following
    in its header:

     * SparseExtentHeader.version = 2
     * SparseExtentHeader.flags has bit 2 set

    Other than the new flag and the possibly zeroed‐grain GTE, version 2
    sparse extents are identical to version 1.  Also, a zeroed‐grain GTE
    has value 0x1 in the GT table.

[1] Virtual Disk Format 5.0, http://www.vmware.com/support/developer/vddk/vmdk_50_technote.pdf?src=vmdk
Signed-off-by: Fam Zheng <famz@redhat.com>
---
 block/vmdk.c | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index 16aa29c..7e07c0f 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -33,10 +33,13 @@
 #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
 #define VMDK4_COMPRESSION_DEFLATE 1
 #define VMDK4_FLAG_RGD (1 << 1)
+/* Zeroed-grain enable bit */
+#define VMDK4_FLAG_ZERO_GRAIN   (1 << 2)
 #define VMDK4_FLAG_COMPRESS (1 << 16)
 #define VMDK4_FLAG_MARKER (1 << 17)
 #define VMDK4_GD_AT_END 0xffffffffffffffffULL
 
+#define VMDK_GTE_ZEROED 0x1
 
 /* VMDK internal error codes */
 #define VMDK_OK      0
@@ -81,6 +84,8 @@ typedef struct VmdkExtent {
     bool flat;
     bool compressed;
     bool has_marker;
+    bool has_zero_grain;
+    int version;
     int64_t sectors;
     int64_t end_sector;
     int64_t flat_start_offset;
@@ -569,6 +574,8 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
     extent->compressed =
         le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
     extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER;
+    extent->version = le32_to_cpu(header.version);
+    extent->has_zero_grain = le32_to_cpu(header.flags) & VMDK4_FLAG_ZERO_GRAIN;
     ret = vmdk_init_tables(bs, extent);
     if (ret) {
         /* free extent allocated by vmdk_add_extent */
@@ -839,6 +846,7 @@ static int get_cluster_offset(BlockDriverState *bs,
     unsigned int l1_index, l2_offset, l2_index;
     int min_index, i, j;
     uint32_t min_count, *l2_table, tmp = 0;
+    bool zeroed = false;
 
     if (m_data) {
         m_data->valid = 0;
@@ -894,9 +902,13 @@ static int get_cluster_offset(BlockDriverState *bs,
     l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
     *cluster_offset = le32_to_cpu(l2_table[l2_index]);
 
-    if (!*cluster_offset) {
+    if (extent->has_zero_grain && *cluster_offset == VMDK_GTE_ZEROED) {
+        zeroed = true;
+    }
+
+    if (!*cluster_offset || zeroed) {
         if (!allocate) {
-            return VMDK_UNALLOC;
+            return zeroed ? VMDK_ZEROED : VMDK_UNALLOC;
         }
 
         /* Avoid the L2 tables update for the images that have snapshots. */
@@ -967,8 +979,8 @@ static int coroutine_fn vmdk_co_is_allocated(BlockDriverState *bs,
     ret = get_cluster_offset(bs, extent, NULL,
                             sector_num * 512, 0, &offset);
     qemu_co_mutex_unlock(&s->lock);
-    /* get_cluster_offset returning 0 means success */
-    ret = !ret;
+
+    ret = (ret == VMDK_OK || ret == VMDK_ZEROED);
 
     index_in_cluster = sector_num % extent->cluster_sectors;
     n = extent->cluster_sectors - index_in_cluster;
@@ -1111,9 +1123,9 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
         if (n > nb_sectors) {
             n = nb_sectors;
         }
-        if (ret) {
+        if (ret != VMDK_OK) {
             /* if not allocated, try to read from parent image, if exist */
-            if (bs->backing_hd) {
+            if (bs->backing_hd && ret != VMDK_ZEROED) {
                 if (!vmdk_is_cid_valid(bs)) {
                     return -EINVAL;
                 }
-- 
1.8.1.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Qemu-devel] [PATCH v3 3/6] vmdk: Add option to create zeroed-grain image
  2013-04-24 12:44 [Qemu-devel] [PATCH v3 0/6] vmdk: zeroed-grain GTE support Fam Zheng
  2013-04-24 12:44 ` [Qemu-devel] [PATCH v3 1/6] vmdk: named return code Fam Zheng
  2013-04-24 12:44 ` [Qemu-devel] [PATCH v3 2/6] vmdk: add support for “zeroed‐grain” GTE Fam Zheng
@ 2013-04-24 12:44 ` Fam Zheng
  2013-04-24 12:44 ` [Qemu-devel] [PATCH v3 4/6] vmdk: change magic number to macro Fam Zheng
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: Fam Zheng @ 2013-04-24 12:44 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, Fam Zheng, stefanha

Add image create option "zeroed-grain" to enable zeroed-grain GTE
feature of vmdk sparse extents. When this option is on, header version
of newly created extent will be 2 and VMDK4_FLAG_ZERO_GRAIN flag bit
will be set.

Signed-off-by: Fam Zheng <famz@redhat.com>
---
 block/vmdk.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index 7e07c0f..cc19e20 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -48,6 +48,8 @@
 #define VMDK_UNALLOC (-2)
 #define VMDK_ZEROED  (-3)
 
+#define BLOCK_OPT_ZEROED_GRAIN "zeroed_grain"
+
 typedef struct {
     uint32_t version;
     uint32_t flags;
@@ -1262,7 +1264,7 @@ static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num,
 
 
 static int vmdk_create_extent(const char *filename, int64_t filesize,
-                              bool flat, bool compress)
+                              bool flat, bool compress, bool zeroed_grain)
 {
     int ret, i;
     int fd = 0;
@@ -1284,9 +1286,10 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
     }
     magic = cpu_to_be32(VMDK4_MAGIC);
     memset(&header, 0, sizeof(header));
-    header.version = 1;
-    header.flags =
-        3 | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0);
+    header.version = zeroed_grain ? 2 : 1;
+    header.flags = 3
+                   | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0)
+                   | (zeroed_grain ? VMDK4_FLAG_ZERO_GRAIN : 0);
     header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0;
     header.capacity = filesize / 512;
     header.granularity = 128;
@@ -1467,6 +1470,7 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
     char parent_desc_line[BUF_SIZE] = "";
     uint32_t parent_cid = 0xffffffff;
     uint32_t number_heads = 16;
+    bool zeroed_grain = false;
     const char desc_template[] =
         "# Disk DescriptorFile\n"
         "version=1\n"
@@ -1502,6 +1506,8 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
             flags |= options->value.n ? BLOCK_FLAG_COMPAT6 : 0;
         } else if (!strcmp(options->name, BLOCK_OPT_SUBFMT)) {
             fmt = options->value.s;
+        } else if (!strcmp(options->name, BLOCK_OPT_ZEROED_GRAIN)) {
+            zeroed_grain |= options->value.n;
         }
         options++;
     }
@@ -1588,7 +1594,8 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
         snprintf(ext_filename, sizeof(ext_filename), "%s%s",
                 path, desc_filename);
 
-        if (vmdk_create_extent(ext_filename, size, flat, compress)) {
+        if (vmdk_create_extent(ext_filename, size,
+                               flat, compress, zeroed_grain)) {
             return -EINVAL;
         }
         filesize -= size;
@@ -1714,6 +1721,11 @@ static QEMUOptionParameter vmdk_create_options[] = {
             "VMDK flat extent format, can be one of "
             "{monolithicSparse (default) | monolithicFlat | twoGbMaxExtentSparse | twoGbMaxExtentFlat | streamOptimized} "
     },
+    {
+        .name = BLOCK_OPT_ZEROED_GRAIN,
+        .type = OPT_FLAG,
+        .help = "Enable efficient zero writes using the zeroed-grain GTE feature"
+    },
     { NULL }
 };
 
-- 
1.8.1.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Qemu-devel] [PATCH v3 4/6] vmdk: change magic number to macro
  2013-04-24 12:44 [Qemu-devel] [PATCH v3 0/6] vmdk: zeroed-grain GTE support Fam Zheng
                   ` (2 preceding siblings ...)
  2013-04-24 12:44 ` [Qemu-devel] [PATCH v3 3/6] vmdk: Add option to create zeroed-grain image Fam Zheng
@ 2013-04-24 12:44 ` Fam Zheng
  2013-04-24 12:44 ` [Qemu-devel] [PATCH v3 5/6] vmdk: store fields of VmdkMetaData in cpu endian Fam Zheng
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: Fam Zheng @ 2013-04-24 12:44 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, Fam Zheng, stefanha

Two hard coded flag bits are changed to macros.
Signed-off-by: Fam Zheng <famz@redhat.com>
---
 block/vmdk.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index cc19e20..0463d3b 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -32,6 +32,7 @@
 #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
 #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
 #define VMDK4_COMPRESSION_DEFLATE 1
+#define VMDK4_FLAG_NL_DETECT (1 << 0)
 #define VMDK4_FLAG_RGD (1 << 1)
 /* Zeroed-grain enable bit */
 #define VMDK4_FLAG_ZERO_GRAIN   (1 << 2)
@@ -1287,7 +1288,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
     magic = cpu_to_be32(VMDK4_MAGIC);
     memset(&header, 0, sizeof(header));
     header.version = zeroed_grain ? 2 : 1;
-    header.flags = 3
+    header.flags = VMDK4_FLAG_RGD | VMDK4_FLAG_NL_DETECT
                    | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0)
                    | (zeroed_grain ? VMDK4_FLAG_ZERO_GRAIN : 0);
     header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0;
-- 
1.8.1.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Qemu-devel] [PATCH v3 5/6] vmdk: store fields of VmdkMetaData in cpu endian
  2013-04-24 12:44 [Qemu-devel] [PATCH v3 0/6] vmdk: zeroed-grain GTE support Fam Zheng
                   ` (3 preceding siblings ...)
  2013-04-24 12:44 ` [Qemu-devel] [PATCH v3 4/6] vmdk: change magic number to macro Fam Zheng
@ 2013-04-24 12:44 ` Fam Zheng
  2013-04-25 13:20   ` Stefan Hajnoczi
  2013-04-24 12:44 ` [Qemu-devel] [PATCH v3 6/6] vmdk: add bdrv_co_write_zeroes Fam Zheng
  2013-04-25 13:25 ` [Qemu-devel] [PATCH v3 0/6] vmdk: zeroed-grain GTE support Stefan Hajnoczi
  6 siblings, 1 reply; 10+ messages in thread
From: Fam Zheng @ 2013-04-24 12:44 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, Fam Zheng, stefanha

Previously VmdkMetaData.offset is stored little endian while other
fields are cpu endian. This changes offset to cpu endian and convert
before writing to image.
Signed-off-by: Fam Zheng <famz@redhat.com>
---
 block/vmdk.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index 0463d3b..16e1417 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -813,14 +813,15 @@ static int get_whole_cluster(BlockDriverState *bs,
 
 static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
 {
+    uint32_t offset;
+    QEMU_BUILD_BUG_ON(sizeof(offset) != sizeof(m_data->offset));
+    offset = cpu_to_le32(m_data->offset);
     /* update L2 table */
     if (bdrv_pwrite_sync(
                 extent->file,
                 ((int64_t)m_data->l2_offset * 512)
                     + (m_data->l2_index * sizeof(m_data->offset)),
-                &(m_data->offset),
-                sizeof(m_data->offset)
-            ) < 0) {
+                &offset, sizeof(offset)) < 0) {
         return VMDK_ERROR;
     }
     /* update backup L2 table */
@@ -830,8 +831,7 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
                     extent->file,
                     ((int64_t)m_data->l2_offset * 512)
                         + (m_data->l2_index * sizeof(m_data->offset)),
-                    &(m_data->offset), sizeof(m_data->offset)
-                ) < 0) {
+                    &offset, sizeof(offset)) < 0) {
             return VMDK_ERROR;
         }
     }
@@ -938,7 +938,7 @@ static int get_cluster_offset(BlockDriverState *bs,
         }
 
         if (m_data) {
-            m_data->offset = tmp;
+            m_data->offset = *cluster_offset;
             m_data->l1_index = l1_index;
             m_data->l2_index = l2_index;
             m_data->l2_offset = l2_offset;
-- 
1.8.1.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Qemu-devel] [PATCH v3 6/6] vmdk: add bdrv_co_write_zeroes
  2013-04-24 12:44 [Qemu-devel] [PATCH v3 0/6] vmdk: zeroed-grain GTE support Fam Zheng
                   ` (4 preceding siblings ...)
  2013-04-24 12:44 ` [Qemu-devel] [PATCH v3 5/6] vmdk: store fields of VmdkMetaData in cpu endian Fam Zheng
@ 2013-04-24 12:44 ` Fam Zheng
  2013-04-25 13:20   ` Stefan Hajnoczi
  2013-04-25 13:25 ` [Qemu-devel] [PATCH v3 0/6] vmdk: zeroed-grain GTE support Stefan Hajnoczi
  6 siblings, 1 reply; 10+ messages in thread
From: Fam Zheng @ 2013-04-24 12:44 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, Fam Zheng, stefanha

Use special offset to write zeroes efficiently, when zeroed-grain GTE is
available. If zero-write an allocated cluster, cluster is leaked because
its offset pointer is overwritten by "0x1".

Signed-off-by: Fam Zheng <famz@redhat.com>
---
 block/vmdk.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 63 insertions(+), 14 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index 16e1417..90cb071 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -905,6 +905,13 @@ static int get_cluster_offset(BlockDriverState *bs,
     l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
     *cluster_offset = le32_to_cpu(l2_table[l2_index]);
 
+    if (m_data) {
+        m_data->valid = 1;
+        m_data->l1_index = l1_index;
+        m_data->l2_index = l2_index;
+        m_data->offset = *cluster_offset;
+        m_data->l2_offset = extent->l1_table[m_data->l1_index];
+    }
     if (extent->has_zero_grain && *cluster_offset == VMDK_GTE_ZEROED) {
         zeroed = true;
     }
@@ -1165,8 +1172,17 @@ static coroutine_fn int vmdk_co_read(BlockDriverState *bs, int64_t sector_num,
     return ret;
 }
 
+/**
+ * vmdk_write:
+ * @zeroed:       buf is ignored (data is zero), use zeroed_grain GTE feature
+ * if possible, otherwise return -ENOTSUP.
+ * @zero_dry_run: used for zeroed == true only, don't update L2 table, just
+ *
+ * Returns: error code with 0 for success.
+ */
 static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
-                     const uint8_t *buf, int nb_sectors)
+                      const uint8_t *buf, int nb_sectors,
+                      bool zeroed, bool zero_dry_run)
 {
     BDRVVmdkState *s = bs->opaque;
     VmdkExtent *extent = NULL;
@@ -1212,7 +1228,7 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
                                         &cluster_offset);
             }
         }
-        if (ret) {
+        if (ret == VMDK_ERROR) {
             return -EINVAL;
         }
         extent_begin_sector = extent->end_sector - extent->sectors;
@@ -1222,17 +1238,34 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
         if (n > nb_sectors) {
             n = nb_sectors;
         }
-
-        ret = vmdk_write_extent(extent,
-                        cluster_offset, index_in_cluster * 512,
-                        buf, n, sector_num);
-        if (ret) {
-            return ret;
-        }
-        if (m_data.valid) {
-            /* update L2 tables */
-            if (vmdk_L2update(extent, &m_data) == -1) {
-                return -EIO;
+        if (zeroed) {
+            /* Do zeroed write, buf is ignored */
+            if (extent->has_zero_grain &&
+                    index_in_cluster == 0 &&
+                    n >= extent->cluster_sectors) {
+                n = extent->cluster_sectors;
+                if (!zero_dry_run) {
+                    m_data.offset = cpu_to_le32(VMDK_GTE_ZEROED);
+                    /* update L2 tables */
+                    if (vmdk_L2update(extent, &m_data) != VMDK_OK) {
+                        return -EIO;
+                    }
+                }
+            } else {
+                return -ENOTSUP;
+            }
+        } else {
+            ret = vmdk_write_extent(extent,
+                            cluster_offset, index_in_cluster * 512,
+                            buf, n, sector_num);
+            if (ret) {
+                return ret;
+            }
+            if (m_data.valid) {
+                /* update L2 tables */
+                if (vmdk_L2update(extent, &m_data) != VMDK_OK) {
+                    return -EIO;
+                }
             }
         }
         nb_sectors -= n;
@@ -1258,7 +1291,22 @@ static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num,
     int ret;
     BDRVVmdkState *s = bs->opaque;
     qemu_co_mutex_lock(&s->lock);
-    ret = vmdk_write(bs, sector_num, buf, nb_sectors);
+    ret = vmdk_write(bs, sector_num, buf, nb_sectors, false, false);
+    qemu_co_mutex_unlock(&s->lock);
+    return ret;
+}
+
+static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs,
+                                             int64_t sector_num,
+                                             int nb_sectors)
+{
+    int ret;
+    BDRVVmdkState *s = bs->opaque;
+    qemu_co_mutex_lock(&s->lock);
+    ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, true);
+    if (!ret) {
+        ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, false);
+    }
     qemu_co_mutex_unlock(&s->lock);
     return ret;
 }
@@ -1738,6 +1786,7 @@ static BlockDriver bdrv_vmdk = {
     .bdrv_reopen_prepare = vmdk_reopen_prepare,
     .bdrv_read      = vmdk_co_read,
     .bdrv_write     = vmdk_co_write,
+    .bdrv_co_write_zeroes = vmdk_co_write_zeroes,
     .bdrv_close     = vmdk_close,
     .bdrv_create    = vmdk_create,
     .bdrv_co_flush_to_disk  = vmdk_co_flush,
-- 
1.8.1.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH v3 6/6] vmdk: add bdrv_co_write_zeroes
  2013-04-24 12:44 ` [Qemu-devel] [PATCH v3 6/6] vmdk: add bdrv_co_write_zeroes Fam Zheng
@ 2013-04-25 13:20   ` Stefan Hajnoczi
  0 siblings, 0 replies; 10+ messages in thread
From: Stefan Hajnoczi @ 2013-04-25 13:20 UTC (permalink / raw)
  To: Fam Zheng; +Cc: kwolf, qemu-devel, stefanha

On Wed, Apr 24, 2013 at 08:44:35PM +0800, Fam Zheng wrote:
> @@ -905,6 +905,13 @@ static int get_cluster_offset(BlockDriverState *bs,
>      l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
>      *cluster_offset = le32_to_cpu(l2_table[l2_index]);
>  
> +    if (m_data) {
> +        m_data->valid = 1;
> +        m_data->l1_index = l1_index;
> +        m_data->l2_index = l2_index;
> +        m_data->offset = *cluster_offset;
> +        m_data->l2_offset = extent->l1_table[m_data->l1_index];

This line can simply be:

m_data->l2_offset = l2_offset;

> +    }

Filling in m_data up here means that only the ->offset field needs to be
filled in when we allocate a cluster further down.  Right now the code
is duplicated, but it just overwrites the fields with the same value
again.

> @@ -1222,17 +1238,34 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
>          if (n > nb_sectors) {
>              n = nb_sectors;
>          }
> -
> -        ret = vmdk_write_extent(extent,
> -                        cluster_offset, index_in_cluster * 512,
> -                        buf, n, sector_num);
> -        if (ret) {
> -            return ret;
> -        }
> -        if (m_data.valid) {
> -            /* update L2 tables */
> -            if (vmdk_L2update(extent, &m_data) == -1) {
> -                return -EIO;
> +        if (zeroed) {
> +            /* Do zeroed write, buf is ignored */
> +            if (extent->has_zero_grain &&
> +                    index_in_cluster == 0 &&
> +                    n >= extent->cluster_sectors) {
> +                n = extent->cluster_sectors;
> +                if (!zero_dry_run) {
> +                    m_data.offset = cpu_to_le32(VMDK_GTE_ZEROED);

offset is host endian now!

> +                    /* update L2 tables */
> +                    if (vmdk_L2update(extent, &m_data) != VMDK_OK) {

Zeroing cluster-by-cluster is slow - vmdk_L2update() uses sync to flush
the L2 update.  The vmdk.c code isn't great for buffering up metadata
changes and flushing them in a single operation though, so this is fine
for now.

> +                        return -EIO;
> +                    }

l2_cache[] has not been updated with the new VMDK_GTE_ZEROED offset.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH v3 5/6] vmdk: store fields of VmdkMetaData in cpu endian
  2013-04-24 12:44 ` [Qemu-devel] [PATCH v3 5/6] vmdk: store fields of VmdkMetaData in cpu endian Fam Zheng
@ 2013-04-25 13:20   ` Stefan Hajnoczi
  0 siblings, 0 replies; 10+ messages in thread
From: Stefan Hajnoczi @ 2013-04-25 13:20 UTC (permalink / raw)
  To: Fam Zheng; +Cc: kwolf, qemu-devel, stefanha

On Wed, Apr 24, 2013 at 08:44:34PM +0800, Fam Zheng wrote:
> @@ -938,7 +938,7 @@ static int get_cluster_offset(BlockDriverState *bs,
>          }
>  
>          if (m_data) {
> -            m_data->offset = tmp;
> +            m_data->offset = *cluster_offset;

tmp can be dropped now:

-tmp = cpu_to_le32(*cluster_offset);
-l2_table[l2_index] = tmp;
+l2_table[l2_index] = cpu_to_le32(*cluster_offset);

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH v3 0/6] vmdk: zeroed-grain GTE support
  2013-04-24 12:44 [Qemu-devel] [PATCH v3 0/6] vmdk: zeroed-grain GTE support Fam Zheng
                   ` (5 preceding siblings ...)
  2013-04-24 12:44 ` [Qemu-devel] [PATCH v3 6/6] vmdk: add bdrv_co_write_zeroes Fam Zheng
@ 2013-04-25 13:25 ` Stefan Hajnoczi
  6 siblings, 0 replies; 10+ messages in thread
From: Stefan Hajnoczi @ 2013-04-25 13:25 UTC (permalink / raw)
  To: Fam Zheng; +Cc: kwolf, qemu-devel, stefanha

On Wed, Apr 24, 2013 at 08:44:29PM +0800, Fam Zheng wrote:
> Added support for zeroed-grain GTE to VMDK according to VMDK Spec 5.0[1].
> 
> [1] Virtual Disk Format 5.0 - VMware,
>     http://www.vmware.com/support/developer/vddk/vmdk_50_technote.pdf?src=vmdk
> 
> Changes since v2:
>  - all: Added 5/6 (vmdk: store fields of VmdkMetaData in cpu endian)
>  - 6/6: Avoid side-effect of vmdk_L2update.
>         Change function comment to gtkdoc stype.
>         Fix VMDK4_FLAG_ZG.

I left some comments.  The trickiest one is forgetting to update the
l2_cache[].  If more vmdk.c features are added in the future, please
first switch to common caching code that qcow2 uses.  Right now just
fixing the missing cache update is fine.

Dong Xu Wang <wdongxu@linux.vnet.ibm.com> has moved the qcow2 cache code
to a common place.  His patches aren't merged yet but should be soon.

Stefan

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2013-04-25 13:26 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-04-24 12:44 [Qemu-devel] [PATCH v3 0/6] vmdk: zeroed-grain GTE support Fam Zheng
2013-04-24 12:44 ` [Qemu-devel] [PATCH v3 1/6] vmdk: named return code Fam Zheng
2013-04-24 12:44 ` [Qemu-devel] [PATCH v3 2/6] vmdk: add support for “zeroed‐grain” GTE Fam Zheng
2013-04-24 12:44 ` [Qemu-devel] [PATCH v3 3/6] vmdk: Add option to create zeroed-grain image Fam Zheng
2013-04-24 12:44 ` [Qemu-devel] [PATCH v3 4/6] vmdk: change magic number to macro Fam Zheng
2013-04-24 12:44 ` [Qemu-devel] [PATCH v3 5/6] vmdk: store fields of VmdkMetaData in cpu endian Fam Zheng
2013-04-25 13:20   ` Stefan Hajnoczi
2013-04-24 12:44 ` [Qemu-devel] [PATCH v3 6/6] vmdk: add bdrv_co_write_zeroes Fam Zheng
2013-04-25 13:20   ` Stefan Hajnoczi
2013-04-25 13:25 ` [Qemu-devel] [PATCH v3 0/6] vmdk: zeroed-grain GTE support Stefan Hajnoczi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).