qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [RFC PATCH v2] Support vhd type VHD_DIFFERENCING
@ 2014-09-04 14:49 Gordon Gong
  2014-09-05 10:04 ` Stefan Hajnoczi
  0 siblings, 1 reply; 3+ messages in thread
From: Gordon Gong @ 2014-09-04 14:49 UTC (permalink / raw)
  To: Stefan Hajnoczi; +Cc: Xiaoding (B), Liuji (Jeremy), Luohao (brian), qemu-devel


[-- Attachment #1.1: Type: text/plain, Size: 23971 bytes --]

[Qemu-devel][RFC PATCH v2] Support vhd type VHD_DIFFERENCING



>From 5387a2a7b6ad052659a08a1fc7e89595708396d1 Mon Sep 17 00:00:00 2001

From: Xiaodong Gong <gordongong0350@gmail.com>

Date: Thu, 4 Sep 2014 01:14:59 +0800

Subject: [PATCH 2/2] Support vhd type VHD_DIFFERENCING



Now qemu only supports vhd type VHD_FIXED and VHD_DYNAMIC,

so qemu can't read snapshot volume of vhd, and can't support

other storage features of vhd file.



This patch add read parent information in function "vpc_open",

read bitmap in "vpc_read", and change bitmap in "vpc_write".



Signed-off-by: Xiaodong Gong <gordongong0350@gmail.com>

---

block/vpc.c | 329
+++++++++++++++++++++++++++++++++++++++++++++++-------------

1 file changed, 261 insertions(+), 68 deletions(-)



diff --git a/block/vpc.c b/block/vpc.c

index c024b4c..3ba0d57 100644

--- a/block/vpc.c

+++ b/block/vpc.c

@@ -33,13 +33,18 @@

/**************************************************************/

 #define HEADER_SIZE 512

+#define DYNAMIC_HEADER_SIZE 1024

+#define PARENT_LOCATOR_NUM 8

+#define PARENT_PREFIX_LEN 7 /* such as file:// */

+#define TBBATMAP_HEAD_SIZE 28

+#define MACX 0x5863614d /* big endian */

 //#define CACHE

 enum vhd_type {

     VHD_FIXED           = 2,

     VHD_DYNAMIC         = 3,

-    VHD_DIFFERENCING    = 4,

+    VHD_DIFF            = 4,

};

 // Seconds since Jan 1, 2000 0:00:00 (UTC)

@@ -138,6 +143,15 @@ typedef struct BDRVVPCState {

     Error *migration_blocker;

} BDRVVPCState;

+typedef struct vhd_tdbatmap_header {

+    char magic[8]; /* always "tdbatmap" */

+

+    uint64_t batmap_offset;

+    uint32_t batmap_size;

+    uint32_t batmap_version;

+    uint32_t checksum;

+} QEMU_PACKED VHDTdBatmapHeader;

+

static uint32_t vpc_checksum(uint8_t* buf, size_t size)

{

     uint32_t res = 0;

@@ -153,7 +167,7 @@ static uint32_t vpc_checksum(uint8_t* buf, size_t size)

static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)

{

     if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8))

-        return 100;

+        return 100;

     return 0;

}

@@ -164,11 +178,17 @@ static int vpc_open(BlockDriverState *bs, QDict
*options, int flags,

     int i;

     VHDFooter *footer;

     VHDDynDiskHeader *dyndisk_header;

-    uint8_t buf[HEADER_SIZE];

+    uint8_t buf[DYNAMIC_HEADER_SIZE];

+    uint8_t tdbatmap_header_buf[TBBATMAP_HEAD_SIZE];

     uint32_t checksum;

     uint64_t computed_size;

-    int disk_type = VHD_DYNAMIC;

+    uint32_t disk_type;

     int ret;

+    VHDTdBatmapHeader *tdbatmap_header;

+    int parent_locator_offset = 0;

+    int64_t data_offset = 0;

+    int data_length = 0;

+    uint32_t platform;

     ret = bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE);

     if (ret < 0) {

@@ -176,6 +196,8 @@ static int vpc_open(BlockDriverState *bs, QDict
*options, int flags,

     }

     footer = (VHDFooter *) s->footer_buf;

+    disk_type = be32_to_cpu(footer->type);

+

     if (strncmp(footer->creator, "conectix", 8)) {

         int64_t offset = bdrv_getlength(bs->file);

         if (offset < 0) {

@@ -230,9 +252,9 @@ static int vpc_open(BlockDriverState *bs, QDict
*options, int flags,

         goto fail;

    }

-    if (disk_type == VHD_DYNAMIC) {

+    if (disk_type == VHD_DYNAMIC || disk_type == VHD_DIFF) {

         ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf,

-                         HEADER_SIZE);

+                         DYNAMIC_HEADER_SIZE);

         if (ret < 0) {

             goto fail;

         }

@@ -286,6 +308,56 @@ static int vpc_open(BlockDriverState *bs, QDict
*options, int flags,

         s->free_data_block_offset =

             (s->bat_offset + (s->max_table_entries * 4) + 511) & ~511;

+        /* Read tdbatmap header by offset */

+        ret = bdrv_pread(bs->file, s->free_data_block_offset,

+            tdbatmap_header_buf, TBBATMAP_HEAD_SIZE);

+        if (ret < 0) {

+            goto fail;

+        }

+

+        tdbatmap_header = (VHDTdBatmapHeader *) tdbatmap_header_buf;

+        if (!strncmp(tdbatmap_header->magic, "tdbatmap", 8)) {

+            s->free_data_block_offset =

+                be32_to_cpu(tdbatmap_header->batmap_size) * 512

+                + be64_to_cpu(tdbatmap_header->batmap_offset);

+        }

+

+        /* Read backing file location from dyn header table */

+        if (dyndisk_header->parent_name[0] ||
dyndisk_header->parent_name[1]) {

+            for (i = 0; i < PARENT_LOCATOR_NUM; i++) {

+                data_offset =

+
be64_to_cpu(dyndisk_header->parent_locator[i].data_offset);

+                data_length =

+
be32_to_cpu(dyndisk_header->parent_locator[i].data_length);

+                platform = dyndisk_header->parent_locator[i].platform;

+

+                if (MACX == platform) {

+                    if (data_offset + PARENT_PREFIX_LEN >

+                        s->max_table_entries * s->block_size) {

+                            goto fail;

+                    }

+                        if (data_length - PARENT_PREFIX_LEN > 1024) {

+                            goto fail;

+                    }

+                    ret = bdrv_pread(bs->file, data_offset +
PARENT_PREFIX_LEN,

+                        bs->backing_file, data_length - PARENT_PREFIX_LEN);

+                    if (ret < 0) {

+                        goto fail;

+                    }

+

+                    bs->backing_file[data_length - PARENT_PREFIX_LEN] =
'\0';

+                }

+

+                if (data_offset > parent_locator_offset) {

+                    parent_locator_offset = data_offset;

+                }

+            }

+        }

+

+        if (parent_locator_offset + 512 > s->free_data_block_offset) {

+            s->free_data_block_offset = parent_locator_offset + 512;

+        }

+

         for (i = 0; i < s->max_table_entries; i++) {

             be32_to_cpus(&s->pagetable[i]);

             if (s->pagetable[i] != 0xFFFFFFFF) {

@@ -363,19 +435,6 @@ static inline int64_t
get_sector_offset(BlockDriverState *bs,

     bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];

     block_offset = bitmap_offset + s->bitmap_size + (512 *
pageentry_index);

-    // We must ensure that we don't write to any sectors which are marked
as

-    // unused in the bitmap. We get away with setting all bits in the block

-    // bitmap each time we write to a new block. This might cause Virtual
PC to

-    // miss sparse read optimization, but it's not a problem in terms of

-    // correctness.

-    if (write && (s->last_bitmap_offset != bitmap_offset)) {

-        uint8_t bitmap[s->bitmap_size];

-

-        s->last_bitmap_offset = bitmap_offset;

-        memset(bitmap, 0xff, s->bitmap_size);

-        bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);

-    }

-

//    printf("sector: %" PRIx64 ", index: %x, offset: %x, bioff: %" PRIx64
", bloff: %" PRIx64 "\n",

//    sector_num, pagetable_index, pageentry_index,

//    bitmap_offset, block_offset);

@@ -412,6 +471,53 @@ static inline int64_t
get_sector_offset(BlockDriverState *bs,

}

 /*

+ * Returns the absolute byte offset of the given sector in the differencing

+ * image file.

+ *

+ * If the sector is not allocated, -1 is returned instead. If the sector is

+ * allocated in the backing file, -2 is returned. If the sector is
allocated

+ * in current file, the block offset is returned.

+ */

+static inline int64_t get_sector_offset_diff(BlockDriverState *bs,

+    int64_t sector_num)

+{

+    BDRVVPCState *s = bs->opaque;

+    uint64_t offset = sector_num << BDRV_SECTOR_BITS;

+    uint64_t bitmap_offset;

+    uint64_t block_offset;

+    uint32_t pagetable_index, pageentry_index;

+    uint32_t bitmap_index, bitmapentry_index;

+    uint8_t bitmap[s->bitmap_size];

+    int ret;

+

+    pagetable_index = offset / s->block_size;

+    if (pagetable_index >= s->max_table_entries) {

+        return -1;

+    } else if (0xffffffff == s->pagetable[pagetable_index]) {

+        return -2;

+    }

+

+    bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];

+    if (bitmap_offset > s->max_table_entries * s->block_size) {

+        return -1;

+    }

+    ret = bdrv_pread(bs->file, bitmap_offset, bitmap, s->bitmap_size);

+    if (ret < 0) {

+        return -1;

+    }

+

+    pageentry_index = (offset % s->block_size) / 512;

+    bitmap_index = pageentry_index / 8;

+    bitmapentry_index = 7 - pageentry_index % 8;

+    if (bitmap[bitmap_index] & 0x1 << bitmapentry_index) {

+        block_offset = bitmap_offset + s->bitmap_size + (512 *
pageentry_index);

+        return block_offset;

+    } else {

+         return -2;

+    }

+}

+

+/*

  * Writes the footer to the end of the image file. This is needed when the

  * file grows as it overwrites the old footer

  *

@@ -437,7 +543,8 @@ static int rewrite_footer(BlockDriverState* bs)

  *

  * Returns the sectors' offset in the image file on success and < 0 on
error

  */

-static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)

+static int64_t alloc_block(BlockDriverState *bs, int64_t sector_num,

+    bool isdiff)

{

     BDRVVPCState *s = bs->opaque;

     int64_t bat_offset;

@@ -457,7 +564,11 @@ static int64_t alloc_block(BlockDriverState* bs,
int64_t sector_num)

     s->pagetable[index] = s->free_data_block_offset / 512;

     // Initialize the block's bitmap

-    memset(bitmap, 0xff, s->bitmap_size);

+    if (isdiff) {

+        memset(bitmap, 0x0, s->bitmap_size);

+    } else {

+        memset(bitmap, 0xff, s->bitmap_size);

+    }

     ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap,

         s->bitmap_size);

     if (ret < 0) {

@@ -501,36 +612,62 @@ static int vpc_read(BlockDriverState *bs, int64_t
sector_num,

                     uint8_t *buf, int nb_sectors)

{

     BDRVVPCState *s = bs->opaque;

-    int ret;

-    int64_t offset;

-    int64_t sectors, sectors_per_block;

     VHDFooter *footer = (VHDFooter *) s->footer_buf;

+    int64_t sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;

+    int64_t offset, sectors;

+    int ret;

-    if (be32_to_cpu(footer->type) == VHD_FIXED) {

+    switch (be32_to_cpu(footer->type)) {

+    case VHD_FIXED:

         return bdrv_read(bs->file, sector_num, buf, nb_sectors);

-    }

-    while (nb_sectors > 0) {

-        offset = get_sector_offset(bs, sector_num, 0);

-

-        sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;

-        sectors = sectors_per_block - (sector_num % sectors_per_block);

-        if (sectors > nb_sectors) {

-            sectors = nb_sectors;

-        }

+    case VHD_DYNAMIC:

+        while (nb_sectors > 0) {

+            sectors = sectors_per_block - (sector_num % sectors_per_block);

+            if (sectors > nb_sectors) {

+                sectors = nb_sectors;

+            }

-        if (offset == -1) {

-            memset(buf, 0, sectors * BDRV_SECTOR_SIZE);

-        } else {

-            ret = bdrv_pread(bs->file, offset, buf,

-                sectors * BDRV_SECTOR_SIZE);

-            if (ret != sectors * BDRV_SECTOR_SIZE) {

-                return -1;

+            offset = get_sector_offset(bs, sector_num, 0);

+            if (-1 == offset) {

+                memset(buf, 0, sectors * BDRV_SECTOR_SIZE);

+            } else {

+                ret = bdrv_pread(bs->file, offset, buf,

+                    sectors * BDRV_SECTOR_SIZE);

+                if (ret != sectors * BDRV_SECTOR_SIZE) {

+                    return -1;

+                }

             }

+

+            nb_sectors -= sectors;

+            sector_num += sectors;

+            buf += sectors * BDRV_SECTOR_SIZE;

         }

+        break;

+    case VHD_DIFF:

+        while (nb_sectors > 0) {

+            offset = get_sector_offset_diff(bs, sector_num);

+            if (-1 == offset) {

+                memset(buf, 0, BDRV_SECTOR_SIZE);

+            } else if (-2 == offset) {

+                ret = bdrv_pread(bs->backing_hd, sector_num <<
BDRV_SECTOR_BITS

+                    , buf, BDRV_SECTOR_SIZE);

+                if (ret < 0) {

+                    return -1;

+                }

+            } else {

+                ret = bdrv_pread(bs->file, offset, buf, BDRV_SECTOR_SIZE);

+                if (ret != BDRV_SECTOR_SIZE) {

+                    return -1;

+                }

+            }

-        nb_sectors -= sectors;

-        sector_num += sectors;

-        buf += sectors * BDRV_SECTOR_SIZE;

+            nb_sectors--;

+            sector_num++;

+            buf += BDRV_SECTOR_SIZE;

+        }

+        break;

+    default:

+        return -1;

     }

     return 0;

}

@@ -546,43 +683,98 @@ static coroutine_fn int vpc_co_read(BlockDriverState
*bs, int64_t sector_num,

     return ret;

}

+static inline int64_t write_bitmap(BlockDriverState *bs, int64_t
sector_num,

+    int64_t sectors)

+{

+    BDRVVPCState *s = bs->opaque;

+    uint64_t offset = sector_num << BDRV_SECTOR_BITS;

+    uint64_t bitmap_offset;

+    uint32_t pagetable_index, pageentry_index;

+    uint8_t bitmap[s->bitmap_size];

+    uint32_t bitmap_index, bitmapbit_index;

+    int i;

+    int ret;

+

+    pagetable_index = offset / s->block_size;

+    pageentry_index = (offset % s->block_size) / 512;

+    bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];

+

+    if (bitmap_offset > s->max_table_entries * s->block_size) {

+        return -1;

+    }

+    ret = bdrv_pread(bs->file, bitmap_offset, bitmap, s->bitmap_size);

+    if (ret < 0) {

+        return -1;

+    }

+

+    for (i = 0; i < sectors; i++) {

+        bitmap_index = pageentry_index / 8;

+        bitmapbit_index = 7 - pageentry_index % 8;

+        bitmap[bitmap_index] |= (0x1 << bitmapbit_index);

+        pageentry_index++;

+    }

+    ret = bdrv_pwrite(bs->file, bitmap_offset, bitmap, s->bitmap_size);

+    if (ret < 0) {

+        return -1;

+    }

+

+    return 0;

+}

+

static int vpc_write(BlockDriverState *bs, int64_t sector_num,

     const uint8_t *buf, int nb_sectors)

{

     BDRVVPCState *s = bs->opaque;

-    int64_t offset;

-    int64_t sectors, sectors_per_block;

+    VHDFooter *footer = (VHDFooter *) s->footer_buf;

+    int64_t sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;

+    int64_t offset, sectors;

+    bool isdiff = true;

     int ret;

-    VHDFooter *footer =  (VHDFooter *) s->footer_buf;

-    if (be32_to_cpu(footer->type) == VHD_FIXED) {

+    switch (be32_to_cpu(footer->type)) {

+    case VHD_FIXED:

         return bdrv_write(bs->file, sector_num, buf, nb_sectors);

-    }

-    while (nb_sectors > 0) {

-        offset = get_sector_offset(bs, sector_num, 1);

-

-        sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;

-        sectors = sectors_per_block - (sector_num % sectors_per_block);

-        if (sectors > nb_sectors) {

-            sectors = nb_sectors;

+    case VHD_DYNAMIC:

+    case VHD_DIFF:

+        if (VHD_DYNAMIC == be32_to_cpu(footer->type)) {

+            isdiff = false;

         }

-        if (offset == -1) {

-            offset = alloc_block(bs, sector_num);

-            if (offset < 0)

+        while (nb_sectors > 0) {

+            sectors = sectors_per_block - (sector_num % sectors_per_block);

+            if (sectors > nb_sectors) {

+                sectors = nb_sectors;

+            }

+

+            offset = get_sector_offset(bs, sector_num, 1);

+            if (offset == -1) {

+                offset = alloc_block(bs, sector_num, isdiff);

+                if (offset < 0) {

+                    return -1;

+                }

+            }

+

+            ret = bdrv_pwrite(bs->file, offset, buf,

+                sectors * BDRV_SECTOR_SIZE);

+            if (ret != sectors * BDRV_SECTOR_SIZE) {

                 return -1;

-        }

+            }

-        ret = bdrv_pwrite(bs->file, offset, buf, sectors *
BDRV_SECTOR_SIZE);

-        if (ret != sectors * BDRV_SECTOR_SIZE) {

-            return -1;

-        }

+            if (true == isdiff) {

+                ret = write_bitmap(bs, sector_num, sectors);

+                if (ret < 0) {

+                    return -1;

+                }

+            }

-        nb_sectors -= sectors;

-        sector_num += sectors;

-        buf += sectors * BDRV_SECTOR_SIZE;

+            nb_sectors -= sectors;

+            sector_num += sectors;

+            buf += sectors * BDRV_SECTOR_SIZE;

+        }

+        break;

+    default:

+        return -1;

     }

-

     return 0;

}

@@ -910,6 +1102,7 @@ static BlockDriver bdrv_vpc = {

     .bdrv_close             = vpc_close,

     .bdrv_reopen_prepare    = vpc_reopen_prepare,

     .bdrv_create            = vpc_create,

+    .supports_backing       = true,

     .bdrv_read              = vpc_co_read,

     .bdrv_write             = vpc_co_write,

--

1.8.3.1











1.       bdrv_preadv and bdrv_pwritev affer get_sector_offset_* is already
checked, others are checked if

version 2.

2.       MACX is a big-endian number, so there is no need be32_to_cpu, but
I added a comment to MACX.

3.       clean the code no used after in vpc_open.

4.       move bdrv_co_readv() to bdrv_preadv in vpc_read.

5.       Added some code to make it easy to understand.

6.       the wrong use of cpu_to_be32 is fixed with another patch.




-------------------------------------------------------------------------------------------------------------------


On Tue, Jul 01, 2014 at 05:45:19PM +0800, Ding xiao wrote:

Sorry for the delay, I forgot about this patch.

>* +typedef struct vhd_tdbatmap_header {*
>* +    char    magic[8]; /* "tdbatmap"*/*
>* +*
>* +    /* byte offset to batmap*/*
>* +    uint64_t    batmap_offset;*
>* +*
>* +    /* Offset of the Block Allocation Table (BAT)*/*

This comment describes the batmap_offset field?  Maybe this should be
dropped since that field already has a comment.

>* +        /* read backend file*/*
>* +        if (dyndisk_header->parent_name[0] || *
>* dyndisk_header->parent_name[1]) {*
>* +            for (i = 0; i < PARENT_LOCATOR_NUM; i++) {*
>* +                data_offset = be64_to_cpu(*
>* +                                *
>* dyndisk_header->parent_locator[i].data_offset);*
>* +                data_length = be32_to_cpu(*
>* +                                *
>* dyndisk_header->parent_locator[i].data_length);*
>* +                if (dyndisk_header->parent_locator[i].platform == MACX) {*

Missing be32_to_cpu()?

>* +                    ret = bdrv_pread(bs->file, data_offset + 7,*
>* +                              bs->backing_file, data_length - 7);*

Buffer overflow: char bs->backing_file[1024].

All input must be validated!

>* +                    if (ret < 0) {*
>* +                        goto fail;*
>* +                    }*
>* +                    bs->backing_file[data_length - 7] = '\0';*

Memory corruption if data_length < 7.  Missing input validation.

>* +                }*
>* +                if (data_offset > parent_locator_offset) {*
>* +                    parent_locator_offset = data_offset;*
>* +                }*
>* +            }*
>* +        }*
>* +*
>* +        if (parent_locator_offset + 512 > s->free_data_block_offset) {*
>* +            s->free_data_block_offset = parent_locator_offset + 512;*
>* +        }*
>* +*
>*          for (i = 0; i < s->max_table_entries; i++) {*
>*              be32_to_cpus(&s->pagetable[i]);*
>*              if (s->pagetable[i] != 0xFFFFFFFF) {*
>* @@ -364,6 +425,9 @@ static inline int64_t get_sector_offset(BlockDriverState *
>* *bs,*
>*      // bitmap each time we write to a new block. This might cause Virtual PC *
>* to*
>*      // miss sparse read optimization, but it's not a problem in terms of*
>*      // correctness.*
>* +*
>* +    /*this will not use*/*
>* +#if 0*

Delete the code if it is no longer used.

>* @@ -433,7 +498,7 @@ static int rewrite_footer(BlockDriverState* bs)*
>*   **
>*   * Returns the sectors' offset in the image file on success and < 0 on error*
>*   */*
>* -static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)*
>* +static int64_t alloc_block(BlockDriverState *bs, int64_t sector_num, int *
>* diff)*

Please use the C99 bool type since it communicates more clearly that
diff is either true or false (not a counter or bitmap).

It would be clearest to check footer->type for VHD_DIFFERENCING in this
function instead of adding a new argument to the function.

>* @@ -501,33 +611,64 @@ static int vpc_read(BlockDriverState *bs, int64_t *
>* sector_num,*
>*      int64_t offset;*
>*      int64_t sectors, sectors_per_block;*
>*      VHDFooter *footer = (VHDFooter *) s->footer_buf;*
>* +    QEMUIOVector hd_qiov;*
>* +    struct iovec qiov;*
>
>*      if (cpu_to_be32(footer->type) == VHD_FIXED) {*
>*          return bdrv_read(bs->file, sector_num, buf, nb_sectors);*
>* -    }*
>* -    while (nb_sectors > 0) {*
>* -        offset = get_sector_offset(bs, sector_num, 0);*
>* +    } else if (cpu_to_be32(footer->type) == VHD_DYNAMIC) {*

cpu_to_be32() is wrong since VHD_DYNAMIC is an enum constant (just a
regular CPU-endian integer).

Please add a separate patch before this one that cleans up incorrect
cpu_to_be*() usage.  For example, cpu_to_be32(footer->type) == VHD_FIXED
a few lines above is wrong too.

>* +        while (nb_sectors > 0) {*
>* +            offset = get_sector_offset(bs, sector_num, 0);*
>* +*
>* +            sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;*
>* +            sectors = sectors_per_block - (sector_num % sectors_per_block);*
>* +            if (sectors > nb_sectors) {*
>* +                sectors = nb_sectors;*
>* +            }*
>
>* -        sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;*
>* -        sectors = sectors_per_block - (sector_num % sectors_per_block);*
>* -        if (sectors > nb_sectors) {*
>* -            sectors = nb_sectors;*
>* +            if (offset == -1) {*
>* +                memset(buf, 0, sectors * BDRV_SECTOR_SIZE);*
>* +            } else {*
>* +                ret = bdrv_pread(bs->file, offset, buf,*
>* +                    sectors * BDRV_SECTOR_SIZE);*
>* +               if (ret != sectors * BDRV_SECTOR_SIZE) {*

Indentation is off

>* +                    return -1;*
>* +                }*
>* +            }*
>* +*
>* +            nb_sectors -= sectors;*
>* +            sector_num += sectors;*
>* +            buf += sectors * BDRV_SECTOR_SIZE;*
>*          }*
>* +    } else {*
>* +         while (nb_sectors > 0) {*
>* +            offset = get_sector_offset_diff(bs, sector_num);*
>* +            if (offset == -1) {*
>* +                memset(buf, 0, BDRV_SECTOR_SIZE);*
>* +            } else if (offset == -2) {*
>* +                qiov.iov_base = (void *)buf;*

This cast is unnecessary.  The compiler does not warn about pointer
casts to or from void*.

>* +                qiov.iov_len = 512;*
>* +                hd_qiov.iov = &qiov;*
>* +                hd_qiov.niov = 1;*
>* +                hd_qiov.nalloc = -1;*
>* +                hd_qiov.size = 512;*

This is not idiomatic.  Normally 'qiov' is a QEMUIOVector, not a struct
iovec.  The qemu_iovec_*() functions should be used instead of manually
setting QEMUIOVector fields:

iov.iov_base = buf;
iov.iov_len = 512;
qemu_iovec_init_external(&hd_qiov, &iov, 1);

>* +                ret = bdrv_co_readv(bs->backing_hd, sector_num, 1, &hd_qiov);*
>* +                if (ret < 0) {*
>* +                    return -1;*
>* +                }*

Why are you using bdrv_co_readv() instead of bdrv_pread() like the rest
of this file?

It would be simpler to use bdrv_pread().  Then you don't need the struct
iovec and QEMUIOVector.  This function also hasn't been marked
coroutine_fn yet, so it is cleaner to stick with bdrv_pread() until the
file is properly converted to coroutines.

[-- Attachment #1.2: Type: text/html, Size: 110279 bytes --]

[-- Attachment #2: 0002-Support-vhd-type-VHD_DIFFERENCING.patch --]
[-- Type: application/octet-stream, Size: 16577 bytes --]

From 5387a2a7b6ad052659a08a1fc7e89595708396d1 Mon Sep 17 00:00:00 2001
From: Xiaodong Gong <gordongong0350@gmail.com>
Date: Thu, 4 Sep 2014 01:14:59 +0800
Subject: [PATCH 2/2] Support vhd type VHD_DIFFERENCING

Now qemu only supports vhd type VHD_FIXED and VHD_DYNAMIC,
so qemu can't read snapshot volume of vhd, and can't support
other storage features of vhd file.

This patch add read parent information in function "vpc_open",
read bitmap in "vpc_read", and change bitmap in "vpc_write".

Signed-off-by: Xiaodong Gong <gordongong0350@gmail.com>
---
 block/vpc.c | 329 +++++++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 261 insertions(+), 68 deletions(-)

diff --git a/block/vpc.c b/block/vpc.c
index c024b4c..3ba0d57 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -33,13 +33,18 @@
 /**************************************************************/
 
 #define HEADER_SIZE 512
+#define DYNAMIC_HEADER_SIZE 1024
+#define PARENT_LOCATOR_NUM 8
+#define PARENT_PREFIX_LEN 7 /* such as file:// */
+#define TBBATMAP_HEAD_SIZE 28
+#define MACX 0x5863614d /* big endian */
 
 //#define CACHE
 
 enum vhd_type {
     VHD_FIXED           = 2,
     VHD_DYNAMIC         = 3,
-    VHD_DIFFERENCING    = 4,
+    VHD_DIFF            = 4,
 };
 
 // Seconds since Jan 1, 2000 0:00:00 (UTC)
@@ -138,6 +143,15 @@ typedef struct BDRVVPCState {
     Error *migration_blocker;
 } BDRVVPCState;
 
+typedef struct vhd_tdbatmap_header {
+    char magic[8]; /* always "tdbatmap" */
+
+    uint64_t batmap_offset;
+    uint32_t batmap_size;
+    uint32_t batmap_version;
+    uint32_t checksum;
+} QEMU_PACKED VHDTdBatmapHeader;
+
 static uint32_t vpc_checksum(uint8_t* buf, size_t size)
 {
     uint32_t res = 0;
@@ -153,7 +167,7 @@ static uint32_t vpc_checksum(uint8_t* buf, size_t size)
 static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
 {
     if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8))
-	return 100;
+        return 100;
     return 0;
 }
 
@@ -164,11 +178,17 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
     int i;
     VHDFooter *footer;
     VHDDynDiskHeader *dyndisk_header;
-    uint8_t buf[HEADER_SIZE];
+    uint8_t buf[DYNAMIC_HEADER_SIZE];
+    uint8_t tdbatmap_header_buf[TBBATMAP_HEAD_SIZE];
     uint32_t checksum;
     uint64_t computed_size;
-    int disk_type = VHD_DYNAMIC;
+    uint32_t disk_type;
     int ret;
+    VHDTdBatmapHeader *tdbatmap_header;
+    int parent_locator_offset = 0;
+    int64_t data_offset = 0;
+    int data_length = 0;
+    uint32_t platform;
 
     ret = bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE);
     if (ret < 0) {
@@ -176,6 +196,8 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     footer = (VHDFooter *) s->footer_buf;
+    disk_type = be32_to_cpu(footer->type);
+
     if (strncmp(footer->creator, "conectix", 8)) {
         int64_t offset = bdrv_getlength(bs->file);
         if (offset < 0) {
@@ -230,9 +252,9 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
         goto fail;
     }
 
-    if (disk_type == VHD_DYNAMIC) {
+    if (disk_type == VHD_DYNAMIC || disk_type == VHD_DIFF) {
         ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf,
-                         HEADER_SIZE);
+                         DYNAMIC_HEADER_SIZE);
         if (ret < 0) {
             goto fail;
         }
@@ -286,6 +308,56 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
         s->free_data_block_offset =
             (s->bat_offset + (s->max_table_entries * 4) + 511) & ~511;
 
+        /* Read tdbatmap header by offset */
+        ret = bdrv_pread(bs->file, s->free_data_block_offset,
+            tdbatmap_header_buf, TBBATMAP_HEAD_SIZE);
+        if (ret < 0) {
+            goto fail;
+        }
+
+        tdbatmap_header = (VHDTdBatmapHeader *) tdbatmap_header_buf;
+        if (!strncmp(tdbatmap_header->magic, "tdbatmap", 8)) {
+            s->free_data_block_offset =
+                be32_to_cpu(tdbatmap_header->batmap_size) * 512
+                + be64_to_cpu(tdbatmap_header->batmap_offset);
+        }
+
+        /* Read backing file location from dyn header table */
+        if (dyndisk_header->parent_name[0] || dyndisk_header->parent_name[1]) {
+            for (i = 0; i < PARENT_LOCATOR_NUM; i++) {
+                data_offset =
+                    be64_to_cpu(dyndisk_header->parent_locator[i].data_offset);
+                data_length =
+                    be32_to_cpu(dyndisk_header->parent_locator[i].data_length);
+                platform = dyndisk_header->parent_locator[i].platform;
+
+                if (MACX == platform) {
+                    if (data_offset + PARENT_PREFIX_LEN >
+                        s->max_table_entries * s->block_size) {
+                            goto fail;
+                    }
+                        if (data_length - PARENT_PREFIX_LEN > 1024) {
+                            goto fail;
+                    }
+                    ret = bdrv_pread(bs->file, data_offset + PARENT_PREFIX_LEN,
+                        bs->backing_file, data_length - PARENT_PREFIX_LEN);
+                    if (ret < 0) {
+                        goto fail;
+                    }
+
+                    bs->backing_file[data_length - PARENT_PREFIX_LEN] = '\0';
+                }
+
+                if (data_offset > parent_locator_offset) {
+                    parent_locator_offset = data_offset;
+                }
+            }
+        }
+
+        if (parent_locator_offset + 512 > s->free_data_block_offset) {
+            s->free_data_block_offset = parent_locator_offset + 512;
+        }
+
         for (i = 0; i < s->max_table_entries; i++) {
             be32_to_cpus(&s->pagetable[i]);
             if (s->pagetable[i] != 0xFFFFFFFF) {
@@ -363,19 +435,6 @@ static inline int64_t get_sector_offset(BlockDriverState *bs,
     bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
     block_offset = bitmap_offset + s->bitmap_size + (512 * pageentry_index);
 
-    // We must ensure that we don't write to any sectors which are marked as
-    // unused in the bitmap. We get away with setting all bits in the block
-    // bitmap each time we write to a new block. This might cause Virtual PC to
-    // miss sparse read optimization, but it's not a problem in terms of
-    // correctness.
-    if (write && (s->last_bitmap_offset != bitmap_offset)) {
-        uint8_t bitmap[s->bitmap_size];
-
-        s->last_bitmap_offset = bitmap_offset;
-        memset(bitmap, 0xff, s->bitmap_size);
-        bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
-    }
-
 //    printf("sector: %" PRIx64 ", index: %x, offset: %x, bioff: %" PRIx64 ", bloff: %" PRIx64 "\n",
 //	sector_num, pagetable_index, pageentry_index,
 //	bitmap_offset, block_offset);
@@ -412,6 +471,53 @@ static inline int64_t get_sector_offset(BlockDriverState *bs,
 }
 
 /*
+ * Returns the absolute byte offset of the given sector in the differencing
+ * image file.
+ *
+ * If the sector is not allocated, -1 is returned instead. If the sector is
+ * allocated in the backing file, -2 is returned. If the sector is allocated
+ * in current file, the block offset is returned.
+ */
+static inline int64_t get_sector_offset_diff(BlockDriverState *bs,
+    int64_t sector_num)
+{
+    BDRVVPCState *s = bs->opaque;
+    uint64_t offset = sector_num << BDRV_SECTOR_BITS;
+    uint64_t bitmap_offset;
+    uint64_t block_offset;
+    uint32_t pagetable_index, pageentry_index;
+    uint32_t bitmap_index, bitmapentry_index;
+    uint8_t bitmap[s->bitmap_size];
+    int ret;
+
+    pagetable_index = offset / s->block_size;
+    if (pagetable_index >= s->max_table_entries) {
+        return -1;
+    } else if (0xffffffff == s->pagetable[pagetable_index]) {
+        return -2;
+    }
+
+    bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
+    if (bitmap_offset > s->max_table_entries * s->block_size) {
+        return -1;
+    }
+    ret = bdrv_pread(bs->file, bitmap_offset, bitmap, s->bitmap_size);
+    if (ret < 0) {
+        return -1;
+    }
+
+    pageentry_index = (offset % s->block_size) / 512;
+    bitmap_index = pageentry_index / 8;
+    bitmapentry_index = 7 - pageentry_index % 8;
+    if (bitmap[bitmap_index] & 0x1 << bitmapentry_index) {
+        block_offset = bitmap_offset + s->bitmap_size + (512 * pageentry_index);
+        return block_offset;
+    } else {
+         return -2;
+    }
+}
+
+/*
  * Writes the footer to the end of the image file. This is needed when the
  * file grows as it overwrites the old footer
  *
@@ -437,7 +543,8 @@ static int rewrite_footer(BlockDriverState* bs)
  *
  * Returns the sectors' offset in the image file on success and < 0 on error
  */
-static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
+static int64_t alloc_block(BlockDriverState *bs, int64_t sector_num,
+    bool isdiff)
 {
     BDRVVPCState *s = bs->opaque;
     int64_t bat_offset;
@@ -457,7 +564,11 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
     s->pagetable[index] = s->free_data_block_offset / 512;
 
     // Initialize the block's bitmap
-    memset(bitmap, 0xff, s->bitmap_size);
+    if (isdiff) {
+        memset(bitmap, 0x0, s->bitmap_size);
+    } else {
+        memset(bitmap, 0xff, s->bitmap_size);
+    }
     ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap,
         s->bitmap_size);
     if (ret < 0) {
@@ -501,36 +612,62 @@ static int vpc_read(BlockDriverState *bs, int64_t sector_num,
                     uint8_t *buf, int nb_sectors)
 {
     BDRVVPCState *s = bs->opaque;
-    int ret;
-    int64_t offset;
-    int64_t sectors, sectors_per_block;
     VHDFooter *footer = (VHDFooter *) s->footer_buf;
+    int64_t sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
+    int64_t offset, sectors;
+    int ret;
 
-    if (be32_to_cpu(footer->type) == VHD_FIXED) {
+    switch (be32_to_cpu(footer->type)) {
+    case VHD_FIXED:
         return bdrv_read(bs->file, sector_num, buf, nb_sectors);
-    }
-    while (nb_sectors > 0) {
-        offset = get_sector_offset(bs, sector_num, 0);
-
-        sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
-        sectors = sectors_per_block - (sector_num % sectors_per_block);
-        if (sectors > nb_sectors) {
-            sectors = nb_sectors;
-        }
+    case VHD_DYNAMIC:
+        while (nb_sectors > 0) {
+            sectors = sectors_per_block - (sector_num % sectors_per_block);
+            if (sectors > nb_sectors) {
+                sectors = nb_sectors;
+            }
 
-        if (offset == -1) {
-            memset(buf, 0, sectors * BDRV_SECTOR_SIZE);
-        } else {
-            ret = bdrv_pread(bs->file, offset, buf,
-                sectors * BDRV_SECTOR_SIZE);
-            if (ret != sectors * BDRV_SECTOR_SIZE) {
-                return -1;
+            offset = get_sector_offset(bs, sector_num, 0);
+            if (-1 == offset) {
+                memset(buf, 0, sectors * BDRV_SECTOR_SIZE);
+            } else {
+                ret = bdrv_pread(bs->file, offset, buf,
+                    sectors * BDRV_SECTOR_SIZE);
+                if (ret != sectors * BDRV_SECTOR_SIZE) {
+                    return -1;
+                }
             }
+
+            nb_sectors -= sectors;
+            sector_num += sectors;
+            buf += sectors * BDRV_SECTOR_SIZE;
         }
+        break;
+    case VHD_DIFF:
+        while (nb_sectors > 0) {
+            offset = get_sector_offset_diff(bs, sector_num);
+            if (-1 == offset) {
+                memset(buf, 0, BDRV_SECTOR_SIZE);
+            } else if (-2 == offset) {
+                ret = bdrv_pread(bs->backing_hd, sector_num << BDRV_SECTOR_BITS
+                    , buf, BDRV_SECTOR_SIZE);
+                if (ret < 0) {
+                    return -1;
+                }
+            } else {
+                ret = bdrv_pread(bs->file, offset, buf, BDRV_SECTOR_SIZE);
+                if (ret != BDRV_SECTOR_SIZE) {
+                    return -1;
+                }
+            }
 
-        nb_sectors -= sectors;
-        sector_num += sectors;
-        buf += sectors * BDRV_SECTOR_SIZE;
+            nb_sectors--;
+            sector_num++;
+            buf += BDRV_SECTOR_SIZE;
+        }
+        break;
+    default:
+        return -1;
     }
     return 0;
 }
@@ -546,43 +683,98 @@ static coroutine_fn int vpc_co_read(BlockDriverState *bs, int64_t sector_num,
     return ret;
 }
 
+static inline int64_t write_bitmap(BlockDriverState *bs, int64_t sector_num,
+    int64_t sectors)
+{
+    BDRVVPCState *s = bs->opaque;
+    uint64_t offset = sector_num << BDRV_SECTOR_BITS;
+    uint64_t bitmap_offset;
+    uint32_t pagetable_index, pageentry_index;
+    uint8_t bitmap[s->bitmap_size];
+    uint32_t bitmap_index, bitmapbit_index;
+    int i;
+    int ret;
+
+    pagetable_index = offset / s->block_size;
+    pageentry_index = (offset % s->block_size) / 512;
+    bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
+
+    if (bitmap_offset > s->max_table_entries * s->block_size) {
+        return -1;
+    }
+    ret = bdrv_pread(bs->file, bitmap_offset, bitmap, s->bitmap_size);
+    if (ret < 0) {
+        return -1;
+    }
+
+    for (i = 0; i < sectors; i++) {
+        bitmap_index = pageentry_index / 8;
+        bitmapbit_index = 7 - pageentry_index % 8;
+        bitmap[bitmap_index] |= (0x1 << bitmapbit_index);
+        pageentry_index++;
+    }
+    ret = bdrv_pwrite(bs->file, bitmap_offset, bitmap, s->bitmap_size);
+    if (ret < 0) {
+        return -1;
+    }
+
+    return 0;
+}
+
 static int vpc_write(BlockDriverState *bs, int64_t sector_num,
     const uint8_t *buf, int nb_sectors)
 {
     BDRVVPCState *s = bs->opaque;
-    int64_t offset;
-    int64_t sectors, sectors_per_block;
+    VHDFooter *footer = (VHDFooter *) s->footer_buf;
+    int64_t sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
+    int64_t offset, sectors;
+    bool isdiff = true;
     int ret;
-    VHDFooter *footer =  (VHDFooter *) s->footer_buf;
 
-    if (be32_to_cpu(footer->type) == VHD_FIXED) {
+    switch (be32_to_cpu(footer->type)) {
+    case VHD_FIXED:
         return bdrv_write(bs->file, sector_num, buf, nb_sectors);
-    }
-    while (nb_sectors > 0) {
-        offset = get_sector_offset(bs, sector_num, 1);
-
-        sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
-        sectors = sectors_per_block - (sector_num % sectors_per_block);
-        if (sectors > nb_sectors) {
-            sectors = nb_sectors;
+    case VHD_DYNAMIC:
+    case VHD_DIFF:
+        if (VHD_DYNAMIC == be32_to_cpu(footer->type)) {
+            isdiff = false;
         }
 
-        if (offset == -1) {
-            offset = alloc_block(bs, sector_num);
-            if (offset < 0)
+        while (nb_sectors > 0) {
+            sectors = sectors_per_block - (sector_num % sectors_per_block);
+            if (sectors > nb_sectors) {
+                sectors = nb_sectors;
+            }
+
+            offset = get_sector_offset(bs, sector_num, 1);
+            if (offset == -1) {
+                offset = alloc_block(bs, sector_num, isdiff);
+                if (offset < 0) {
+                    return -1;
+                }
+            }
+
+            ret = bdrv_pwrite(bs->file, offset, buf,
+                sectors * BDRV_SECTOR_SIZE);
+            if (ret != sectors * BDRV_SECTOR_SIZE) {
                 return -1;
-        }
+            }
 
-        ret = bdrv_pwrite(bs->file, offset, buf, sectors * BDRV_SECTOR_SIZE);
-        if (ret != sectors * BDRV_SECTOR_SIZE) {
-            return -1;
-        }
+            if (true == isdiff) {
+                ret = write_bitmap(bs, sector_num, sectors);
+                if (ret < 0) {
+                    return -1;
+                }
+            }
 
-        nb_sectors -= sectors;
-        sector_num += sectors;
-        buf += sectors * BDRV_SECTOR_SIZE;
+            nb_sectors -= sectors;
+            sector_num += sectors;
+            buf += sectors * BDRV_SECTOR_SIZE;
+        }
+        break;
+    default:
+        return -1;
     }
-
     return 0;
 }
 
@@ -910,6 +1102,7 @@ static BlockDriver bdrv_vpc = {
     .bdrv_close             = vpc_close,
     .bdrv_reopen_prepare    = vpc_reopen_prepare,
     .bdrv_create            = vpc_create,
+    .supports_backing       = true,
 
     .bdrv_read              = vpc_co_read,
     .bdrv_write             = vpc_co_write,
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [Qemu-devel] [RFC PATCH v2] Support vhd type VHD_DIFFERENCING
  2014-09-04 14:49 Gordon Gong
@ 2014-09-05 10:04 ` Stefan Hajnoczi
  0 siblings, 0 replies; 3+ messages in thread
From: Stefan Hajnoczi @ 2014-09-05 10:04 UTC (permalink / raw)
  To: Gordon Gong
  Cc: Xiaoding (B), Liuji (Jeremy), Luohao (brian), qemu-devel,
	Stefan Hajnoczi

[-- Attachment #1: Type: text/plain, Size: 1050 bytes --]

On Thu, Sep 04, 2014 at 10:49:43PM +0800, Gordon Gong wrote:
> [Qemu-devel][RFC PATCH v2] Support vhd type VHD_DIFFERENCING
> 
> 
> 
> From 5387a2a7b6ad052659a08a1fc7e89595708396d1 Mon Sep 17 00:00:00 2001
> 
> From: Xiaodong Gong <gordongong0350@gmail.com>
> 
> Date: Thu, 4 Sep 2014 01:14:59 +0800
> 
> Subject: [PATCH 2/2] Support vhd type VHD_DIFFERENCING
> 
> 
> 
> Now qemu only supports vhd type VHD_FIXED and VHD_DYNAMIC,
> 
> so qemu can't read snapshot volume of vhd, and can't support
> 
> other storage features of vhd file.
> 
> 
> 
> This patch add read parent information in function "vpc_open",
> 
> read bitmap in "vpc_read", and change bitmap in "vpc_write".
> 
> 
> 
> Signed-off-by: Xiaodong Gong <gordongong0350@gmail.com>
> 
> ---
> 
> block/vpc.c | 329
> +++++++++++++++++++++++++++++++++++++++++++++++-------------
> 
> 1 file changed, 261 insertions(+), 68 deletions(-)

This patch is malformed.  Please use git-send-email(1):
http://qemu-project.org/Contribute/SubmitAPatch

Stefan

[-- Attachment #2: Type: application/pgp-signature, Size: 473 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [Qemu-devel] [RFC PATCH v2] Support vhd type VHD_DIFFERENCING
@ 2014-09-05 18:55 =?gb18030?B?MjFH?=
  0 siblings, 0 replies; 3+ messages in thread
From: =?gb18030?B?MjFH?= @ 2014-09-05 18:55 UTC (permalink / raw)
  To: =?gb18030?B?a3dvbGY=?=, =?gb18030?B?c3RlZmFuaGE=?=
  Cc: =?gb18030?B?cWVtdS1kZXZlbA==?=

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="gb18030", Size: 17667 bytes --]

Now qemu only supports vhd type VHD_FIXED and VHD_DYNAMIC,
so qemu can't read snapshot volume of vhd, and can't support
other storage features of vhd file.

This patch add read parent information in function "vpc_open",
read bitmap in "vpc_read", and change bitmap in "vpc_write".

Signed-off-by: Xiaodong Gong <gordongong0350@gmail.com>
---
 block/vpc.c | 329 +++++++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 261 insertions(+), 68 deletions(-)

diff --git a/block/vpc.c b/block/vpc.c
index c024b4c..3ba0d57 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -33,13 +33,18 @@
 /**************************************************************/
 
 #define HEADER_SIZE 512
+#define DYNAMIC_HEADER_SIZE 1024
+#define PARENT_LOCATOR_NUM 8
+#define PARENT_PREFIX_LEN 7 /* such as file:// */
+#define TBBATMAP_HEAD_SIZE 28
+#define MACX 0x5863614d /* big endian */
 
 //#define CACHE
 
 enum vhd_type {
     VHD_FIXED           = 2,
     VHD_DYNAMIC         = 3,
-    VHD_DIFFERENCING    = 4,
+    VHD_DIFF            = 4,
 };
 
 // Seconds since Jan 1, 2000 0:00:00 (UTC)
@@ -138,6 +143,15 @@ typedef struct BDRVVPCState {
     Error *migration_blocker;
 } BDRVVPCState;
 
+typedef struct vhd_tdbatmap_header {
+    char magic[8]; /* always "tdbatmap" */
+
+    uint64_t batmap_offset;
+    uint32_t batmap_size;
+    uint32_t batmap_version;
+    uint32_t checksum;
+} QEMU_PACKED VHDTdBatmapHeader;
+
 static uint32_t vpc_checksum(uint8_t* buf, size_t size)
 {
     uint32_t res = 0;
@@ -153,7 +167,7 @@ static uint32_t vpc_checksum(uint8_t* buf, size_t size)
 static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
 {
     if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8))
-	return 100;
+        return 100;
     return 0;
 }
 
@@ -164,11 +178,17 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
     int i;
     VHDFooter *footer;
     VHDDynDiskHeader *dyndisk_header;
-    uint8_t buf[HEADER_SIZE];
+    uint8_t buf[DYNAMIC_HEADER_SIZE];
+    uint8_t tdbatmap_header_buf[TBBATMAP_HEAD_SIZE];
     uint32_t checksum;
     uint64_t computed_size;
-    int disk_type = VHD_DYNAMIC;
+    uint32_t disk_type;
     int ret;
+    VHDTdBatmapHeader *tdbatmap_header;
+    int parent_locator_offset = 0;
+    int64_t data_offset = 0;
+    int data_length = 0;
+    uint32_t platform;
 
     ret = bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE);
     if (ret < 0) {
@@ -176,6 +196,8 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     footer = (VHDFooter *) s->footer_buf;
+    disk_type = be32_to_cpu(footer->type);
+
     if (strncmp(footer->creator, "conectix", 8)) {
         int64_t offset = bdrv_getlength(bs->file);
         if (offset < 0) {
@@ -230,9 +252,9 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
         goto fail;
     }
 
-    if (disk_type == VHD_DYNAMIC) {
+    if (disk_type == VHD_DYNAMIC || disk_type == VHD_DIFF) {
         ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf,
-                         HEADER_SIZE);
+                         DYNAMIC_HEADER_SIZE);
         if (ret < 0) {
             goto fail;
         }
@@ -286,6 +308,56 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
         s->free_data_block_offset =
             (s->bat_offset + (s->max_table_entries * 4) + 511) & ~511;
 
+        /* Read tdbatmap header by offset */
+        ret = bdrv_pread(bs->file, s->free_data_block_offset,
+            tdbatmap_header_buf, TBBATMAP_HEAD_SIZE);
+        if (ret < 0) {
+            goto fail;
+        }
+
+        tdbatmap_header = (VHDTdBatmapHeader *) tdbatmap_header_buf;
+        if (!strncmp(tdbatmap_header->magic, "tdbatmap", 8)) {
+            s->free_data_block_offset =
+                be32_to_cpu(tdbatmap_header->batmap_size) * 512
+                + be64_to_cpu(tdbatmap_header->batmap_offset);
+        }
+
+        /* Read backing file location from dyn header table */
+        if (dyndisk_header->parent_name[0] || dyndisk_header->parent_name[1]) {
+            for (i = 0; i < PARENT_LOCATOR_NUM; i++) {
+                data_offset =
+                    be64_to_cpu(dyndisk_header->parent_locator[i].data_offset);
+                data_length =
+                    be32_to_cpu(dyndisk_header->parent_locator[i].data_length);
+                platform = dyndisk_header->parent_locator[i].platform;
+
+                if (MACX == platform) {
+                    if (data_offset + PARENT_PREFIX_LEN >
+                        s->max_table_entries * s->block_size) {
+                            goto fail;
+                    }
+                        if (data_length - PARENT_PREFIX_LEN > 1024) {
+                            goto fail;
+                    }
+                    ret = bdrv_pread(bs->file, data_offset + PARENT_PREFIX_LEN,
+                        bs->backing_file, data_length - PARENT_PREFIX_LEN);
+                    if (ret < 0) {
+                        goto fail;
+                    }
+
+                    bs->backing_file[data_length - PARENT_PREFIX_LEN] = '\0';
+                }
+
+                if (data_offset > parent_locator_offset) {
+                    parent_locator_offset = data_offset;
+                }
+            }
+        }
+
+        if (parent_locator_offset + 512 > s->free_data_block_offset) {
+            s->free_data_block_offset = parent_locator_offset + 512;
+        }
+
         for (i = 0; i < s->max_table_entries; i++) {
             be32_to_cpus(&s->pagetable[i]);
             if (s->pagetable[i] != 0xFFFFFFFF) {
@@ -363,19 +435,6 @@ static inline int64_t get_sector_offset(BlockDriverState *bs,
     bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
     block_offset = bitmap_offset + s->bitmap_size + (512 * pageentry_index);
 
-    // We must ensure that we don't write to any sectors which are marked as
-    // unused in the bitmap. We get away with setting all bits in the block
-    // bitmap each time we write to a new block. This might cause Virtual PC to
-    // miss sparse read optimization, but it's not a problem in terms of
-    // correctness.
-    if (write && (s->last_bitmap_offset != bitmap_offset)) {
-        uint8_t bitmap[s->bitmap_size];
-
-        s->last_bitmap_offset = bitmap_offset;
-        memset(bitmap, 0xff, s->bitmap_size);
-        bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
-    }
-
 //    printf("sector: %" PRIx64 ", index: %x, offset: %x, bioff: %" PRIx64 ", bloff: %" PRIx64 "\n",
 //	sector_num, pagetable_index, pageentry_index,
 //	bitmap_offset, block_offset);
@@ -412,6 +471,53 @@ static inline int64_t get_sector_offset(BlockDriverState *bs,
 }
 
 /*
+ * Returns the absolute byte offset of the given sector in the differencing
+ * image file.
+ *
+ * If the sector is not allocated, -1 is returned instead. If the sector is
+ * allocated in the backing file, -2 is returned. If the sector is allocated
+ * in current file, the block offset is returned.
+ */
+static inline int64_t get_sector_offset_diff(BlockDriverState *bs,
+    int64_t sector_num)
+{
+    BDRVVPCState *s = bs->opaque;
+    uint64_t offset = sector_num << BDRV_SECTOR_BITS;
+    uint64_t bitmap_offset;
+    uint64_t block_offset;
+    uint32_t pagetable_index, pageentry_index;
+    uint32_t bitmap_index, bitmapentry_index;
+    uint8_t bitmap[s->bitmap_size];
+    int ret;
+
+    pagetable_index = offset / s->block_size;
+    if (pagetable_index >= s->max_table_entries) {
+        return -1;
+    } else if (0xffffffff == s->pagetable[pagetable_index]) {
+        return -2;
+    }
+
+    bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
+    if (bitmap_offset > s->max_table_entries * s->block_size) {
+        return -1;
+    }
+    ret = bdrv_pread(bs->file, bitmap_offset, bitmap, s->bitmap_size);
+    if (ret < 0) {
+        return -1;
+    }
+
+    pageentry_index = (offset % s->block_size) / 512;
+    bitmap_index = pageentry_index / 8;
+    bitmapentry_index = 7 - pageentry_index % 8;
+    if (bitmap[bitmap_index] & 0x1 << bitmapentry_index) {
+        block_offset = bitmap_offset + s->bitmap_size + (512 * pageentry_index);
+        return block_offset;
+    } else {
+         return -2;
+    }
+}
+
+/*
  * Writes the footer to the end of the image file. This is needed when the
  * file grows as it overwrites the old footer
  *
@@ -437,7 +543,8 @@ static int rewrite_footer(BlockDriverState* bs)
  *
  * Returns the sectors' offset in the image file on success and < 0 on error
  */
-static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
+static int64_t alloc_block(BlockDriverState *bs, int64_t sector_num,
+    bool isdiff)
 {
     BDRVVPCState *s = bs->opaque;
     int64_t bat_offset;
@@ -457,7 +564,11 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
     s->pagetable[index] = s->free_data_block_offset / 512;
 
     // Initialize the block's bitmap
-    memset(bitmap, 0xff, s->bitmap_size);
+    if (isdiff) {
+        memset(bitmap, 0x0, s->bitmap_size);
+    } else {
+        memset(bitmap, 0xff, s->bitmap_size);
+    }
     ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap,
         s->bitmap_size);
     if (ret < 0) {
@@ -501,36 +612,62 @@ static int vpc_read(BlockDriverState *bs, int64_t sector_num,
                     uint8_t *buf, int nb_sectors)
 {
     BDRVVPCState *s = bs->opaque;
-    int ret;
-    int64_t offset;
-    int64_t sectors, sectors_per_block;
     VHDFooter *footer = (VHDFooter *) s->footer_buf;
+    int64_t sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
+    int64_t offset, sectors;
+    int ret;
 
-    if (be32_to_cpu(footer->type) == VHD_FIXED) {
+    switch (be32_to_cpu(footer->type)) {
+    case VHD_FIXED:
         return bdrv_read(bs->file, sector_num, buf, nb_sectors);
-    }
-    while (nb_sectors > 0) {
-        offset = get_sector_offset(bs, sector_num, 0);
-
-        sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
-        sectors = sectors_per_block - (sector_num % sectors_per_block);
-        if (sectors > nb_sectors) {
-            sectors = nb_sectors;
-        }
+    case VHD_DYNAMIC:
+        while (nb_sectors > 0) {
+            sectors = sectors_per_block - (sector_num % sectors_per_block);
+            if (sectors > nb_sectors) {
+                sectors = nb_sectors;
+            }
 
-        if (offset == -1) {
-            memset(buf, 0, sectors * BDRV_SECTOR_SIZE);
-        } else {
-            ret = bdrv_pread(bs->file, offset, buf,
-                sectors * BDRV_SECTOR_SIZE);
-            if (ret != sectors * BDRV_SECTOR_SIZE) {
-                return -1;
+            offset = get_sector_offset(bs, sector_num, 0);
+            if (-1 == offset) {
+                memset(buf, 0, sectors * BDRV_SECTOR_SIZE);
+            } else {
+                ret = bdrv_pread(bs->file, offset, buf,
+                    sectors * BDRV_SECTOR_SIZE);
+                if (ret != sectors * BDRV_SECTOR_SIZE) {
+                    return -1;
+                }
             }
+
+            nb_sectors -= sectors;
+            sector_num += sectors;
+            buf += sectors * BDRV_SECTOR_SIZE;
         }
+        break;
+    case VHD_DIFF:
+        while (nb_sectors > 0) {
+            offset = get_sector_offset_diff(bs, sector_num);
+            if (-1 == offset) {
+                memset(buf, 0, BDRV_SECTOR_SIZE);
+            } else if (-2 == offset) {
+                ret = bdrv_pread(bs->backing_hd, sector_num << BDRV_SECTOR_BITS
+                    , buf, BDRV_SECTOR_SIZE);
+                if (ret < 0) {
+                    return -1;
+                }
+            } else {
+                ret = bdrv_pread(bs->file, offset, buf, BDRV_SECTOR_SIZE);
+                if (ret != BDRV_SECTOR_SIZE) {
+                    return -1;
+                }
+            }
 
-        nb_sectors -= sectors;
-        sector_num += sectors;
-        buf += sectors * BDRV_SECTOR_SIZE;
+            nb_sectors--;
+            sector_num++;
+            buf += BDRV_SECTOR_SIZE;
+        }
+        break;
+    default:
+        return -1;
     }
     return 0;
 }
@@ -546,43 +683,98 @@ static coroutine_fn int vpc_co_read(BlockDriverState *bs, int64_t sector_num,
     return ret;
 }
 
+static inline int64_t write_bitmap(BlockDriverState *bs, int64_t sector_num,
+    int64_t sectors)
+{
+    BDRVVPCState *s = bs->opaque;
+    uint64_t offset = sector_num << BDRV_SECTOR_BITS;
+    uint64_t bitmap_offset;
+    uint32_t pagetable_index, pageentry_index;
+    uint8_t bitmap[s->bitmap_size];
+    uint32_t bitmap_index, bitmapbit_index;
+    int i;
+    int ret;
+
+    pagetable_index = offset / s->block_size;
+    pageentry_index = (offset % s->block_size) / 512;
+    bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
+
+    if (bitmap_offset > s->max_table_entries * s->block_size) {
+        return -1;
+    }
+    ret = bdrv_pread(bs->file, bitmap_offset, bitmap, s->bitmap_size);
+    if (ret < 0) {
+        return -1;
+    }
+
+    for (i = 0; i < sectors; i++) {
+        bitmap_index = pageentry_index / 8;
+        bitmapbit_index = 7 - pageentry_index % 8;
+        bitmap[bitmap_index] |= (0x1 << bitmapbit_index);
+        pageentry_index++;
+    }
+    ret = bdrv_pwrite(bs->file, bitmap_offset, bitmap, s->bitmap_size);
+    if (ret < 0) {
+        return -1;
+    }
+
+    return 0;
+}
+
 static int vpc_write(BlockDriverState *bs, int64_t sector_num,
     const uint8_t *buf, int nb_sectors)
 {
     BDRVVPCState *s = bs->opaque;
-    int64_t offset;
-    int64_t sectors, sectors_per_block;
+    VHDFooter *footer = (VHDFooter *) s->footer_buf;
+    int64_t sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
+    int64_t offset, sectors;
+    bool isdiff = true;
     int ret;
-    VHDFooter *footer =  (VHDFooter *) s->footer_buf;
 
-    if (be32_to_cpu(footer->type) == VHD_FIXED) {
+    switch (be32_to_cpu(footer->type)) {
+    case VHD_FIXED:
         return bdrv_write(bs->file, sector_num, buf, nb_sectors);
-    }
-    while (nb_sectors > 0) {
-        offset = get_sector_offset(bs, sector_num, 1);
-
-        sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
-        sectors = sectors_per_block - (sector_num % sectors_per_block);
-        if (sectors > nb_sectors) {
-            sectors = nb_sectors;
+    case VHD_DYNAMIC:
+    case VHD_DIFF:
+        if (VHD_DYNAMIC == be32_to_cpu(footer->type)) {
+            isdiff = false;
         }
 
-        if (offset == -1) {
-            offset = alloc_block(bs, sector_num);
-            if (offset < 0)
+        while (nb_sectors > 0) {
+            sectors = sectors_per_block - (sector_num % sectors_per_block);
+            if (sectors > nb_sectors) {
+                sectors = nb_sectors;
+            }
+
+            offset = get_sector_offset(bs, sector_num, 1);
+            if (offset == -1) {
+                offset = alloc_block(bs, sector_num, isdiff);
+                if (offset < 0) {
+                    return -1;
+                }
+            }
+
+            ret = bdrv_pwrite(bs->file, offset, buf,
+                sectors * BDRV_SECTOR_SIZE);
+            if (ret != sectors * BDRV_SECTOR_SIZE) {
                 return -1;
-        }
+            }
 
-        ret = bdrv_pwrite(bs->file, offset, buf, sectors * BDRV_SECTOR_SIZE);
-        if (ret != sectors * BDRV_SECTOR_SIZE) {
-            return -1;
-        }
+            if (true == isdiff) {
+                ret = write_bitmap(bs, sector_num, sectors);
+                if (ret < 0) {
+                    return -1;
+                }
+            }
 
-        nb_sectors -= sectors;
-        sector_num += sectors;
-        buf += sectors * BDRV_SECTOR_SIZE;
+            nb_sectors -= sectors;
+            sector_num += sectors;
+            buf += sectors * BDRV_SECTOR_SIZE;
+        }
+        break;
+    default:
+        return -1;
     }
-
     return 0;
 }
 
@@ -910,6 +1102,7 @@ static BlockDriver bdrv_vpc = {
     .bdrv_close             = vpc_close,
     .bdrv_reopen_prepare    = vpc_reopen_prepare,
     .bdrv_create            = vpc_create,
+    .supports_backing       = true,
 
     .bdrv_read              = vpc_co_read,
     .bdrv_write             = vpc_co_write,
-- 
1.8.3.1
























1.       bdrv_preadv and bdrv_pwritev affer get_sector_offset_* is already checked, others are checked.

2.       MACX is a big-endian number, so there is no need be32_to_cpu, but I added a comment to MACX.

3.       clean the code no used after in vpc_open.

4.       move bdrv_co_readv() to bdrv_preadv in vpc_read.

5.       Added some code to make it easy to understand.

6.       the wrong use of cpu_to_be32 is fixed with another patch.




Last, it is hard to use "git sendmail" to your mailbox through gmail, because of GFW in china, so I only could

send to my mail.qq.com and forward to it. Hope that could run ! '

I am so sorry for the last mail, I must add a fixed subject prefix in my email that out of company, so copy the 

code£¬ screwed up.

[-- Attachment #2: Type: text/html, Size: 35854 bytes --]

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2014-09-05 18:56 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-09-05 18:55 [Qemu-devel] [RFC PATCH v2] Support vhd type VHD_DIFFERENCING =?gb18030?B?MjFH?=
  -- strict thread matches above, loose matches on Subject: below --
2014-09-04 14:49 Gordon Gong
2014-09-05 10:04 ` Stefan Hajnoczi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).