All of lore.kernel.org
 help / color / mirror / Atom feed
From: Thiemo Seufer <ths@networkno.de>
To: Igor Lvovsky <igor.lvovsky@qumranet.com>
Cc: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH] Fix a race condition and non-leaf images growing in VMDK chains.
Date: Sat, 19 May 2007 22:39:28 +0100	[thread overview]
Message-ID: <20070519213928.GA2409@networkno.de> (raw)
In-Reply-To: <64F9B87B6B770947A9F8391472E032160BD63B0C@ehost011-8.exch011.intermedia.net>

[-- Attachment #1: Type: text/plain, Size: 448 bytes --]

Igor Lvovsky wrote:
> 
> 	Hi,
> The bug was in my last patch.
> This is a new one.
> I'll very appreciate any comments.

Appended is a variant which gets rid of the confusing "child" variable
which means "is_parent", and the duplicate of it (parent_open).

There was also an uninitialized variable "tmp" which was stored as
offset in m_data.

I quite possibly missed some bits which broke the patch, please test if
it still works for you.


Thiemo

[-- Attachment #2: block-vdmk.diff --]
[-- Type: text/x-diff, Size: 9259 bytes --]

Index: block-vmdk.c
===================================================================
--- block-vmdk.c.orig	2007-01-30 20:37:51.000000000 +0000
+++ block-vmdk.c	2007-05-19 22:32:01.000000000 +0100
@@ -75,8 +75,25 @@
 
     unsigned int cluster_sectors;
     uint32_t parent_cid;
+    int is_parent;
 } BDRVVmdkState;
 
+typedef struct VmdkMetaData {
+    uint32_t offset;
+    unsigned int l1_index;
+    unsigned int l2_index;
+    unsigned int l2_offset;
+    int valid;
+} VmdkMetaData;
+
+typedef struct ActiveBDRVState{
+    BlockDriverState *hd;            // active image handler
+    uint64_t cluster_offset;         // current write offset
+}ActiveBDRVState;
+
+static ActiveBDRVState activeBDRV;
+
+
 static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
 {
     uint32_t magic;
@@ -305,7 +322,6 @@
         bdrv_close(bs->backing_hd);
 }
 
-
 static int vmdk_parent_open(BlockDriverState *bs, const char * filename)
 {
     BDRVVmdkState *s = bs->opaque;
@@ -336,11 +352,14 @@
         s->hd->backing_hd = bdrv_new("");
         if (!s->hd->backing_hd) {
             failure:
+            s->is_parent = 0;
             bdrv_close(s->hd);
             return -1;
         }
-        if (bdrv_open(s->hd->backing_hd, parent_img_name, 0) < 0)
+        s->is_parent = 1;
+        if (bdrv_open(s->hd->backing_hd, parent_img_name, BDRV_O_RDONLY) < 0)
             goto failure;
+        s->is_parent = 0;
     }
 
     return 0;
@@ -352,6 +371,11 @@
     uint32_t magic;
     int l1_size, i, ret;
 
+    if (s->is_parent)
+        // Parent must be opened as RO.
+        flags = BDRV_O_RDONLY;
+    fprintf(stderr, "(VMDK) image open: flags=0x%x filename=%s\n", flags, bs->filename);
+
     ret = bdrv_file_open(&s->hd, filename, flags);
     if (ret < 0)
         return ret;
@@ -430,7 +454,8 @@
     return -1;
 }
 
-static uint64_t get_cluster_offset(BlockDriverState *bs, uint64_t offset, int allocate);
+static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
+                                   uint64_t offset, int allocate);
 
 static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
                              uint64_t offset, int allocate)
@@ -446,27 +471,55 @@
 
         if (!vmdk_is_cid_valid(bs))
             return -1;
-        parent_cluster_offset = get_cluster_offset(s->hd->backing_hd, offset, allocate);
-        if (bdrv_pread(ps->hd, parent_cluster_offset, whole_grain, ps->cluster_sectors*512) != 
-                                                                            ps->cluster_sectors*512)
-            return -1;
 
-        if (bdrv_pwrite(s->hd, cluster_offset << 9, whole_grain, sizeof(whole_grain)) != 
-                                                                            sizeof(whole_grain))
+        parent_cluster_offset = get_cluster_offset(s->hd->backing_hd, NULL, offset, allocate);
+
+        if (parent_cluster_offset) {
+            BDRVVmdkState *act_s = activeBDRV.hd->opaque;
+
+            if (bdrv_pread(ps->hd, parent_cluster_offset, whole_grain, ps->cluster_sectors*512) != ps->cluster_sectors*512)
+                return -1;
+
+            //Write grain only into the active image
+            if (bdrv_pwrite(act_s->hd, activeBDRV.cluster_offset << 9, whole_grain, sizeof(whole_grain)) != sizeof(whole_grain))
+                return -1;
+        }
+    }
+    return 0;
+}
+
+static int vmdk_L2update(BlockDriverState *bs, VmdkMetaData *m_data)
+{
+    BDRVVmdkState *s = bs->opaque;
+
+    /* update L2 table */
+    if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
+                    &(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset))
+        return -1;
+    /* update backup L2 table */
+    if (s->l1_backup_table_offset != 0) {
+        m_data->l2_offset = s->l1_backup_table[m_data->l1_index];
+        if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
+                        &(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset))
             return -1;
     }
+
     return 0;
 }
 
-static uint64_t get_cluster_offset(BlockDriverState *bs,
+static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
                                    uint64_t offset, int allocate)
 {
     BDRVVmdkState *s = bs->opaque;
     unsigned int l1_index, l2_offset, l2_index;
     int min_index, i, j;
-    uint32_t min_count, *l2_table, tmp;
+    uint32_t min_count, *l2_table, tmp = 0;
     uint64_t cluster_offset;
-    
+    int status;
+
+    if (m_data)
+        m_data->valid = 0;
+
     l1_index = (offset >> 9) / s->l1_entry_sectors;
     if (l1_index >= s->l1_size)
         return 0;
@@ -504,32 +557,45 @@
  found:
     l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size;
     cluster_offset = le32_to_cpu(l2_table[l2_index]);
+
     if (!cluster_offset) {
         struct stat file_buf;
 
         if (!allocate)
             return 0;
-        stat(s->hd->filename, &file_buf);
-        cluster_offset = file_buf.st_size;
-        bdrv_truncate(s->hd, cluster_offset + (s->cluster_sectors << 9));
-
-        cluster_offset >>= 9;
-        /* update L2 table */
-        tmp = cpu_to_le32(cluster_offset);
-        l2_table[l2_index] = tmp;
-        if (bdrv_pwrite(s->hd, ((int64_t)l2_offset * 512) + (l2_index * sizeof(tmp)), 
-                        &tmp, sizeof(tmp)) != sizeof(tmp))
-            return 0;
-        /* update backup L2 table */
-        if (s->l1_backup_table_offset != 0) {
-            l2_offset = s->l1_backup_table[l1_index];
-            if (bdrv_pwrite(s->hd, ((int64_t)l2_offset * 512) + (l2_index * sizeof(tmp)), 
-                            &tmp, sizeof(tmp)) != sizeof(tmp))
+        // Avoid the L2 tables update for the images that have snapshots.
+        if (!s->is_parent) {
+            status = stat(s->hd->filename, &file_buf);
+            if (status == -1) {
+                fprintf(stderr, "(VMDK) Fail file stat: filename =%s size=0x%lx errno=%s\n",
+                                s->hd->filename, (uint64_t)file_buf.st_size, strerror(errno));
                 return 0;
-        }
+            }
+            cluster_offset = file_buf.st_size;
+            bdrv_truncate(s->hd, cluster_offset + (s->cluster_sectors << 9));
 
+            cluster_offset >>= 9;
+            tmp = cpu_to_le32(cluster_offset);
+            l2_table[l2_index] = tmp;
+            // Save the active image state
+            activeBDRV.cluster_offset = cluster_offset;
+            activeBDRV.hd = bs;
+        }
+        /* First of all we write grain itself, to avoid race condition
+         * that may to corrupt the image.
+         * This problem may occur because of insufficient space on host disk
+         * or inappropriate VM shutdown.
+         */
         if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1)
             return 0;
+
+        if (m_data) {
+            m_data->offset = tmp;
+            m_data->l1_index = l1_index;
+            m_data->l2_index = l2_index;
+            m_data->l2_offset = l2_offset;
+            m_data->valid = 1;
+        }
     }
     cluster_offset <<= 9;
     return cluster_offset;
@@ -542,7 +608,7 @@
     int index_in_cluster, n;
     uint64_t cluster_offset;
 
-    cluster_offset = get_cluster_offset(bs, sector_num << 9, 0);
+    cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
     index_in_cluster = sector_num % s->cluster_sectors;
     n = s->cluster_sectors - index_in_cluster;
     if (n > nb_sectors)
@@ -559,7 +625,7 @@
     uint64_t cluster_offset;
 
     while (nb_sectors > 0) {
-        cluster_offset = get_cluster_offset(bs, sector_num << 9, 0);
+        cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
         index_in_cluster = sector_num % s->cluster_sectors;
         n = s->cluster_sectors - index_in_cluster;
         if (n > nb_sectors)
@@ -590,20 +656,34 @@
                      const uint8_t *buf, int nb_sectors)
 {
     BDRVVmdkState *s = bs->opaque;
+    VmdkMetaData m_data;
     int index_in_cluster, n;
     uint64_t cluster_offset;
     static int cid_update = 0;
 
+    if (sector_num > bs->total_sectors) {
+        fprintf(stderr,
+                "(VMDK) Wrong offset: sector_num=0x%lx total_sectors=0x%lx\n",
+                sector_num, bs->total_sectors);
+        return -1;
+    }
+
     while (nb_sectors > 0) {
         index_in_cluster = sector_num & (s->cluster_sectors - 1);
         n = s->cluster_sectors - index_in_cluster;
         if (n > nb_sectors)
             n = nb_sectors;
-        cluster_offset = get_cluster_offset(bs, sector_num << 9, 1);
+        cluster_offset = get_cluster_offset(bs, &m_data, sector_num << 9, 1);
         if (!cluster_offset)
             return -1;
+
         if (bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
             return -1;
+        if (m_data.valid) {
+            /* update L2 tables */
+            if (vmdk_L2update(bs, &m_data) == -1)
+                return -1;
+        }
         nb_sectors -= n;
         sector_num += n;
         buf += n * 512;

  reply	other threads:[~2007-05-19 21:43 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-01-16 13:59 [Qemu-devel] Race condition in VMDK (QCOW*) formats Igor Lvovsky
2007-01-16 19:35 ` Fabrice Bellard
2007-01-16 19:35 ` Fabrice Bellard
2007-05-13 11:13   ` [Qemu-devel] [PATCH] Fix a race condition and non-leaf images growing in VMDK chains Igor Lvovsky
2007-05-17 13:54     ` Igor Lvovsky
2007-05-19 21:39       ` Thiemo Seufer [this message]
2007-05-19 21:42         ` Thiemo Seufer
2007-05-20 11:40           ` Igor Lvovsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070519213928.GA2409@networkno.de \
    --to=ths@networkno.de \
    --cc=igor.lvovsky@qumranet.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.