From: Thiemo Seufer <ths@networkno.de>
To: Igor Lvovsky <igor.lvovsky@qumranet.com>
Cc: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH] Fix a race condition and non-leaf images growing in VMDK chains.
Date: Sat, 19 May 2007 22:39:28 +0100 [thread overview]
Message-ID: <20070519213928.GA2409@networkno.de> (raw)
In-Reply-To: <64F9B87B6B770947A9F8391472E032160BD63B0C@ehost011-8.exch011.intermedia.net>
[-- Attachment #1: Type: text/plain, Size: 448 bytes --]
Igor Lvovsky wrote:
>
> Hi,
> The bug was in my last patch.
> This is a new one.
> I'll very appreciate any comments.
Appended is a variant which gets rid of the confusing "child" variable
which means "is_parent", and the duplicate of it (parent_open).
There was also an uninitialized variable "tmp" which was stored as
offset in m_data.
I quite possibly missed some bits which broke the patch, please test if
it still works for you.
Thiemo
[-- Attachment #2: block-vdmk.diff --]
[-- Type: text/x-diff, Size: 9259 bytes --]
Index: block-vmdk.c
===================================================================
--- block-vmdk.c.orig 2007-01-30 20:37:51.000000000 +0000
+++ block-vmdk.c 2007-05-19 22:32:01.000000000 +0100
@@ -75,8 +75,25 @@
unsigned int cluster_sectors;
uint32_t parent_cid;
+ int is_parent;
} BDRVVmdkState;
+typedef struct VmdkMetaData {
+ uint32_t offset;
+ unsigned int l1_index;
+ unsigned int l2_index;
+ unsigned int l2_offset;
+ int valid;
+} VmdkMetaData;
+
+typedef struct ActiveBDRVState{
+ BlockDriverState *hd; // active image handler
+ uint64_t cluster_offset; // current write offset
+}ActiveBDRVState;
+
+static ActiveBDRVState activeBDRV;
+
+
static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
{
uint32_t magic;
@@ -305,7 +322,6 @@
bdrv_close(bs->backing_hd);
}
-
static int vmdk_parent_open(BlockDriverState *bs, const char * filename)
{
BDRVVmdkState *s = bs->opaque;
@@ -336,11 +352,14 @@
s->hd->backing_hd = bdrv_new("");
if (!s->hd->backing_hd) {
failure:
+ s->is_parent = 0;
bdrv_close(s->hd);
return -1;
}
- if (bdrv_open(s->hd->backing_hd, parent_img_name, 0) < 0)
+ s->is_parent = 1;
+ if (bdrv_open(s->hd->backing_hd, parent_img_name, BDRV_O_RDONLY) < 0)
goto failure;
+ s->is_parent = 0;
}
return 0;
@@ -352,6 +371,11 @@
uint32_t magic;
int l1_size, i, ret;
+ if (s->is_parent)
+ // Parent must be opened as RO.
+ flags = BDRV_O_RDONLY;
+ fprintf(stderr, "(VMDK) image open: flags=0x%x filename=%s\n", flags, bs->filename);
+
ret = bdrv_file_open(&s->hd, filename, flags);
if (ret < 0)
return ret;
@@ -430,7 +454,8 @@
return -1;
}
-static uint64_t get_cluster_offset(BlockDriverState *bs, uint64_t offset, int allocate);
+static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
+ uint64_t offset, int allocate);
static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
uint64_t offset, int allocate)
@@ -446,27 +471,55 @@
if (!vmdk_is_cid_valid(bs))
return -1;
- parent_cluster_offset = get_cluster_offset(s->hd->backing_hd, offset, allocate);
- if (bdrv_pread(ps->hd, parent_cluster_offset, whole_grain, ps->cluster_sectors*512) !=
- ps->cluster_sectors*512)
- return -1;
- if (bdrv_pwrite(s->hd, cluster_offset << 9, whole_grain, sizeof(whole_grain)) !=
- sizeof(whole_grain))
+ parent_cluster_offset = get_cluster_offset(s->hd->backing_hd, NULL, offset, allocate);
+
+ if (parent_cluster_offset) {
+ BDRVVmdkState *act_s = activeBDRV.hd->opaque;
+
+ if (bdrv_pread(ps->hd, parent_cluster_offset, whole_grain, ps->cluster_sectors*512) != ps->cluster_sectors*512)
+ return -1;
+
+ //Write grain only into the active image
+ if (bdrv_pwrite(act_s->hd, activeBDRV.cluster_offset << 9, whole_grain, sizeof(whole_grain)) != sizeof(whole_grain))
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static int vmdk_L2update(BlockDriverState *bs, VmdkMetaData *m_data)
+{
+ BDRVVmdkState *s = bs->opaque;
+
+ /* update L2 table */
+ if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
+ &(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset))
+ return -1;
+ /* update backup L2 table */
+ if (s->l1_backup_table_offset != 0) {
+ m_data->l2_offset = s->l1_backup_table[m_data->l1_index];
+ if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
+ &(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset))
return -1;
}
+
return 0;
}
-static uint64_t get_cluster_offset(BlockDriverState *bs,
+static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
uint64_t offset, int allocate)
{
BDRVVmdkState *s = bs->opaque;
unsigned int l1_index, l2_offset, l2_index;
int min_index, i, j;
- uint32_t min_count, *l2_table, tmp;
+ uint32_t min_count, *l2_table, tmp = 0;
uint64_t cluster_offset;
-
+ int status;
+
+ if (m_data)
+ m_data->valid = 0;
+
l1_index = (offset >> 9) / s->l1_entry_sectors;
if (l1_index >= s->l1_size)
return 0;
@@ -504,32 +557,45 @@
found:
l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size;
cluster_offset = le32_to_cpu(l2_table[l2_index]);
+
if (!cluster_offset) {
struct stat file_buf;
if (!allocate)
return 0;
- stat(s->hd->filename, &file_buf);
- cluster_offset = file_buf.st_size;
- bdrv_truncate(s->hd, cluster_offset + (s->cluster_sectors << 9));
-
- cluster_offset >>= 9;
- /* update L2 table */
- tmp = cpu_to_le32(cluster_offset);
- l2_table[l2_index] = tmp;
- if (bdrv_pwrite(s->hd, ((int64_t)l2_offset * 512) + (l2_index * sizeof(tmp)),
- &tmp, sizeof(tmp)) != sizeof(tmp))
- return 0;
- /* update backup L2 table */
- if (s->l1_backup_table_offset != 0) {
- l2_offset = s->l1_backup_table[l1_index];
- if (bdrv_pwrite(s->hd, ((int64_t)l2_offset * 512) + (l2_index * sizeof(tmp)),
- &tmp, sizeof(tmp)) != sizeof(tmp))
+ // Avoid the L2 tables update for the images that have snapshots.
+ if (!s->is_parent) {
+ status = stat(s->hd->filename, &file_buf);
+ if (status == -1) {
+ fprintf(stderr, "(VMDK) Fail file stat: filename =%s size=0x%lx errno=%s\n",
+ s->hd->filename, (uint64_t)file_buf.st_size, strerror(errno));
return 0;
- }
+ }
+ cluster_offset = file_buf.st_size;
+ bdrv_truncate(s->hd, cluster_offset + (s->cluster_sectors << 9));
+ cluster_offset >>= 9;
+ tmp = cpu_to_le32(cluster_offset);
+ l2_table[l2_index] = tmp;
+ // Save the active image state
+ activeBDRV.cluster_offset = cluster_offset;
+ activeBDRV.hd = bs;
+ }
+ /* First of all we write grain itself, to avoid race condition
+ * that may to corrupt the image.
+ * This problem may occur because of insufficient space on host disk
+ * or inappropriate VM shutdown.
+ */
if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1)
return 0;
+
+ if (m_data) {
+ m_data->offset = tmp;
+ m_data->l1_index = l1_index;
+ m_data->l2_index = l2_index;
+ m_data->l2_offset = l2_offset;
+ m_data->valid = 1;
+ }
}
cluster_offset <<= 9;
return cluster_offset;
@@ -542,7 +608,7 @@
int index_in_cluster, n;
uint64_t cluster_offset;
- cluster_offset = get_cluster_offset(bs, sector_num << 9, 0);
+ cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
index_in_cluster = sector_num % s->cluster_sectors;
n = s->cluster_sectors - index_in_cluster;
if (n > nb_sectors)
@@ -559,7 +625,7 @@
uint64_t cluster_offset;
while (nb_sectors > 0) {
- cluster_offset = get_cluster_offset(bs, sector_num << 9, 0);
+ cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
index_in_cluster = sector_num % s->cluster_sectors;
n = s->cluster_sectors - index_in_cluster;
if (n > nb_sectors)
@@ -590,20 +656,34 @@
const uint8_t *buf, int nb_sectors)
{
BDRVVmdkState *s = bs->opaque;
+ VmdkMetaData m_data;
int index_in_cluster, n;
uint64_t cluster_offset;
static int cid_update = 0;
+ if (sector_num > bs->total_sectors) {
+ fprintf(stderr,
+ "(VMDK) Wrong offset: sector_num=0x%lx total_sectors=0x%lx\n",
+ sector_num, bs->total_sectors);
+ return -1;
+ }
+
while (nb_sectors > 0) {
index_in_cluster = sector_num & (s->cluster_sectors - 1);
n = s->cluster_sectors - index_in_cluster;
if (n > nb_sectors)
n = nb_sectors;
- cluster_offset = get_cluster_offset(bs, sector_num << 9, 1);
+ cluster_offset = get_cluster_offset(bs, &m_data, sector_num << 9, 1);
if (!cluster_offset)
return -1;
+
if (bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
return -1;
+ if (m_data.valid) {
+ /* update L2 tables */
+ if (vmdk_L2update(bs, &m_data) == -1)
+ return -1;
+ }
nb_sectors -= n;
sector_num += n;
buf += n * 512;
next prev parent reply other threads:[~2007-05-19 21:43 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-01-16 13:59 [Qemu-devel] Race condition in VMDK (QCOW*) formats Igor Lvovsky
2007-01-16 19:35 ` Fabrice Bellard
2007-01-16 19:35 ` Fabrice Bellard
2007-05-13 11:13 ` [Qemu-devel] [PATCH] Fix a race condition and non-leaf images growing in VMDK chains Igor Lvovsky
2007-05-17 13:54 ` Igor Lvovsky
2007-05-19 21:39 ` Thiemo Seufer [this message]
2007-05-19 21:42 ` Thiemo Seufer
2007-05-20 11:40 ` Igor Lvovsky
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070519213928.GA2409@networkno.de \
--to=ths@networkno.de \
--cc=igor.lvovsky@qumranet.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).