* [Qemu-devel] [PATCH 1/5] Remove tabs from qcow_aio_read_cb(). Fix indentation.
2008-11-23 14:52 [Qemu-devel] [PATCH 0/5] QCOW2 small cleanups and changing metadata update order Gleb Natapov
@ 2008-11-23 14:52 ` Gleb Natapov
2008-11-24 16:31 ` Anthony Liguori
2008-11-23 14:53 ` [Qemu-devel] [PATCH 2/5] Introduce new helper function qcow_shedule_bh() Gleb Natapov
` (3 subsequent siblings)
4 siblings, 1 reply; 15+ messages in thread
From: Gleb Natapov @ 2008-11-23 14:52 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
block-qcow2.c | 78 +++++++++++++++++++++++++++++----------------------------
1 files changed, 39 insertions(+), 39 deletions(-)
diff --git a/block-qcow2.c b/block-qcow2.c
index dc73769..ab19a4e 100644
--- a/block-qcow2.c
+++ b/block-qcow2.c
@@ -1186,7 +1186,7 @@ static void qcow_aio_read_cb(void *opaque, int ret)
acb->hd_aiocb = NULL;
if (ret < 0) {
- fail:
+fail:
acb->common.cb(acb->common.opaque, ret);
qemu_aio_release(acb);
return;
@@ -1200,8 +1200,8 @@ static void qcow_aio_read_cb(void *opaque, int ret)
} else {
if (s->crypt_method) {
encrypt_sectors(s, acb->sector_num, acb->buf, acb->buf,
- acb->n, 0,
- &s->aes_decrypt_key);
+ acb->n, 0,
+ &s->aes_decrypt_key);
}
}
@@ -1225,62 +1225,62 @@ static void qcow_aio_read_cb(void *opaque, int ret)
if (bs->backing_hd) {
/* read from the base image */
n1 = backing_read1(bs->backing_hd, acb->sector_num,
- acb->buf, acb->n);
+ acb->buf, acb->n);
if (n1 > 0) {
acb->hd_aiocb = bdrv_aio_read(bs->backing_hd, acb->sector_num,
- acb->buf, acb->n, qcow_aio_read_cb, acb);
+ acb->buf, acb->n, qcow_aio_read_cb, acb);
if (acb->hd_aiocb == NULL)
goto fail;
} else {
- if (acb->bh) {
- ret = -EIO;
- goto fail;
- }
- acb->bh = qemu_bh_new(qcow_aio_read_bh, acb);
- if (!acb->bh) {
- ret = -EIO;
- goto fail;
- }
- qemu_bh_schedule(acb->bh);
+ if (acb->bh) {
+ ret = -EIO;
+ goto fail;
+ }
+ acb->bh = qemu_bh_new(qcow_aio_read_bh, acb);
+ if (!acb->bh) {
+ ret = -EIO;
+ goto fail;
+ }
+ qemu_bh_schedule(acb->bh);
}
} else {
/* Note: in this case, no need to wait */
memset(acb->buf, 0, 512 * acb->n);
- if (acb->bh) {
- ret = -EIO;
- goto fail;
- }
- acb->bh = qemu_bh_new(qcow_aio_read_bh, acb);
- if (!acb->bh) {
- ret = -EIO;
- goto fail;
- }
- qemu_bh_schedule(acb->bh);
+ if (acb->bh) {
+ ret = -EIO;
+ goto fail;
+ }
+ acb->bh = qemu_bh_new(qcow_aio_read_bh, acb);
+ if (!acb->bh) {
+ ret = -EIO;
+ goto fail;
+ }
+ qemu_bh_schedule(acb->bh);
}
} else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
/* add AIO support for compressed blocks ? */
if (decompress_cluster(s, acb->cluster_offset) < 0)
goto fail;
memcpy(acb->buf,
- s->cluster_cache + index_in_cluster * 512, 512 * acb->n);
- if (acb->bh) {
- ret = -EIO;
- goto fail;
- }
- acb->bh = qemu_bh_new(qcow_aio_read_bh, acb);
- if (!acb->bh) {
- ret = -EIO;
- goto fail;
- }
- qemu_bh_schedule(acb->bh);
+ s->cluster_cache + index_in_cluster * 512, 512 * acb->n);
+ if (acb->bh) {
+ ret = -EIO;
+ goto fail;
+ }
+ acb->bh = qemu_bh_new(qcow_aio_read_bh, acb);
+ if (!acb->bh) {
+ ret = -EIO;
+ goto fail;
+ }
+ qemu_bh_schedule(acb->bh);
} else {
if ((acb->cluster_offset & 511) != 0) {
ret = -EIO;
goto fail;
}
acb->hd_aiocb = bdrv_aio_read(s->hd,
- (acb->cluster_offset >> 9) + index_in_cluster,
- acb->buf, acb->n, qcow_aio_read_cb, acb);
+ (acb->cluster_offset >> 9) + index_in_cluster,
+ acb->buf, acb->n, qcow_aio_read_cb, acb);
if (acb->hd_aiocb == NULL)
goto fail;
}
@@ -1551,7 +1551,7 @@ static int qcow_make_empty(BlockDriverState *bs)
memset(s->l1_table, 0, l1_length);
if (bdrv_pwrite(s->hd, s->l1_table_offset, s->l1_table, l1_length) < 0)
- return -1;
+ return -1;
ret = bdrv_truncate(s->hd, s->l1_table_offset + l1_length);
if (ret < 0)
return ret;
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [Qemu-devel] [PATCH 1/5] Remove tabs from qcow_aio_read_cb(). Fix indentation.
2008-11-23 14:52 ` [Qemu-devel] [PATCH 1/5] Remove tabs from qcow_aio_read_cb(). Fix indentation Gleb Natapov
@ 2008-11-24 16:31 ` Anthony Liguori
2008-11-24 20:03 ` Gleb Natapov
0 siblings, 1 reply; 15+ messages in thread
From: Anthony Liguori @ 2008-11-24 16:31 UTC (permalink / raw)
To: qemu-devel
Gleb Natapov wrote:
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>
> block-qcow2.c | 78 +++++++++++++++++++++++++++++----------------------------
> 1 files changed, 39 insertions(+), 39 deletions(-)
>
> diff --git a/block-qcow2.c b/block-qcow2.c
> index dc73769..ab19a4e 100644
> --- a/block-qcow2.c
> +++ b/block-qcow2.c
> @@ -1186,7 +1186,7 @@ static void qcow_aio_read_cb(void *opaque, int ret)
>
> acb->hd_aiocb = NULL;
> if (ret < 0) {
> - fail:
> +fail:
> acb->common.cb(acb->common.opaque, ret);
> qemu_aio_release(acb);
> return;
> @@ -1200,8 +1200,8 @@ static void qcow_aio_read_cb(void *opaque, int ret)
> } else {
> if (s->crypt_method) {
> encrypt_sectors(s, acb->sector_num, acb->buf, acb->buf,
> - acb->n, 0,
> - &s->aes_decrypt_key);
> + acb->n, 0,
> + &s->aes_decrypt_key);
>
Why did you change this indenting? There is no tabs and I think it's
better the way it is than how you made it.
> @@ -1551,7 +1551,7 @@ static int qcow_make_empty(BlockDriverState *bs)
>
> memset(s->l1_table, 0, l1_length);
> if (bdrv_pwrite(s->hd, s->l1_table_offset, s->l1_table, l1_length) < 0)
> - return -1;
> + return -1;
> ret = bdrv_truncate(s->hd, s->l1_table_offset + l1_length);
> if (ret < 0)
> return ret;
>
That's really not right.
Regards,
Anthony Liguori
>
>
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [Qemu-devel] [PATCH 1/5] Remove tabs from qcow_aio_read_cb(). Fix indentation.
2008-11-24 16:31 ` Anthony Liguori
@ 2008-11-24 20:03 ` Gleb Natapov
0 siblings, 0 replies; 15+ messages in thread
From: Gleb Natapov @ 2008-11-24 20:03 UTC (permalink / raw)
To: qemu-devel
On Mon, Nov 24, 2008 at 10:31:00AM -0600, Anthony Liguori wrote:
> Gleb Natapov wrote:
>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
>> ---
>>
>> block-qcow2.c | 78 +++++++++++++++++++++++++++++----------------------------
>> 1 files changed, 39 insertions(+), 39 deletions(-)
>>
>> diff --git a/block-qcow2.c b/block-qcow2.c
>> index dc73769..ab19a4e 100644
>> --- a/block-qcow2.c
>> +++ b/block-qcow2.c
>> @@ -1186,7 +1186,7 @@ static void qcow_aio_read_cb(void *opaque, int ret)
>> acb->hd_aiocb = NULL;
>> if (ret < 0) {
>> - fail:
>> +fail:
>> acb->common.cb(acb->common.opaque, ret);
>> qemu_aio_release(acb);
>> return;
>> @@ -1200,8 +1200,8 @@ static void qcow_aio_read_cb(void *opaque, int ret)
>> } else {
>> if (s->crypt_method) {
>> encrypt_sectors(s, acb->sector_num, acb->buf, acb->buf,
>> - acb->n, 0,
>> - &s->aes_decrypt_key);
>> + acb->n, 0,
>> + &s->aes_decrypt_key);
>>
>
> Why did you change this indenting? There is no tabs and I think it's
> better the way it is than how you made it.
>
I reformatted the whole function with vi autoindent. I'll drop those two
lines.
--
Gleb.
^ permalink raw reply [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH 2/5] Introduce new helper function qcow_shedule_bh().
2008-11-23 14:52 [Qemu-devel] [PATCH 0/5] QCOW2 small cleanups and changing metadata update order Gleb Natapov
2008-11-23 14:52 ` [Qemu-devel] [PATCH 1/5] Remove tabs from qcow_aio_read_cb(). Fix indentation Gleb Natapov
@ 2008-11-23 14:53 ` Gleb Natapov
2008-11-24 16:32 ` Anthony Liguori
2008-11-23 14:53 ` [Qemu-devel] [PATCH 3/5] Write table offset and size in one syscall Gleb Natapov
` (2 subsequent siblings)
4 siblings, 1 reply; 15+ messages in thread
From: Gleb Natapov @ 2008-11-23 14:53 UTC (permalink / raw)
To: qemu-devel
Use it to remove code duplications from qcow_aio_read_cb().
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
block-qcow2.c | 44 +++++++++++++++++---------------------------
1 files changed, 17 insertions(+), 27 deletions(-)
diff --git a/block-qcow2.c b/block-qcow2.c
index ab19a4e..69f6414 100644
--- a/block-qcow2.c
+++ b/block-qcow2.c
@@ -1177,6 +1177,20 @@ static void qcow_aio_read_bh(void *opaque)
qcow_aio_read_cb(opaque, 0);
}
+static int qcow_shedule_bh(QEMUBHFunc *cb, QCowAIOCB *acb)
+{
+ if (acb->bh)
+ return -EIO;
+
+ acb->bh = qemu_bh_new(cb, acb);
+ if (!acb->bh)
+ return -EIO;
+
+ qemu_bh_schedule(acb->bh);
+
+ return 0;
+}
+
static void qcow_aio_read_cb(void *opaque, int ret)
{
QCowAIOCB *acb = opaque;
@@ -1232,30 +1246,14 @@ fail:
if (acb->hd_aiocb == NULL)
goto fail;
} else {
- if (acb->bh) {
- ret = -EIO;
- goto fail;
- }
- acb->bh = qemu_bh_new(qcow_aio_read_bh, acb);
- if (!acb->bh) {
- ret = -EIO;
+ if((ret = qcow_shedule_bh(qcow_aio_read_bh, acb)) < 0)
goto fail;
- }
- qemu_bh_schedule(acb->bh);
}
} else {
/* Note: in this case, no need to wait */
memset(acb->buf, 0, 512 * acb->n);
- if (acb->bh) {
- ret = -EIO;
+ if((ret = qcow_shedule_bh(qcow_aio_read_bh, acb)) < 0)
goto fail;
- }
- acb->bh = qemu_bh_new(qcow_aio_read_bh, acb);
- if (!acb->bh) {
- ret = -EIO;
- goto fail;
- }
- qemu_bh_schedule(acb->bh);
}
} else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
/* add AIO support for compressed blocks ? */
@@ -1263,16 +1261,8 @@ fail:
goto fail;
memcpy(acb->buf,
s->cluster_cache + index_in_cluster * 512, 512 * acb->n);
- if (acb->bh) {
- ret = -EIO;
+ if((ret = qcow_shedule_bh(qcow_aio_read_bh, acb)) < 0)
goto fail;
- }
- acb->bh = qemu_bh_new(qcow_aio_read_bh, acb);
- if (!acb->bh) {
- ret = -EIO;
- goto fail;
- }
- qemu_bh_schedule(acb->bh);
} else {
if ((acb->cluster_offset & 511) != 0) {
ret = -EIO;
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [Qemu-devel] [PATCH 2/5] Introduce new helper function qcow_shedule_bh().
2008-11-23 14:53 ` [Qemu-devel] [PATCH 2/5] Introduce new helper function qcow_shedule_bh() Gleb Natapov
@ 2008-11-24 16:32 ` Anthony Liguori
2008-11-24 20:05 ` Gleb Natapov
0 siblings, 1 reply; 15+ messages in thread
From: Anthony Liguori @ 2008-11-24 16:32 UTC (permalink / raw)
To: qemu-devel
Gleb Natapov wrote:
> Use it to remove code duplications from qcow_aio_read_cb().
>
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>
> block-qcow2.c | 44 +++++++++++++++++---------------------------
> 1 files changed, 17 insertions(+), 27 deletions(-)
>
> diff --git a/block-qcow2.c b/block-qcow2.c
> index ab19a4e..69f6414 100644
> --- a/block-qcow2.c
> +++ b/block-qcow2.c
> @@ -1177,6 +1177,20 @@ static void qcow_aio_read_bh(void *opaque)
> qcow_aio_read_cb(opaque, 0);
> }
>
> +static int qcow_shedule_bh(QEMUBHFunc *cb, QCowAIOCB *acb)
> +{
> + if (acb->bh)
> + return -EIO;
> +
> + acb->bh = qemu_bh_new(cb, acb);
> + if (!acb->bh)
> + return -EIO;
> +
> + qemu_bh_schedule(acb->bh);
> +
> + return 0;
> +}
> +
> static void qcow_aio_read_cb(void *opaque, int ret)
> {
> QCowAIOCB *acb = opaque;
> @@ -1232,30 +1246,14 @@ fail:
> if (acb->hd_aiocb == NULL)
> goto fail;
> } else {
> - if (acb->bh) {
> - ret = -EIO;
> - goto fail;
> - }
> - acb->bh = qemu_bh_new(qcow_aio_read_bh, acb);
> - if (!acb->bh) {
> - ret = -EIO;
> + if((ret = qcow_shedule_bh(qcow_aio_read_bh, acb)) < 0)
>
Please do this on two lines and watch the whitespace damage. Also, you
have a consistent typo in "schedule".
Regards,
Anthony Liguori
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [Qemu-devel] [PATCH 2/5] Introduce new helper function qcow_shedule_bh().
2008-11-24 16:32 ` Anthony Liguori
@ 2008-11-24 20:05 ` Gleb Natapov
0 siblings, 0 replies; 15+ messages in thread
From: Gleb Natapov @ 2008-11-24 20:05 UTC (permalink / raw)
To: qemu-devel
On Mon, Nov 24, 2008 at 10:32:53AM -0600, Anthony Liguori wrote:
> Gleb Natapov wrote:
>> Use it to remove code duplications from qcow_aio_read_cb().
>>
>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
>> ---
>>
>> block-qcow2.c | 44 +++++++++++++++++---------------------------
>> 1 files changed, 17 insertions(+), 27 deletions(-)
>>
>> diff --git a/block-qcow2.c b/block-qcow2.c
>> index ab19a4e..69f6414 100644
>> --- a/block-qcow2.c
>> +++ b/block-qcow2.c
>> @@ -1177,6 +1177,20 @@ static void qcow_aio_read_bh(void *opaque)
>> qcow_aio_read_cb(opaque, 0);
>> }
>> +static int qcow_shedule_bh(QEMUBHFunc *cb, QCowAIOCB *acb)
>> +{
>> + if (acb->bh)
>> + return -EIO;
>> +
>> + acb->bh = qemu_bh_new(cb, acb);
>> + if (!acb->bh)
>> + return -EIO;
>> +
>> + qemu_bh_schedule(acb->bh);
>> +
>> + return 0;
>> +}
>> +
>> static void qcow_aio_read_cb(void *opaque, int ret)
>> {
>> QCowAIOCB *acb = opaque;
>> @@ -1232,30 +1246,14 @@ fail:
>> if (acb->hd_aiocb == NULL)
>> goto fail;
>> } else {
>> - if (acb->bh) {
>> - ret = -EIO;
>> - goto fail;
>> - }
>> - acb->bh = qemu_bh_new(qcow_aio_read_bh, acb);
>> - if (!acb->bh) {
>> - ret = -EIO;
>> + if((ret = qcow_shedule_bh(qcow_aio_read_bh, acb)) < 0)
>>
>
> Please do this on two lines and watch the whitespace damage. Also, you
OK.
> have a consistent typo in "schedule".
>
That is only one typo and many cut and pastes :)
--
Gleb.
^ permalink raw reply [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH 3/5] Write table offset and size in one syscall.
2008-11-23 14:52 [Qemu-devel] [PATCH 0/5] QCOW2 small cleanups and changing metadata update order Gleb Natapov
2008-11-23 14:52 ` [Qemu-devel] [PATCH 1/5] Remove tabs from qcow_aio_read_cb(). Fix indentation Gleb Natapov
2008-11-23 14:53 ` [Qemu-devel] [PATCH 2/5] Introduce new helper function qcow_shedule_bh() Gleb Natapov
@ 2008-11-23 14:53 ` Gleb Natapov
2008-11-24 16:42 ` Anthony Liguori
2008-11-23 14:53 ` [Qemu-devel] [PATCH 4/5] Cleanup {alloc|get}_cluster_offset() Gleb Natapov
2008-11-23 14:53 ` [Qemu-devel] [PATCH 5/5] Change order of metadata update to prevent loosing guest data because of unexpected exit Gleb Natapov
4 siblings, 1 reply; 15+ messages in thread
From: Gleb Natapov @ 2008-11-23 14:53 UTC (permalink / raw)
To: qemu-devel
Otherwise if VM is killed between two writes data may be lost.
But if offset and size fields are at the same disk block one
write should update them both simultaneously.
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
block-qcow2.c | 27 ++++++++++-----------------
1 files changed, 10 insertions(+), 17 deletions(-)
diff --git a/block-qcow2.c b/block-qcow2.c
index 69f6414..7f99921 100644
--- a/block-qcow2.c
+++ b/block-qcow2.c
@@ -429,8 +429,7 @@ static int grow_l1_table(BlockDriverState *bs, int min_size)
int new_l1_size, new_l1_size2, ret, i;
uint64_t *new_l1_table;
uint64_t new_l1_table_offset;
- uint64_t data64;
- uint32_t data32;
+ uint8_t data[12];
new_l1_size = s->l1_size;
if (min_size <= new_l1_size)
@@ -460,14 +459,12 @@ static int grow_l1_table(BlockDriverState *bs, int min_size)
new_l1_table[i] = be64_to_cpu(new_l1_table[i]);
/* set new table */
- data64 = cpu_to_be64(new_l1_table_offset);
- if (bdrv_pwrite(s->hd, offsetof(QCowHeader, l1_table_offset),
- &data64, sizeof(data64)) != sizeof(data64))
- goto fail;
- data32 = cpu_to_be32(new_l1_size);
- if (bdrv_pwrite(s->hd, offsetof(QCowHeader, l1_size),
- &data32, sizeof(data32)) != sizeof(data32))
+ *(uint32_t*)data = cpu_to_be32(new_l1_size);
+ *(uint64_t*)&data[4] = cpu_to_be64(new_l1_table_offset);
+ if (bdrv_pwrite(s->hd, offsetof(QCowHeader, l1_size), data,
+ sizeof(data)) != sizeof(data))
goto fail;
+
qemu_free(s->l1_table);
free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t));
s->l1_table_offset = new_l1_table_offset;
@@ -2278,8 +2275,7 @@ static int grow_refcount_table(BlockDriverState *bs, int min_size)
int new_table_size, new_table_size2, refcount_table_clusters, i, ret;
uint64_t *new_table;
int64_t table_offset;
- uint64_t data64;
- uint32_t data32;
+ uint8_t data[12];
int old_table_size;
int64_t old_table_offset;
@@ -2318,13 +2314,10 @@ static int grow_refcount_table(BlockDriverState *bs, int min_size)
for(i = 0; i < s->refcount_table_size; i++)
be64_to_cpus(&new_table[i]);
- data64 = cpu_to_be64(table_offset);
+ *(uint64_t*)data = cpu_to_be64(table_offset);
+ *(uint32_t*)&data[8] = cpu_to_be32(refcount_table_clusters);
if (bdrv_pwrite(s->hd, offsetof(QCowHeader, refcount_table_offset),
- &data64, sizeof(data64)) != sizeof(data64))
- goto fail;
- data32 = cpu_to_be32(refcount_table_clusters);
- if (bdrv_pwrite(s->hd, offsetof(QCowHeader, refcount_table_clusters),
- &data32, sizeof(data32)) != sizeof(data32))
+ data, sizeof(data)) != sizeof(data))
goto fail;
qemu_free(s->refcount_table);
old_table_offset = s->refcount_table_offset;
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [Qemu-devel] [PATCH 3/5] Write table offset and size in one syscall.
2008-11-23 14:53 ` [Qemu-devel] [PATCH 3/5] Write table offset and size in one syscall Gleb Natapov
@ 2008-11-24 16:42 ` Anthony Liguori
2008-11-24 20:11 ` Gleb Natapov
0 siblings, 1 reply; 15+ messages in thread
From: Anthony Liguori @ 2008-11-24 16:42 UTC (permalink / raw)
To: qemu-devel
Gleb Natapov wrote:
> Otherwise if VM is killed between two writes data may be lost.
> But if offset and size fields are at the same disk block one
> write should update them both simultaneously.
>
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>
> block-qcow2.c | 27 ++++++++++-----------------
> 1 files changed, 10 insertions(+), 17 deletions(-)
>
> diff --git a/block-qcow2.c b/block-qcow2.c
> index 69f6414..7f99921 100644
> --- a/block-qcow2.c
> +++ b/block-qcow2.c
> @@ -429,8 +429,7 @@ static int grow_l1_table(BlockDriverState *bs, int min_size)
> int new_l1_size, new_l1_size2, ret, i;
> uint64_t *new_l1_table;
> uint64_t new_l1_table_offset;
> - uint64_t data64;
> - uint32_t data32;
> + uint8_t data[12];
>
This assumes packing will happen correctly.
>
> new_l1_size = s->l1_size;
> if (min_size <= new_l1_size)
> @@ -460,14 +459,12 @@ static int grow_l1_table(BlockDriverState *bs, int min_size)
> new_l1_table[i] = be64_to_cpu(new_l1_table[i]);
>
> /* set new table */
> - data64 = cpu_to_be64(new_l1_table_offset);
> - if (bdrv_pwrite(s->hd, offsetof(QCowHeader, l1_table_offset),
> - &data64, sizeof(data64)) != sizeof(data64))
> - goto fail;
> - data32 = cpu_to_be32(new_l1_size);
> - if (bdrv_pwrite(s->hd, offsetof(QCowHeader, l1_size),
> - &data32, sizeof(data32)) != sizeof(data32))
> + *(uint32_t*)data = cpu_to_be32(new_l1_size);
> + *(uint64_t*)&data[4] = cpu_to_be64(new_l1_table_offset);
> + if (bdrv_pwrite(s->hd, offsetof(QCowHeader, l1_size), data,
> + sizeof(data)) != sizeof(data))
> goto fail;
>
>
Why not just introduces a uint8_t data[12] in this function, memcpy to
the right offsets, and do one brdv_pwrite? Then you don't need to do
weird things with packing.
> qemu_free(s->l1_table);
> free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t));
> s->l1_table_offset = new_l1_table_offset;
> @@ -2278,8 +2275,7 @@ static int grow_refcount_table(BlockDriverState *bs, int min_size)
> int new_table_size, new_table_size2, refcount_table_clusters, i, ret;
> uint64_t *new_table;
> int64_t table_offset;
> - uint64_t data64;
> - uint32_t data32;
> + uint8_t data[12];
> int old_table_size;
> int64_t old_table_offset;
>
> @@ -2318,13 +2314,10 @@ static int grow_refcount_table(BlockDriverState *bs, int min_size)
> for(i = 0; i < s->refcount_table_size; i++)
> be64_to_cpus(&new_table[i]);
>
> - data64 = cpu_to_be64(table_offset);
> + *(uint64_t*)data = cpu_to_be64(table_offset);
> + *(uint32_t*)&data[8] = cpu_to_be32(refcount_table_clusters);
> if (bdrv_pwrite(s->hd, offsetof(QCowHeader, refcount_table_offset),
> - &data64, sizeof(data64)) != sizeof(data64))
> - goto fail;
> - data32 = cpu_to_be32(refcount_table_clusters);
> - if (bdrv_pwrite(s->hd, offsetof(QCowHeader, refcount_table_clusters),
> - &data32, sizeof(data32)) != sizeof(data32))
> + data, sizeof(data)) != sizeof(data))
> goto fail;
>
Same here. Alternatively, you could use the cpu_to_beXXs() variants and
just pass in pointer offsets.
Regards,
Anthony Liguori
> qemu_free(s->refcount_table);
> old_table_offset = s->refcount_table_offset;
>
>
>
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] [PATCH 3/5] Write table offset and size in one syscall.
2008-11-24 16:42 ` Anthony Liguori
@ 2008-11-24 20:11 ` Gleb Natapov
0 siblings, 0 replies; 15+ messages in thread
From: Gleb Natapov @ 2008-11-24 20:11 UTC (permalink / raw)
To: qemu-devel
On Mon, Nov 24, 2008 at 10:42:24AM -0600, Anthony Liguori wrote:
>> new_l1_size = s->l1_size;
>> if (min_size <= new_l1_size)
>> @@ -460,14 +459,12 @@ static int grow_l1_table(BlockDriverState *bs, int min_size)
>> new_l1_table[i] = be64_to_cpu(new_l1_table[i]);
>> /* set new table */
>> - data64 = cpu_to_be64(new_l1_table_offset);
>> - if (bdrv_pwrite(s->hd, offsetof(QCowHeader, l1_table_offset),
>> - &data64, sizeof(data64)) != sizeof(data64))
>> - goto fail;
>> - data32 = cpu_to_be32(new_l1_size);
>> - if (bdrv_pwrite(s->hd, offsetof(QCowHeader, l1_size),
>> - &data32, sizeof(data32)) != sizeof(data32))
>> + *(uint32_t*)data = cpu_to_be32(new_l1_size);
>> + *(uint64_t*)&data[4] = cpu_to_be64(new_l1_table_offset);
>> + if (bdrv_pwrite(s->hd, offsetof(QCowHeader, l1_size), data,
>> + sizeof(data)) != sizeof(data))
>> goto fail;
>>
>>
>
> Why not just introduces a uint8_t data[12] in this function, memcpy to
> the right offsets, and do one brdv_pwrite? Then you don't need to do
> weird things with packing.
>
It is exactly what I did except memcpy part. What can go wrong with
packing the way I did it?
>> be64_to_cpus(&new_table[i]);
>> - data64 = cpu_to_be64(table_offset);
>> + *(uint64_t*)data = cpu_to_be64(table_offset);
>> + *(uint32_t*)&data[8] = cpu_to_be32(refcount_table_clusters);
>> if (bdrv_pwrite(s->hd, offsetof(QCowHeader, refcount_table_offset),
>> - &data64, sizeof(data64)) != sizeof(data64))
>> - goto fail;
>> - data32 = cpu_to_be32(refcount_table_clusters);
>> - if (bdrv_pwrite(s->hd, offsetof(QCowHeader, refcount_table_clusters),
>> - &data32, sizeof(data32)) != sizeof(data32))
>> + data, sizeof(data)) != sizeof(data))
>> goto fail;
>>
>
> Same here. Alternatively, you could use the cpu_to_beXXs() variants and
> just pass in pointer offsets.
>
That is good idea. I'll use cpu_to_beXXs().
--
Gleb.
^ permalink raw reply [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH 4/5] Cleanup {alloc|get}_cluster_offset().
2008-11-23 14:52 [Qemu-devel] [PATCH 0/5] QCOW2 small cleanups and changing metadata update order Gleb Natapov
` (2 preceding siblings ...)
2008-11-23 14:53 ` [Qemu-devel] [PATCH 3/5] Write table offset and size in one syscall Gleb Natapov
@ 2008-11-23 14:53 ` Gleb Natapov
2008-11-24 16:47 ` Anthony Liguori
2008-11-23 14:53 ` [Qemu-devel] [PATCH 5/5] Change order of metadata update to prevent loosing guest data because of unexpected exit Gleb Natapov
4 siblings, 1 reply; 15+ messages in thread
From: Gleb Natapov @ 2008-11-23 14:53 UTC (permalink / raw)
To: qemu-devel
Move duplicated code into helper functions.
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
block-qcow2.c | 160 +++++++++++++++++++++++++--------------------------------
1 files changed, 70 insertions(+), 90 deletions(-)
diff --git a/block-qcow2.c b/block-qcow2.c
index 7f99921..0771281 100644
--- a/block-qcow2.c
+++ b/block-qcow2.c
@@ -61,6 +61,8 @@
#define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */
+#define SIZE2CLUSTERS(Q, S) (((S) + ((Q)->cluster_size - 1)) >> (Q)->cluster_bits)
+
typedef struct QCowHeader {
uint32_t magic;
uint32_t version;
@@ -602,6 +604,29 @@ static uint64_t *l2_allocate(BlockDriverState *bs, int l1_index)
return l2_table;
}
+static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size,
+ uint64_t *l2_table, uint64_t mask)
+{
+ int i;
+ uint64_t offset = be64_to_cpu(l2_table[0]) & ~mask;
+
+ for (i = 0; i < nb_clusters; i++)
+ if (offset + i * cluster_size != (be64_to_cpu(l2_table[i]) & ~mask))
+ break;
+
+ return i;
+}
+
+static int count_contiguous_free_clusters(uint64_t nb_clusters, uint64_t *l2_table)
+{
+ int i = 0;
+
+ while(nb_clusters-- && l2_table[i] == 0)
+ i++;
+
+ return i;
+}
+
/*
* get_cluster_offset
*
@@ -623,9 +648,9 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
{
BDRVQcowState *s = bs->opaque;
int l1_index, l2_index;
- uint64_t l2_offset, *l2_table, cluster_offset, next;
- int l1_bits;
- int index_in_cluster, nb_available, nb_needed;
+ uint64_t l2_offset, *l2_table, cluster_offset;
+ int l1_bits, c;
+ int index_in_cluster, nb_available, nb_needed, nb_clusters;
index_in_cluster = (offset >> 9) & (s->cluster_sectors - 1);
nb_needed = *num + index_in_cluster;
@@ -633,7 +658,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
l1_bits = s->l2_bits + s->cluster_bits;
/* compute how many bytes there are between the offset and
- * and the end of the l1 entry
+ * the end of the l1 entry
*/
nb_available = (1 << l1_bits) - (offset & ((1 << l1_bits) - 1));
@@ -668,38 +693,23 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
cluster_offset = be64_to_cpu(l2_table[l2_index]);
- nb_available = s->cluster_sectors;
- l2_index++;
-
- if (!cluster_offset) {
-
- /* how many empty clusters ? */
-
- while (nb_available < nb_needed && !l2_table[l2_index]) {
- l2_index++;
- nb_available += s->cluster_sectors;
- }
- } else {
+ nb_clusters = SIZE2CLUSTERS(s, nb_needed << 9);
+ c = (!cluster_offset) ?
+ /* how many empty clusters ? */
+ count_contiguous_free_clusters(nb_clusters, &l2_table[l2_index]) :
/* how many allocated clusters ? */
+ count_contiguous_clusters(nb_clusters, s->cluster_size,
+ &l2_table[l2_index], QCOW_OFLAG_COPIED);
- cluster_offset &= ~QCOW_OFLAG_COPIED;
- while (nb_available < nb_needed) {
- next = be64_to_cpu(l2_table[l2_index]) & ~QCOW_OFLAG_COPIED;
- if (next != cluster_offset + (nb_available << 9))
- break;
- l2_index++;
- nb_available += s->cluster_sectors;
- }
- }
-
+ nb_available = (c * s->cluster_sectors);
out:
if (nb_available > nb_needed)
nb_available = nb_needed;
*num = nb_available - index_in_cluster;
- return cluster_offset;
+ return cluster_offset & ~QCOW_OFLAG_COPIED;
}
/*
@@ -863,15 +873,15 @@ static uint64_t alloc_cluster_offset(BlockDriverState *bs,
BDRVQcowState *s = bs->opaque;
int l2_index, ret;
uint64_t l2_offset, *l2_table, cluster_offset;
- int nb_available, nb_clusters, i, j;
- uint64_t start_sect, current;
+ int nb_available, nb_clusters, i = 0;
+ uint64_t start_sect;
ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
if (ret == 0)
return 0;
- nb_clusters = ((n_end << 9) + s->cluster_size - 1) >>
- s->cluster_bits;
+ nb_clusters = SIZE2CLUSTERS(s, n_end << 9);
+
if (nb_clusters > s->l2_size - l2_index)
nb_clusters = s->l2_size - l2_index;
@@ -880,13 +890,8 @@ static uint64_t alloc_cluster_offset(BlockDriverState *bs,
/* We keep all QCOW_OFLAG_COPIED clusters */
if (cluster_offset & QCOW_OFLAG_COPIED) {
-
- for (i = 1; i < nb_clusters; i++) {
- current = be64_to_cpu(l2_table[l2_index + i]);
- if (cluster_offset + (i << s->cluster_bits) != current)
- break;
- }
- nb_clusters = i;
+ nb_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size,
+ &l2_table[l2_index], 0);
nb_available = nb_clusters << (s->cluster_bits - 9);
if (nb_available > n_end)
@@ -904,46 +909,27 @@ static uint64_t alloc_cluster_offset(BlockDriverState *bs,
/* how many available clusters ? */
- i = 0;
while (i < nb_clusters) {
+ int j;
+ i += count_contiguous_free_clusters(nb_clusters - i,
+ &l2_table[l2_index + i]);
- i++;
-
- if (!cluster_offset) {
-
- /* how many free clusters ? */
-
- while (i < nb_clusters) {
- cluster_offset = be64_to_cpu(l2_table[l2_index + i]);
- if (cluster_offset != 0)
- break;
- i++;
- }
+ cluster_offset = be64_to_cpu(l2_table[l2_index + i]);
- if ((cluster_offset & QCOW_OFLAG_COPIED) ||
+ if ((cluster_offset & QCOW_OFLAG_COPIED) ||
(cluster_offset & QCOW_OFLAG_COMPRESSED))
- break;
-
- } else {
+ break;
- /* how many contiguous clusters ? */
+ j = count_contiguous_clusters(nb_clusters - i, s->cluster_size,
+ &l2_table[l2_index + i], 0);
- j = 1;
- current = 0;
- while (i < nb_clusters) {
- current = be64_to_cpu(l2_table[l2_index + i]);
- if (cluster_offset + (j << s->cluster_bits) != current)
- break;
+ if (j)
+ free_any_clusters(bs, cluster_offset, j);
- i++;
- j++;
- }
+ i += j;
- free_any_clusters(bs, cluster_offset, j);
- if (current)
- break;
- cluster_offset = current;
- }
+ if(be64_to_cpu(l2_table[l2_index + i]))
+ break;
}
nb_clusters = i;
@@ -951,6 +937,7 @@ static uint64_t alloc_cluster_offset(BlockDriverState *bs,
cluster_offset = alloc_clusters(bs, nb_clusters * s->cluster_size);
+
/* we must initialize the cluster content which won't be
written */
@@ -2192,26 +2179,19 @@ static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size)
BDRVQcowState *s = bs->opaque;
int i, nb_clusters;
- nb_clusters = (size + s->cluster_size - 1) >> s->cluster_bits;
- for(;;) {
- if (get_refcount(bs, s->free_cluster_index) == 0) {
- s->free_cluster_index++;
- for(i = 1; i < nb_clusters; i++) {
- if (get_refcount(bs, s->free_cluster_index) != 0)
- goto not_found;
- s->free_cluster_index++;
- }
+ nb_clusters = SIZE2CLUSTERS(s, size);
+retry:
+ for(i = 0; i < nb_clusters; i++) {
+ int64_t i = s->free_cluster_index++;
+ if (get_refcount(bs, i) != 0)
+ goto retry;
+ }
#ifdef DEBUG_ALLOC2
- printf("alloc_clusters: size=%lld -> %lld\n",
- size,
- (s->free_cluster_index - nb_clusters) << s->cluster_bits);
+ printf("alloc_clusters: size=%lld -> %lld\n",
+ size,
+ (s->free_cluster_index - nb_clusters) << s->cluster_bits);
#endif
- return (s->free_cluster_index - nb_clusters) << s->cluster_bits;
- } else {
- not_found:
- s->free_cluster_index++;
- }
- }
+ return (s->free_cluster_index - nb_clusters) << s->cluster_bits;
}
static int64_t alloc_clusters(BlockDriverState *bs, int64_t size)
@@ -2546,7 +2526,7 @@ static void check_refcounts(BlockDriverState *bs)
uint16_t *refcount_table;
size = bdrv_getlength(s->hd);
- nb_clusters = (size + s->cluster_size - 1) >> s->cluster_bits;
+ nb_clusters = SIZE2CLUSTERS(s, size);
refcount_table = qemu_mallocz(nb_clusters * sizeof(uint16_t));
/* header */
@@ -2598,7 +2578,7 @@ static void dump_refcounts(BlockDriverState *bs)
int refcount;
size = bdrv_getlength(s->hd);
- nb_clusters = (size + s->cluster_size - 1) >> s->cluster_bits;
+ nb_clusters = SIZE2CLUSTERS(s, size);
for(k = 0; k < nb_clusters;) {
k1 = k;
refcount = get_refcount(bs, k);
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [Qemu-devel] [PATCH 4/5] Cleanup {alloc|get}_cluster_offset().
2008-11-23 14:53 ` [Qemu-devel] [PATCH 4/5] Cleanup {alloc|get}_cluster_offset() Gleb Natapov
@ 2008-11-24 16:47 ` Anthony Liguori
0 siblings, 0 replies; 15+ messages in thread
From: Anthony Liguori @ 2008-11-24 16:47 UTC (permalink / raw)
To: qemu-devel
Gleb Natapov wrote:
> Move duplicated code into helper functions.
>
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>
> block-qcow2.c | 160 +++++++++++++++++++++++++--------------------------------
> 1 files changed, 70 insertions(+), 90 deletions(-)
>
> diff --git a/block-qcow2.c b/block-qcow2.c
> index 7f99921..0771281 100644
> --- a/block-qcow2.c
> +++ b/block-qcow2.c
> @@ -61,6 +61,8 @@
>
> #define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */
>
> +#define SIZE2CLUSTERS(Q, S) (((S) + ((Q)->cluster_size - 1)) >> (Q)->cluster_bits)
>
Please make this a static function.
> @@ -668,38 +693,23 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
>
> l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
> cluster_offset = be64_to_cpu(l2_table[l2_index]);
> - nb_available = s->cluster_sectors;
> - l2_index++;
> -
> - if (!cluster_offset) {
> -
> - /* how many empty clusters ? */
> -
> - while (nb_available < nb_needed && !l2_table[l2_index]) {
> - l2_index++;
> - nb_available += s->cluster_sectors;
> - }
> - } else {
> + nb_clusters = SIZE2CLUSTERS(s, nb_needed << 9);
>
> + c = (!cluster_offset) ?
> + /* how many empty clusters ? */
> + count_contiguous_free_clusters(nb_clusters, &l2_table[l2_index]) :
> /* how many allocated clusters ? */
> + count_contiguous_clusters(nb_clusters, s->cluster_size,
> + &l2_table[l2_index], QCOW_OFLAG_COPIED);
>
Just use an if() here.
> nb_available = nb_clusters << (s->cluster_bits - 9);
> if (nb_available > n_end)
> @@ -951,6 +937,7 @@ static uint64_t alloc_cluster_offset(BlockDriverState *bs,
>
> cluster_offset = alloc_clusters(bs, nb_clusters * s->cluster_size);
>
> +
> /* we must initialize the cluster content which won't be
> written */
Unnecessary whitespace.
Regards,
Anthony Liguori
^ permalink raw reply [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH 5/5] Change order of metadata update to prevent loosing guest data because of unexpected exit.
2008-11-23 14:52 [Qemu-devel] [PATCH 0/5] QCOW2 small cleanups and changing metadata update order Gleb Natapov
` (3 preceding siblings ...)
2008-11-23 14:53 ` [Qemu-devel] [PATCH 4/5] Cleanup {alloc|get}_cluster_offset() Gleb Natapov
@ 2008-11-23 14:53 ` Gleb Natapov
2008-11-24 16:49 ` Anthony Liguori
4 siblings, 1 reply; 15+ messages in thread
From: Gleb Natapov @ 2008-11-23 14:53 UTC (permalink / raw)
To: qemu-devel
Currently the order is this (during cow since it's the interesting case):
1. Decrement refcount of old clusters
2. Increment refcount for newly allocated clusters
3. Copy content of old sectors that will not be rewritten
4. Update L2 table with pointers to new clusters
5. Write guest data into new clusters (asynchronously)
There are several problems with this order. The first one is that if qemu
crashes (or killed or host reboots) after new clusters are linked into L2
table but before user data is written there, then on the next reboot guest
will find neither old data nor new one in those sectors and this is not
what gust expects even when journaling file system is in use. The other
problem is that if qemu is killed between steps 1 and 4 then refcount
of old cluster will be incorrect and may cause snapshot corruption.
The patch change the order to be like this:
1. Increment refcount for newly allocated clusters
2. Write guest data into new clusters (asynchronously)
3. Copy content of old sectors that were not rewritten
4. Update L2 table with pointers to new clusters
5. Decrement refcount of old clusters
Unexpected crash may cause cluster leakage, but guest data should be safe.
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
block-qcow2.c | 155 +++++++++++++++++++++++++++++++++------------------------
1 files changed, 91 insertions(+), 64 deletions(-)
diff --git a/block-qcow2.c b/block-qcow2.c
index 0771281..c600517 100644
--- a/block-qcow2.c
+++ b/block-qcow2.c
@@ -852,6 +852,69 @@ static uint64_t alloc_compressed_cluster_offset(BlockDriverState *bs,
return cluster_offset;
}
+typedef struct QCowL2Meta
+{
+ uint64_t offset;
+ int n_start;
+ int nb_available;
+ int nb_clusters;
+} QCowL2Meta;
+
+static int alloc_cluster_link_l2(BlockDriverState *bs, uint64_t cluster_offset,
+ QCowL2Meta *m)
+{
+ BDRVQcowState *s = bs->opaque;
+ int i, j = 0, l2_index, ret;
+ uint64_t *old_cluster, start_sect, l2_offset, *l2_table;
+
+ if (m->nb_clusters == 0)
+ return 0;
+
+ if (!(old_cluster = qemu_malloc(m->nb_clusters * sizeof(uint64_t))))
+ return -ENOMEM;
+
+ /* copy content of unmodified sectors */
+ start_sect = (m->offset & ~(s->cluster_size - 1)) >> 9;
+ if (m->n_start) {
+ ret = copy_sectors(bs, start_sect, cluster_offset, 0, m->n_start);
+ if (ret < 0)
+ goto err;
+ }
+
+ if (m->nb_available & (s->cluster_sectors - 1)) {
+ uint64_t end = m->nb_available & ~(uint64_t)(s->cluster_sectors - 1);
+ ret = copy_sectors(bs, start_sect + end, cluster_offset + (end << 9),
+ m->nb_available - end, s->cluster_sectors);
+ if (ret < 0)
+ goto err;
+ }
+
+ ret = -EIO;
+ /* update L2 table */
+ if (!get_cluster_table(bs, m->offset, &l2_table, &l2_offset, &l2_index))
+ goto err;
+
+ for (i = 0; i < m->nb_clusters; i++) {
+ if(l2_table[l2_index + i] != 0)
+ old_cluster[j++] = l2_table[l2_index + i];
+
+ l2_table[l2_index + i] = cpu_to_be64((cluster_offset +
+ (i << s->cluster_bits)) | QCOW_OFLAG_COPIED);
+ }
+
+ if (bdrv_pwrite(s->hd, l2_offset + l2_index * sizeof(uint64_t),
+ l2_table + l2_index, m->nb_clusters * sizeof(uint64_t)) !=
+ m->nb_clusters * sizeof(uint64_t))
+ goto err;
+
+ for (i = 0; i < j; i++)
+ free_any_clusters(bs, old_cluster[i], 1);
+
+ return 0;
+err:
+ return ret;
+ }
+
/*
* alloc_cluster_offset
*
@@ -868,13 +931,12 @@ static uint64_t alloc_compressed_cluster_offset(BlockDriverState *bs,
static uint64_t alloc_cluster_offset(BlockDriverState *bs,
uint64_t offset,
int n_start, int n_end,
- int *num)
+ int *num, QCowL2Meta *m)
{
BDRVQcowState *s = bs->opaque;
int l2_index, ret;
uint64_t l2_offset, *l2_table, cluster_offset;
- int nb_available, nb_clusters, i = 0;
- uint64_t start_sect;
+ int nb_clusters, i = 0;
ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
if (ret == 0)
@@ -882,8 +944,7 @@ static uint64_t alloc_cluster_offset(BlockDriverState *bs,
nb_clusters = SIZE2CLUSTERS(s, n_end << 9);
- if (nb_clusters > s->l2_size - l2_index)
- nb_clusters = s->l2_size - l2_index;
+ nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
cluster_offset = be64_to_cpu(l2_table[l2_index]);
@@ -893,11 +954,8 @@ static uint64_t alloc_cluster_offset(BlockDriverState *bs,
nb_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size,
&l2_table[l2_index], 0);
- nb_available = nb_clusters << (s->cluster_bits - 9);
- if (nb_available > n_end)
- nb_available = n_end;
-
cluster_offset &= ~QCOW_OFLAG_COPIED;
+ m->nb_clusters = 0;
goto out;
}
@@ -910,7 +968,6 @@ static uint64_t alloc_cluster_offset(BlockDriverState *bs,
/* how many available clusters ? */
while (i < nb_clusters) {
- int j;
i += count_contiguous_free_clusters(nb_clusters - i,
&l2_table[l2_index + i]);
@@ -920,14 +977,9 @@ static uint64_t alloc_cluster_offset(BlockDriverState *bs,
(cluster_offset & QCOW_OFLAG_COMPRESSED))
break;
- j = count_contiguous_clusters(nb_clusters - i, s->cluster_size,
+ i += count_contiguous_clusters(nb_clusters - i, s->cluster_size,
&l2_table[l2_index + i], 0);
- if (j)
- free_any_clusters(bs, cluster_offset, j);
-
- i += j;
-
if(be64_to_cpu(l2_table[l2_index + i]))
break;
}
@@ -937,49 +989,15 @@ static uint64_t alloc_cluster_offset(BlockDriverState *bs,
cluster_offset = alloc_clusters(bs, nb_clusters * s->cluster_size);
-
- /* we must initialize the cluster content which won't be
- written */
-
- nb_available = nb_clusters << (s->cluster_bits - 9);
- if (nb_available > n_end)
- nb_available = n_end;
-
- /* copy content of unmodified sectors */
-
- start_sect = (offset & ~(s->cluster_size - 1)) >> 9;
- if (n_start) {
- ret = copy_sectors(bs, start_sect, cluster_offset, 0, n_start);
- if (ret < 0)
- return 0;
- }
-
- if (nb_available & (s->cluster_sectors - 1)) {
- uint64_t end = nb_available & ~(uint64_t)(s->cluster_sectors - 1);
- ret = copy_sectors(bs, start_sect + end,
- cluster_offset + (end << 9),
- nb_available - end,
- s->cluster_sectors);
- if (ret < 0)
- return 0;
- }
-
- /* update L2 table */
-
- for (i = 0; i < nb_clusters; i++)
- l2_table[l2_index + i] = cpu_to_be64((cluster_offset +
- (i << s->cluster_bits)) |
- QCOW_OFLAG_COPIED);
-
- if (bdrv_pwrite(s->hd,
- l2_offset + l2_index * sizeof(uint64_t),
- l2_table + l2_index,
- nb_clusters * sizeof(uint64_t)) !=
- nb_clusters * sizeof(uint64_t))
- return 0;
+ /* save info needed for meta data update */
+ m->offset = offset;
+ m->n_start = n_start;
+ m->nb_clusters = nb_clusters;
out:
- *num = nb_available - n_start;
+ m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end);
+
+ *num = m->nb_available - n_start;
return cluster_offset;
}
@@ -1110,6 +1128,7 @@ static int qcow_write(BlockDriverState *bs, int64_t sector_num,
int ret, index_in_cluster, n;
uint64_t cluster_offset;
int n_end;
+ QCowL2Meta l2meta;
while (nb_sectors > 0) {
index_in_cluster = sector_num & (s->cluster_sectors - 1);
@@ -1119,7 +1138,7 @@ static int qcow_write(BlockDriverState *bs, int64_t sector_num,
n_end = QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors;
cluster_offset = alloc_cluster_offset(bs, sector_num << 9,
index_in_cluster,
- n_end, &n);
+ n_end, &n, &l2meta);
if (!cluster_offset)
return -1;
if (s->crypt_method) {
@@ -1130,8 +1149,10 @@ static int qcow_write(BlockDriverState *bs, int64_t sector_num,
} else {
ret = bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512);
}
- if (ret != n * 512)
+ if (ret != n * 512 || alloc_cluster_link_l2(bs, cluster_offset, &l2meta) < 0) {
+ free_any_clusters(bs, cluster_offset, l2meta.nb_clusters);
return -1;
+ }
nb_sectors -= n;
sector_num += n;
buf += n * 512;
@@ -1150,6 +1171,7 @@ typedef struct QCowAIOCB {
uint8_t *cluster_data;
BlockDriverAIOCB *hd_aiocb;
QEMUBH *bh;
+ QCowL2Meta l2meta;
} QCowAIOCB;
static void qcow_aio_read_cb(void *opaque, int ret);
@@ -1275,6 +1297,7 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
acb->nb_sectors = nb_sectors;
acb->n = 0;
acb->cluster_offset = 0;
+ acb->l2meta.nb_clusters = 0;
return acb;
}
@@ -1298,7 +1321,6 @@ static void qcow_aio_write_cb(void *opaque, int ret)
BlockDriverState *bs = acb->common.bs;
BDRVQcowState *s = bs->opaque;
int index_in_cluster;
- uint64_t cluster_offset;
const uint8_t *src_buf;
int n_end;
@@ -1311,6 +1333,11 @@ static void qcow_aio_write_cb(void *opaque, int ret)
return;
}
+ if (alloc_cluster_link_l2(bs, acb->cluster_offset, &acb->l2meta) < 0) {
+ free_any_clusters(bs, acb->cluster_offset, acb->l2meta.nb_clusters);
+ goto fail;
+ }
+
acb->nb_sectors -= acb->n;
acb->sector_num += acb->n;
acb->buf += acb->n * 512;
@@ -1328,10 +1355,10 @@ static void qcow_aio_write_cb(void *opaque, int ret)
n_end > QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors)
n_end = QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors;
- cluster_offset = alloc_cluster_offset(bs, acb->sector_num << 9,
+ acb->cluster_offset = alloc_cluster_offset(bs, acb->sector_num << 9,
index_in_cluster,
- n_end, &acb->n);
- if (!cluster_offset || (cluster_offset & 511) != 0) {
+ n_end, &acb->n, &acb->l2meta);
+ if (!acb->cluster_offset || (acb->cluster_offset & 511) != 0) {
ret = -EIO;
goto fail;
}
@@ -1351,7 +1378,7 @@ static void qcow_aio_write_cb(void *opaque, int ret)
src_buf = acb->buf;
}
acb->hd_aiocb = bdrv_aio_write(s->hd,
- (cluster_offset >> 9) + index_in_cluster,
+ (acb->cluster_offset >> 9) + index_in_cluster,
src_buf, acb->n,
qcow_aio_write_cb, acb);
if (acb->hd_aiocb == NULL)
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [Qemu-devel] [PATCH 5/5] Change order of metadata update to prevent loosing guest data because of unexpected exit.
2008-11-23 14:53 ` [Qemu-devel] [PATCH 5/5] Change order of metadata update to prevent loosing guest data because of unexpected exit Gleb Natapov
@ 2008-11-24 16:49 ` Anthony Liguori
2008-11-24 20:19 ` Gleb Natapov
0 siblings, 1 reply; 15+ messages in thread
From: Anthony Liguori @ 2008-11-24 16:49 UTC (permalink / raw)
To: qemu-devel
Gleb Natapov wrote:
> Currently the order is this (during cow since it's the interesting case):
> 1. Decrement refcount of old clusters
> 2. Increment refcount for newly allocated clusters
> 3. Copy content of old sectors that will not be rewritten
> 4. Update L2 table with pointers to new clusters
> 5. Write guest data into new clusters (asynchronously)
>
> There are several problems with this order. The first one is that if qemu
> crashes (or killed or host reboots) after new clusters are linked into L2
> table but before user data is written there, then on the next reboot guest
> will find neither old data nor new one in those sectors and this is not
> what gust expects even when journaling file system is in use. The other
> problem is that if qemu is killed between steps 1 and 4 then refcount
> of old cluster will be incorrect and may cause snapshot corruption.
>
> The patch change the order to be like this:
> 1. Increment refcount for newly allocated clusters
> 2. Write guest data into new clusters (asynchronously)
> 3. Copy content of old sectors that were not rewritten
> 4. Update L2 table with pointers to new clusters
> 5. Decrement refcount of old clusters
>
> Unexpected crash may cause cluster leakage, but guest data should be safe.
>
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>
> block-qcow2.c | 155 +++++++++++++++++++++++++++++++++------------------------
> 1 files changed, 91 insertions(+), 64 deletions(-)
>
> diff --git a/block-qcow2.c b/block-qcow2.c
> index 0771281..c600517 100644
> --- a/block-qcow2.c
> +++ b/block-qcow2.c
> @@ -852,6 +852,69 @@ static uint64_t alloc_compressed_cluster_offset(BlockDriverState *bs,
> return cluster_offset;
> }
>
> +typedef struct QCowL2Meta
> +{
> + uint64_t offset;
> + int n_start;
> + int nb_available;
> + int nb_clusters;
> +} QCowL2Meta;
> +
> +static int alloc_cluster_link_l2(BlockDriverState *bs, uint64_t cluster_offset,
> + QCowL2Meta *m)
> +{
> + BDRVQcowState *s = bs->opaque;
> + int i, j = 0, l2_index, ret;
> + uint64_t *old_cluster, start_sect, l2_offset, *l2_table;
> +
> + if (m->nb_clusters == 0)
> + return 0;
> +
> + if (!(old_cluster = qemu_malloc(m->nb_clusters * sizeof(uint64_t))))
> + return -ENOMEM;
>
This memory is never freed.
Regards,
Anthony Liguori
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [Qemu-devel] [PATCH 5/5] Change order of metadata update to prevent loosing guest data because of unexpected exit.
2008-11-24 16:49 ` Anthony Liguori
@ 2008-11-24 20:19 ` Gleb Natapov
0 siblings, 0 replies; 15+ messages in thread
From: Gleb Natapov @ 2008-11-24 20:19 UTC (permalink / raw)
To: qemu-devel
On Mon, Nov 24, 2008 at 10:49:46AM -0600, Anthony Liguori wrote:
>> diff --git a/block-qcow2.c b/block-qcow2.c
>> index 0771281..c600517 100644
>> --- a/block-qcow2.c
>> +++ b/block-qcow2.c
>> @@ -852,6 +852,69 @@ static uint64_t alloc_compressed_cluster_offset(BlockDriverState *bs,
>> return cluster_offset;
>> }
>> +typedef struct QCowL2Meta
>> +{
>> + uint64_t offset;
>> + int n_start;
>> + int nb_available;
>> + int nb_clusters;
>> +} QCowL2Meta;
>> +
>> +static int alloc_cluster_link_l2(BlockDriverState *bs, uint64_t cluster_offset,
>> + QCowL2Meta *m)
>> +{
>> + BDRVQcowState *s = bs->opaque;
>> + int i, j = 0, l2_index, ret;
>> + uint64_t *old_cluster, start_sect, l2_offset, *l2_table;
>> +
>> + if (m->nb_clusters == 0)
>> + return 0;
>> +
>> + if (!(old_cluster = qemu_malloc(m->nb_clusters * sizeof(uint64_t))))
>> + return -ENOMEM;
>>
>
> This memory is never freed.
>
I swear there was free() there! Well, it probably fell a victim to one of
the rewrites :)
--
Gleb.
^ permalink raw reply [flat|nested] 15+ messages in thread