* [Qemu-devel] [PATCH v2] qcow2: Unlock during COW
@ 2011-11-14 17:55 Kevin Wolf
2011-11-15 12:07 ` Stefan Hajnoczi
` (2 more replies)
0 siblings, 3 replies; 5+ messages in thread
From: Kevin Wolf @ 2011-11-14 17:55 UTC (permalink / raw)
To: qemu-devel; +Cc: kwolf
Unlocking during COW allows for more parallelism. One change it requires is
that buffers are dynamically allocated instead of just using a per-image
buffer.
While touching the code, drop the synchronous qcow2_read() function and replace
it by a bdrv_read() call.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
block/qcow2-cluster.c | 104 ++++++++++++++++--------------------------------
1 files changed, 35 insertions(+), 69 deletions(-)
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index f4e049f..0e33707 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -289,89 +289,51 @@ void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
}
}
-
-static int qcow2_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
-{
- BDRVQcowState *s = bs->opaque;
- int ret, index_in_cluster, n, n1;
- uint64_t cluster_offset;
- struct iovec iov;
- QEMUIOVector qiov;
-
- while (nb_sectors > 0) {
- n = nb_sectors;
-
- ret = qcow2_get_cluster_offset(bs, sector_num << 9, &n,
- &cluster_offset);
- if (ret < 0) {
- return ret;
- }
-
- index_in_cluster = sector_num & (s->cluster_sectors - 1);
- if (!cluster_offset) {
- if (bs->backing_hd) {
- /* read from the base image */
- iov.iov_base = buf;
- iov.iov_len = n * 512;
- qemu_iovec_init_external(&qiov, &iov, 1);
-
- n1 = qcow2_backing_read1(bs->backing_hd, &qiov, sector_num, n);
- if (n1 > 0) {
- BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING);
- ret = bdrv_read(bs->backing_hd, sector_num, buf, n1);
- if (ret < 0)
- return -1;
- }
- } else {
- memset(buf, 0, 512 * n);
- }
- } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
- if (qcow2_decompress_cluster(bs, cluster_offset) < 0)
- return -1;
- memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n);
- } else {
- BLKDBG_EVENT(bs->file, BLKDBG_READ);
- ret = bdrv_pread(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512);
- if (ret != n * 512)
- return -1;
- if (s->crypt_method) {
- qcow2_encrypt_sectors(s, sector_num, buf, buf, n, 0,
- &s->aes_decrypt_key);
- }
- }
- nb_sectors -= n;
- sector_num += n;
- buf += n * 512;
- }
- return 0;
-}
-
static int copy_sectors(BlockDriverState *bs, uint64_t start_sect,
uint64_t cluster_offset, int n_start, int n_end)
{
BDRVQcowState *s = bs->opaque;
int n, ret;
+ void *buf;
+
+ /*
+ * If this is the last cluster and it is only partially used, we must only
+ * copy until the end of the image, or bdrv_check_request will fail for the
+ * bdrv_read/write calls below.
+ */
+ if (start_sect + n_end > bs->total_sectors) {
+ n_end = bs->total_sectors - start_sect;
+ }
n = n_end - n_start;
- if (n <= 0)
+ if (n <= 0) {
return 0;
+ }
+
+ buf = qemu_blockalign(bs, n * BDRV_SECTOR_SIZE);
+
BLKDBG_EVENT(bs->file, BLKDBG_COW_READ);
- ret = qcow2_read(bs, start_sect + n_start, s->cluster_data, n);
- if (ret < 0)
- return ret;
+ ret = bdrv_read(bs, start_sect + n_start, buf, n);
+ if (ret < 0) {
+ goto out;
+ }
+
if (s->crypt_method) {
qcow2_encrypt_sectors(s, start_sect + n_start,
- s->cluster_data,
- s->cluster_data, n, 1,
+ buf, buf, n, 1,
&s->aes_encrypt_key);
}
+
BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
- ret = bdrv_write(bs->file, (cluster_offset >> 9) + n_start,
- s->cluster_data, n);
- if (ret < 0)
- return ret;
- return 0;
+ ret = bdrv_write(bs->file, (cluster_offset >> 9) + n_start, buf, n);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = 0;
+out:
+ qemu_vfree(buf);
+ return ret;
}
@@ -620,7 +582,9 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
start_sect = (m->offset & ~(s->cluster_size - 1)) >> 9;
if (m->n_start) {
cow = true;
+ qemu_co_mutex_unlock(&s->lock);
ret = copy_sectors(bs, start_sect, cluster_offset, 0, m->n_start);
+ qemu_co_mutex_lock(&s->lock);
if (ret < 0)
goto err;
}
@@ -628,8 +592,10 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
if (m->nb_available & (s->cluster_sectors - 1)) {
uint64_t end = m->nb_available & ~(uint64_t)(s->cluster_sectors - 1);
cow = true;
+ qemu_co_mutex_unlock(&s->lock);
ret = copy_sectors(bs, start_sect + end, cluster_offset + (end << 9),
m->nb_available - end, s->cluster_sectors);
+ qemu_co_mutex_lock(&s->lock);
if (ret < 0)
goto err;
}
--
1.7.6.4
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [Qemu-devel] [PATCH v2] qcow2: Unlock during COW
2011-11-14 17:55 [Qemu-devel] [PATCH v2] qcow2: Unlock during COW Kevin Wolf
@ 2011-11-15 12:07 ` Stefan Hajnoczi
2011-11-23 17:01 ` Kevin Wolf
2012-05-05 0:21 ` Marc-André Lureau
2 siblings, 0 replies; 5+ messages in thread
From: Stefan Hajnoczi @ 2011-11-15 12:07 UTC (permalink / raw)
To: Kevin Wolf; +Cc: qemu-devel
On Mon, Nov 14, 2011 at 06:55:18PM +0100, Kevin Wolf wrote:
> Unlocking during COW allows for more parallelism. One change it requires is
> that buffers are dynamically allocated instead of just using a per-image
> buffer.
>
> While touching the code, drop the synchronous qcow2_read() function and replace
> it by a bdrv_read() call.
>
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
> block/qcow2-cluster.c | 104 ++++++++++++++++--------------------------------
> 1 files changed, 35 insertions(+), 69 deletions(-)
This should be safe because dependent requests are queued so we can
perform copy_sectors() in parallel with the non-dependent requests.
> -static int qcow2_read(BlockDriverState *bs, int64_t sector_num,
> - uint8_t *buf, int nb_sectors)
> -{
> - BDRVQcowState *s = bs->opaque;
> - int ret, index_in_cluster, n, n1;
> - uint64_t cluster_offset;
> - struct iovec iov;
> - QEMUIOVector qiov;
> -
> - while (nb_sectors > 0) {
> - n = nb_sectors;
> -
> - ret = qcow2_get_cluster_offset(bs, sector_num << 9, &n,
> - &cluster_offset);
> - if (ret < 0) {
> - return ret;
> - }
> -
> - index_in_cluster = sector_num & (s->cluster_sectors - 1);
> - if (!cluster_offset) {
> - if (bs->backing_hd) {
> - /* read from the base image */
> - iov.iov_base = buf;
> - iov.iov_len = n * 512;
> - qemu_iovec_init_external(&qiov, &iov, 1);
> -
> - n1 = qcow2_backing_read1(bs->backing_hd, &qiov, sector_num, n);
> - if (n1 > 0) {
> - BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING);
> - ret = bdrv_read(bs->backing_hd, sector_num, buf, n1);
> - if (ret < 0)
> - return -1;
> - }
> - } else {
> - memset(buf, 0, 512 * n);
> - }
> - } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
> - if (qcow2_decompress_cluster(bs, cluster_offset) < 0)
> - return -1;
> - memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n);
> - } else {
> - BLKDBG_EVENT(bs->file, BLKDBG_READ);
BLKDBG_READ is now unused, there is no other occurrence in QEMU. Not
sure if you want to remove it from blkdebug now?
> - ret = bdrv_pread(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512);
> - if (ret != n * 512)
> - return -1;
> - if (s->crypt_method) {
> - qcow2_encrypt_sectors(s, sector_num, buf, buf, n, 0,
> - &s->aes_decrypt_key);
> - }
> - }
> - nb_sectors -= n;
> - sector_num += n;
> - buf += n * 512;
> - }
> - return 0;
> -}
> -
> static int copy_sectors(BlockDriverState *bs, uint64_t start_sect,
> uint64_t cluster_offset, int n_start, int n_end)
> {
> BDRVQcowState *s = bs->opaque;
> int n, ret;
> + void *buf;
> +
> + /*
> + * If this is the last cluster and it is only partially used, we must only
> + * copy until the end of the image, or bdrv_check_request will fail for the
> + * bdrv_read/write calls below.
> + */
> + if (start_sect + n_end > bs->total_sectors) {
> + n_end = bs->total_sectors - start_sect;
> + }
>
> n = n_end - n_start;
> - if (n <= 0)
> + if (n <= 0) {
> return 0;
> + }
> +
> + buf = qemu_blockalign(bs, n * BDRV_SECTOR_SIZE);
> +
> BLKDBG_EVENT(bs->file, BLKDBG_COW_READ);
> - ret = qcow2_read(bs, start_sect + n_start, s->cluster_data, n);
> - if (ret < 0)
> - return ret;
> + ret = bdrv_read(bs, start_sect + n_start, buf, n);
> + if (ret < 0) {
> + goto out;
> + }
> +
> if (s->crypt_method) {
> qcow2_encrypt_sectors(s, start_sect + n_start,
> - s->cluster_data,
> - s->cluster_data, n, 1,
> + buf, buf, n, 1,
> &s->aes_encrypt_key);
> }
> +
> BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
> - ret = bdrv_write(bs->file, (cluster_offset >> 9) + n_start,
> - s->cluster_data, n);
> - if (ret < 0)
> - return ret;
> - return 0;
> + ret = bdrv_write(bs->file, (cluster_offset >> 9) + n_start, buf, n);
> + if (ret < 0) {
> + goto out;
> + }
> +
> + ret = 0;
bdrv_write() returns 0 on success or -errno on failure. We don't need
to check for ret < 0 or set ret = 0 here. Just continuing execution
should be fine unless I missed something.
Stefan
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [Qemu-devel] [PATCH v2] qcow2: Unlock during COW
2011-11-14 17:55 [Qemu-devel] [PATCH v2] qcow2: Unlock during COW Kevin Wolf
2011-11-15 12:07 ` Stefan Hajnoczi
@ 2011-11-23 17:01 ` Kevin Wolf
2012-05-05 0:21 ` Marc-André Lureau
2 siblings, 0 replies; 5+ messages in thread
From: Kevin Wolf @ 2011-11-23 17:01 UTC (permalink / raw)
To: Kevin Wolf; +Cc: qemu-devel
Am 14.11.2011 18:55, schrieb Kevin Wolf:
> Unlocking during COW allows for more parallelism. One change it requires is
> that buffers are dynamically allocated instead of just using a per-image
> buffer.
>
> While touching the code, drop the synchronous qcow2_read() function and replace
> it by a bdrv_read() call.
>
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
This seems to break an IDE guest for me. I don't quite understand why
yet, but maybe someone has an idea.
hw/ide/pci.c:313: bmdma_cmd_writeb: Assertion `bm->bus->dma->aiocb ==
((void *)0)' failed.
Kevin
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [Qemu-devel] [PATCH v2] qcow2: Unlock during COW
2011-11-14 17:55 [Qemu-devel] [PATCH v2] qcow2: Unlock during COW Kevin Wolf
2011-11-15 12:07 ` Stefan Hajnoczi
2011-11-23 17:01 ` Kevin Wolf
@ 2012-05-05 0:21 ` Marc-André Lureau
2012-05-05 3:17 ` Zhi Yong Wu
2 siblings, 1 reply; 5+ messages in thread
From: Marc-André Lureau @ 2012-05-05 0:21 UTC (permalink / raw)
To: Kevin Wolf; +Cc: qemu-devel
Hi
On Mon, Nov 14, 2011 at 6:55 PM, Kevin Wolf <kwolf@redhat.com> wrote:
> Unlocking during COW allows for more parallelism. One change it requires is
> that buffers are dynamically allocated instead of just using a per-image
> buffer.
This commit introduces failure with preallocation and some sizes:
qemu-img create -f qcow2 new.img 976563K -o preallocation=metadata
qemu-img: qemu-coroutine-lock.c:111: qemu_co_mutex_unlock: Assertion
`mutex->locked == 1' failed.
regards
--
Marc-André Lureau
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [Qemu-devel] [PATCH v2] qcow2: Unlock during COW
2012-05-05 0:21 ` Marc-André Lureau
@ 2012-05-05 3:17 ` Zhi Yong Wu
0 siblings, 0 replies; 5+ messages in thread
From: Zhi Yong Wu @ 2012-05-05 3:17 UTC (permalink / raw)
To: Marc-André Lureau; +Cc: Kevin Wolf, Stefan Hajnoczi, qemu-devel
On Sat, May 5, 2012 at 8:21 AM, Marc-André Lureau
<marcandre.lureau@gmail.com> wrote:
> Hi
>
> On Mon, Nov 14, 2011 at 6:55 PM, Kevin Wolf <kwolf@redhat.com> wrote:
>> Unlocking during COW allows for more parallelism. One change it requires is
>> that buffers are dynamically allocated instead of just using a per-image
>> buffer.
>
> This commit introduces failure with preallocation and some sizes:
>
> qemu-img create -f qcow2 new.img 976563K -o preallocation=metadata
> qemu-img: qemu-coroutine-lock.c:111: qemu_co_mutex_unlock: Assertion
> `mutex->locked == 1' failed.
For this issue, i have submitted two patches, can you help test agaist
my patchs?
1. [1/2] block: make bdrv_create adopt coroutine
http://patchwork.ozlabs.org/patch/157022/
2. [2/2] qcow2: lock on prealloc
http://patchwork.ozlabs.org/patch/157023/
>
> regards
>
> --
> Marc-André Lureau
>
--
Regards,
Zhi Yong Wu
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2012-05-05 3:17 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-11-14 17:55 [Qemu-devel] [PATCH v2] qcow2: Unlock during COW Kevin Wolf
2011-11-15 12:07 ` Stefan Hajnoczi
2011-11-23 17:01 ` Kevin Wolf
2012-05-05 0:21 ` Marc-André Lureau
2012-05-05 3:17 ` Zhi Yong Wu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).