From: Paolo Bonzini <pbonzini@redhat.com>
To: qemu-devel@nongnu.org
Cc: kwolf@redhat.com, jcody@redhat.com
Subject: [Qemu-devel] [PATCH v2 38/45] mirror: perform COW if the cluster size is bigger than the granularity
Date: Wed, 26 Sep 2012 17:56:44 +0200 [thread overview]
Message-ID: <1348675011-8794-39-git-send-email-pbonzini@redhat.com> (raw)
In-Reply-To: <1348675011-8794-1-git-send-email-pbonzini@redhat.com>
When mirroring runs, the backing files for the target may not yet be
ready. However, this means that a copy-on-write operation on the target
would fill the missing sectors with zeros. Copy-on-write only happens
if the granularity of the dirty bitmap is smaller than the cluster size
(and only for clusters that are allocated in the source after the job
has started copying). So far, the granularity was fixed to 1MB; to avoid
the problem we detected the situation and required the backing files to
be available in that case only.
However, we want to lower the granularity for efficiency, so we need
a better solution. The solution is to always copy a whole cluster the
first time it is touched. The code keeps a bitmap of clusters that
have already been allocated by the mirroring job, and only does "manual"
copy-on-write if the chunk being copied is zero in the bitmap.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
v1->v2: new testcase
block/mirror.c | 60 +++++++++++++++++++++++++++++++++++++++-------
blockdev.c | 15 +++---------
tests/qemu-iotests/040 | 21 ++++++++++++++++
tests/qemu-iotests/040.out | 4 ++--
trace-events | 1 +
5 file modificati, 78 inserzioni(+), 23 rimozioni(-)
diff --git a/block/mirror.c b/block/mirror.c
index 72e0986..49f9bde 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -15,6 +15,7 @@
#include "blockjob.h"
#include "block_int.h"
#include "qemu/ratelimit.h"
+#include "bitmap.h"
enum {
/*
@@ -36,6 +37,8 @@ typedef struct MirrorBlockJob {
bool synced;
bool complete;
int64_t sector_num;
+ size_t buf_size;
+ unsigned long *cow_bitmap;
HBitmapIter hbi;
uint8_t *buf;
} MirrorBlockJob;
@@ -60,7 +63,7 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
BlockDriverState *target = s->target;
QEMUIOVector qiov;
int ret, nb_sectors;
- int64_t end;
+ int64_t end, sector_num, cluster_num;
struct iovec iov;
s->sector_num = hbitmap_iter_next(&s->hbi);
@@ -71,22 +74,41 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
assert(s->sector_num >= 0);
}
+ /* If we have no backing file yet in the destination, and the cluster size
+ * is very large, we need to do COW ourselves. The first time a cluster is
+ * copied, copy it entirely.
+ *
+ * Because both BDRV_SECTORS_PER_DIRTY_CHUNK and the cluster size are
+ * powers of two, the number of sectors to copy cannot exceed one cluster.
+ */
+ sector_num = s->sector_num;
+ nb_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
+ cluster_num = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
+ if (s->cow_bitmap && !test_bit(cluster_num, s->cow_bitmap)) {
+ trace_mirror_cow(s, sector_num);
+ bdrv_round_to_clusters(s->target,
+ sector_num, BDRV_SECTORS_PER_DIRTY_CHUNK,
+ §or_num, &nb_sectors);
+ bitmap_set(s->cow_bitmap, sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK,
+ nb_sectors / BDRV_SECTORS_PER_DIRTY_CHUNK);
+ }
+
end = s->common.len >> BDRV_SECTOR_BITS;
- nb_sectors = MIN(BDRV_SECTORS_PER_DIRTY_CHUNK, end - s->sector_num);
- bdrv_reset_dirty(source, s->sector_num, nb_sectors);
+ nb_sectors = MIN(nb_sectors, end - sector_num);
+ bdrv_reset_dirty(source, sector_num, nb_sectors);
/* Copy the dirty cluster. */
iov.iov_base = s->buf;
iov.iov_len = nb_sectors * 512;
qemu_iovec_init_external(&qiov, &iov, 1);
- trace_mirror_one_iteration(s, s->sector_num, nb_sectors);
- ret = bdrv_co_readv(source, s->sector_num, nb_sectors, &qiov);
+ trace_mirror_one_iteration(s, sector_num, nb_sectors);
+ ret = bdrv_co_readv(source, sector_num, nb_sectors, &qiov);
if (ret < 0) {
*p_action = mirror_error_action(s, true, -ret);
goto fail;
}
- ret = bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov);
+ ret = bdrv_co_writev(target, sector_num, nb_sectors, &qiov);
if (ret < 0) {
*p_action = mirror_error_action(s, false, -ret);
s->synced = false;
@@ -96,7 +118,7 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
fail:
/* Try again later. */
- bdrv_set_dirty(source, s->sector_num, nb_sectors);
+ bdrv_set_dirty(source, sector_num, nb_sectors);
return ret;
}
@@ -104,7 +126,9 @@ static void coroutine_fn mirror_run(void *opaque)
{
MirrorBlockJob *s = opaque;
BlockDriverState *bs = s->common.bs;
- int64_t sector_num, end;
+ int64_t sector_num, end, length;
+ BlockDriverInfo bdi;
+ char backing_filename[1024];
int ret = 0;
int n;
@@ -118,8 +142,23 @@ static void coroutine_fn mirror_run(void *opaque)
return;
}
+ /* If we have no backing file yet in the destination, we cannot let
+ * the destination do COW. Instead, we copy sectors around the
+ * dirty data if needed. We need a bitmap to do that.
+ */
+ bdrv_get_backing_filename(s->target, backing_filename,
+ sizeof(backing_filename));
+ if (backing_filename[0] && !s->target->backing_hd) {
+ bdrv_get_info(s->target, &bdi);
+ if (s->buf_size < bdi.cluster_size) {
+ s->buf_size = bdi.cluster_size;
+ length = (bdrv_getlength(bs) + BLOCK_SIZE - 1) / BLOCK_SIZE;
+ s->cow_bitmap = bitmap_new(length);
+ }
+ }
+
end = s->common.len >> BDRV_SECTOR_BITS;
- s->buf = qemu_blockalign(bs, BLOCK_SIZE);
+ s->buf = qemu_blockalign(bs, s->buf_size);
if (s->mode != MIRROR_SYNC_MODE_NONE) {
/* First part, loop on the sectors and initialize the dirty bitmap. */
@@ -233,6 +272,7 @@ static void coroutine_fn mirror_run(void *opaque)
immediate_exit:
g_free(s->buf);
+ g_free(s->cow_bitmap);
bdrv_set_dirty_tracking(bs, false);
bdrv_iostatus_disable(s->target);
if (s->complete && ret == 0) {
@@ -316,6 +356,8 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
s->on_target_error = on_target_error;
s->target = target;
s->mode = mode;
+ s->buf_size = BLOCK_SIZE;
+
bdrv_set_dirty_tracking(bs, true);
bdrv_set_enable_write_cache(s->target, true);
bdrv_set_on_error(s->target, on_target_error, on_target_error);
diff --git a/blockdev.c b/blockdev.c
index 84fee2f..c989ce6 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1126,7 +1126,6 @@ void qmp_drive_mirror(const char *device, const char *target,
bool has_on_target_error, BlockdevOnError on_target_error,
Error **errp)
{
- BlockDriverInfo bdi;
BlockDriverState *bs;
BlockDriverState *source, *target_bs;
BlockDriver *proto_drv;
@@ -1217,6 +1216,9 @@ void qmp_drive_mirror(const char *device, const char *target,
return;
}
+ /* Mirroring takes care of copy-on-write using the source's backing
+ * file.
+ */
target_bs = bdrv_new("");
ret = bdrv_open(target_bs, target, flags | BDRV_O_NO_BACKING, drv);
@@ -1226,17 +1228,6 @@ void qmp_drive_mirror(const char *device, const char *target,
return;
}
- /* We need a backing file if we will copy parts of a cluster. */
- if (bdrv_get_info(target_bs, &bdi) >= 0 && bdi.cluster_size != 0 &&
- bdi.cluster_size >= BDRV_SECTORS_PER_DIRTY_CHUNK * 512) {
- ret = bdrv_open_backing_file(target_bs);
- if (ret < 0) {
- bdrv_delete(target_bs);
- error_set(errp, QERR_OPEN_FILE_FAILED, target);
- return;
- }
- }
-
mirror_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
block_job_cb, bs, &local_err);
if (local_err != NULL) {
diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040
index ec86c70..39d07a6 100755
--- a/tests/qemu-iotests/040
+++ b/tests/qemu-iotests/040
@@ -283,6 +283,27 @@ class TestMirrorNoBacking(ImageMirroringTestCase):
self.assertTrue(self.compare_images(test_img, target_img),
'target image does not match source after mirroring')
+ def test_large_cluster(self):
+ self.assert_no_active_mirrors()
+
+ # qemu-img create fails if the image is not there
+ qemu_img('create', '-f', iotests.imgfmt, '-o', 'size=%d'
+ %(TestMirrorNoBacking.image_len), target_backing_img)
+ qemu_img('create', '-f', iotests.imgfmt, '-o', 'cluster_size=%d,backing_file=%s'
+ % (TestMirrorNoBacking.image_len, target_backing_img), target_img)
+ os.remove(target_backing_img)
+
+ result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+ mode='existing', target=target_img)
+ self.assert_qmp(result, 'return', {})
+
+ self.complete_and_wait()
+ result = self.vm.qmp('query-block')
+ self.assert_qmp(result, 'return[0]/inserted/file', target_img)
+ self.vm.shutdown()
+ self.assertTrue(self.compare_images(test_img, target_img),
+ 'target image does not match source after mirroring')
+
class TestReadErrors(ImageMirroringTestCase):
image_len = 2 * 1024 * 1024 # MB
diff --git a/tests/qemu-iotests/040.out b/tests/qemu-iotests/040.out
index b6f2576..52d796e 100644
--- a/tests/qemu-iotests/040.out
+++ b/tests/qemu-iotests/040.out
@@ -1,5 +1,5 @@
-................
+.................
----------------------------------------------------------------------
-Ran 16 tests
+Ran 17 tests
OK
diff --git a/trace-events b/trace-events
index 99818d5..8bca020 100644
--- a/trace-events
+++ b/trace-events
@@ -82,6 +82,7 @@ mirror_before_flush(void *s) "s %p"
mirror_before_drain(void *s, int64_t cnt) "s %p dirty count %"PRId64
mirror_before_sleep(void *s, int64_t cnt, int synced) "s %p dirty count %"PRId64" synced %d"
mirror_one_iteration(void *s, int64_t sector_num, int nb_sectors) "s %p sector_num %"PRId64" nb_sectors %d"
+mirror_cow(void *s, int64_t sector_num) "s %p sector_num %"PRId64
# blockdev.c
qmp_block_job_cancel(void *job) "job %p"
--
1.7.12
next prev parent reply other threads:[~2012-09-26 16:00 UTC|newest]
Thread overview: 102+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-09-26 15:56 [Qemu-devel] [PATCH v2 00/45] Block job improvements for 1.3 Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 01/45] qerror/block: introduce QERR_BLOCK_JOB_NOT_ACTIVE Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 02/45] blockdev: rename block_stream_cb to a generic block_job_cb Paolo Bonzini
2012-09-27 11:56 ` Kevin Wolf
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 03/45] block: fix documentation of block_job_cancel_sync Paolo Bonzini
2012-09-27 12:03 ` Kevin Wolf
2012-09-27 12:08 ` Paolo Bonzini
2012-09-27 12:13 ` Kevin Wolf
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 04/45] block: move job APIs to separate files Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 05/45] block: add block_job_query Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 06/45] block: add support for job pause/resume Paolo Bonzini
2012-09-26 17:31 ` Eric Blake
2012-09-27 12:18 ` Kevin Wolf
2012-09-27 12:27 ` Paolo Bonzini
2012-09-27 12:45 ` Kevin Wolf
2012-09-27 12:57 ` Paolo Bonzini
2012-09-27 13:51 ` Kevin Wolf
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 07/45] qmp: add block-job-pause and block-job-resume Paolo Bonzini
2012-09-26 17:45 ` Eric Blake
2012-09-27 9:23 ` Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 08/45] qemu-iotests: add test for pausing a streaming operation Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 09/45] block: rename block_job_complete to block_job_completed Paolo Bonzini
2012-09-27 12:30 ` Kevin Wolf
2012-09-27 20:31 ` Jeff Cody
2012-09-28 11:00 ` Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 10/45] iostatus: rename BlockErrorAction, BlockQMPEventAction Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 11/45] iostatus: move BlockdevOnError declaration to QAPI Paolo Bonzini
2012-09-26 17:54 ` Eric Blake
2012-09-27 9:23 ` Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 12/45] iostatus: change is_read to a bool Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 13/45] iostatus: reorganize io error code Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 14/45] block: introduce block job error Paolo Bonzini
2012-09-26 19:10 ` Eric Blake
2012-09-26 19:27 ` Eric Blake
2012-09-27 9:24 ` Paolo Bonzini
2012-09-27 13:41 ` Kevin Wolf
2012-09-27 14:50 ` Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 15/45] stream: add on-error argument Paolo Bonzini
2012-09-26 20:53 ` Eric Blake
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 16/45] blkdebug: process all set_state rules in the old state Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 17/45] qemu-iotests: map underscore to dash in QMP argument names Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 18/45] qemu-iotests: add tests for streaming error handling Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 19/45] block: add bdrv_query_info Paolo Bonzini
2012-10-15 15:42 ` Kevin Wolf
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 20/45] block: add bdrv_query_stats Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 21/45] block: add bdrv_open_backing_file Paolo Bonzini
2012-09-27 18:14 ` Jeff Cody
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 22/45] block: introduce new dirty bitmap functionality Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 23/45] block: export dirty bitmap information in query-block Paolo Bonzini
2012-10-15 16:08 ` Kevin Wolf
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 24/45] block: add block-job-complete Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 25/45] block: introduce BLOCK_JOB_READY event Paolo Bonzini
2012-09-27 0:01 ` Eric Blake
2012-09-27 9:25 ` Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 26/45] mirror: introduce mirror job Paolo Bonzini
2012-10-15 16:57 ` Kevin Wolf
2012-10-16 6:36 ` Paolo Bonzini
2012-10-16 8:24 ` Kevin Wolf
2012-10-16 8:35 ` Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 27/45] qmp: add drive-mirror command Paolo Bonzini
2012-09-27 0:14 ` Eric Blake
2012-09-27 19:49 ` Jeff Cody
2012-10-15 17:33 ` Kevin Wolf
2012-10-16 6:39 ` Paolo Bonzini
2012-10-18 13:13 ` Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 28/45] mirror: implement completion Paolo Bonzini
2012-10-15 17:49 ` Kevin Wolf
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 29/45] qemu-iotests: add mirroring test case Paolo Bonzini
2012-09-27 0:26 ` Eric Blake
2012-10-18 12:43 ` Kevin Wolf
2012-10-18 12:50 ` Paolo Bonzini
2012-10-18 13:08 ` Kevin Wolf
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 30/45] iostatus: forward block_job_iostatus_reset to block job Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 31/45] mirror: add support for on-source-error/on-target-error Paolo Bonzini
2012-10-18 13:07 ` Kevin Wolf
2012-10-18 13:10 ` Paolo Bonzini
2012-10-18 13:56 ` Kevin Wolf
2012-10-18 14:52 ` Paolo Bonzini
2012-10-19 8:04 ` Kevin Wolf
2012-10-19 9:30 ` Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 32/45] qmp: add pull_event function Paolo Bonzini
2012-09-26 17:17 ` Luiz Capitulino
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 33/45] qemu-iotests: add testcases for mirroring on-source-error/on-target-error Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 34/45] host-utils: add ffsl Paolo Bonzini
2012-09-27 1:14 ` Eric Blake
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 35/45] add hierarchical bitmap data type and test cases Paolo Bonzini
2012-09-27 2:53 ` Eric Blake
2012-09-27 9:27 ` Paolo Bonzini
2012-10-24 14:41 ` Kevin Wolf
2012-10-24 14:50 ` Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 36/45] block: implement dirty bitmap using HBitmap Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 37/45] block: make round_to_clusters public Paolo Bonzini
2012-09-26 15:56 ` Paolo Bonzini [this message]
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 39/45] block: return count of dirty sectors, not chunks Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 40/45] block: allow customizing the granularity of the dirty bitmap Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 41/45] mirror: allow customizing the granularity Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 42/45] mirror: switch mirror_iteration to AIO Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 43/45] mirror: add buf-size argument to drive-mirror Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 44/45] mirror: support more than one in-flight AIO operation Paolo Bonzini
2012-09-26 15:56 ` [Qemu-devel] [PATCH v2 45/45] mirror: support arbitrarily-sized iterations Paolo Bonzini
2012-09-27 14:05 ` [Qemu-devel] [PATCH v2 00/45] Block job improvements for 1.3 Kevin Wolf
2012-09-27 14:57 ` Paolo Bonzini
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1348675011-8794-39-git-send-email-pbonzini@redhat.com \
--to=pbonzini@redhat.com \
--cc=jcody@redhat.com \
--cc=kwolf@redhat.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).