qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Paolo Bonzini <pbonzini@redhat.com>
To: qemu-devel@nongnu.org
Cc: kwolf@redhat.com, jcody@redhat.com, stefanha@redhat.com
Subject: [Qemu-devel] [PATCH 05/20] mirror: perform COW if the cluster size is bigger than the granularity
Date: Wed, 12 Dec 2012 14:46:24 +0100	[thread overview]
Message-ID: <1355319999-30627-6-git-send-email-pbonzini@redhat.com> (raw)
In-Reply-To: <1355319999-30627-1-git-send-email-pbonzini@redhat.com>

When mirroring runs, the backing files for the target may not yet be
ready.  However, this means that a copy-on-write operation on the target
would fill the missing sectors with zeros.  Copy-on-write only happens
if the granularity of the dirty bitmap is smaller than the cluster size
(and only for clusters that are allocated in the source after the job
has started copying).  So far, the granularity was fixed to 1MB; to avoid
the problem we detected the situation and required the backing files to
be available in that case only.

However, we want to lower the granularity for efficiency, so we need
a better solution.  The solution is to always copy a whole cluster the
first time it is touched.  The code keeps a bitmap of clusters that
have already been allocated by the mirroring job, and only does "manual"
copy-on-write if the chunk being copied is zero in the bitmap.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block/mirror.c             | 60 +++++++++++++++++++++++++++++++++++++++-------
 blockdev.c                 | 15 +++---------
 tests/qemu-iotests/041     | 21 ++++++++++++++++
 tests/qemu-iotests/041.out |  4 ++--
 trace-events               |  1 +
 5 files changed, 78 insertions(+), 23 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 30bb267..14ce1a7 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -15,6 +15,7 @@
 #include "blockjob.h"
 #include "block_int.h"
 #include "qemu/ratelimit.h"
+#include "bitmap.h"
 
 enum {
     /*
@@ -36,6 +37,8 @@ typedef struct MirrorBlockJob {
     bool synced;
     bool should_complete;
     int64_t sector_num;
+    size_t buf_size;
+    unsigned long *cow_bitmap;
     HBitmapIter hbi;
     uint8_t *buf;
 } MirrorBlockJob;
@@ -60,7 +63,7 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
     BlockDriverState *target = s->target;
     QEMUIOVector qiov;
     int ret, nb_sectors;
-    int64_t end;
+    int64_t end, sector_num, cluster_num;
     struct iovec iov;
 
     s->sector_num = hbitmap_iter_next(&s->hbi);
@@ -71,22 +74,41 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
         assert(s->sector_num >= 0);
     }
 
+    /* If we have no backing file yet in the destination, and the cluster size
+     * is very large, we need to do COW ourselves.  The first time a cluster is
+     * copied, copy it entirely.
+     *
+     * Because both BDRV_SECTORS_PER_DIRTY_CHUNK and the cluster size are
+     * powers of two, the number of sectors to copy cannot exceed one cluster.
+     */
+    sector_num = s->sector_num;
+    nb_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
+    cluster_num = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
+    if (s->cow_bitmap && !test_bit(cluster_num, s->cow_bitmap)) {
+        trace_mirror_cow(s, sector_num);
+        bdrv_round_to_clusters(s->target,
+                               sector_num, BDRV_SECTORS_PER_DIRTY_CHUNK,
+                               &sector_num, &nb_sectors);
+        bitmap_set(s->cow_bitmap, sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK,
+                   nb_sectors / BDRV_SECTORS_PER_DIRTY_CHUNK);
+    }
+
     end = s->common.len >> BDRV_SECTOR_BITS;
-    nb_sectors = MIN(BDRV_SECTORS_PER_DIRTY_CHUNK, end - s->sector_num);
-    bdrv_reset_dirty(source, s->sector_num, nb_sectors);
+    nb_sectors = MIN(nb_sectors, end - sector_num);
+    bdrv_reset_dirty(source, sector_num, nb_sectors);
 
     /* Copy the dirty cluster.  */
     iov.iov_base = s->buf;
     iov.iov_len  = nb_sectors * 512;
     qemu_iovec_init_external(&qiov, &iov, 1);
 
-    trace_mirror_one_iteration(s, s->sector_num, nb_sectors);
-    ret = bdrv_co_readv(source, s->sector_num, nb_sectors, &qiov);
+    trace_mirror_one_iteration(s, sector_num, nb_sectors);
+    ret = bdrv_co_readv(source, sector_num, nb_sectors, &qiov);
     if (ret < 0) {
         *p_action = mirror_error_action(s, true, -ret);
         goto fail;
     }
-    ret = bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov);
+    ret = bdrv_co_writev(target, sector_num, nb_sectors, &qiov);
     if (ret < 0) {
         *p_action = mirror_error_action(s, false, -ret);
         s->synced = false;
@@ -96,7 +118,7 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
 
 fail:
     /* Try again later.  */
-    bdrv_set_dirty(source, s->sector_num, nb_sectors);
+    bdrv_set_dirty(source, sector_num, nb_sectors);
     return ret;
 }
 
@@ -104,7 +126,9 @@ static void coroutine_fn mirror_run(void *opaque)
 {
     MirrorBlockJob *s = opaque;
     BlockDriverState *bs = s->common.bs;
-    int64_t sector_num, end;
+    int64_t sector_num, end, length;
+    BlockDriverInfo bdi;
+    char backing_filename[1024];
     int ret = 0;
     int n;
 
@@ -118,8 +142,23 @@ static void coroutine_fn mirror_run(void *opaque)
         return;
     }
 
+    /* If we have no backing file yet in the destination, we cannot let
+     * the destination do COW.  Instead, we copy sectors around the
+     * dirty data if needed.  We need a bitmap to do that.
+     */
+    bdrv_get_backing_filename(s->target, backing_filename,
+                              sizeof(backing_filename));
+    if (backing_filename[0] && !s->target->backing_hd) {
+        bdrv_get_info(s->target, &bdi);
+        if (s->buf_size < bdi.cluster_size) {
+            s->buf_size = bdi.cluster_size;
+            length = (bdrv_getlength(bs) + BLOCK_SIZE - 1) / BLOCK_SIZE;
+            s->cow_bitmap = bitmap_new(length);
+        }
+    }
+
     end = s->common.len >> BDRV_SECTOR_BITS;
-    s->buf = qemu_blockalign(bs, BLOCK_SIZE);
+    s->buf = qemu_blockalign(bs, s->buf_size);
 
     if (s->mode != MIRROR_SYNC_MODE_NONE) {
         /* First part, loop on the sectors and initialize the dirty bitmap.  */
@@ -234,6 +273,7 @@ static void coroutine_fn mirror_run(void *opaque)
 
 immediate_exit:
     g_free(s->buf);
+    g_free(s->cow_bitmap);
     bdrv_set_dirty_tracking(bs, false);
     bdrv_iostatus_disable(s->target);
     if (s->should_complete && ret == 0) {
@@ -320,6 +360,8 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
     s->on_target_error = on_target_error;
     s->target = target;
     s->mode = mode;
+    s->buf_size = BLOCK_SIZE;
+
     bdrv_set_dirty_tracking(bs, true);
     bdrv_set_enable_write_cache(s->target, true);
     bdrv_set_on_error(s->target, on_target_error, on_target_error);
diff --git a/blockdev.c b/blockdev.c
index e73fd6e..f309dba 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1196,7 +1196,6 @@ void qmp_drive_mirror(const char *device, const char *target,
                       bool has_on_target_error, BlockdevOnError on_target_error,
                       Error **errp)
 {
-    BlockDriverInfo bdi;
     BlockDriverState *bs;
     BlockDriverState *source, *target_bs;
     BlockDriver *proto_drv;
@@ -1287,6 +1286,9 @@ void qmp_drive_mirror(const char *device, const char *target,
         return;
     }
 
+    /* Mirroring takes care of copy-on-write using the source's backing
+     * file.
+     */
     target_bs = bdrv_new("");
     ret = bdrv_open(target_bs, target, flags | BDRV_O_NO_BACKING, drv);
 
@@ -1296,17 +1298,6 @@ void qmp_drive_mirror(const char *device, const char *target,
         return;
     }
 
-    /* We need a backing file if we will copy parts of a cluster.  */
-    if (bdrv_get_info(target_bs, &bdi) >= 0 && bdi.cluster_size != 0 &&
-        bdi.cluster_size >= BDRV_SECTORS_PER_DIRTY_CHUNK * 512) {
-        ret = bdrv_open_backing_file(target_bs);
-        if (ret < 0) {
-            bdrv_delete(target_bs);
-            error_set(errp, QERR_OPEN_FILE_FAILED, target);
-            return;
-        }
-    }
-
     mirror_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
                  block_job_cb, bs, &local_err);
     if (local_err != NULL) {
diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index c6eb851..d0e9a7f 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -292,6 +292,27 @@ class TestMirrorNoBacking(ImageMirroringTestCase):
         self.assertTrue(self.compare_images(test_img, target_img),
                         'target image does not match source after mirroring')
 
+    def test_large_cluster(self):
+        self.assert_no_active_mirrors()
+
+        # qemu-img create fails if the image is not there
+        qemu_img('create', '-f', iotests.imgfmt, '-o', 'size=%d'
+                        %(TestMirrorNoBacking.image_len), target_backing_img)
+        qemu_img('create', '-f', iotests.imgfmt, '-o', 'cluster_size=%d,backing_file=%s'
+                        % (TestMirrorNoBacking.image_len, target_backing_img), target_img)
+        os.remove(target_backing_img)
+
+        result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+                             mode='existing', target=target_img)
+        self.assert_qmp(result, 'return', {})
+
+        self.complete_and_wait()
+        result = self.vm.qmp('query-block')
+        self.assert_qmp(result, 'return[0]/inserted/file', target_img)
+        self.vm.shutdown()
+        self.assertTrue(self.compare_images(test_img, target_img),
+                        'target image does not match source after mirroring')
+
 class TestReadErrors(ImageMirroringTestCase):
     image_len = 2 * 1024 * 1024 # MB
 
diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out
index 71009c2..4176bb9 100644
--- a/tests/qemu-iotests/041.out
+++ b/tests/qemu-iotests/041.out
@@ -1,5 +1,5 @@
-..................
+...................
 ----------------------------------------------------------------------
-Ran 18 tests
+Ran 19 tests
 
 OK
diff --git a/trace-events b/trace-events
index b868ac6..b318b4e 100644
--- a/trace-events
+++ b/trace-events
@@ -84,6 +84,7 @@ mirror_before_flush(void *s) "s %p"
 mirror_before_drain(void *s, int64_t cnt) "s %p dirty count %"PRId64
 mirror_before_sleep(void *s, int64_t cnt, int synced) "s %p dirty count %"PRId64" synced %d"
 mirror_one_iteration(void *s, int64_t sector_num, int nb_sectors) "s %p sector_num %"PRId64" nb_sectors %d"
+mirror_cow(void *s, int64_t sector_num) "s %p sector_num %"PRId64
 
 # blockdev.c
 qmp_block_job_cancel(void *job) "job %p"
-- 
1.8.0.1

  parent reply	other threads:[~2012-12-12 13:47 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-12-12 13:46 [Qemu-devel] [PATCH 00/20] Block device mirroring enhancements, 12-12-12 edition Paolo Bonzini
2012-12-12 13:46 ` [Qemu-devel] [PATCH 01/20] host-utils: add ffsl Paolo Bonzini
2012-12-12 23:41   ` Eric Blake
2012-12-12 13:46 ` [Qemu-devel] [PATCH 02/20] add hierarchical bitmap data type and test cases Paolo Bonzini
2012-12-14  0:04   ` Eric Blake
2013-01-11 18:27   ` Stefan Hajnoczi
2012-12-12 13:46 ` [Qemu-devel] [PATCH 03/20] block: implement dirty bitmap using HBitmap Paolo Bonzini
2012-12-14  0:27   ` Eric Blake
2012-12-12 13:46 ` [Qemu-devel] [PATCH 04/20] block: make round_to_clusters public Paolo Bonzini
2012-12-14 20:13   ` Eric Blake
2012-12-12 13:46 ` Paolo Bonzini [this message]
2012-12-14 20:21   ` [Qemu-devel] [PATCH 05/20] mirror: perform COW if the cluster size is bigger than the granularity Eric Blake
2012-12-12 13:46 ` [Qemu-devel] [PATCH 06/20] block: return count of dirty sectors, not chunks Paolo Bonzini
2012-12-14 20:49   ` Eric Blake
2012-12-12 13:46 ` [Qemu-devel] [PATCH 07/20] block: allow customizing the granularity of the dirty bitmap Paolo Bonzini
2012-12-14 21:27   ` Eric Blake
2012-12-15  9:11     ` Paolo Bonzini
2012-12-12 13:46 ` [Qemu-devel] [PATCH 08/20] mirror: allow customizing the granularity Paolo Bonzini
2012-12-14 22:01   ` Eric Blake
2013-01-14 11:28   ` Stefan Hajnoczi
2012-12-12 13:46 ` [Qemu-devel] [PATCH 09/20] mirror: switch mirror_iteration to AIO Paolo Bonzini
2012-12-14 22:11   ` Eric Blake
2012-12-15  9:09     ` Paolo Bonzini
2012-12-15 13:05       ` Eric Blake
2012-12-12 13:46 ` [Qemu-devel] [PATCH 10/20] mirror: add buf-size argument to drive-mirror Paolo Bonzini
2012-12-14 22:22   ` Eric Blake
2012-12-15  9:09     ` Paolo Bonzini
2013-01-14 11:41   ` Stefan Hajnoczi
2012-12-12 13:46 ` [Qemu-devel] [PATCH 11/20] mirror: support more than one in-flight AIO operation Paolo Bonzini
2012-12-14 22:32   ` Eric Blake
2013-01-14 12:56   ` Stefan Hajnoczi
2013-01-14 13:28     ` Paolo Bonzini
2012-12-12 13:46 ` [Qemu-devel] [PATCH 12/20] mirror: support arbitrarily-sized iterations Paolo Bonzini
2012-12-14 22:39   ` Eric Blake
2012-12-12 13:46 ` [Qemu-devel] [PATCH 13/20] oslib: add a wrapper for mmap/munmap Paolo Bonzini
2012-12-14 22:54   ` Eric Blake
2012-12-15  9:06     ` Paolo Bonzini
2012-12-12 13:46 ` [Qemu-devel] [PATCH 14/20] hbitmap: add hbitmap_alloc_with_data and hbitmap_required_size Paolo Bonzini
2012-12-17 17:14   ` Eric Blake
2012-12-17 17:18     ` Paolo Bonzini
2012-12-12 13:46 ` [Qemu-devel] [PATCH 15/20] hbitmap: add hbitmap_copy Paolo Bonzini
2012-12-17 18:25   ` Eric Blake
2012-12-12 13:46 ` [Qemu-devel] [PATCH 16/20] block: split bdrv_enable_dirty_tracking and bdrv_disable_dirty_tracking Paolo Bonzini
2012-12-20 18:26   ` Eric Blake
2012-12-12 13:46 ` [Qemu-devel] [PATCH 17/20] block: support a persistent dirty bitmap Paolo Bonzini
2012-12-20 23:03   ` Eric Blake
2012-12-12 13:46 ` [Qemu-devel] [PATCH 18/20] mirror: add support for " Paolo Bonzini
2012-12-20 23:49   ` Eric Blake
2012-12-12 13:46 ` [Qemu-devel] [PATCH 19/20] block: choose the default dirty bitmap granularity in bdrv_enable_dirty_tracking Paolo Bonzini
2012-12-20 23:53   ` Eric Blake
2012-12-12 13:46 ` [Qemu-devel] [PATCH 20/20] monitor: add commands to start/stop dirty bitmap Paolo Bonzini
2012-12-21 18:30   ` Eric Blake
2013-01-14 13:02 ` [Qemu-devel] [PATCH 00/20] Block device mirroring enhancements, 12-12-12 edition Stefan Hajnoczi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1355319999-30627-6-git-send-email-pbonzini@redhat.com \
    --to=pbonzini@redhat.com \
    --cc=jcody@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).