From: Paolo Bonzini <pbonzini@redhat.com>
To: qemu-devel@nongnu.org
Cc: kwolf@redhat.com, stefanha@linux.vnet.ibm.com, lcapitulino@redhat.com
Subject: [Qemu-devel] [RFC PATCH 33/36] mirror: perform COW if the cluster size is bigger than the granularity
Date: Fri, 15 Jun 2012 17:05:56 +0200 [thread overview]
Message-ID: <1339772759-31004-34-git-send-email-pbonzini@redhat.com> (raw)
In-Reply-To: <1339772759-31004-1-git-send-email-pbonzini@redhat.com>
When mirroring runs, the backing files for the target are not yet ready.
However, this means that a copy-on-write operation on the target would
fill the missing sectors with zeros. Avoid this by always copying a
whole cluster the first time it is touched.
The code keeps a bitmap of clusters that have already been allocated
by the mirroring job, and only does "manual" copy-on-write if the
chunk being copied is zero in the bitmap.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
block/mirror.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++--------
blockdev.c | 2 --
2 files changed, 50 insertions(+), 10 deletions(-)
diff --git a/block/mirror.c b/block/mirror.c
index 787b763..fcedd66 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -15,6 +15,7 @@
#include "blockjob.h"
#include "block_int.h"
#include "qemu/ratelimit.h"
+#include "bitmap.h"
enum {
/*
@@ -36,6 +37,8 @@ typedef struct MirrorBlockJob {
bool synced;
bool complete;
int64_t sector_num;
+ int64_t buf_size;
+ unsigned long *cow_bitmap;
HBitmapIter hbi;
void *buf;
} MirrorBlockJob;
@@ -47,7 +50,7 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
BlockDriverState *target = s->target;
QEMUIOVector qiov;
int ret, nb_sectors;
- int64_t end;
+ int64_t end, sector_num, cluster_num;
struct iovec iov;
s->sector_num = hbitmap_iter_next(&s->hbi);
@@ -57,23 +60,41 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
assert(s->sector_num >= 0);
}
+ /* If we have no backing file yet in the destination, and the cluster size
+ * is very large, we need to do COW ourselves. The first time a cluster is
+ * copied, copy it entirely.
+ *
+ * Because both BDRV_SECTORS_PER_DIRTY_CHUNK and the cluster size are
+ * powers of two, the number of sectors to copy cannot exceed one cluster.
+ */
+ sector_num = s->sector_num;
+ nb_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
+ cluster_num = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
+ if (s->cow_bitmap && !test_bit(cluster_num, s->cow_bitmap)) {
+ bdrv_round_to_clusters(s->target,
+ sector_num, BDRV_SECTORS_PER_DIRTY_CHUNK,
+ §or_num, &nb_sectors);
+ bitmap_set(s->cow_bitmap, sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK,
+ nb_sectors / BDRV_SECTORS_PER_DIRTY_CHUNK);
+ }
+
end = s->common.len >> BDRV_SECTOR_BITS;
- nb_sectors = MIN(BDRV_SECTORS_PER_DIRTY_CHUNK, end - s->sector_num);
- trace_mirror_one_iteration(s, s->sector_num);
- bdrv_reset_dirty(source, s->sector_num, BDRV_SECTORS_PER_DIRTY_CHUNK);
+ nb_sectors = MIN(nb_sectors, end - sector_num);
+ trace_mirror_one_iteration(s, sector_num);
+ bdrv_reset_dirty(source, sector_num, BDRV_SECTORS_PER_DIRTY_CHUNK);
/* Copy the dirty cluster. */
iov.iov_base = s->buf;
iov.iov_len = nb_sectors * 512;
qemu_iovec_init_external(&qiov, &iov, 1);
- ret = bdrv_co_readv(source, s->sector_num, nb_sectors, &qiov);
+ ret = bdrv_co_readv(source, sector_num, nb_sectors, &qiov);
if (ret < 0) {
*p_action = block_job_error_action(&s->common, source,
s->on_source_error, true, -ret);
goto fail;
}
- ret = bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov);
+ ret = bdrv_co_writev(target, sector_num, nb_sectors, &qiov);
if (ret < 0) {
*p_action = block_job_error_action(&s->common, target,
s->on_target_error, false, -ret);
@@ -84,7 +105,7 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
fail:
/* Try again later. */
- bdrv_set_dirty(source, s->sector_num, nb_sectors);
+ bdrv_set_dirty(source, sector_num, nb_sectors);
return ret;
}
@@ -107,7 +128,7 @@ static void coroutine_fn mirror_run(void *opaque)
}
end = s->common.len >> BDRV_SECTOR_BITS;
- s->buf = qemu_blockalign(bs, BLOCK_SIZE);
+ s->buf = qemu_blockalign(bs, s->buf_size);
if (s->mode == MIRROR_SYNC_MODE_FULL || s->mode == MIRROR_SYNC_MODE_TOP) {
/* First part, loop on the sectors and initialize the dirty bitmap. */
@@ -211,6 +232,7 @@ static void coroutine_fn mirror_run(void *opaque)
immediate_exit:
g_free(s->buf);
+ g_free(s->cow_bitmap);
bdrv_set_dirty_tracking(bs, false);
bdrv_iostatus_disable(s->target);
if (s->synced && ret == 0) {
@@ -292,6 +314,9 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
void *opaque, Error **errp)
{
MirrorBlockJob *s;
+ BlockDriverInfo bdi;
+ char backing_filename[1024];
+ int64_t length;
s = block_job_create(&mirror_job_type, bs, speed, cb, opaque, errp);
if (!s) {
@@ -302,6 +327,23 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
s->on_target_error = on_target_error;
s->target = target;
s->mode = mode;
+
+ /* If we have no backing file yet in the destination, we cannot let
+ * the destination do COW. Instead, we copy sectors around the
+ * dirty data if needed.
+ */
+ s->buf_size = BLOCK_SIZE;
+ bdrv_get_backing_filename(s->target, backing_filename,
+ sizeof(backing_filename));
+ if (backing_filename[0] && !s->target->backing_hd) {
+ bdrv_get_info(s->target, &bdi);
+ if (s->buf_size < bdi.cluster_size) {
+ s->buf_size = bdi.cluster_size;
+ length = (bdrv_getlength(bs) + BLOCK_SIZE - 1) / BLOCK_SIZE;
+ s->cow_bitmap = bitmap_new(length);
+ }
+ }
+
bdrv_set_dirty_tracking(bs, true);
bdrv_set_on_error(s->target, on_target_error, on_target_error);
bdrv_iostatus_enable(s->target);
diff --git a/blockdev.c b/blockdev.c
index b46a86c..f940e8f 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -927,8 +927,6 @@ void qmp_drive_mirror(const char *device, const char *target,
return;
}
- /* ### TODO check for cluster size vs. dirty bitmap granularity */
-
target_bs = bdrv_new("");
ret = bdrv_open(target_bs, target, flags | BDRV_O_NO_BACKING, drv);
--
1.7.10.2
next prev parent reply other threads:[~2012-06-15 15:08 UTC|newest]
Thread overview: 65+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-06-15 15:05 [Qemu-devel] [RFC PATCH 00/36] A peek at the current block job patches Paolo Bonzini
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 01/36] qapi: generalize documentation of streaming commands Paolo Bonzini
2012-06-15 16:45 ` Eric Blake
2012-07-11 16:00 ` Paolo Bonzini
2012-07-12 8:07 ` Kevin Wolf
2012-07-12 20:41 ` Blue Swirl
2012-07-13 9:13 ` Kevin Wolf
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 02/36] qerror/block: introduce QERR_BLOCK_JOB_NOT_ACTIVE Paolo Bonzini
2012-06-15 16:51 ` Eric Blake
2012-06-15 16:56 ` Paolo Bonzini
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 03/36] block: move job APIs to separate files Paolo Bonzini
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 04/36] block: add block_job_query Paolo Bonzini
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 05/36] block: add support for job pause/resume Paolo Bonzini
2012-06-15 17:22 ` Eric Blake
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 06/36] qmp: add block-job-pause and block-job-resume Paolo Bonzini
2012-06-15 17:32 ` Eric Blake
2012-07-11 16:02 ` Paolo Bonzini
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 07/36] qemu-iotests: add test for pausing a streaming operation Paolo Bonzini
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 08/36] block: rename block_job_complete to block_job_completed Paolo Bonzini
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 09/36] block: rename BlockErrorAction, BlockQMPEventAction Paolo Bonzini
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 10/36] block: move BlockdevOnError declaration to QAPI Paolo Bonzini
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 11/36] block: reorganize io error code Paolo Bonzini
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 12/36] block: sort BlockDeviceIoStatus errors by severity Paolo Bonzini
2012-06-15 17:45 ` Eric Blake
2012-07-11 16:03 ` Paolo Bonzini
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 13/36] block: introduce block job error Paolo Bonzini
2012-06-15 17:50 ` Eric Blake
2012-07-11 16:10 ` Paolo Bonzini
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 14/36] stream: add on_error argument Paolo Bonzini
2012-06-15 17:58 ` Eric Blake
2012-07-11 16:12 ` Paolo Bonzini
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 15/36] qemu-iotests: add tests for streaming error handling Paolo Bonzini
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 16/36] block: add bdrv_query_info Paolo Bonzini
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 17/36] block: add bdrv_query_stats Paolo Bonzini
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 18/36] block: make device optional in BlockInfo Paolo Bonzini
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 19/36] block: add target info to QMP query-blockjobs command Paolo Bonzini
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 20/36] block: forward bdrv_iostatus_reset to block job Paolo Bonzini
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 21/36] block: introduce new dirty bitmap functionality Paolo Bonzini
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 22/36] block: add mirror job Paolo Bonzini
2012-06-15 18:20 ` Eric Blake
2012-07-12 13:45 ` Paolo Bonzini
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 23/36] qmp: add drive-mirror command Paolo Bonzini
2012-06-15 20:12 ` Eric Blake
2012-07-11 16:23 ` Paolo Bonzini
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 24/36] mirror: support querying target file Paolo Bonzini
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 25/36] mirror: add support for on_source_error/on_target_error Paolo Bonzini
2012-06-15 21:12 ` Eric Blake
2012-07-11 16:28 ` Paolo Bonzini
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 26/36] block: live snapshot documentation tweaks Paolo Bonzini
2012-06-15 21:14 ` Eric Blake
2012-07-11 16:16 ` Paolo Bonzini
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 27/36] block: add bdrv_ensure_backing_file Paolo Bonzini
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 28/36] block: add block-job-complete Paolo Bonzini
2012-06-15 21:42 ` Eric Blake
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 29/36] mirror: implement completion Paolo Bonzini
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 30/36] add hierarchical bitmap data type and test cases Paolo Bonzini
2012-06-15 23:02 ` Eric Blake
2012-07-11 16:35 ` Paolo Bonzini
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 31/36] block: implement dirty bitmap using HBitmap Paolo Bonzini
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 32/36] block: make round_to_clusters public Paolo Bonzini
2012-06-15 15:05 ` Paolo Bonzini [this message]
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 34/36] block: return count of dirty sectors, not chunks Paolo Bonzini
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 35/36] block: allow customizing the granularity of the dirty bitmap Paolo Bonzini
2012-06-15 15:05 ` [Qemu-devel] [RFC PATCH 36/36] mirror: allow customizing the granularity Paolo Bonzini
2012-06-15 23:24 ` Eric Blake
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1339772759-31004-34-git-send-email-pbonzini@redhat.com \
--to=pbonzini@redhat.com \
--cc=kwolf@redhat.com \
--cc=lcapitulino@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=stefanha@linux.vnet.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).