From: Kevin Wolf <kwolf@redhat.com>
To: anthony@codemonkey.ws
Cc: kwolf@redhat.com, qemu-devel@nongnu.org
Subject: [Qemu-devel] [PATCH 09/30] block: add live block commit functionality
Date: Fri, 28 Sep 2012 19:56:52 +0200 [thread overview]
Message-ID: <1348855033-17174-10-git-send-email-kwolf@redhat.com> (raw)
In-Reply-To: <1348855033-17174-1-git-send-email-kwolf@redhat.com>
From: Jeff Cody <jcody@redhat.com>
This adds the live commit coroutine. This iteration focuses on the
commit only below the active layer, and not the active layer itself.
The behaviour is similar to block streaming; the sectors are walked
through, and anything that exists above 'base' is committed back down
into base. At the end, intermediate images are deleted, and the
chain stitched together. Images are restored to their original open
flags upon completion.
Signed-off-by: Jeff Cody <jcody@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
block/Makefile.objs | 1 +
block/commit.c | 267 +++++++++++++++++++++++++++++++++++++++++++++++++++
block_int.h | 16 +++
trace-events | 2 +
4 files changed, 286 insertions(+), 0 deletions(-)
create mode 100644 block/commit.c
diff --git a/block/Makefile.objs b/block/Makefile.objs
index a1ae67f..81fd43c 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -4,6 +4,7 @@ block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
block-obj-y += qed-check.o
block-obj-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
block-obj-y += stream.o
+block-obj-y += commit.o
block-obj-$(CONFIG_WIN32) += raw-win32.o
block-obj-$(CONFIG_POSIX) += raw-posix.o
block-obj-$(CONFIG_LIBISCSI) += iscsi.o
diff --git a/block/commit.c b/block/commit.c
new file mode 100644
index 0000000..624ec5f
--- /dev/null
+++ b/block/commit.c
@@ -0,0 +1,267 @@
+/*
+ * Live block commit
+ *
+ * Copyright Red Hat, Inc. 2012
+ *
+ * Authors:
+ * Jeff Cody <jcody@redhat.com>
+ * Based on stream.c by Stefan Hajnoczi
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "trace.h"
+#include "block_int.h"
+#include "qemu/ratelimit.h"
+
+enum {
+ /*
+ * Size of data buffer for populating the image file. This should be large
+ * enough to process multiple clusters in a single call, so that populating
+ * contiguous regions of the image is efficient.
+ */
+ COMMIT_BUFFER_SIZE = 512 * 1024, /* in bytes */
+};
+
+#define SLICE_TIME 100000000ULL /* ns */
+
+typedef struct CommitBlockJob {
+ BlockJob common;
+ RateLimit limit;
+ BlockDriverState *active;
+ BlockDriverState *top;
+ BlockDriverState *base;
+ BlockErrorAction on_error;
+ int base_flags;
+ int orig_overlay_flags;
+} CommitBlockJob;
+
+static int coroutine_fn commit_populate(BlockDriverState *bs,
+ BlockDriverState *base,
+ int64_t sector_num, int nb_sectors,
+ void *buf)
+{
+ int ret = 0;
+
+ ret = bdrv_read(bs, sector_num, buf, nb_sectors);
+ if (ret) {
+ return ret;
+ }
+
+ ret = bdrv_write(base, sector_num, buf, nb_sectors);
+ if (ret) {
+ return ret;
+ }
+
+ return 0;
+}
+
+static void coroutine_fn commit_run(void *opaque)
+{
+ CommitBlockJob *s = opaque;
+ BlockDriverState *active = s->active;
+ BlockDriverState *top = s->top;
+ BlockDriverState *base = s->base;
+ BlockDriverState *overlay_bs = NULL;
+ int64_t sector_num, end;
+ int ret = 0;
+ int n = 0;
+ void *buf;
+ int bytes_written = 0;
+ int64_t base_len;
+
+ ret = s->common.len = bdrv_getlength(top);
+
+
+ if (s->common.len < 0) {
+ goto exit_restore_reopen;
+ }
+
+ ret = base_len = bdrv_getlength(base);
+ if (base_len < 0) {
+ goto exit_restore_reopen;
+ }
+
+ if (base_len < s->common.len) {
+ ret = bdrv_truncate(base, s->common.len);
+ if (ret) {
+ goto exit_restore_reopen;
+ }
+ }
+
+ overlay_bs = bdrv_find_overlay(active, top);
+
+ end = s->common.len >> BDRV_SECTOR_BITS;
+ buf = qemu_blockalign(top, COMMIT_BUFFER_SIZE);
+
+ for (sector_num = 0; sector_num < end; sector_num += n) {
+ uint64_t delay_ns = 0;
+ bool copy;
+
+wait:
+ /* Note that even when no rate limit is applied we need to yield
+ * with no pending I/O here so that qemu_aio_flush() returns.
+ */
+ block_job_sleep_ns(&s->common, rt_clock, delay_ns);
+ if (block_job_is_cancelled(&s->common)) {
+ break;
+ }
+ /* Copy if allocated above the base */
+ ret = bdrv_co_is_allocated_above(top, base, sector_num,
+ COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE,
+ &n);
+ copy = (ret == 1);
+ trace_commit_one_iteration(s, sector_num, n, ret);
+ if (copy) {
+ if (s->common.speed) {
+ delay_ns = ratelimit_calculate_delay(&s->limit, n);
+ if (delay_ns > 0) {
+ goto wait;
+ }
+ }
+ ret = commit_populate(top, base, sector_num, n, buf);
+ bytes_written += n * BDRV_SECTOR_SIZE;
+ }
+ if (ret < 0) {
+ if (s->on_error == BLOCK_ERR_STOP_ANY ||
+ s->on_error == BLOCK_ERR_REPORT ||
+ (s->on_error == BLOCK_ERR_STOP_ENOSPC && ret == -ENOSPC)) {
+ goto exit_free_buf;
+ } else {
+ n = 0;
+ continue;
+ }
+ }
+ /* Publish progress */
+ s->common.offset += n * BDRV_SECTOR_SIZE;
+ }
+
+ ret = 0;
+
+ if (!block_job_is_cancelled(&s->common) && sector_num == end) {
+ /* success */
+ ret = bdrv_drop_intermediate(active, top, base);
+ }
+
+exit_free_buf:
+ qemu_vfree(buf);
+
+exit_restore_reopen:
+ /* restore base open flags here if appropriate (e.g., change the base back
+ * to r/o). These reopens do not need to be atomic, since we won't abort
+ * even on failure here */
+ if (s->base_flags != bdrv_get_flags(base)) {
+ bdrv_reopen(base, s->base_flags, NULL);
+ }
+ if (s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) {
+ bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
+ }
+
+ block_job_complete(&s->common, ret);
+}
+
+static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp)
+{
+ CommitBlockJob *s = container_of(job, CommitBlockJob, common);
+
+ if (speed < 0) {
+ error_set(errp, QERR_INVALID_PARAMETER, "speed");
+ return;
+ }
+ ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
+}
+
+static BlockJobType commit_job_type = {
+ .instance_size = sizeof(CommitBlockJob),
+ .job_type = "commit",
+ .set_speed = commit_set_speed,
+};
+
+void commit_start(BlockDriverState *bs, BlockDriverState *base,
+ BlockDriverState *top, int64_t speed,
+ BlockErrorAction on_error, BlockDriverCompletionFunc *cb,
+ void *opaque, Error **errp)
+{
+ CommitBlockJob *s;
+ BlockReopenQueue *reopen_queue = NULL;
+ int orig_overlay_flags;
+ int orig_base_flags;
+ BlockDriverState *overlay_bs;
+ Error *local_err = NULL;
+
+ if ((on_error == BLOCK_ERR_STOP_ANY ||
+ on_error == BLOCK_ERR_STOP_ENOSPC) &&
+ !bdrv_iostatus_is_enabled(bs)) {
+ error_set(errp, QERR_INVALID_PARAMETER_COMBINATION);
+ return;
+ }
+
+ /* Once we support top == active layer, remove this check */
+ if (top == bs) {
+ error_setg(errp,
+ "Top image as the active layer is currently unsupported");
+ return;
+ }
+
+ if (top == base) {
+ error_setg(errp, "Invalid files for merge: top and base are the same");
+ return;
+ }
+
+ /* top and base may be valid, but let's make sure that base is reachable
+ * from top */
+ if (bdrv_find_backing_image(top, base->filename) != base) {
+ error_setg(errp,
+ "Base (%s) is not reachable from top (%s)",
+ base->filename, top->filename);
+ return;
+ }
+
+ overlay_bs = bdrv_find_overlay(bs, top);
+
+ if (overlay_bs == NULL) {
+ error_setg(errp, "Could not find overlay image for %s:", top->filename);
+ return;
+ }
+
+ orig_base_flags = bdrv_get_flags(base);
+ orig_overlay_flags = bdrv_get_flags(overlay_bs);
+
+ /* convert base & overlay_bs to r/w, if necessary */
+ if (!(orig_base_flags & BDRV_O_RDWR)) {
+ reopen_queue = bdrv_reopen_queue(reopen_queue, base,
+ orig_base_flags | BDRV_O_RDWR);
+ }
+ if (!(orig_overlay_flags & BDRV_O_RDWR)) {
+ reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs,
+ orig_overlay_flags | BDRV_O_RDWR);
+ }
+ if (reopen_queue) {
+ bdrv_reopen_multiple(reopen_queue, &local_err);
+ if (local_err != NULL) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ }
+
+
+ s = block_job_create(&commit_job_type, bs, speed, cb, opaque, errp);
+ if (!s) {
+ return;
+ }
+
+ s->base = base;
+ s->top = top;
+ s->active = bs;
+
+ s->base_flags = orig_base_flags;
+ s->orig_overlay_flags = orig_overlay_flags;
+
+ s->on_error = on_error;
+ s->common.co = qemu_coroutine_create(commit_run);
+
+ trace_commit_start(bs, base, top, s, s->common.co, opaque);
+ qemu_coroutine_enter(s->common.co, s);
+}
diff --git a/block_int.h b/block_int.h
index ac4245c..56164a7 100644
--- a/block_int.h
+++ b/block_int.h
@@ -463,4 +463,20 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base,
BlockDriverCompletionFunc *cb,
void *opaque, Error **errp);
+/**
+ * commit_start:
+ * @bs: Top Block device
+ * @base: Block device that will be written into, and become the new top
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ */
+void commit_start(BlockDriverState *bs, BlockDriverState *base,
+ BlockDriverState *top, int64_t speed,
+ BlockErrorAction on_error, BlockDriverCompletionFunc *cb,
+ void *opaque, Error **errp);
+
#endif /* BLOCK_INT_H */
diff --git a/trace-events b/trace-events
index f5b5097..dbc3007 100644
--- a/trace-events
+++ b/trace-events
@@ -74,6 +74,8 @@ bdrv_co_do_copy_on_readv(void *bs, int64_t sector_num, int nb_sectors, int64_t c
# block/stream.c
stream_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
stream_start(void *bs, void *base, void *s, void *co, void *opaque) "bs %p base %p s %p co %p opaque %p"
+commit_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
+commit_start(void *bs, void *base, void *top, void *s, void *co, void *opaque) "bs %p base %p top %p s %p co %p opaque %p"
# blockdev.c
qmp_block_job_cancel(void *job) "job %p"
--
1.7.6.5
next prev parent reply other threads:[~2012-09-28 17:57 UTC|newest]
Thread overview: 36+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-09-28 17:56 [Qemu-devel] [PULL 00/30] Block patches Kevin Wolf
2012-09-28 17:56 ` [Qemu-devel] [PATCH 01/30] block-migration: Flush requests in blk_mig_cleanup Kevin Wolf
2012-09-28 17:56 ` [Qemu-devel] [PATCH 02/30] block: after creating a live snapshot, make old image read-only Kevin Wolf
2012-09-28 17:56 ` [Qemu-devel] [PATCH 03/30] aio: Fix qemu_aio_wait() to maintain correct walking_handlers count Kevin Wolf
2012-09-28 17:56 ` [Qemu-devel] [PATCH 04/30] qemu: URI parsing library Kevin Wolf
2012-09-28 17:56 ` [Qemu-devel] [PATCH 05/30] aio: Another fix to the walking_handlers logic Kevin Wolf
2012-09-28 17:56 ` [Qemu-devel] [PATCH 06/30] configure: Add a config option for GlusterFS as block backend Kevin Wolf
2012-09-28 17:56 ` [Qemu-devel] [PATCH 07/30] block: Support GlusterFS as a QEMU " Kevin Wolf
2012-09-28 17:56 ` [Qemu-devel] [PATCH 08/30] block: add support functions for live commit, to find and delete images Kevin Wolf
2012-09-28 17:56 ` Kevin Wolf [this message]
2012-09-28 17:56 ` [Qemu-devel] [PATCH 10/30] blockdev: rename block_stream_cb to a generic block_job_cb Kevin Wolf
2012-09-28 17:56 ` [Qemu-devel] [PATCH 11/30] block: helper function, to find the base image of a chain Kevin Wolf
2012-09-28 17:56 ` [Qemu-devel] [PATCH 12/30] QAPI: add command for live block commit, 'block-commit' Kevin Wolf
2012-10-05 17:29 ` Eric Blake
2012-10-05 18:05 ` Eric Blake
2012-10-08 14:37 ` Paolo Bonzini
2012-10-11 15:42 ` Eric Blake
2012-09-28 17:56 ` [Qemu-devel] [PATCH 13/30] qemu-iotests: add initial tests for live block commit Kevin Wolf
2012-09-28 17:56 ` [Qemu-devel] [PATCH 14/30] qerror/block: introduce QERR_BLOCK_JOB_NOT_ACTIVE Kevin Wolf
2012-09-28 17:56 ` [Qemu-devel] [PATCH 15/30] block: fix documentation of block_job_cancel_sync Kevin Wolf
2012-09-28 17:56 ` [Qemu-devel] [PATCH 16/30] block: move job APIs to separate files Kevin Wolf
2012-09-28 17:57 ` [Qemu-devel] [PATCH 17/30] block: add block_job_query Kevin Wolf
2012-09-28 17:57 ` [Qemu-devel] [PATCH 18/30] qmp: add 'busy' member to BlockJobInfo Kevin Wolf
2012-09-28 17:57 ` [Qemu-devel] [PATCH 19/30] block: add support for job pause/resume Kevin Wolf
2012-09-28 17:57 ` [Qemu-devel] [PATCH 20/30] qmp: add block-job-pause and block-job-resume Kevin Wolf
2012-09-28 17:57 ` [Qemu-devel] [PATCH 21/30] qemu-iotests: add test for pausing a streaming operation Kevin Wolf
2012-09-28 17:57 ` [Qemu-devel] [PATCH 22/30] iostatus: rename BlockErrorAction, BlockQMPEventAction Kevin Wolf
2012-09-28 17:57 ` [Qemu-devel] [PATCH 23/30] iostatus: move BlockdevOnError declaration to QAPI Kevin Wolf
2012-09-28 17:57 ` [Qemu-devel] [PATCH 24/30] iostatus: change is_read to a bool Kevin Wolf
2012-09-28 17:57 ` [Qemu-devel] [PATCH 25/30] iostatus: reorganize io error code Kevin Wolf
2012-09-28 17:57 ` [Qemu-devel] [PATCH 26/30] block: introduce block job error Kevin Wolf
2012-09-28 17:57 ` [Qemu-devel] [PATCH 27/30] stream: add on-error argument Kevin Wolf
2012-09-28 17:57 ` [Qemu-devel] [PATCH 28/30] blkdebug: process all set_state rules in the old state Kevin Wolf
2012-09-28 17:57 ` [Qemu-devel] [PATCH 29/30] qemu-iotests: map underscore to dash in QMP argument names Kevin Wolf
2012-09-28 17:57 ` [Qemu-devel] [PATCH 30/30] qemu-iotests: add tests for streaming error handling Kevin Wolf
2012-10-05 2:11 ` [Qemu-devel] [PULL 00/30] Block patches Anthony Liguori
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1348855033-17174-10-git-send-email-kwolf@redhat.com \
--to=kwolf@redhat.com \
--cc=anthony@codemonkey.ws \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).