All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [patch 0/3] add support for live block copy
@ 2010-12-16 17:44 Marcelo Tosatti
  2010-12-16 17:44 ` [Qemu-devel] [patch 1/3] add migration_active function Marcelo Tosatti
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Marcelo Tosatti @ 2010-12-16 17:44 UTC (permalink / raw)
  To: qemu-devel; +Cc: Kevin Wolf, Anthony Liguori

Add support for live block copy. This is similar (and based on), block
migration, except it is performed without VM migration.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Qemu-devel] [patch 1/3] add migration_active function
  2010-12-16 17:44 [Qemu-devel] [patch 0/3] add support for live block copy Marcelo Tosatti
@ 2010-12-16 17:44 ` Marcelo Tosatti
  2010-12-16 17:44 ` [Qemu-devel] [patch 2/3] live block copy Marcelo Tosatti
  2010-12-16 17:44 ` [Qemu-devel] [patch 3/3] do not allow migration if block copy in progress Marcelo Tosatti
  2 siblings, 0 replies; 6+ messages in thread
From: Marcelo Tosatti @ 2010-12-16 17:44 UTC (permalink / raw)
  To: qemu-devel; +Cc: Kevin Wolf, Anthony Liguori, Marcelo Tosatti

[-- Attachment #1: migration-active --]
[-- Type: text/plain, Size: 973 bytes --]

To query whether migration is active.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: qemu-kvm-block-copy/migration.c
===================================================================
--- qemu-kvm-block-copy.orig/migration.c
+++ qemu-kvm-block-copy/migration.c
@@ -448,3 +448,13 @@ int migrate_fd_close(void *opaque)
     qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
     return s->close(s);
 }
+
+bool migration_active(void)
+{
+    if (current_migration &&
+        current_migration->get_status(current_migration) == MIG_STATE_ACTIVE) {
+        return true;
+    }
+
+    return false;
+}
Index: qemu-kvm-block-copy/migration.h
===================================================================
--- qemu-kvm-block-copy.orig/migration.h
+++ qemu-kvm-block-copy/migration.h
@@ -134,4 +134,6 @@ static inline FdMigrationState *migrate_
     return container_of(mig_state, FdMigrationState, mig_state);
 }
 
+bool migration_active(void);
+
 #endif

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Qemu-devel] [patch 2/3] live block copy
  2010-12-16 17:44 [Qemu-devel] [patch 0/3] add support for live block copy Marcelo Tosatti
  2010-12-16 17:44 ` [Qemu-devel] [patch 1/3] add migration_active function Marcelo Tosatti
@ 2010-12-16 17:44 ` Marcelo Tosatti
  2011-01-14 10:46   ` Stefan Hajnoczi
  2010-12-16 17:44 ` [Qemu-devel] [patch 3/3] do not allow migration if block copy in progress Marcelo Tosatti
  2 siblings, 1 reply; 6+ messages in thread
From: Marcelo Tosatti @ 2010-12-16 17:44 UTC (permalink / raw)
  To: qemu-devel; +Cc: Kevin Wolf, Anthony Liguori, Marcelo Tosatti

[-- Attachment #1: block-copy --]
[-- Type: text/plain, Size: 29933 bytes --]

Add support for live block copy.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: qemu-kvm/block-copy.c
===================================================================
--- /dev/null
+++ qemu-kvm/block-copy.c
@@ -0,0 +1,728 @@
+/*
+ * QEMU live block copy
+ *
+ * Copyright (C) 2010 Red Hat Inc.
+ *
+ * Authors: Marcelo Tosatti <mtosatti@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include "block_int.h"
+#include "qemu-queue.h"
+#include "qemu-timer.h"
+#include "monitor.h"
+#include "block-copy.h"
+#include "migration.h"
+#include "sysemu.h"
+#include "qjson.h"
+#include <assert.h>
+
+#define BLOCK_SIZE (BDRV_SECTORS_PER_DIRTY_CHUNK << BDRV_SECTOR_BITS)
+#define MAX_IS_ALLOCATED_SEARCH 65536
+
+/*
+ * Stages:
+ *
+ * STAGE_BULK: bulk reads/writes in progress
+ * STAGE_BULK_FINISHED: bulk reads finished, bulk writes in progress
+ * STAGE_DIRTY: bulk writes finished, dirty reads/writes in progress
+ * STAGE_SWITCH_FINISHED: switched to new image.
+ */
+
+enum BdrvCopyStage {
+    STAGE_BULK,
+    STAGE_BULK_FINISHED,
+    STAGE_DIRTY,
+    STAGE_SWITCH_FINISHED,
+};
+
+typedef struct BdrvCopyState {
+    BlockDriverState *src;
+    BlockDriverState *dst;
+    bool shared_base;
+
+    int64_t curr_sector;
+    int64_t completed_sectors;
+    int64_t nr_sectors;
+
+    enum BdrvCopyStage stage;
+    int inflight_reads;
+    int error;
+    int failed;
+    int cancelled;
+    QLIST_HEAD(, BdrvCopyBlock) io_list;
+    unsigned long *aio_bitmap;
+    QEMUTimer *aio_timer;
+    QLIST_ENTRY(BdrvCopyState) list;
+
+    int64_t blocks;
+    int64_t total_time;
+
+    char src_device_name[32];
+    char dst_filename[1024];
+    int commit_fd;
+} BdrvCopyState;
+
+typedef struct BdrvCopyBlock {
+    BdrvCopyState *state;
+    uint8_t *buf;
+    int64_t sector;
+    int64_t nr_sectors;
+    struct iovec iov;
+    QEMUIOVector qiov;
+    BlockDriverAIOCB *aiocb;
+    int64_t time;
+    QLIST_ENTRY(BdrvCopyBlock) list;
+} BdrvCopyBlock;
+
+static QLIST_HEAD(, BdrvCopyState) block_copy_list =
+    QLIST_HEAD_INITIALIZER(block_copy_list);
+
+static void alloc_aio_bitmap(BdrvCopyState *s)
+{
+    BlockDriverState *bs = s->src;
+    int64_t bitmap_size;
+
+    bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
+            BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
+    bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
+
+    s->aio_bitmap = qemu_mallocz(bitmap_size);
+}
+
+static bool aio_inflight(BdrvCopyState *s, int64_t sector)
+{
+    int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
+
+    if (s->aio_bitmap &&
+        (sector << BDRV_SECTOR_BITS) < bdrv_getlength(s->src)) {
+        return !!(s->aio_bitmap[chunk / (sizeof(unsigned long) * 8)] &
+            (1UL << (chunk % (sizeof(unsigned long) * 8))));
+    } else {
+        return 0;
+    }
+}
+
+static void set_aio_inflight(BdrvCopyState *s, int64_t sector_num,
+                             int nb_sectors, int set)
+{
+    int64_t start, end;
+    unsigned long val, idx, bit;
+
+    start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
+    end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
+
+    for (; start <= end; start++) {
+        idx = start / (sizeof(unsigned long) * 8);
+        bit = start % (sizeof(unsigned long) * 8);
+        val = s->aio_bitmap[idx];
+        if (set) {
+            if (!(val & (1UL << bit))) {
+                val |= 1UL << bit;
+            }
+        } else {
+            if (val & (1UL << bit)) {
+                val &= ~(1UL << bit);
+            }
+        }
+        s->aio_bitmap[idx] = val;
+    }
+}
+
+static void blkcopy_set_stage(BdrvCopyState *s, enum BdrvCopyStage stage)
+{
+    s->stage = stage;
+
+    switch (stage) {
+    case STAGE_BULK:
+        BLKDBG_EVENT(s->dst->file, BLKDBG_BLKCOPY_STAGE_BULK);
+        break;
+    case STAGE_BULK_FINISHED:
+        BLKDBG_EVENT(s->dst->file, BLKDBG_BLKCOPY_STAGE_BULK_FINISHED);
+        break;
+    case STAGE_DIRTY:
+        BLKDBG_EVENT(s->dst->file, BLKDBG_BLKCOPY_STAGE_DIRTY);
+        break;
+    case STAGE_SWITCH_FINISHED:
+        BLKDBG_EVENT(s->dst->file, BLKDBG_BLKCOPY_STAGE_SWITCH_FINISHED);
+        break;
+    default:
+        break;
+    }
+}
+
+static void blk_copy_handle_cb_error(BdrvCopyState *s, int ret)
+{
+    s->error = ret;
+    qemu_mod_timer(s->aio_timer, qemu_get_clock(rt_clock));
+}
+
+static inline void add_avg_transfer_time(BdrvCopyState *s, int64_t time)
+{
+    s->blocks++;
+    s->total_time += time;
+}
+
+static void blk_copy_write_cb(void *opaque, int ret)
+{
+    BdrvCopyBlock *blk = opaque;
+    BdrvCopyState *s = blk->state;
+
+    if (ret < 0) {
+        QLIST_REMOVE(blk, list);
+        qemu_free(blk->buf);
+        qemu_free(blk);
+        blk_copy_handle_cb_error(s, ret);
+        return;
+    }
+
+    QLIST_REMOVE(blk, list);
+    add_avg_transfer_time(s, qemu_get_clock_ns(rt_clock) - blk->time);
+
+    /* schedule switch to STAGE_DIRTY on last bulk write completion */
+    if (blk->state->stage == STAGE_BULK_FINISHED) {
+        qemu_mod_timer(s->aio_timer, qemu_get_clock(rt_clock));
+    }
+
+    if (blk->state->stage > STAGE_BULK_FINISHED) {
+        set_aio_inflight(blk->state, blk->sector, blk->nr_sectors, 0);
+    }
+
+    qemu_free(blk->buf);
+    qemu_free(blk);
+}
+
+static void blk_copy_issue_write(BdrvCopyState *s, BdrvCopyBlock *read_blk)
+{
+    BdrvCopyBlock *blk = qemu_mallocz(sizeof(BdrvCopyBlock));
+    blk->state = s;
+    blk->sector = read_blk->sector;
+    blk->nr_sectors = read_blk->nr_sectors;
+    blk->time = read_blk->time;
+    blk->buf = read_blk->buf;
+    QLIST_INSERT_HEAD(&s->io_list, blk, list);
+
+    blk->iov.iov_base = read_blk->buf;
+    blk->iov.iov_len = read_blk->iov.iov_len;
+    qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
+
+    BLKDBG_EVENT(s->dst->file, BLKDBG_BLKCOPY_AIO_WRITE);
+    blk->aiocb = bdrv_aio_writev(s->dst, blk->sector, &blk->qiov,
+                                 blk->iov.iov_len / BDRV_SECTOR_SIZE,
+                                 blk_copy_write_cb, blk);
+    if (!blk->aiocb) {
+        s->error = 1;
+        goto error;
+    }
+
+    return;
+
+error:
+    QLIST_REMOVE(blk, list);
+    qemu_free(read_blk->buf);
+    qemu_free(blk);
+}
+
+static void blk_copy_read_cb(void *opaque, int ret)
+{
+    BdrvCopyBlock *blk = opaque;
+    BdrvCopyState *s = blk->state;
+
+    s->inflight_reads--;
+    if (ret < 0) {
+        QLIST_REMOVE(blk, list);
+        qemu_free(blk->buf);
+        qemu_free(blk);
+        blk_copy_handle_cb_error(s, ret);
+        return;
+    }
+    blk_copy_issue_write(s, blk);
+    QLIST_REMOVE(blk, list);
+    qemu_free(blk);
+    qemu_mod_timer(s->aio_timer, qemu_get_clock(rt_clock));
+}
+
+static void blk_copy_issue_read(BdrvCopyState *s, int64_t sector,
+                                int nr_sectors)
+{
+    BdrvCopyBlock *blk = qemu_mallocz(sizeof(BdrvCopyBlock));
+    blk->buf = qemu_mallocz(BLOCK_SIZE);
+    blk->state = s;
+    blk->sector = sector;
+    blk->nr_sectors = nr_sectors;
+    QLIST_INSERT_HEAD(&s->io_list, blk, list);
+
+    blk->iov.iov_base = blk->buf;
+    blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
+    qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
+
+    s->inflight_reads++;
+    blk->time = qemu_get_clock_ns(rt_clock);
+    blk->aiocb = bdrv_aio_readv(s->src, sector, &blk->qiov, nr_sectors,
+                                blk_copy_read_cb, blk);
+    if (!blk->aiocb) {
+        s->error = 1;
+        goto error;
+    }
+
+    return;
+
+error:
+    s->inflight_reads--;
+    QLIST_REMOVE(blk, list);
+    qemu_free(blk->buf);
+    qemu_free(blk);
+}
+
+static bool blkcopy_can_switch(BdrvCopyState *s)
+{
+    int64_t remaining_dirty;
+    int64_t avg_transfer_time;
+
+    remaining_dirty = bdrv_get_dirty_count(s->src);
+    if (remaining_dirty == 0 || s->blocks == 0) {
+        return true;
+    }
+
+    avg_transfer_time = s->total_time / s->blocks;
+    if ((remaining_dirty * avg_transfer_time) <= migrate_max_downtime()) {
+        return true;
+    }
+    return false;
+}
+
+static int blk_issue_reads_dirty(BdrvCopyState *s)
+{
+    int64_t sector;
+
+    for (sector = s->curr_sector; sector < s->nr_sectors;) {
+        if (bdrv_get_dirty(s->src, sector) && !aio_inflight(s, sector)) {
+            int nr_sectors = MIN(s->nr_sectors - s->curr_sector,
+                                 BDRV_SECTORS_PER_DIRTY_CHUNK);
+
+            blk_copy_issue_read(s, sector, nr_sectors);
+            bdrv_reset_dirty(s->src, sector, nr_sectors);
+            set_aio_inflight(s, sector, nr_sectors, 1);
+            break;
+        }
+
+        sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
+        s->curr_sector = sector;
+    }
+
+    if (sector >= s->nr_sectors) {
+        s->curr_sector = 0;
+    }
+    return 0;
+}
+
+static int blk_issue_reads_bulk(BdrvCopyState *s)
+{
+    int nr_sectors;
+    int64_t curr_sector = s->curr_sector;
+
+    if (s->shared_base) {
+        while (curr_sector < s->nr_sectors &&
+                !bdrv_is_allocated(s->src, curr_sector,
+                                   MAX_IS_ALLOCATED_SEARCH, &nr_sectors)) {
+                curr_sector += nr_sectors;
+        }
+    }
+
+    if (curr_sector >= s->nr_sectors) {
+        s->curr_sector = 0;
+        return 1;
+    }
+
+    curr_sector &= ~((int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK - 1);
+    nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
+
+    blk_copy_issue_read(s, s->curr_sector, nr_sectors);
+    s->curr_sector += nr_sectors;
+    s->completed_sectors = curr_sector;
+    return 0;
+}
+
+static void blkcopy_finish(BdrvCopyState *s)
+{
+    int64_t sector;
+    uint8_t *buf;
+
+    buf = qemu_malloc(BLOCK_SIZE);
+
+    /* FIXME: speed up loop, get_next_dirty_block? */
+    for (sector = 0; sector < s->nr_sectors;
+         sector += BDRV_SECTORS_PER_DIRTY_CHUNK) {
+        if (bdrv_get_dirty(s->src, sector)) {
+            int nr_sectors = MIN(s->nr_sectors - sector,
+                                 BDRV_SECTORS_PER_DIRTY_CHUNK);
+
+            memset(buf, 0, BLOCK_SIZE);
+            if (bdrv_read(s->src, sector, buf, nr_sectors) < 0) {
+                goto error;
+            }
+            if (bdrv_write(s->dst, sector, buf, nr_sectors) < 0) {
+                goto error;
+            }
+            bdrv_reset_dirty(s->src, sector, nr_sectors);
+        }
+
+        if (bdrv_get_dirty_count(s->src) == 0)
+            break;
+    }
+    qemu_free(buf);
+    return;
+
+error:
+    qemu_free(buf);
+    s->error = 1;
+}
+
+static int write_commit_file(BdrvCopyState *s)
+{
+    char commit_msg[1400];
+    const char *buf = commit_msg;
+    int len, ret;
+
+    sprintf(commit_msg, "commit QEMU block_copy %s -> %s\n", s->src_device_name,
+                        s->dst_filename);
+
+    len = strlen(commit_msg);
+    while (len > 0) {
+        ret = write(s->commit_fd, buf, len);
+        if (ret == -1 && errno == EINTR) {
+            continue;
+        }
+        if (ret <= 0) {
+            return -errno;
+        }
+        buf += ret;
+        len -= ret;
+    }
+
+    if (fsync(s->commit_fd) == -1) {
+        return -errno;
+    }
+
+    return 0;
+}
+
+static void blkcopy_cleanup(BdrvCopyState *s)
+{
+    assert(s->inflight_reads == 0);
+    assert(QLIST_EMPTY(&s->io_list));
+    bdrv_set_dirty_tracking(s->src, 0);
+    if (s->stage >= STAGE_DIRTY)
+        qemu_free(s->aio_bitmap);
+    qemu_del_timer(s->aio_timer);
+    if (s->commit_fd)
+        close(s->commit_fd);
+}
+
+static void blkcopy_free(BdrvCopyState *s)
+{
+    QLIST_REMOVE(s, list);
+    qemu_free(s);
+}
+
+static void handle_error(BdrvCopyState *s)
+{
+    if (!QLIST_EMPTY(&s->io_list))
+        return;
+    s->failed = 1;
+    blkcopy_cleanup(s);
+}
+
+static void blkcopy_switch(BdrvCopyState *s)
+{
+    char src_filename[1024];
+    int open_flags;
+
+    strncpy(src_filename, s->src->filename, sizeof(src_filename));
+    open_flags = s->src->open_flags;
+
+    assert(s->stage == STAGE_DIRTY);
+
+    vm_stop(0);
+    /* flush any guest writes, dirty bitmap uptodate after this.
+     * copy AIO also finished.
+     */
+    qemu_aio_flush();
+    assert(QLIST_EMPTY(&s->io_list));
+    if (s->error) {
+        handle_error(s);
+        goto vm_start;
+    }
+    blkcopy_finish(s);
+    if (s->error) {
+        handle_error(s);
+        goto vm_start;
+    }
+    assert(bdrv_get_dirty_count(s->src) == 0);
+    bdrv_flush_all();
+    bdrv_close(s->src);
+    bdrv_close(s->dst);
+    if (bdrv_open(s->src, s->dst->filename, s->src->open_flags, NULL) < 0) {
+        s->failed = 1;
+        goto err;
+    }
+    if (s->commit_fd && write_commit_file(s)) {
+        s->failed = 1;
+        bdrv_close(s->src);
+        goto err;
+    }
+
+    blkcopy_set_stage(s, STAGE_SWITCH_FINISHED);
+    blkcopy_cleanup(s);
+vm_start:
+    vm_start();
+    return;
+
+err:
+    if (bdrv_open(s->src, src_filename, open_flags, NULL) < 0) {
+        error_report("%s: %s: cannot fallback to source image\n", __func__,
+                     s->dst_filename);
+        abort();
+    }
+    blkcopy_cleanup(s);
+    goto vm_start;
+}
+
+#define BLKCOPY_INFLIGHT 2
+
+static void aio_timer(void *opaque)
+{
+    BdrvCopyState *s = opaque;
+
+    assert(s->cancelled == 0);
+
+    if (s->error) {
+        handle_error(s);
+        return;
+    }
+
+    while (s->stage == STAGE_BULK) {
+        if (s->inflight_reads >= BLKCOPY_INFLIGHT) {
+            break;
+        }
+        if (blk_issue_reads_bulk(s)) {
+            blkcopy_set_stage(s, STAGE_BULK_FINISHED);
+        }
+    }
+
+    if (s->stage == STAGE_BULK_FINISHED) {
+        if (QLIST_EMPTY(&s->io_list)) {
+            blkcopy_set_stage(s, STAGE_DIRTY);
+            alloc_aio_bitmap(s);
+        }
+    }
+
+    while (s->stage == STAGE_DIRTY) {
+        if (s->inflight_reads >= BLKCOPY_INFLIGHT) {
+            break;
+        }
+        blk_issue_reads_dirty(s);
+        if (blkcopy_can_switch(s)) {
+            BLKDBG_EVENT(s->dst->file, BLKDBG_BLKCOPY_SWITCH_START);
+            blkcopy_switch(s);
+            return;
+        }
+    }
+}
+
+static int bdrv_copy(Monitor *mon, const char * device, BlockDriverState *src,
+                     BlockDriverState *dst, const char *commit_file,
+                     bool shared_base)
+{
+    int64_t sectors;
+    BdrvCopyState *blkcopy, *safe;
+    int f;
+
+    QLIST_FOREACH_SAFE(blkcopy, &block_copy_list, list, safe) {
+        if (!strcmp(blkcopy->src_device_name, src->device_name)) {
+            if (blkcopy->stage == STAGE_SWITCH_FINISHED || blkcopy->failed) {
+                blkcopy_free(blkcopy);
+            } else {
+                qerror_report(QERR_BLOCKCOPY_IN_PROGRESS, src->device_name);
+                return -1;
+            }
+        }
+    }
+
+    sectors = bdrv_getlength(src) >> BDRV_SECTOR_BITS;
+    if (sectors != bdrv_getlength(dst) >> BDRV_SECTOR_BITS) {
+        qerror_report(QERR_BLOCKCOPY_IMAGE_SIZE_DIFFERS);
+        return -1;
+    }
+
+    if (commit_file) {
+        f = open(commit_file, O_CREAT|O_WRONLY, S_IRUSR);
+        if (f == -1) {
+            qerror_report(QERR_OPEN_FILE_FAILED, commit_file);
+            return -1;
+        }
+    }
+
+    blkcopy = qemu_mallocz(sizeof(BdrvCopyState));
+    blkcopy->src = src;
+    blkcopy->dst = dst;
+    blkcopy->curr_sector = 0;
+    blkcopy->nr_sectors = sectors;
+    blkcopy_set_stage(blkcopy, STAGE_BULK);
+    blkcopy->aio_timer = qemu_new_timer(rt_clock, aio_timer, blkcopy);
+    blkcopy->shared_base = shared_base;
+    blkcopy->commit_fd = f;
+    strncpy(blkcopy->src_device_name, blkcopy->src->device_name,
+            sizeof(blkcopy->src_device_name) - 1);
+    strncpy(blkcopy->dst_filename, blkcopy->dst->filename,
+            sizeof(blkcopy->dst_filename) - 1);
+
+    bdrv_set_dirty_tracking(src, 1);
+    qemu_mod_timer(blkcopy->aio_timer, qemu_get_clock(rt_clock));
+
+    QLIST_INSERT_HEAD(&block_copy_list, blkcopy, list);
+    return 0;
+}
+
+int do_bdrv_copy(Monitor *mon, const QDict *qdict, QObject **ret_data)
+{
+    const char *device = qdict_get_str(qdict, "device");
+    const char *filename = qdict_get_str(qdict, "filename");
+    const char *commit_file = qdict_get_try_str(qdict, "commit_filename");
+    bool shared_base = qdict_get_try_bool(qdict, "inc", 0);
+    BlockDriverState *new_bs, *bs;
+    int ret;
+
+    if (migration_active()) {
+        qerror_report(QERR_MIGRATION_IN_PROGRESS);
+        return -1;
+    }
+
+    bs = bdrv_find(device);
+    if (!bs) {
+        qerror_report(QERR_DEVICE_NOT_FOUND, device);
+        return -1;
+    }
+
+    new_bs = bdrv_new("");
+    if (bdrv_open(new_bs, filename, bs->open_flags, NULL) < 0) {
+        bdrv_delete(new_bs);
+        qerror_report(QERR_OPEN_FILE_FAILED, filename);
+        return -1;
+    }
+
+    ret = bdrv_copy(mon, device, bs, new_bs, commit_file, shared_base);
+    return ret;
+}
+
+int do_bdrv_copy_cancel(Monitor *mon, const QDict *qdict, QObject **ret_data)
+{
+    BdrvCopyState *blkcopy, *s = NULL;
+    const char *device = qdict_get_str(qdict, "device");
+
+    QLIST_FOREACH(blkcopy, &block_copy_list, list) {
+        if (!strcmp(blkcopy->src_device_name, device)) {
+            s = blkcopy;
+            break;
+        }
+    }
+
+    if (!s) {
+        qerror_report(QERR_DEVICE_NOT_FOUND, device);
+        return -1;
+    }
+
+    s->cancelled = 1;
+    do {
+        qemu_aio_flush();
+    } while (!QLIST_EMPTY(&s->io_list));
+    blkcopy_cleanup(s);
+    blkcopy_free(s);
+
+    return 0;
+}
+
+static void blockcopy_print_dict(QObject *obj, void *opaque)
+{
+    QDict *c_dict;
+    Monitor *mon = opaque;
+
+    c_dict = qobject_to_qdict(obj);
+
+    monitor_printf(mon, "%s: status=%s ",
+                        qdict_get_str(c_dict, "device"),
+                        qdict_get_str(c_dict, "status"));
+
+    if (qdict_haskey(c_dict, "info")) {
+        QDict *qdict = qobject_to_qdict(qdict_get(c_dict, "info"));
+
+        monitor_printf(mon, "percentage=%ld %%",
+                       qdict_get_int(qdict, "percentage"));
+    }
+
+    monitor_printf(mon, "\n");
+}
+
+void do_info_blockcopy_print(Monitor *mon, const QObject *data)
+{
+    qlist_iter(qobject_to_qlist(data), blockcopy_print_dict, mon);
+}
+
+void do_info_blockcopy(Monitor *mon, QObject **ret_data)
+{
+    QList *c_list;
+    BdrvCopyState *s;
+
+    c_list = qlist_new();
+
+    QLIST_FOREACH(s, &block_copy_list, list) {
+        QObject *c_obj;
+        static const char *status[] = { "failed", "active", "completed" };
+        int i;
+
+        if (s->failed) {
+            i = 0;
+        } else if (s->stage < STAGE_SWITCH_FINISHED) {
+            i = 1;
+        } else {
+            i = 2;
+        }
+
+        c_obj = qobject_from_jsonf("{ 'device': %s, 'status': %s }",
+                                    s->src_device_name, status[i]);
+
+        if (i == 1) {
+            QDict *dict = qobject_to_qdict(c_obj);
+            QObject *obj;
+
+            /* FIXME: add dirty stage progress? */
+            obj = qobject_from_jsonf("{ 'percentage': %" PRId64 "}",
+                                     s->completed_sectors * 100 / s->nr_sectors);
+            qdict_put_obj(dict, "info", obj);
+        }
+        qlist_append_obj(c_list, c_obj);
+    }
+
+    *ret_data = QOBJECT(c_list);
+}
+
+bool block_copy_active(void)
+{
+    BdrvCopyState *s;
+
+    QLIST_FOREACH(s, &block_copy_list, list) {
+        if (s->failed) {
+            continue;
+        }
+        if (s->stage < STAGE_SWITCH_FINISHED) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
Index: qemu-kvm/block-copy.h
===================================================================
--- /dev/null
+++ qemu-kvm/block-copy.h
@@ -0,0 +1,25 @@
+/*
+ * QEMU live block copy
+ *
+ * Copyright (C) 2010 Red Hat Inc.
+ *
+ * Authors: Marcelo Tosatti <mtosatti@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef BLOCK_COPY_H
+#define BLOCK_COPY_H
+
+int do_bdrv_copy(Monitor *mon, const QDict *qdict, QObject **ret_data);
+int do_bdrv_copy_cancel(Monitor *mon, const QDict *qdict, QObject **ret_data);
+
+void do_info_blockcopy_print(Monitor *mon, const QObject *data);
+void do_info_blockcopy(Monitor *mon, QObject **ret_data);
+
+bool block_copy_active(void);
+
+#endif /* BLOCK_COPY_H */
+
Index: qemu-kvm/hmp-commands.hx
===================================================================
--- qemu-kvm.orig/hmp-commands.hx
+++ qemu-kvm/hmp-commands.hx
@@ -769,6 +769,43 @@ Set maximum speed to @var{value} (in byt
 ETEXI
 
     {
+        .name       = "block_copy",
+        .args_type  = "device:s,filename:s,commit_filename:s?,inc:-i",
+        .params     = "device filename [commit_filename] [-i]",
+        .help       = "live block copy device to image"
+                      "\n\t\t\t optional commit filename "
+                      "\n\t\t\t -i for incremental copy "
+                      "(base image shared between src and destination)",
+        .user_print = monitor_user_noop,
+        .mhandler.cmd_new = do_bdrv_copy,
+    },
+
+STEXI
+@item block_copy @var{device} @var{filename} [@var{commit_filename}] [-i]
+@findex block_copy
+Live copy block device @var{device} to image @var{filename}.
+        -i for incremental copy (base image is shared)
+
+Optionally a commit message is written to @var{commit_filename}
+once the switch to the new image is performed.
+ETEXI
+
+    {
+        .name       = "block_copy_cancel",
+        .args_type  = "device:s",
+        .params     = "device",
+        .help       = "cancel live block copy",
+        .user_print = monitor_user_noop,
+        .mhandler.cmd_new = do_bdrv_copy_cancel,
+    },
+
+STEXI
+@item block_copy_cancel @var{device}
+@findex block_copy_cancel
+Cancel live block copy on @var{device}.
+ETEXI
+
+    {
         .name       = "migrate_set_downtime",
         .args_type  = "value:T",
         .params     = "value",
@@ -1213,6 +1250,8 @@ show device tree
 show qdev device model list
 @item info roms
 show roms
+@item info block-copy
+show block copy status
 @end table
 ETEXI
 
Index: qemu-kvm/monitor.c
===================================================================
--- qemu-kvm.orig/monitor.c
+++ qemu-kvm/monitor.c
@@ -44,6 +44,7 @@
 #include "balloon.h"
 #include "qemu-timer.h"
 #include "migration.h"
+#include "block-copy.h"
 #include "kvm.h"
 #include "acl.h"
 #include "qint.h"
@@ -2651,6 +2652,14 @@ static const mon_cmd_t info_cmds[] = {
     },
 #endif
     {
+        .name       = "block-copy",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show block copy status",
+        .user_print = do_info_blockcopy_print,
+        .mhandler.info_new = do_info_blockcopy,
+    },
+    {
         .name       = NULL,
     },
 };
@@ -2782,6 +2791,14 @@ static const mon_cmd_t qmp_query_cmds[] 
         .mhandler.info_async = do_info_balloon,
         .flags      = MONITOR_CMD_ASYNC,
     },
+    {
+        .name       = "block-copy",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show block copy status",
+        .user_print = do_info_blockcopy_print,
+        .mhandler.info_new = do_info_blockcopy,
+    },
     { /* NULL */ },
 };
 
Index: qemu-kvm/block.h
===================================================================
--- qemu-kvm.orig/block.h
+++ qemu-kvm/block.h
@@ -281,6 +281,13 @@ typedef enum {
     BLKDBG_CLUSTER_ALLOC_BYTES,
     BLKDBG_CLUSTER_FREE,
 
+    BLKDBG_BLKCOPY_STAGE_BULK,
+    BLKDBG_BLKCOPY_STAGE_BULK_FINISHED,
+    BLKDBG_BLKCOPY_STAGE_DIRTY,
+    BLKDBG_BLKCOPY_STAGE_SWITCH_FINISHED,
+    BLKDBG_BLKCOPY_SWITCH_START,
+    BLKDBG_BLKCOPY_AIO_WRITE,
+
     BLKDBG_EVENT_MAX,
 } BlkDebugEvent;
 
Index: qemu-kvm/block/blkdebug.c
===================================================================
--- qemu-kvm.orig/block/blkdebug.c
+++ qemu-kvm/block/blkdebug.c
@@ -178,6 +178,14 @@ static const char *event_names[BLKDBG_EV
     [BLKDBG_CLUSTER_ALLOC]                  = "cluster_alloc",
     [BLKDBG_CLUSTER_ALLOC_BYTES]            = "cluster_alloc_bytes",
     [BLKDBG_CLUSTER_FREE]                   = "cluster_free",
+
+
+    [BLKDBG_BLKCOPY_STAGE_BULK]             = "blkcopy_stage_bulk",
+    [BLKDBG_BLKCOPY_STAGE_BULK_FINISHED]    = "blkcopy_stage_bulk_finished",
+    [BLKDBG_BLKCOPY_STAGE_DIRTY]            = "blkcopy_stage_dirty",
+    [BLKDBG_BLKCOPY_STAGE_SWITCH_FINISHED]  = "blkcopy_stage_switch_finished",
+    [BLKDBG_BLKCOPY_SWITCH_START]           = "blkcopy_switch_start",
+    [BLKDBG_BLKCOPY_AIO_WRITE]              = "blkcopy_aio_write",
 };
 
 static int get_event_by_name(const char *name, BlkDebugEvent *event)
Index: qemu-kvm/qerror.c
===================================================================
--- qemu-kvm.orig/qerror.c
+++ qemu-kvm/qerror.c
@@ -200,6 +200,18 @@ static const QErrorStringTable qerror_ta
         .error_fmt = QERR_VNC_SERVER_FAILED,
         .desc      = "Could not start VNC server on %(target)",
     },
+    {
+        .error_fmt = QERR_BLOCKCOPY_IN_PROGRESS,
+        .desc      = "Block copy for %(device) in progress",
+    },
+    {
+        .error_fmt = QERR_BLOCKCOPY_IMAGE_SIZE_DIFFERS,
+        .desc      = "Length of destination image differs from source image",
+    },
+    {
+        .error_fmt = QERR_MIGRATION_IN_PROGRESS,
+        .desc      = "Migration in progress",
+    },
     {}
 };
 
Index: qemu-kvm/qerror.h
===================================================================
--- qemu-kvm.orig/qerror.h
+++ qemu-kvm/qerror.h
@@ -165,4 +165,13 @@ QError *qobject_to_qerror(const QObject 
 #define QERR_VNC_SERVER_FAILED \
     "{ 'class': 'VNCServerFailed', 'data': { 'target': %s } }"
 
+#define QERR_BLOCKCOPY_IN_PROGRESS \
+    "{ 'class': 'BlockCopyInProgress', 'data': { 'device': %s } }"
+
+#define QERR_BLOCKCOPY_IMAGE_SIZE_DIFFERS \
+    "{ 'class': 'BlockCopyImageSizeDiffers', 'data': {} }"
+
+#define QERR_MIGRATION_IN_PROGRESS \
+    "{ 'class': 'MigrationInProgress', 'data': {} }"
+
 #endif /* QERROR_H */
Index: qemu-kvm/qmp-commands.hx
===================================================================
--- qemu-kvm.orig/qmp-commands.hx
+++ qemu-kvm/qmp-commands.hx
@@ -546,6 +546,75 @@ Example:
 EQMP
 
     {
+        .name       = "block_copy",
+        .args_type  = "device:s,filename:s,commit_filename:s?,inc:-i",
+        .params     = "device filename [commit_filename] [-i]",
+        .help       = "live block copy device to image"
+                      "\n\t\t\t optional commit filename "
+                      "\n\t\t\t -i for incremental copy "
+                      "(base image shared between src and destination)",
+        .user_print = monitor_user_noop,
+        .mhandler.cmd_new = do_bdrv_copy,
+    },
+
+SQMP
+block-copy
+-------
+
+Live block copy.
+
+Arguments:
+
+- "device": device name (json-string)
+- "filename": target image filename (json-string)
+- "commit_filename": target commit filename (json-string, optional)
+- "inc": incremental disk copy (json-bool, optional)
+
+Example:
+
+-> { "execute": "block_copy",
+                            "arguments": { "device": "ide0-hd1",
+                               "filename": "/mnt/new-disk.img",
+                               "commit_filename: "/mnt/commit-new-disk.img"
+                             } }
+
+<- { "return": {} }
+
+Notes:
+
+(1) The 'query-block-copy' command should be used to check block copy progress
+    and final result (this information is provided by the 'status' member)
+(2) Boolean argument "inc" defaults to false
+
+EQMP
+
+    {
+        .name       = "block_copy_cancel",
+        .args_type  = "device:s",
+        .params     = "device",
+        .help       = "cancel live block copy",
+        .user_print = monitor_user_noop,
+        .mhandler.cmd_new = do_bdrv_copy_cancel,
+    },
+
+SQMP
+block_copy_cancel
+--------------
+
+Cancel live block copy.
+
+Arguments:
+
+- device: device name (json-string)
+
+Example:
+
+-> { "execute": "block_copy_cancel", "arguments": { "device": "ide0-hd1" } }
+<- { "return": {} }
+
+EQMP
+
+    {
         .name       = "netdev_add",
         .args_type  = "netdev:O",
         .params     = "[user|tap|socket],id=str[,prop=value][,...]",
@@ -1505,6 +1574,44 @@ Examples:
 EQMP
 
 SQMP
+query-block-copy
+-------------
+
+Live block copy status.
+
+Each block copy instance information is stored in a json-object and the returned
+value is a json-array of all instances.
+
+Each json-object contains the following:
+
+- "device": device name (json-string)
+- "status": block copy status (json-string)
+    - Possible values: "active", "failed", "completed"
+- "info": A json-object with the statistics information, if status is "active":
+    - "percentage": percentage completed (json-int)
+
+Example:
+
+Block copy for "ide1-hd0" active and block copy for "ide1-hd1" failed:
+
+-> { "execute": "query-block-copy" }
+<- {
+      "return":[
+        {"device":"ide1-hd0",
+            "status":"active",
+            "info":{
+               "percentage":23,
+            }
+        },
+        {"device":"ide1-hd1",
+         "status":"failed"
+        }
+      ]
+   }
+
+EQMP
+
+SQMP
 query-balloon
 -------------
 
Index: qemu-kvm/Makefile.objs
===================================================================
--- qemu-kvm.orig/Makefile.objs
+++ qemu-kvm/Makefile.objs
@@ -91,7 +91,7 @@ common-obj-y += buffered_file.o migratio
 common-obj-y += qemu-char.o savevm.o #aio.o
 common-obj-y += msmouse.o ps2.o
 common-obj-y += qdev.o qdev-properties.o
-common-obj-y += block-migration.o
+common-obj-y += block-migration.o block-copy.o
 common-obj-y += pflib.o
 
 common-obj-$(CONFIG_BRLAPI) += baum.o

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Qemu-devel] [patch 3/3] do not allow migration if block copy in progress
  2010-12-16 17:44 [Qemu-devel] [patch 0/3] add support for live block copy Marcelo Tosatti
  2010-12-16 17:44 ` [Qemu-devel] [patch 1/3] add migration_active function Marcelo Tosatti
  2010-12-16 17:44 ` [Qemu-devel] [patch 2/3] live block copy Marcelo Tosatti
@ 2010-12-16 17:44 ` Marcelo Tosatti
  2 siblings, 0 replies; 6+ messages in thread
From: Marcelo Tosatti @ 2010-12-16 17:44 UTC (permalink / raw)
  To: qemu-devel; +Cc: Kevin Wolf, Anthony Liguori, Marcelo Tosatti

[-- Attachment #1: blockcopy-active --]
[-- Type: text/plain, Size: 784 bytes --]

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: qemu-kvm-block-copy/migration.c
===================================================================
--- qemu-kvm-block-copy.orig/migration.c
+++ qemu-kvm-block-copy/migration.c
@@ -19,6 +19,7 @@
 #include "block.h"
 #include "qemu_socket.h"
 #include "block-migration.h"
+#include "block-copy.h"
 #include "qemu-objects.h"
 
 //#define DEBUG_MIGRATION
@@ -88,6 +89,11 @@ int do_migrate(Monitor *mon, const QDict
         return -1;
     }
 
+    if (block_copy_active()) {
+        monitor_printf(mon, "block copy in progress\n");
+        return -1;
+    }
+
     if (strstart(uri, "tcp:", &p)) {
         s = tcp_start_outgoing_migration(mon, p, max_throttle, detach,
                                          blk, inc);

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Qemu-devel] [patch 2/3] live block copy
  2010-12-16 17:44 ` [Qemu-devel] [patch 2/3] live block copy Marcelo Tosatti
@ 2011-01-14 10:46   ` Stefan Hajnoczi
  2011-01-14 12:20     ` Marcelo Tosatti
  0 siblings, 1 reply; 6+ messages in thread
From: Stefan Hajnoczi @ 2011-01-14 10:46 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Kevin Wolf, Anthony Liguori, qemu-devel

On Thu, Dec 16, 2010 at 5:44 PM, Marcelo Tosatti <mtosatti@redhat.com> wrote:
> Add support for live block copy.
>
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Are you still pushing this, are you looking for reviews?

Stefan

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Qemu-devel] [patch 2/3] live block copy
  2011-01-14 10:46   ` Stefan Hajnoczi
@ 2011-01-14 12:20     ` Marcelo Tosatti
  0 siblings, 0 replies; 6+ messages in thread
From: Marcelo Tosatti @ 2011-01-14 12:20 UTC (permalink / raw)
  To: Stefan Hajnoczi; +Cc: Kevin Wolf, Anthony Liguori, qemu-devel

On Fri, Jan 14, 2011 at 10:46:53AM +0000, Stefan Hajnoczi wrote:
> On Thu, Dec 16, 2010 at 5:44 PM, Marcelo Tosatti <mtosatti@redhat.com> wrote:
> > Add support for live block copy.
> >
> > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> 
> Are you still pushing this, are you looking for reviews?
> 
> Stefan

Reviews are welcome. The known issues, which will be fixed in the next
submission, are:

- Interaction with device hotplug.
- Use a context different than timer to issue AIOs, if possible.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2011-01-14 12:20 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-12-16 17:44 [Qemu-devel] [patch 0/3] add support for live block copy Marcelo Tosatti
2010-12-16 17:44 ` [Qemu-devel] [patch 1/3] add migration_active function Marcelo Tosatti
2010-12-16 17:44 ` [Qemu-devel] [patch 2/3] live block copy Marcelo Tosatti
2011-01-14 10:46   ` Stefan Hajnoczi
2011-01-14 12:20     ` Marcelo Tosatti
2010-12-16 17:44 ` [Qemu-devel] [patch 3/3] do not allow migration if block copy in progress Marcelo Tosatti

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.