From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:36060) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1YLkJ0-0003mm-Mt for qemu-devel@nongnu.org; Wed, 11 Feb 2015 22:19:10 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1YLkIv-0005mj-6R for qemu-devel@nongnu.org; Wed, 11 Feb 2015 22:19:06 -0500 Received: from szxga02-in.huawei.com ([119.145.14.65]:8042) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1YLkIu-0005mJ-K8 for qemu-devel@nongnu.org; Wed, 11 Feb 2015 22:19:01 -0500 From: zhanghailiang Date: Thu, 12 Feb 2015 11:17:14 +0800 Message-ID: <1423711034-5340-28-git-send-email-zhang.zhanghailiang@huawei.com> In-Reply-To: <1423711034-5340-1-git-send-email-zhang.zhanghailiang@huawei.com> References: <1423711034-5340-1-git-send-email-zhang.zhanghailiang@huawei.com> MIME-Version: 1.0 Content-Type: text/plain Subject: [Qemu-devel] [PATCH RFC v3 27/27] COLO: Add block replication into colo process List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Cc: zhanghailiang , yunhong.jiang@intel.com, eddie.dong@intel.com, dgilbert@redhat.com, peter.huangpeng@huawei.com, stefanha@redhat.com, pbonzini@redhat.com, Yang Hongyang Make sure master start block replication after slave's block replication started Signed-off-by: zhanghailiang Signed-off-by: Wen Congyang Signed-off-by: Yang Hongyang --- migration/colo.c | 94 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 89 insertions(+), 5 deletions(-) diff --git a/migration/colo.c b/migration/colo.c index d5baf87..042dec8 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -17,6 +17,8 @@ #include "qemu/error-report.h" #include "migration/migration-failover.h" #include "net/colo-nic.h" +#include "block/block.h" +#include "sysemu/block-backend.h" /* #define DEBUG_COLO */ @@ -82,6 +84,66 @@ static bool colo_runstate_is_stopped(void) return runstate_check(RUN_STATE_COLO) || !runstate_is_running(); } +static int blk_start_replication(bool primary) +{ + int mode = primary ? COLO_PRIMARY_MODE : COLO_SECONDARY_MODE; + BlockBackend *blk, *temp; + int ret = 0; + + for (blk = blk_next(NULL); blk; blk = blk_next(blk)) { + if (blk_is_read_only(blk)) { + continue; + } + ret = bdrv_start_replication(blk_bs(blk), mode); + if (ret) { + return 0; + } + } + + if (ret < 0) { + for (temp = blk_next(NULL); temp != blk; temp = blk_next(temp)) { + bdrv_stop_replication(blk_bs(temp)); + } + } + + return ret; +} + +static int blk_do_checkpoint(void) +{ + BlockBackend *blk; + int ret = 0; + + for (blk = blk_next(NULL); blk; blk = blk_next(blk)) { + if (blk_is_read_only(blk)) { + continue; + } + + if (bdrv_do_checkpoint(blk_bs(blk))) { + ret = -1; + } + } + + return ret; +} + +static int blk_stop_replication(void) +{ + BlockBackend *blk; + int ret = 0; + + for (blk = blk_next(NULL); blk; blk = blk_next(blk)) { + if (blk_is_read_only(blk)) { + continue; + } + if (bdrv_stop_replication(blk_bs(blk))) { + ret = -1; + } + } + + return ret; +} + /* * there are two way to entry this function * 1. From colo checkpoint incoming thread, in this case @@ -101,6 +163,7 @@ static void slave_do_failover(void) error_report("colo proxy failed to do failover"); } colo_proxy_destroy(COLO_SECONDARY_MODE); + blk_stop_replication(); colo = NULL; @@ -128,6 +191,8 @@ static void master_do_failover(void) migrate_set_state(s, MIG_STATE_COLO, MIG_STATE_COMPLETED); } + blk_stop_replication(); + vm_start(); } @@ -258,6 +323,9 @@ static int do_colo_transaction(MigrationState *s, QEMUFile *control) goto out; } + /* we call this api although this may do nothing on primary side */ + blk_do_checkpoint(); + ret = colo_ctl_put(s->file, COLO_CHECKPOINT_SEND); if (ret < 0) { goto out; @@ -347,6 +415,12 @@ static void *colo_thread(void *opaque) goto out; } + /* start block replication */ + ret = blk_start_replication(true); + if (ret) { + goto out; + } + qemu_mutex_lock_iothread(); vm_start(); qemu_mutex_unlock_iothread(); @@ -508,17 +582,24 @@ void *colo_process_incoming_checkpoints(void *opaque) create_and_init_ram_cache(); - ret = colo_ctl_put(ctl, COLO_READY); - if (ret < 0) { - goto out; - } - colo_buffer = qsb_create(NULL, COLO_BUFFER_BASE_SIZE); if (colo_buffer == NULL) { error_report("Failed to allocate colo buffer!"); goto out; } + /* start block replication */ + ret = blk_start_replication(false); + if (ret) { + goto out; + } + DPRINTF("finish block replication\n"); + + ret = colo_ctl_put(ctl, COLO_READY); + if (ret < 0) { + goto out; + } + qemu_mutex_lock_iothread(); /* in COLO mode, slave is runing, so start the vm */ vm_start(); @@ -593,6 +674,9 @@ void *colo_process_incoming_checkpoints(void *opaque) vmstate_loading = false; qemu_mutex_unlock_iothread(); + /* discard colo disk buffer */ + blk_do_checkpoint(); + ret = colo_ctl_put(ctl, COLO_CHECKPOINT_LOADED); if (ret < 0) { goto out; -- 1.7.12.4