From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:44484) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZX3MF-0007VA-89 for qemu-devel@nongnu.org; Wed, 02 Sep 2015 04:25:32 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1ZX3ME-0001gx-07 for qemu-devel@nongnu.org; Wed, 02 Sep 2015 04:25:27 -0400 Received: from szxga02-in.huawei.com ([119.145.14.65]:41925) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZX3MC-0001cd-NV for qemu-devel@nongnu.org; Wed, 02 Sep 2015 04:25:25 -0400 From: zhanghailiang Date: Wed, 2 Sep 2015 16:23:18 +0800 Message-ID: <1441182199-8328-32-git-send-email-zhang.zhanghailiang@huawei.com> In-Reply-To: <1441182199-8328-1-git-send-email-zhang.zhanghailiang@huawei.com> References: <1441182199-8328-1-git-send-email-zhang.zhanghailiang@huawei.com> MIME-Version: 1.0 Content-Type: text/plain Subject: [Qemu-devel] [PATCH COLO-Frame v9 31/32] COLO: Add block replication into colo process List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Cc: lizhijian@cn.fujitsu.com, quintela@redhat.com, yunhong.jiang@intel.com, eddie.dong@intel.com, peter.huangpeng@huawei.com, dgilbert@redhat.com, arei.gonglei@huawei.com, stefanha@redhat.com, amit.shah@redhat.com, yanghy@cn.fujitsu.com, zhanghailiang From: Wen Congyang Make sure master start block replication after slave's block replication started. Signed-off-by: zhanghailiang Signed-off-by: Wen Congyang Signed-off-by: Yang Hongyang Signed-off-by: Li Zhijian --- migration/colo.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- trace-events | 2 ++ 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/migration/colo.c b/migration/colo.c index fdbda79..d7a14ba 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -21,6 +21,7 @@ #include "qapi-event.h" #include "qmp-commands.h" #include "qapi-types.h" +#include "block/block_int.h" /* * The delay time before qemu begin the procedure of default failover treatment. @@ -64,6 +65,7 @@ static void secondary_vm_do_failover(void) { int old_state; MigrationIncomingState *mis = migration_incoming_get_current(); + Error *local_err = NULL; /* Can not do failover during the process of VM's loading VMstate, Or * it will break the secondary VM. @@ -81,6 +83,12 @@ static void secondary_vm_do_failover(void) migrate_set_state(&mis->state, MIGRATION_STATUS_COLO, MIGRATION_STATUS_COMPLETED); + bdrv_stop_replication_all(true, &local_err); + if (local_err) { + error_report_err(local_err); + } + trace_colo_stop_block_replication("failover"); + if (!autostart) { error_report("\"-S\" qemu option will be ignored in secondary side"); /* recover runstate to normal migration finish state */ @@ -111,6 +119,7 @@ static void primary_vm_do_failover(void) { MigrationState *s = migrate_get_current(); int old_state; + Error *local_err = NULL; if (s->state != MIGRATION_STATUS_FAILED) { migrate_set_state(&s->state, MIGRATION_STATUS_COLO, @@ -126,6 +135,12 @@ static void primary_vm_do_failover(void) qemu_bh_schedule(s->cleanup_bh); + bdrv_stop_replication_all(true, &local_err); + if (local_err) { + error_report_err(local_err); + } + trace_colo_stop_block_replication("failover"); + vm_start(); old_state = failover_set_state(FAILOVER_STATUS_HANDLING, @@ -215,6 +230,7 @@ static int colo_do_checkpoint_transaction(MigrationState *s, int colo_shutdown, ret; size_t size; QEMUFile *trans = NULL; + Error *local_err = NULL; ret = colo_ctl_put(s->to_dst_file, COLO_CMD_CHECKPOINT_REQUEST, 0); if (ret < 0) { @@ -252,6 +268,16 @@ static int colo_do_checkpoint_transaction(MigrationState *s, goto out; } + /* we call this api although this may do nothing on primary side */ + qemu_mutex_lock_iothread(); + bdrv_do_checkpoint_all(&local_err); + qemu_mutex_unlock_iothread(); + if (local_err) { + error_report_err(local_err); + ret = -1; + goto out; + } + ret = colo_ctl_put(s->to_dst_file, COLO_CMD_VMSTATE_SEND, 0); if (ret < 0) { goto out; @@ -294,6 +320,10 @@ static int colo_do_checkpoint_transaction(MigrationState *s, } if (colo_shutdown) { + qemu_mutex_lock_iothread(); + bdrv_stop_replication_all(false, NULL); + trace_colo_stop_block_replication("shutdown"); + qemu_mutex_unlock_iothread(); colo_ctl_put(s->to_dst_file, COLO_CMD_GUEST_SHUTDOWN, 0); qemu_fflush(s->to_dst_file); colo_shutdown_requested = 0; @@ -339,6 +369,7 @@ static void *colo_thread(void *opaque) int64_t current_time, checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); int64_t error_time; int fd, ret = 0; + Error *local_err = NULL; failover_init_state(); @@ -377,6 +408,14 @@ static void *colo_thread(void *opaque) } qemu_mutex_lock_iothread(); + /* start block replication */ + bdrv_start_replication_all(REPLICATION_MODE_PRIMARY, &local_err); + if (local_err) { + qemu_mutex_unlock_iothread(); + error_report_err(local_err); + goto out; + } + trace_colo_start_block_replication(); vm_start(); qemu_mutex_unlock_iothread(); trace_colo_vm_state_change("stop", "run"); @@ -507,6 +546,8 @@ static int colo_wait_handle_cmd(QEMUFile *f, int *checkpoint_request) case COLO_CMD_GUEST_SHUTDOWN: qemu_mutex_lock_iothread(); vm_stop_force_state(RUN_STATE_COLO); + bdrv_stop_replication_all(false, NULL); + trace_colo_stop_block_replication("shutdown"); qemu_system_shutdown_request_core(); qemu_mutex_unlock_iothread(); /* the main thread will exit and termiante the whole @@ -538,6 +579,7 @@ void *colo_process_incoming_thread(void *opaque) int total_size; int64_t error_time, current_time; int fd, ret = 0; + Error *local_err = NULL; migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_COLO); @@ -573,6 +615,16 @@ void *colo_process_incoming_thread(void *opaque) goto out; } + qemu_mutex_lock_iothread(); + /* start block replication */ + bdrv_start_replication_all(REPLICATION_MODE_SECONDARY, &local_err); + qemu_mutex_unlock_iothread(); + if (local_err) { + error_report_err(local_err); + goto out; + } + trace_colo_start_block_replication(); + ret = colo_ctl_put(mis->to_src_file, COLO_CMD_CHECKPOINT_READY, 0); if (ret < 0) { goto out; @@ -647,8 +699,15 @@ void *colo_process_incoming_thread(void *opaque) goto out; } - vmstate_loading = false; + /* discard colo disk buffer */ + bdrv_do_checkpoint_all(&local_err); qemu_mutex_unlock_iothread(); + if (local_err) { + vmstate_loading = false; + goto out; + } + + vmstate_loading = false; if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) { failover_set_state(FAILOVER_STATUS_RELAUNCH, FAILOVER_STATUS_NONE); diff --git a/trace-events b/trace-events index cf378ec..d3ba02d 100644 --- a/trace-events +++ b/trace-events @@ -1477,6 +1477,8 @@ colo_vm_state_change(const char *old, const char *new) "Change '%s' => '%s'" colo_ctl_put(const char *msg) "Send '%s'" colo_ctl_get(const char *msg) "Receive '%s'" colo_failover_set_state(int new_state) "new state %d" +colo_start_block_replication(void) "Block replication is started" +colo_stop_block_replication(const char *reason) "Block replication is stopped(reason: '%s')" # kvm-all.c kvm_ioctl(int type, void *arg) "type 0x%x, arg %p" -- 1.8.3.1