From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:37297) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1a1AP3-0000zr-14 for qemu-devel@nongnu.org; Tue, 24 Nov 2015 05:00:50 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1a1AP1-0000Cx-ED for qemu-devel@nongnu.org; Tue, 24 Nov 2015 05:00:48 -0500 Received: from szxga03-in.huawei.com ([119.145.14.66]:27216) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1a1AP0-00007W-9O for qemu-devel@nongnu.org; Tue, 24 Nov 2015 05:00:47 -0500 From: zhanghailiang Date: Tue, 24 Nov 2015 17:25:49 +0800 Message-ID: <1448357149-17572-40-git-send-email-zhang.zhanghailiang@huawei.com> In-Reply-To: <1448357149-17572-1-git-send-email-zhang.zhanghailiang@huawei.com> References: <1448357149-17572-1-git-send-email-zhang.zhanghailiang@huawei.com> MIME-Version: 1.0 Content-Type: text/plain Subject: [Qemu-devel] [PATCH COLO-Frame v11 39/39] COLO: Add block replication into colo process List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Cc: lizhijian@cn.fujitsu.com, quintela@redhat.com, yunhong.jiang@intel.com, eddie.dong@intel.com, peter.huangpeng@huawei.com, dgilbert@redhat.com, zhanghailiang , arei.gonglei@huawei.com, stefanha@redhat.com, amit.shah@redhat.com, hongyang.yang@easystack.cn Make sure master start block replication after slave's block replication started. Signed-off-by: zhanghailiang Signed-off-by: Wen Congyang Signed-off-by: Li Zhijian --- migration/colo.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ trace-events | 2 ++ 2 files changed, 62 insertions(+) diff --git a/migration/colo.c b/migration/colo.c index b1b7905..c534ff9 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -20,6 +20,7 @@ #include "migration/failover.h" #include "qapi-event.h" #include "net/filter.h" +#include "block/block_int.h" /* * The delay time before qemu begin the procedure of default failover treatment. @@ -62,6 +63,7 @@ static void secondary_vm_do_failover(void) { int old_state; MigrationIncomingState *mis = migration_incoming_get_current(); + Error *local_err = NULL; /* Can not do failover during the process of VM's loading VMstate, Or * it will break the secondary VM. @@ -79,6 +81,12 @@ static void secondary_vm_do_failover(void) migrate_set_state(&mis->state, MIGRATION_STATUS_COLO, MIGRATION_STATUS_COMPLETED); + bdrv_stop_replication_all(true, &local_err); + if (local_err) { + error_report_err(local_err); + } + trace_colo_stop_block_replication("failover"); + if (!autostart) { error_report("\"-S\" qemu option will be ignored in secondary side"); /* recover runstate to normal migration finish state */ @@ -110,6 +118,7 @@ static void primary_vm_do_failover(void) { MigrationState *s = migrate_get_current(); int old_state; + Error *local_err = NULL; if (s->state != MIGRATION_STATUS_FAILED) { migrate_set_state(&s->state, MIGRATION_STATUS_COLO, @@ -134,6 +143,12 @@ static void primary_vm_do_failover(void) } /* Don't buffer any packets while exited COLO */ qemu_set_default_filter_buffers(false); + + bdrv_stop_replication_all(true, &local_err); + if (local_err) { + error_report_err(local_err); + } + trace_colo_stop_block_replication("failover"); } void colo_do_failover(MigrationState *s) @@ -212,6 +227,7 @@ static int colo_do_checkpoint_transaction(MigrationState *s, int colo_shutdown; size_t size; QEMUFile *trans = NULL; + Error *local_err = NULL; ret = colo_ctl_put(s->to_dst_file, COLO_COMMAND_CHECKPOINT_REQUEST, 0); if (ret < 0) { @@ -250,6 +266,16 @@ static int colo_do_checkpoint_transaction(MigrationState *s, goto out; } + /* we call this api although this may do nothing on primary side */ + qemu_mutex_lock_iothread(); + bdrv_do_checkpoint_all(&local_err); + qemu_mutex_unlock_iothread(); + if (local_err) { + error_report_err(local_err); + ret = -1; + goto out; + } + ret = colo_ctl_put(s->to_dst_file, COLO_COMMAND_VMSTATE_SEND, 0); if (ret < 0) { goto out; @@ -296,6 +322,10 @@ static int colo_do_checkpoint_transaction(MigrationState *s, qemu_release_default_filters_packets(); if (colo_shutdown) { + qemu_mutex_lock_iothread(); + bdrv_stop_replication_all(false, NULL); + trace_colo_stop_block_replication("shutdown"); + qemu_mutex_unlock_iothread(); colo_ctl_put(s->to_dst_file, COLO_COMMAND_GUEST_SHUTDOWN, 0); qemu_fflush(s->to_dst_file); colo_shutdown_requested = 0; @@ -341,6 +371,7 @@ static void colo_process_checkpoint(MigrationState *s) int64_t error_time; int ret = 0; uint64_t value; + Error *local_err = NULL; failover_init_state(); @@ -376,6 +407,15 @@ static void colo_process_checkpoint(MigrationState *s) qemu_set_default_filter_buffers(true); qemu_mutex_lock_iothread(); + /* start block replication */ + bdrv_start_replication_all(REPLICATION_MODE_PRIMARY, &local_err); + if (local_err) { + qemu_mutex_unlock_iothread(); + error_report_err(local_err); + ret = -EINVAL; + goto out; + } + trace_colo_start_block_replication(); vm_start(); qemu_mutex_unlock_iothread(); trace_colo_vm_state_change("stop", "run"); @@ -492,6 +532,8 @@ static int colo_wait_handle_cmd(QEMUFile *f, int *checkpoint_request) case COLO_COMMAND_GUEST_SHUTDOWN: qemu_mutex_lock_iothread(); vm_stop_force_state(RUN_STATE_COLO); + bdrv_stop_replication_all(false, NULL); + trace_colo_stop_block_replication("shutdown"); qemu_system_shutdown_request_core(); qemu_mutex_unlock_iothread(); /* the main thread will exit and termiante the whole @@ -524,6 +566,7 @@ void *colo_process_incoming_thread(void *opaque) int64_t error_time, current_time; int ret = 0; uint64_t value; + Error *local_err = NULL; migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_COLO); @@ -560,6 +603,16 @@ void *colo_process_incoming_thread(void *opaque) goto out; } + qemu_mutex_lock_iothread(); + /* start block replication */ + bdrv_start_replication_all(REPLICATION_MODE_SECONDARY, &local_err); + qemu_mutex_unlock_iothread(); + if (local_err) { + error_report_err(local_err); + goto out; + } + trace_colo_start_block_replication(); + ret = colo_ctl_put(mis->to_src_file, COLO_COMMAND_CHECKPOINT_READY, 0); if (ret < 0) { goto out; @@ -639,6 +692,13 @@ void *colo_process_incoming_thread(void *opaque) qemu_mutex_unlock_iothread(); goto out; } + /* discard colo disk buffer */ + bdrv_do_checkpoint_all(&local_err); + qemu_mutex_unlock_iothread(); + if (local_err) { + vmstate_loading = false; + goto out; + } vmstate_loading = false; qemu_mutex_unlock_iothread(); diff --git a/trace-events b/trace-events index b80c1e0..5f95b3c 100644 --- a/trace-events +++ b/trace-events @@ -1583,6 +1583,8 @@ colo_vm_state_change(const char *old, const char *new) "Change '%s' => '%s'" colo_ctl_put(const char *msg, uint64_t value) "Send '%s' cmd, value: %" PRIu64"" colo_ctl_get(const char *msg, uint64_t value) "Receive '%s' cmd, value: %" PRIu64"" colo_failover_set_state(int new_state) "new state %d" +colo_start_block_replication(void) "Block replication is started" +colo_stop_block_replication(const char *reason) "Block replication is stopped(reason: '%s')" # kvm-all.c kvm_ioctl(int type, void *arg) "type 0x%x, arg %p" -- 1.8.3.1