From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:52286) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZydSI-0006OE-EF for qemu-devel@nongnu.org; Tue, 17 Nov 2015 05:25:44 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1ZydSE-0007Ki-B8 for qemu-devel@nongnu.org; Tue, 17 Nov 2015 05:25:42 -0500 Received: from szxga01-in.huawei.com ([58.251.152.64]:30256) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZydSC-0007KD-Oy for qemu-devel@nongnu.org; Tue, 17 Nov 2015 05:25:38 -0500 References: <1446551816-15768-1-git-send-email-zhang.zhanghailiang@huawei.com> <1446551816-15768-13-git-send-email-zhang.zhanghailiang@huawei.com> <20151106185921.GK4267@work-vm> <564064C5.80608@huawei.com> <20151113185317.GD8720@work-vm> From: zhanghailiang Message-ID: <564AFF6A.80101@huawei.com> Date: Tue, 17 Nov 2015 18:20:26 +0800 MIME-Version: 1.0 In-Reply-To: <20151113185317.GD8720@work-vm> Content-Type: text/plain; charset="utf-8"; format=flowed Content-Transfer-Encoding: 8bit Subject: Re: [Qemu-devel] [PATCH COLO-Frame v10 12/38] COLO: Save PVM state to secondary side when do checkpoint List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: "Dr. David Alan Gilbert" Cc: lizhijian@cn.fujitsu.com, quintela@redhat.com, yunhong.jiang@intel.com, eddie.dong@intel.com, peter.huangpeng@huawei.com, qemu-devel@nongnu.org, arei.gonglei@huawei.com, stefanha@redhat.com, amit.shah@redhat.com On 2015/11/14 2:53, Dr. David Alan Gilbert wrote: > * zhanghailiang (zhang.zhanghailiang@huawei.com) wrote: >> On 2015/11/7 2:59, Dr. David Alan Gilbert wrote: >>> * zhanghailiang (zhang.zhanghailiang@huawei.com) wrote: >>>> The main process of checkpoint is to synchronize SVM with PVM. >>>> VM's state includes ram and device state. So we will migrate PVM's >>>> state to SVM when do checkpoint, just like migration does. >>>> >>>> We will cache PVM's state in slave, we use QEMUSizedBuffer >>>> to store the data, we need to know the size of VM state, so in master, >>>> we use qsb to store VM state temporarily, get the data size by call qsb_get_length() >>>> and then migrate the data to the qsb in the secondary side. >>>> >>>> Signed-off-by: zhanghailiang >>>> Signed-off-by: Gonglei >>>> Signed-off-by: Li Zhijian >>>> --- >>>> migration/colo.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++---- >>>> migration/ram.c | 47 +++++++++++++++++++++++++++++-------- >>>> migration/savevm.c | 2 +- >>>> 3 files changed, 101 insertions(+), 16 deletions(-) >>>> >>>> diff --git a/migration/colo.c b/migration/colo.c >>>> index 2510762..b865513 100644 >>>> --- a/migration/colo.c >>>> +++ b/migration/colo.c >>>> @@ -17,6 +17,9 @@ >>>> #include "qemu/error-report.h" >>>> #include "qemu/sockets.h" >>>> >>>> +/* colo buffer */ >>>> +#define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024) >>>> + >>>> bool colo_supported(void) >>>> { >>>> return true; >>>> @@ -94,9 +97,12 @@ static int colo_ctl_get(QEMUFile *f, uint32_t require) >>>> return value; >>>> } >>>> >>>> -static int colo_do_checkpoint_transaction(MigrationState *s) >>>> +static int colo_do_checkpoint_transaction(MigrationState *s, >>>> + QEMUSizedBuffer *buffer) >>>> { >>>> int ret; >>>> + size_t size; >>>> + QEMUFile *trans = NULL; >>>> >>>> ret = colo_ctl_put(s->to_dst_file, COLO_COMMAND_CHECKPOINT_REQUEST, 0); >>>> if (ret < 0) { >>>> @@ -107,15 +113,47 @@ static int colo_do_checkpoint_transaction(MigrationState *s) >>>> if (ret < 0) { >>>> goto out; >>>> } >>>> + /* Reset colo buffer and open it for write */ >>>> + qsb_set_length(buffer, 0); >>>> + trans = qemu_bufopen("w", buffer); >>>> + if (!trans) { >>>> + error_report("Open colo buffer for write failed"); >>>> + goto out; >>>> + } >>>> >>>> - /* TODO: suspend and save vm state to colo buffer */ >>>> + qemu_mutex_lock_iothread(); >>>> + vm_stop_force_state(RUN_STATE_COLO); >>>> + qemu_mutex_unlock_iothread(); >>>> + trace_colo_vm_state_change("run", "stop"); >>>> + >>>> + /* Disable block migration */ >>>> + s->params.blk = 0; >>>> + s->params.shared = 0; >>>> + qemu_savevm_state_header(trans); >>>> + qemu_savevm_state_begin(trans, &s->params); >>>> + qemu_mutex_lock_iothread(); >>>> + qemu_savevm_state_complete(trans); >>>> + qemu_mutex_unlock_iothread(); >>>> + >>>> + qemu_fflush(trans); >>>> >>>> ret = colo_ctl_put(s->to_dst_file, COLO_COMMAND_VMSTATE_SEND, 0); >>>> if (ret < 0) { >>>> goto out; >>>> } >>>> + /* we send the total size of the vmstate first */ >>>> + size = qsb_get_length(buffer); >>>> + ret = colo_ctl_put(s->to_dst_file, COLO_COMMAND_VMSTATE_SIZE, size); >>>> + if (ret < 0) { >>>> + goto out; >>>> + } >>>> >>>> - /* TODO: send vmstate to Secondary */ >>>> + qsb_put_buffer(s->to_dst_file, buffer, size); >>>> + qemu_fflush(s->to_dst_file); >>>> + ret = qemu_file_get_error(s->to_dst_file); >>>> + if (ret < 0) { >>>> + goto out; >>>> + } >>>> >>>> ret = colo_ctl_get(s->from_dst_file, COLO_COMMAND_VMSTATE_RECEIVED); >>>> if (ret < 0) { >>>> @@ -127,14 +165,24 @@ static int colo_do_checkpoint_transaction(MigrationState *s) >>>> goto out; >>>> } >>>> >>>> - /* TODO: resume Primary */ >>>> + ret = 0; >>>> + /* resume master */ >>>> + qemu_mutex_lock_iothread(); >>>> + vm_start(); >>>> + qemu_mutex_unlock_iothread(); >>>> + trace_colo_vm_state_change("stop", "run"); >>>> >>>> out: >>>> + if (trans) { >>>> + qemu_fclose(trans); >>>> + } >>>> + >>>> return ret; >>>> } >>>> >>>> static void colo_process_checkpoint(MigrationState *s) >>>> { >>>> + QEMUSizedBuffer *buffer = NULL; >>>> int fd, ret = 0; >>>> >>>> /* Dup the fd of to_dst_file */ >>>> @@ -159,6 +207,13 @@ static void colo_process_checkpoint(MigrationState *s) >>>> goto out; >>>> } >>>> >>>> + buffer = qsb_create(NULL, COLO_BUFFER_BASE_SIZE); >>>> + if (buffer == NULL) { >>>> + ret = -ENOMEM; >>>> + error_report("Failed to allocate buffer!"); >>> >>> Please say 'Failed to allocate colo buffer'; QEMU has lots and lots of buffers. >>> >> >> OK, will fix it in next version. >> >>>> + goto out; >>>> + } >>>> + >>>> qemu_mutex_lock_iothread(); >>>> vm_start(); >>>> qemu_mutex_unlock_iothread(); >>>> @@ -166,7 +221,7 @@ static void colo_process_checkpoint(MigrationState *s) >>>> >>>> while (s->state == MIGRATION_STATUS_COLO) { >>>> /* start a colo checkpoint */ >>>> - ret = colo_do_checkpoint_transaction(s); >>>> + ret = colo_do_checkpoint_transaction(s, buffer); >>>> if (ret < 0) { >>>> goto out; >>>> } >>>> @@ -179,6 +234,9 @@ out: >>>> migrate_set_state(&s->state, MIGRATION_STATUS_COLO, >>>> MIGRATION_STATUS_COMPLETED); >>>> >>>> + qsb_free(buffer); >>>> + buffer = NULL; >>>> + >>>> if (s->from_dst_file) { >>>> qemu_fclose(s->from_dst_file); >>>> } >>>> diff --git a/migration/ram.c b/migration/ram.c >>>> index a25bcc7..5784c15 100644 >>>> --- a/migration/ram.c >>>> +++ b/migration/ram.c >>>> @@ -38,6 +38,7 @@ >>>> #include "trace.h" >>>> #include "exec/ram_addr.h" >>>> #include "qemu/rcu_queue.h" >>>> +#include "migration/colo.h" >>>> >>>> #ifdef DEBUG_MIGRATION_RAM >>>> #define DPRINTF(fmt, ...) \ >>>> @@ -1165,15 +1166,8 @@ void migration_bitmap_extend(ram_addr_t old, ram_addr_t new) >>>> } >>>> } >>>> >>>> -/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has >>>> - * long-running RCU critical section. When rcu-reclaims in the code >>>> - * start to become numerous it will be necessary to reduce the >>>> - * granularity of these critical sections. >>>> - */ >>>> - >>>> -static int ram_save_setup(QEMUFile *f, void *opaque) >>>> +static int ram_save_init_globals(void) >>>> { >>>> - RAMBlock *block; >>>> int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */ >>>> >>>> dirty_rate_high_cnt = 0; >>>> @@ -1233,6 +1227,31 @@ static int ram_save_setup(QEMUFile *f, void *opaque) >>>> migration_bitmap_sync(); >>>> qemu_mutex_unlock_ramlist(); >>>> qemu_mutex_unlock_iothread(); >>>> + rcu_read_unlock(); >>>> + >>>> + return 0; >>>> +} >>> >>> It surprises me you want migration_bitmap_sync in ram_save_init_globals(), >>> but I guess you want the first sync at the start. >>> >> >> Er, sorry,i don't quite understand. >> Here. I just split part codes of ram_save_setup() >> into a helper function ram_save_init_global(), to make it more clear. >> We can't do initial work for twice. Is there any thing wrong ? > > No, that's OK - it just seemed odd for a function like 'init_globals' > to do such a big side effect of doing the sync; but yes, it makes sense > since it's just a split. > >>>> +/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has >>>> + * long-running RCU critical section. When rcu-reclaims in the code >>>> + * start to become numerous it will be necessary to reduce the >>>> + * granularity of these critical sections. >>>> + */ >>>> + >>>> +static int ram_save_setup(QEMUFile *f, void *opaque) >>>> +{ >>>> + RAMBlock *block; >>>> + >>>> + /* >>>> + * migration has already setup the bitmap, reuse it. >>>> + */ >>>> + if (!migration_in_colo_state()) { >>>> + if (ram_save_init_globals() < 0) { >>>> + return -1; >>>> + } >>>> + } >>>> + >>>> + rcu_read_lock(); >>>> >>>> qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); >>>> >>>> @@ -1332,7 +1351,8 @@ static int ram_save_complete(QEMUFile *f, void *opaque) >>>> while (true) { >>>> int pages; >>>> >>>> - pages = ram_find_and_save_block(f, true, &bytes_transferred); >>>> + pages = ram_find_and_save_block(f, !migration_in_colo_state(), >>>> + &bytes_transferred); >>>> /* no more blocks to sent */ >>>> if (pages == 0) { >>>> break; >>>> @@ -1343,8 +1363,15 @@ static int ram_save_complete(QEMUFile *f, void *opaque) >>>> ram_control_after_iterate(f, RAM_CONTROL_FINISH); >>>> >>>> rcu_read_unlock(); >>>> + /* >>>> + * Since we need to reuse dirty bitmap in colo, >>>> + * don't cleanup the bitmap. >>>> + */ >>>> + if (!migrate_colo_enabled() || >>>> + migration_has_failed(migrate_get_current())) { >>>> + migration_end(); >>>> + } >>>> >>>> - migration_end(); >>>> qemu_put_be64(f, RAM_SAVE_FLAG_EOS); >>>> >>>> return 0; >>>> diff --git a/migration/savevm.c b/migration/savevm.c >>>> index dbcc39a..0faf12b 100644 >>>> --- a/migration/savevm.c >>>> +++ b/migration/savevm.c >>>> @@ -48,7 +48,7 @@ >>>> #include "qemu/iov.h" >>>> #include "block/snapshot.h" >>>> #include "block/qapi.h" >>>> - >>>> +#include "migration/colo.h" >>>> >>>> #ifndef ETH_P_RARP >>>> #define ETH_P_RARP 0x8035 >>> >>> Wrong patch? >>> >> >> No, we have call migration_in_colo_state() in qemu_savevm_state_begin(). >> So we have to include "migration/colo.h" > > I don't think you use it in savevm.c until patch 30, so you can add > the #include in patch 30 (or whichever is the patch that first needs it). > Ha, I know what you mean. And yes, you are right, we shouldn't call migration_in_colo_state() in qemu_savevm_state_begin() here. Good catch. I will fix it in next version. Thanks. > Dave > > >> >>> >>> So other than those minor things: >>> >>> Reviewed-by: Dr. David Alan Gilbert >>> >>> but watch out for the recent changes to migrate_end that went in >>> a few days ago. >>> >> >> Thanks for reminding me, i have rebased that. ;) >> >> zhanghailiang >> >>> Dave >>> >>>> -- >>>> 1.8.3.1 >>>> >>>> >>> -- >>> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK >>> >>> . >>> >> >> > -- > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > > . >