From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:33433) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1XSKEL-0007Wo-BD for qemu-devel@nongnu.org; Fri, 12 Sep 2014 02:21:18 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1XSKEG-0002R8-L4 for qemu-devel@nongnu.org; Fri, 12 Sep 2014 02:21:13 -0400 Received: from [59.151.112.132] (port=22373 helo=heian.cn.fujitsu.com) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1XSKEF-0002Qo-9M for qemu-devel@nongnu.org; Fri, 12 Sep 2014 02:21:08 -0400 Message-ID: <541290C5.4010905@cn.fujitsu.com> Date: Fri, 12 Sep 2014 14:20:53 +0800 From: Hongyang Yang MIME-Version: 1.0 References: <1406125538-27992-1-git-send-email-yanghy@cn.fujitsu.com> <1406125538-27992-12-git-send-email-yanghy@cn.fujitsu.com> <20140801150347.GE2430@work-vm> In-Reply-To: <20140801150347.GE2430@work-vm> Content-Type: text/plain; charset="UTF-8"; format=flowed Content-Transfer-Encoding: quoted-printable Subject: Re: [Qemu-devel] [RFC PATCH 11/17] COLO ctl: implement colo checkpoint protocol List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: "Dr. David Alan Gilbert" Cc: kvm@vger.kernel.org, GuiJianfeng@cn.fujitsu.com, eddie.dong@intel.com, qemu-devel@nongnu.org, mrhines@linux.vnet.ibm.com =E5=9C=A8 08/01/2014 11:03 PM, Dr. David Alan Gilbert =E5=86=99=E9=81=93: > * Yang Hongyang (yanghy@cn.fujitsu.com) wrote: >> implement colo checkpoint protocol. >> >> Checkpoint synchronzing points. >> >> Primary Secondary >> NEW @ >> Suspend >> SUSPENDED @ >> Suspend&Save state >> SEND @ >> Send state Receive state >> RECEIVED @ >> Flush network Load state >> LOADED @ >> Resume Resume >> >> Start Comparing >> NOTE: >> 1) '@' who sends the message >> 2) Every sync-point is synchronized by two sides with only >> one handshake(single direction) for low-latency. >> If more strict synchronization is required, a opposite direction >> sync-point should be added. >> 3) Since sync-points are single direction, the remote side may >> go forward a lot when this side just receives the sync-point. >> >> Signed-off-by: Yang Hongyang >> --- >> migration-colo.c | 268 +++++++++++++++++++++++++++++++++++++++++++++++= ++++++-- >> 1 file changed, 262 insertions(+), 6 deletions(-) >> >> diff --git a/migration-colo.c b/migration-colo.c >> index 2699e77..a708872 100644 >> --- a/migration-colo.c >> +++ b/migration-colo.c >> @@ -24,6 +24,41 @@ >> */ >> #define CHKPOINT_TIMER 10000 >> >> +enum { >> + COLO_READY =3D 0x46, >> + >> + /* >> + * Checkpoint synchronzing points. >> + * >> + * Primary Secondary >> + * NEW @ >> + * Suspend >> + * SUSPENDED @ >> + * Suspend&Save state >> + * SEND @ >> + * Send state Receive state >> + * RECEIVED @ >> + * Flush network Load state >> + * LOADED @ >> + * Resume Resume >> + * >> + * Start Comparing >> + * NOTE: >> + * 1) '@' who sends the message >> + * 2) Every sync-point is synchronized by two sides with only >> + * one handshake(single direction) for low-latency. >> + * If more strict synchronization is required, a opposite direct= ion >> + * sync-point should be added. >> + * 3) Since sync-points are single direction, the remote side may >> + * go forward a lot when this side just receives the sync-point. >> + */ >> + COLO_CHECKPOINT_NEW, >> + COLO_CHECKPOINT_SUSPENDED, >> + COLO_CHECKPOINT_SEND, >> + COLO_CHECKPOINT_RECEIVED, >> + COLO_CHECKPOINT_LOADED, >> +}; >> + >> static QEMUBH *colo_bh; >> >> bool colo_supported(void) >> @@ -185,30 +220,161 @@ static const QEMUFileOps colo_read_ops =3D { >> .close =3D colo_close, >> }; >> >> +/* colo checkpoint control helper */ >> +static bool is_master(void); >> +static bool is_slave(void); >> + >> +static void ctl_error_handler(void *opaque, int err) >> +{ >> + if (is_slave()) { >> + /* TODO: determine whether we need to failover */ >> + /* FIXME: we will not failover currently, just kill slave */ >> + error_report("error: colo transmission failed!\n"); >> + exit(1); >> + } else if (is_master()) { >> + /* Master still alive, do not failover */ >> + error_report("error: colo transmission failed!\n"); >> + return; >> + } else { >> + error_report("COLO: Unexpected error happend!\n"); >> + exit(EXIT_FAILURE); >> + } >> +} >> + >> +static int colo_ctl_put(QEMUFile *f, uint64_t request) >> +{ >> + int ret =3D 0; >> + >> + qemu_put_be64(f, request); >> + qemu_fflush(f); >> + >> + ret =3D qemu_file_get_error(f); >> + if (ret < 0) { >> + ctl_error_handler(f, ret); >> + return 1; >> + } >> + >> + return ret; >> +} >> + >> +static int colo_ctl_get_value(QEMUFile *f, uint64_t *value) >> +{ >> + int ret =3D 0; >> + uint64_t temp; >> + >> + temp =3D qemu_get_be64(f); >> + >> + ret =3D qemu_file_get_error(f); >> + if (ret < 0) { >> + ctl_error_handler(f, ret); >> + return 1; >> + } >> + >> + *value =3D temp; >> + return 0; >> +} >> + >> +static int colo_ctl_get(QEMUFile *f, uint64_t require) >> +{ >> + int ret; >> + uint64_t value; >> + >> + ret =3D colo_ctl_get_value(f, &value); >> + if (ret) { >> + return ret; >> + } >> + >> + if (value !=3D require) { >> + error_report("unexpected state received!\n"); > > I find it useful to print the expected/received state to > be able to figure out what went wrong. Good idea! > >> + exit(1); >> + } >> + >> + return ret; >> +} >> + >> /* save */ >> >> -static __attribute__((unused)) bool is_master(void) >> +static bool is_master(void) >> { >> MigrationState *s =3D migrate_get_current(); >> return (s->state =3D=3D MIG_STATE_COLO); >> } >> >> +static int do_colo_transaction(MigrationState *s, QEMUFile *control, >> + QEMUFile *trans) >> +{ >> + int ret; >> + >> + ret =3D colo_ctl_put(s->file, COLO_CHECKPOINT_NEW); >> + if (ret) { >> + goto out; >> + } >> + >> + ret =3D colo_ctl_get(control, COLO_CHECKPOINT_SUSPENDED); > > What happens at this point if the slave just doesn't respond? > (i.e. the socket doesn't drop - you just don't get the byte). If the socket return bytes that were not expected, exit. If socket return error, do some cleanup and quit COLO process. refer to: colo_ctl_get() and colo_ctl_get_value() > >> + if (ret) { >> + goto out; >> + } >> + >> + /* TODO: suspend and save vm state to colo buffer */ >> + >> + ret =3D colo_ctl_put(s->file, COLO_CHECKPOINT_SEND); >> + if (ret) { >> + goto out; >> + } >> + >> + /* TODO: send vmstate to slave */ >> + >> + ret =3D colo_ctl_get(control, COLO_CHECKPOINT_RECEIVED); >> + if (ret) { >> + goto out; >> + } >> + >> + /* TODO: Flush network etc. */ >> + >> + ret =3D colo_ctl_get(control, COLO_CHECKPOINT_LOADED); >> + if (ret) { >> + goto out; >> + } >> + >> + /* TODO: resume master */ >> + >> +out: >> + return ret; >> +} >> + >> static void *colo_thread(void *opaque) >> { >> MigrationState *s =3D opaque; >> int dev_hotplug =3D qdev_hotplug, wait_cp =3D 0; >> int64_t start_time =3D qemu_clock_get_ms(QEMU_CLOCK_HOST); >> int64_t current_time; >> + QEMUFile *colo_control =3D NULL, *colo_trans =3D NULL; >> + int ret; >> >> if (colo_compare_init() < 0) { >> error_report("Init colo compare error\n"); >> goto out; >> } >> >> + colo_control =3D qemu_fopen_socket(qemu_get_fd(s->file), "rb"); >> + if (!colo_control) { >> + error_report("open colo_control failed\n"); >> + goto out; >> + } > > In my postcopy world I'm trying to abstract this type of thing into a 're= turn path' > so that the QEMUFile can implement it however it wants and you don't > need to assume it's a socket. But I'm still fighting some of those detai= ls. > > Dave > >> + >> qdev_hotplug =3D 0; >> >> colo_buffer_init(); >> >> + /* >> + * Wait for slave finish loading vm states and enter COLO >> + * restore. >> + */ >> + ret =3D colo_ctl_get(colo_control, COLO_READY); >> + if (ret) { >> + goto out; >> + } >> + >> while (s->state =3D=3D MIG_STATE_COLO) { >> /* wait for a colo checkpoint */ >> wait_cp =3D colo_compare(); >> @@ -230,13 +396,33 @@ static void *colo_thread(void *opaque) >> >> /* start a colo checkpoint */ >> >> - /*TODO: COLO save */ >> + /* open colo buffer for write */ >> + colo_trans =3D qemu_fopen_ops(&colo_buffer, &colo_write_ops); >> + if (!colo_trans) { >> + error_report("open colo buffer failed\n"); >> + goto out; >> + } >> >> + if (do_colo_transaction(s, colo_control, colo_trans)) { >> + goto out; >> + } >> + >> + qemu_fclose(colo_trans); >> + colo_trans =3D NULL; >> start_time =3D qemu_clock_get_ms(QEMU_CLOCK_HOST); >> } >> >> out: >> + if (colo_trans) { >> + qemu_fclose(colo_trans); >> + } >> + >> colo_buffer_destroy(); >> + >> + if (colo_control) { >> + qemu_fclose(colo_control); >> + } >> + >> colo_compare_destroy(); >> >> if (s->state !=3D MIG_STATE_ERROR) { >> @@ -281,7 +467,7 @@ void colo_init_checkpointer(MigrationState *s) >> >> static Coroutine *colo; >> >> -static __attribute__((unused)) bool is_slave(void) >> +static bool is_slave(void) >> { >> return colo !=3D NULL; >> } >> @@ -293,13 +479,32 @@ static __attribute__((unused)) bool is_slave(void) >> */ >> static int slave_wait_new_checkpoint(QEMUFile *f) >> { >> - /* TODO: wait checkpoint start command from master */ >> - return 1; >> + int fd =3D qemu_get_fd(f); >> + int ret; >> + uint64_t cmd; >> + >> + yield_until_fd_readable(fd); >> + >> + ret =3D colo_ctl_get_value(f, &cmd); >> + if (ret) { >> + return 1; >> + } >> + >> + if (cmd =3D=3D COLO_CHECKPOINT_NEW) { >> + return 0; >> + } else { >> + /* Unexpected data received */ >> + ctl_error_handler(f, ret); >> + return 1; >> + } >> } >> >> void colo_process_incoming_checkpoints(QEMUFile *f) >> { >> + int fd =3D qemu_get_fd(f); >> int dev_hotplug =3D qdev_hotplug; >> + QEMUFile *ctl =3D NULL; >> + int ret; >> >> if (!restore_use_colo()) { >> return; >> @@ -310,18 +515,69 @@ void colo_process_incoming_checkpoints(QEMUFile *f= ) >> colo =3D qemu_coroutine_self(); >> assert(colo !=3D NULL); >> >> + ctl =3D qemu_fopen_socket(fd, "wb"); >> + if (!ctl) { >> + error_report("can't open incoming channel\n"); >> + goto out; >> + } >> + >> colo_buffer_init(); >> >> + ret =3D colo_ctl_put(ctl, COLO_READY); >> + if (ret) { >> + goto out; >> + } >> + >> + /* TODO: in COLO mode, slave is runing, so start the vm */ >> + >> while (true) { >> if (slave_wait_new_checkpoint(f)) { >> break; >> } >> >> - /* TODO: COLO restore */ >> + /* start colo checkpoint */ >> + >> + /* TODO: suspend guest */ >> + >> + ret =3D colo_ctl_put(ctl, COLO_CHECKPOINT_SUSPENDED); >> + if (ret) { >> + goto out; >> + } >> + >> + /* TODO: open colo buffer for read */ >> + >> + ret =3D colo_ctl_get(f, COLO_CHECKPOINT_SEND); >> + if (ret) { >> + goto out; >> + } >> + >> + /* TODO: read migration data into colo buffer */ >> + >> + ret =3D colo_ctl_put(ctl, COLO_CHECKPOINT_RECEIVED); >> + if (ret) { >> + goto out; >> + } >> + >> + /* TODO: load vm state */ >> + >> + ret =3D colo_ctl_put(ctl, COLO_CHECKPOINT_LOADED); >> + if (ret) { >> + goto out; >> + } >> + >> + /* TODO: resume guest */ >> + >> + /* TODO: close colo buffer */ >> } >> >> +out: >> colo_buffer_destroy(); >> colo =3D NULL; >> + >> + if (ctl) { >> + qemu_fclose(ctl); >> + } >> + >> restore_exit_colo(); >> >> qdev_hotplug =3D dev_hotplug; >> -- >> 1.9.1 >> > -- > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > . > --=20 Thanks, Yang.