From mboxrd@z Thu Jan 1 00:00:00 1970 From: Andrew Cooper Subject: Re: [PATCH Remus v4 2/2] libxc/restore: implement Remus checkpointed restore Date: Thu, 14 May 2015 10:24:12 +0100 Message-ID: <555469BC.1020703@citrix.com> References: <1431593786-4936-1-git-send-email-yanghy@cn.fujitsu.com> <1431593786-4936-3-git-send-email-yanghy@cn.fujitsu.com> Mime-Version: 1.0 Content-Type: text/plain; charset="windows-1252" Content-Transfer-Encoding: quoted-printable Return-path: In-Reply-To: <1431593786-4936-3-git-send-email-yanghy@cn.fujitsu.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Yang Hongyang , xen-devel@lists.xen.org Cc: wei.liu2@citrix.com, ian.campbell@citrix.com, wency@cn.fujitsu.com, guijianfeng@cn.fujitsu.com, yunhong.jiang@intel.com, eddie.dong@intel.com, rshriram@cs.ubc.ca, ian.jackson@eu.citrix.com List-Id: xen-devel@lists.xenproject.org On 14/05/15 09:56, Yang Hongyang wrote: > With Remus, the restore flow should be: > the first full migration stream -> { periodically restore stream } > > Signed-off-by: Yang Hongyang > Signed-off-by: Andrew Cooper > CC: Ian Campbell > CC: Ian Jackson > CC: Wei Liu > --- > tools/libxc/xc_sr_common.h | 14 +++++ > tools/libxc/xc_sr_restore.c | 121 ++++++++++++++++++++++++++++++++++++++= ++---- > 2 files changed, 125 insertions(+), 10 deletions(-) > > diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h > index f8121e7..3bf27f1 100644 > --- a/tools/libxc/xc_sr_common.h > +++ b/tools/libxc/xc_sr_common.h > @@ -208,6 +208,20 @@ struct xc_sr_context > /* Plain VM, or checkpoints over time. */ > bool checkpointed; > = > + /* Currently buffering records between a checkpoint */ > + bool buffer_all_records; > + > +/* > + * With Remus, we buffer the records sent by the primary at checkpoint, > + * in case the primary will fail, we can recover from the last > + * checkpoint state. > + * This should be enough because primary only send dirty pages at > + * checkpoint. > + */ > +#define MAX_BUF_RECORDS 1024 > + struct xc_sr_record *buffered_records; > + unsigned buffered_rec_num; > + > /* > * Xenstore and Console parameters. > * INPUT: evtchn & domid > diff --git a/tools/libxc/xc_sr_restore.c b/tools/libxc/xc_sr_restore.c > index 9ab5760..3c93406 100644 > --- a/tools/libxc/xc_sr_restore.c > +++ b/tools/libxc/xc_sr_restore.c > @@ -468,10 +468,73 @@ static int handle_page_data(struct xc_sr_context *c= tx, struct xc_sr_record *rec) > return rc; > } > = > +static int process_record(struct xc_sr_context *ctx, struct xc_sr_record= *rec); > +static int handle_checkpoint(struct xc_sr_context *ctx) > +{ > + xc_interface *xch =3D ctx->xch; > + int rc =3D 0; > + unsigned i; > + struct xc_sr_record *rec; > + > + if ( !ctx->restore.checkpointed ) > + { > + ERROR("Found checkpoint in non-checkpointed stream"); > + rc =3D -1; > + goto err; > + } > + > + if ( ctx->restore.buffer_all_records ) > + { > + IPRINTF("All records buffered"); > + > + /* > + * We need to set buffer_all_records to false in > + * order to process records instead of buffer records. > + * buffer_all_records should be set back to true after > + * we successfully processed all records. > + */ > + ctx->restore.buffer_all_records =3D false; > + for ( i =3D 0; i < ctx->restore.buffered_rec_num; i++) Space before closing bracket. > + { > + rec =3D ctx->restore.buffered_records + > + i * sizeof(struct xc_sr_record); This pointer arithmetic looks wrong. FWIW, "rec =3D &ctx->restore.buffered_records[i];" would be clearer, although you don't even need to pull it into a variable as it is only referenced once. > + rc =3D process_record(ctx, rec); > + if ( rc ) > + goto err; > + } > + ctx->restore.buffered_rec_num =3D 0; > + ctx->restore.buffer_all_records =3D true; > + IPRINTF("All records processed"); > + } > + else > + ctx->restore.buffer_all_records =3D true; > + > + err: > + return rc; > +} > + > static int process_record(struct xc_sr_context *ctx, struct xc_sr_record= *rec) > { > xc_interface *xch =3D ctx->xch; > int rc =3D 0; > + struct xc_sr_record *buf_rec; > + > + if ( ctx->restore.buffer_all_records && > + rec->type !=3D REC_TYPE_END && > + rec->type !=3D REC_TYPE_CHECKPOINT ) > + { > + if ( ctx->restore.buffered_rec_num >=3D MAX_BUF_RECORDS ) > + { > + ERROR("There are too many records within a checkpoint"); > + return -1; > + } > + > + buf_rec =3D ctx->restore.buffered_records + > + ctx->restore.buffered_rec_num++ * sizeof(struct xc_sr_= record); Ah - this is how the other bit of pointer arithmetic doesn=92t break, but it will wander off the array if the sender provides more than 32 records. > + memcpy(buf_rec, rec, sizeof(struct xc_sr_record)); As before, memcpy(&ctx->restore.buffered_records[ctx->restore.buffered_rec_num++], rec, sizeof(*rec)); might be a little more simple. > + > + return 0; > + } > = > switch ( rec->type ) > { > @@ -487,12 +550,17 @@ static int process_record(struct xc_sr_context *ctx= , struct xc_sr_record *rec) > ctx->restore.verify =3D true; > break; > = > + case REC_TYPE_CHECKPOINT: > + rc =3D handle_checkpoint(ctx); > + break; > + > default: > rc =3D ctx->restore.ops.process_record(ctx, rec); > break; > } > = > free(rec->data); > + rec->data =3D NULL; > = > if ( rc =3D=3D RECORD_NOT_PROCESSED ) > { > @@ -529,6 +597,15 @@ static int setup(struct xc_sr_context *ctx) > goto err; > } > = > + ctx->restore.buffered_records =3D malloc( > + MAX_BUF_RECORDS * sizeof(struct xc_sr_record)); > + if ( !ctx->restore.buffered_records ) > + { > + ERROR("Unable to allocate memory for buffered records"); > + rc =3D -1; > + goto err; > + } > + > err: > return rc; > } > @@ -536,7 +613,15 @@ static int setup(struct xc_sr_context *ctx) > static void cleanup(struct xc_sr_context *ctx) > { > xc_interface *xch =3D ctx->xch; > + unsigned i; > + struct xc_sr_record *rec; > = > + for ( i =3D 0; i < ctx->restore.buffered_rec_num; i++) Style. > + { > + rec =3D ctx->restore.buffered_records + i * sizeof(struct xc_sr_= record); > + free(rec->data); More bad pointer arithmetic. Other than the pointer arithmetic issues (and a few minor style issues), this patch looks fine. ~Andrew > + } > + free(ctx->restore.buffered_records); > free(ctx->restore.populated_pfns); > if ( ctx->restore.ops.cleanup(ctx) ) > PERROR("Failed to clean up"); > @@ -564,7 +649,27 @@ static int restore(struct xc_sr_context *ctx) > { > rc =3D read_record(ctx, &rec); > if ( rc ) > - goto err; > + { > + if ( ctx->restore.buffer_all_records ) > + goto remus_failover; > + else > + goto err; > + } > + > +#ifdef XG_LIBXL_HVM_COMPAT > + if ( ctx->dominfo.hvm && > + (rec.type =3D=3D REC_TYPE_END || rec.type =3D=3D REC_TYPE_C= HECKPOINT) ) > + { > + rc =3D read_qemu(ctx); > + if ( rc ) > + { > + if ( ctx->restore.buffer_all_records ) > + goto remus_failover; > + else > + goto err; > + } > + } > +#endif > = > rc =3D process_record(ctx, &rec); > if ( rc ) > @@ -572,15 +677,11 @@ static int restore(struct xc_sr_context *ctx) > = > } while ( rec.type !=3D REC_TYPE_END ); > = > -#ifdef XG_LIBXL_HVM_COMPAT > - if ( ctx->dominfo.hvm ) > - { > - rc =3D read_qemu(ctx); > - if ( rc ) > - goto err; > - } > -#endif > - > + remus_failover: > + /* > + * With Remus, if we reach here, there must be some error on primary, > + * failover from the last checkpoint state. > + */ > rc =3D ctx->restore.ops.stream_complete(ctx); > if ( rc ) > goto err;