qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
To: Marcelo Tosatti <mtosatti@redhat.com>
Cc: kvm@vger.kernel.org, qemu-devel <qemu-devel@nongnu.org>,
	"Paolo Bonzini" <pbonzini@redhat.com>,
	"Radim Krčmář" <rkrcmar@redhat.com>,
	"Juan Quintela" <quintela@redhat.com>,
	"Eduardo Habkost" <ehabkost@redhat.com>
Subject: Re: [Qemu-devel] [QEMU PATCH v2] kvmclock: advance clock by time window between vm_stop and pre_save
Date: Mon, 7 Nov 2016 15:46:11 +0000	[thread overview]
Message-ID: <20161107154610.GG2054@work-vm> (raw)
In-Reply-To: <20161104165933.GA3027@amt.cnet>

* Marcelo Tosatti (mtosatti@redhat.com) wrote:
> This patch, relative to pre-copy migration codepath,
> measures the time between vm_stop() and pre_save(),
> which includes copying the remaining RAM to destination,
> and advances the clock by that amount.
> 
> In a VM with 5 seconds downtime, this reduces the guest
> clock difference on destination from 5s to 0.2s.
> 
> Tested with Linux and Windows 2012 R2 guests with -cpu XXX,+hv-time.

One thing that bothers me is that it's only this clock that's
getting corrected; doesn't it cause things to get upset when
one clock moves and the others dont?
Shouldn't the pause delay be recorded somewhere architecturally
independent and then be a thing that kvm-clock happens to use and
other clocks might as well?

Dave

> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> 
> ---
> 
> v2: use subsection (Juan Quintela)
>     fix older machine types support
> 
> diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
> index 0f75dd3..a2a02ac 100644
> --- a/hw/i386/kvm/clock.c
> +++ b/hw/i386/kvm/clock.c
> @@ -22,9 +22,11 @@
>  #include "kvm_i386.h"
>  #include "hw/sysbus.h"
>  #include "hw/kvm/clock.h"
> +#include "migration/migration.h"
>  
>  #include <linux/kvm.h>
>  #include <linux/kvm_para.h>
> +#include <time.h>
>  
>  #define TYPE_KVM_CLOCK "kvmclock"
>  #define KVM_CLOCK(obj) OBJECT_CHECK(KVMClockState, (obj), TYPE_KVM_CLOCK)
> @@ -35,7 +37,13 @@ typedef struct KVMClockState {
>      /*< public >*/
>  
>      uint64_t clock;
> +    uint64_t ns;
>      bool clock_valid;
> +
> +    uint64_t advance_clock;
> +    struct timespec t_aftervmstop;
> +
> +    bool adv_clock_enabled;
>  } KVMClockState;
>  
>  struct pvclock_vcpu_time_info {
> @@ -100,6 +108,11 @@ static void kvmclock_vm_state_change(void *opaque, int running,
>              s->clock = time_at_migration;
>          }
>  
> +        if (s->advance_clock && s->clock + s->advance_clock > s->clock) {
> +            s->clock += s->advance_clock;
> +            s->advance_clock = 0;
> +        }
> +
>          data.clock = s->clock;
>          ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data);
>          if (ret < 0) {
> @@ -135,6 +148,18 @@ static void kvmclock_vm_state_change(void *opaque, int running,
>              abort();
>          }
>          s->clock = data.clock;
> +        /*
> +         * Transition from VM-running to VM-stopped via migration?
> +         * Record when the VM was stopped.
> +         */
> +
> +        if (state == RUN_STATE_FINISH_MIGRATE &&
> +            !migration_in_postcopy(migrate_get_current())) {
> +            clock_gettime(CLOCK_MONOTONIC, &s->t_aftervmstop);
> +        } else {
> +            s->t_aftervmstop.tv_sec = 0;
> +            s->t_aftervmstop.tv_nsec = 0;
> +        }
>  
>          /*
>           * If the VM is stopped, declare the clock state valid to
> @@ -152,6 +177,77 @@ static void kvmclock_realize(DeviceState *dev, Error **errp)
>      qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s);
>  }
>  
> +static uint64_t clock_delta(struct timespec *before, struct timespec *after)
> +{
> +    if (before->tv_sec > after->tv_sec ||
> +        (before->tv_sec == after->tv_sec &&
> +         before->tv_nsec > after->tv_nsec)) {
> +        fprintf(stderr, "clock_delta failed: before=(%ld sec, %ld nsec),"
> +                        "after=(%ld sec, %ld nsec)\n", before->tv_sec,
> +                        before->tv_nsec, after->tv_sec, after->tv_nsec);
> +        abort();
> +    }
> +
> +    return (after->tv_sec - before->tv_sec) * 1000000000ULL +
> +            after->tv_nsec - before->tv_nsec;
> +}
> +
> +static void kvmclock_pre_save(void *opaque)
> +{
> +    KVMClockState *s = opaque;
> +    struct timespec now;
> +    uint64_t ns;
> +
> +    if (s->t_aftervmstop.tv_sec == 0) {
> +        return;
> +    }
> +
> +    clock_gettime(CLOCK_MONOTONIC, &now);
> +
> +    ns = clock_delta(&s->t_aftervmstop, &now);
> +
> +    /*
> +     * Linux guests can overflow if time jumps
> +     * forward in large increments.
> +     * Cap maximum adjustment to 10 minutes.
> +     */
> +    ns = MIN(ns, 600000000000ULL);
> +
> +    if (s->clock + ns > s->clock) {
> +        s->ns = ns;
> +    }
> +}
> +
> +static int kvmclock_post_load(void *opaque, int version_id)
> +{
> +    KVMClockState *s = opaque;
> +
> +    /* save the value from incoming migration */
> +    s->advance_clock = s->ns;
> +
> +    return 0;
> +}
> +
> +static bool kvmclock_ns_needed(void *opaque)
> +{
> +    KVMClockState *s = opaque;
> +
> +    return s->adv_clock_enabled;
> +}
> +
> +static const VMStateDescription kvmclock_advance_ns = {
> +    .name = "kvmclock/advance_ns",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .needed = kvmclock_ns_needed,
> +    .pre_save = kvmclock_pre_save,
> +    .post_load = kvmclock_post_load,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_UINT64(ns, KVMClockState),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
>  static const VMStateDescription kvmclock_vmsd = {
>      .name = "kvmclock",
>      .version_id = 1,
> @@ -159,15 +255,25 @@ static const VMStateDescription kvmclock_vmsd = {
>      .fields = (VMStateField[]) {
>          VMSTATE_UINT64(clock, KVMClockState),
>          VMSTATE_END_OF_LIST()
> +    },
> +    .subsections = (const VMStateDescription * []) {
> +        &kvmclock_advance_ns,
> +        NULL
>      }
>  };
>  
> +static Property kvmclock_properties[] = {
> +    DEFINE_PROP_BOOL("advance_clock", KVMClockState, adv_clock_enabled, true),
> +    DEFINE_PROP_END_OF_LIST(),
> +};
> +
>  static void kvmclock_class_init(ObjectClass *klass, void *data)
>  {
>      DeviceClass *dc = DEVICE_CLASS(klass);
>  
>      dc->realize = kvmclock_realize;
>      dc->vmsd = &kvmclock_vmsd;
> +    dc->props = kvmclock_properties;
>  }
>  
>  static const TypeInfo kvmclock_info = {
> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> index 98dc772..243352e 100644
> --- a/include/hw/i386/pc.h
> +++ b/include/hw/i386/pc.h
> @@ -370,6 +370,11 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
>  #define PC_COMPAT_2_7 \
>      HW_COMPAT_2_7 \
>      {\
> +        .driver   = "kvmclock",\
> +        .property = "advance_clock",\
> +        .value    = "off",\
> +    },\
> +    {\
>          .driver   = TYPE_X86_CPU,\
>          .property = "l3-cache",\
>          .value    = "off",\
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK

  parent reply	other threads:[~2016-11-07 15:46 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-11-04  9:43 [Qemu-devel] [QEMU PATCH] kvmclock: advance clock by time window between vm_stop and pre_save Marcelo Tosatti
2016-11-04 12:28 ` Juan Quintela
2016-11-04 12:35   ` Marcelo Tosatti
2016-11-04 14:00     ` Marcelo Tosatti
2016-11-04 15:25 ` Radim Krčmář
2016-11-04 15:33   ` Paolo Bonzini
2016-11-04 15:48     ` Radim Krčmář
2016-11-04 15:57       ` Paolo Bonzini
2016-11-04 17:16         ` Radim Krčmář
2016-11-04 21:29           ` Paolo Bonzini
2016-11-04 21:47             ` Marcelo Tosatti
2016-11-04 22:35               ` Paolo Bonzini
2016-11-07 14:31           ` Roman Kagan
2016-11-07 19:31             ` Marcelo Tosatti
2016-11-04 16:24       ` Marcelo Tosatti
2016-11-04 17:34         ` Radim Krčmář
2016-11-04 18:29           ` Marcelo Tosatti
2016-11-04 20:07             ` Radim Krčmář
2016-11-04 16:04   ` Marcelo Tosatti
2016-11-04 17:07   ` Marcelo Tosatti
2016-11-04 17:39     ` Radim Krčmář
2016-11-04 18:31       ` Marcelo Tosatti
2016-11-07 13:08       ` Dr. David Alan Gilbert
2016-11-04 16:59 ` [Qemu-devel] [QEMU PATCH v2] " Marcelo Tosatti
2016-11-04 18:57   ` Juan Quintela
2016-11-07 15:46   ` Dr. David Alan Gilbert [this message]
2016-11-07 19:41     ` Marcelo Tosatti
2016-11-07 20:03       ` Dr. David Alan Gilbert
2016-11-08  0:06         ` Marcelo Tosatti
2016-11-08 10:22           ` Dr. David Alan Gilbert
2016-11-08 13:32             ` Marcelo Tosatti
2016-11-09 19:32               ` Marcelo Tosatti
2016-11-09 16:23             ` Paolo Bonzini
2016-11-09 16:28               ` Dr. David Alan Gilbert
2016-11-09 16:33                 ` Paolo Bonzini
2016-11-10 11:48               ` Marcelo Tosatti
2016-11-10 17:57                 ` Paolo Bonzini
2016-11-11 14:23                   ` Marcelo Tosatti
2017-02-07 10:02       ` Wanpeng Li
2017-02-07 12:18         ` Marcelo Tosatti

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20161107154610.GG2054@work-vm \
    --to=dgilbert@redhat.com \
    --cc=ehabkost@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=mtosatti@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=quintela@redhat.com \
    --cc=rkrcmar@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).