All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
To: Marcelo Tosatti <mtosatti@redhat.com>
Cc: kvm@vger.kernel.org, qemu-devel <qemu-devel@nongnu.org>,
	"Paolo Bonzini" <pbonzini@redhat.com>,
	"Radim Krčmář" <rkrcmar@redhat.com>,
	"Juan Quintela" <quintela@redhat.com>,
	"Eduardo Habkost" <ehabkost@redhat.com>
Subject: Re: [QEMU PATCH v2] kvmclock: advance clock by time window between vm_stop and pre_save
Date: Mon, 7 Nov 2016 15:46:11 +0000	[thread overview]
Message-ID: <20161107154610.GG2054@work-vm> (raw)
In-Reply-To: <20161104165933.GA3027@amt.cnet>

* Marcelo Tosatti (mtosatti@redhat.com) wrote:
> This patch, relative to pre-copy migration codepath,
> measures the time between vm_stop() and pre_save(),
> which includes copying the remaining RAM to destination,
> and advances the clock by that amount.
> 
> In a VM with 5 seconds downtime, this reduces the guest
> clock difference on destination from 5s to 0.2s.
> 
> Tested with Linux and Windows 2012 R2 guests with -cpu XXX,+hv-time.

One thing that bothers me is that it's only this clock that's
getting corrected; doesn't it cause things to get upset when
one clock moves and the others dont?
Shouldn't the pause delay be recorded somewhere architecturally
independent and then be a thing that kvm-clock happens to use and
other clocks might as well?

Dave

> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> 
> ---
> 
> v2: use subsection (Juan Quintela)
>     fix older machine types support
> 
> diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
> index 0f75dd3..a2a02ac 100644
> --- a/hw/i386/kvm/clock.c
> +++ b/hw/i386/kvm/clock.c
> @@ -22,9 +22,11 @@
>  #include "kvm_i386.h"
>  #include "hw/sysbus.h"
>  #include "hw/kvm/clock.h"
> +#include "migration/migration.h"
>  
>  #include <linux/kvm.h>
>  #include <linux/kvm_para.h>
> +#include <time.h>
>  
>  #define TYPE_KVM_CLOCK "kvmclock"
>  #define KVM_CLOCK(obj) OBJECT_CHECK(KVMClockState, (obj), TYPE_KVM_CLOCK)
> @@ -35,7 +37,13 @@ typedef struct KVMClockState {
>      /*< public >*/
>  
>      uint64_t clock;
> +    uint64_t ns;
>      bool clock_valid;
> +
> +    uint64_t advance_clock;
> +    struct timespec t_aftervmstop;
> +
> +    bool adv_clock_enabled;
>  } KVMClockState;
>  
>  struct pvclock_vcpu_time_info {
> @@ -100,6 +108,11 @@ static void kvmclock_vm_state_change(void *opaque, int running,
>              s->clock = time_at_migration;
>          }
>  
> +        if (s->advance_clock && s->clock + s->advance_clock > s->clock) {
> +            s->clock += s->advance_clock;
> +            s->advance_clock = 0;
> +        }
> +
>          data.clock = s->clock;
>          ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data);
>          if (ret < 0) {
> @@ -135,6 +148,18 @@ static void kvmclock_vm_state_change(void *opaque, int running,
>              abort();
>          }
>          s->clock = data.clock;
> +        /*
> +         * Transition from VM-running to VM-stopped via migration?
> +         * Record when the VM was stopped.
> +         */
> +
> +        if (state == RUN_STATE_FINISH_MIGRATE &&
> +            !migration_in_postcopy(migrate_get_current())) {
> +            clock_gettime(CLOCK_MONOTONIC, &s->t_aftervmstop);
> +        } else {
> +            s->t_aftervmstop.tv_sec = 0;
> +            s->t_aftervmstop.tv_nsec = 0;
> +        }
>  
>          /*
>           * If the VM is stopped, declare the clock state valid to
> @@ -152,6 +177,77 @@ static void kvmclock_realize(DeviceState *dev, Error **errp)
>      qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s);
>  }
>  
> +static uint64_t clock_delta(struct timespec *before, struct timespec *after)
> +{
> +    if (before->tv_sec > after->tv_sec ||
> +        (before->tv_sec == after->tv_sec &&
> +         before->tv_nsec > after->tv_nsec)) {
> +        fprintf(stderr, "clock_delta failed: before=(%ld sec, %ld nsec),"
> +                        "after=(%ld sec, %ld nsec)\n", before->tv_sec,
> +                        before->tv_nsec, after->tv_sec, after->tv_nsec);
> +        abort();
> +    }
> +
> +    return (after->tv_sec - before->tv_sec) * 1000000000ULL +
> +            after->tv_nsec - before->tv_nsec;
> +}
> +
> +static void kvmclock_pre_save(void *opaque)
> +{
> +    KVMClockState *s = opaque;
> +    struct timespec now;
> +    uint64_t ns;
> +
> +    if (s->t_aftervmstop.tv_sec == 0) {
> +        return;
> +    }
> +
> +    clock_gettime(CLOCK_MONOTONIC, &now);
> +
> +    ns = clock_delta(&s->t_aftervmstop, &now);
> +
> +    /*
> +     * Linux guests can overflow if time jumps
> +     * forward in large increments.
> +     * Cap maximum adjustment to 10 minutes.
> +     */
> +    ns = MIN(ns, 600000000000ULL);
> +
> +    if (s->clock + ns > s->clock) {
> +        s->ns = ns;
> +    }
> +}
> +
> +static int kvmclock_post_load(void *opaque, int version_id)
> +{
> +    KVMClockState *s = opaque;
> +
> +    /* save the value from incoming migration */
> +    s->advance_clock = s->ns;
> +
> +    return 0;
> +}
> +
> +static bool kvmclock_ns_needed(void *opaque)
> +{
> +    KVMClockState *s = opaque;
> +
> +    return s->adv_clock_enabled;
> +}
> +
> +static const VMStateDescription kvmclock_advance_ns = {
> +    .name = "kvmclock/advance_ns",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .needed = kvmclock_ns_needed,
> +    .pre_save = kvmclock_pre_save,
> +    .post_load = kvmclock_post_load,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_UINT64(ns, KVMClockState),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
>  static const VMStateDescription kvmclock_vmsd = {
>      .name = "kvmclock",
>      .version_id = 1,
> @@ -159,15 +255,25 @@ static const VMStateDescription kvmclock_vmsd = {
>      .fields = (VMStateField[]) {
>          VMSTATE_UINT64(clock, KVMClockState),
>          VMSTATE_END_OF_LIST()
> +    },
> +    .subsections = (const VMStateDescription * []) {
> +        &kvmclock_advance_ns,
> +        NULL
>      }
>  };
>  
> +static Property kvmclock_properties[] = {
> +    DEFINE_PROP_BOOL("advance_clock", KVMClockState, adv_clock_enabled, true),
> +    DEFINE_PROP_END_OF_LIST(),
> +};
> +
>  static void kvmclock_class_init(ObjectClass *klass, void *data)
>  {
>      DeviceClass *dc = DEVICE_CLASS(klass);
>  
>      dc->realize = kvmclock_realize;
>      dc->vmsd = &kvmclock_vmsd;
> +    dc->props = kvmclock_properties;
>  }
>  
>  static const TypeInfo kvmclock_info = {
> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> index 98dc772..243352e 100644
> --- a/include/hw/i386/pc.h
> +++ b/include/hw/i386/pc.h
> @@ -370,6 +370,11 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
>  #define PC_COMPAT_2_7 \
>      HW_COMPAT_2_7 \
>      {\
> +        .driver   = "kvmclock",\
> +        .property = "advance_clock",\
> +        .value    = "off",\
> +    },\
> +    {\
>          .driver   = TYPE_X86_CPU,\
>          .property = "l3-cache",\
>          .value    = "off",\
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK

WARNING: multiple messages have this Message-ID (diff)
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
To: Marcelo Tosatti <mtosatti@redhat.com>
Cc: kvm@vger.kernel.org, qemu-devel <qemu-devel@nongnu.org>,
	"Paolo Bonzini" <pbonzini@redhat.com>,
	"Radim Krčmář" <rkrcmar@redhat.com>,
	"Juan Quintela" <quintela@redhat.com>,
	"Eduardo Habkost" <ehabkost@redhat.com>
Subject: Re: [Qemu-devel] [QEMU PATCH v2] kvmclock: advance clock by time window between vm_stop and pre_save
Date: Mon, 7 Nov 2016 15:46:11 +0000	[thread overview]
Message-ID: <20161107154610.GG2054@work-vm> (raw)
In-Reply-To: <20161104165933.GA3027@amt.cnet>

* Marcelo Tosatti (mtosatti@redhat.com) wrote:
> This patch, relative to pre-copy migration codepath,
> measures the time between vm_stop() and pre_save(),
> which includes copying the remaining RAM to destination,
> and advances the clock by that amount.
> 
> In a VM with 5 seconds downtime, this reduces the guest
> clock difference on destination from 5s to 0.2s.
> 
> Tested with Linux and Windows 2012 R2 guests with -cpu XXX,+hv-time.

One thing that bothers me is that it's only this clock that's
getting corrected; doesn't it cause things to get upset when
one clock moves and the others dont?
Shouldn't the pause delay be recorded somewhere architecturally
independent and then be a thing that kvm-clock happens to use and
other clocks might as well?

Dave

> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> 
> ---
> 
> v2: use subsection (Juan Quintela)
>     fix older machine types support
> 
> diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
> index 0f75dd3..a2a02ac 100644
> --- a/hw/i386/kvm/clock.c
> +++ b/hw/i386/kvm/clock.c
> @@ -22,9 +22,11 @@
>  #include "kvm_i386.h"
>  #include "hw/sysbus.h"
>  #include "hw/kvm/clock.h"
> +#include "migration/migration.h"
>  
>  #include <linux/kvm.h>
>  #include <linux/kvm_para.h>
> +#include <time.h>
>  
>  #define TYPE_KVM_CLOCK "kvmclock"
>  #define KVM_CLOCK(obj) OBJECT_CHECK(KVMClockState, (obj), TYPE_KVM_CLOCK)
> @@ -35,7 +37,13 @@ typedef struct KVMClockState {
>      /*< public >*/
>  
>      uint64_t clock;
> +    uint64_t ns;
>      bool clock_valid;
> +
> +    uint64_t advance_clock;
> +    struct timespec t_aftervmstop;
> +
> +    bool adv_clock_enabled;
>  } KVMClockState;
>  
>  struct pvclock_vcpu_time_info {
> @@ -100,6 +108,11 @@ static void kvmclock_vm_state_change(void *opaque, int running,
>              s->clock = time_at_migration;
>          }
>  
> +        if (s->advance_clock && s->clock + s->advance_clock > s->clock) {
> +            s->clock += s->advance_clock;
> +            s->advance_clock = 0;
> +        }
> +
>          data.clock = s->clock;
>          ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data);
>          if (ret < 0) {
> @@ -135,6 +148,18 @@ static void kvmclock_vm_state_change(void *opaque, int running,
>              abort();
>          }
>          s->clock = data.clock;
> +        /*
> +         * Transition from VM-running to VM-stopped via migration?
> +         * Record when the VM was stopped.
> +         */
> +
> +        if (state == RUN_STATE_FINISH_MIGRATE &&
> +            !migration_in_postcopy(migrate_get_current())) {
> +            clock_gettime(CLOCK_MONOTONIC, &s->t_aftervmstop);
> +        } else {
> +            s->t_aftervmstop.tv_sec = 0;
> +            s->t_aftervmstop.tv_nsec = 0;
> +        }
>  
>          /*
>           * If the VM is stopped, declare the clock state valid to
> @@ -152,6 +177,77 @@ static void kvmclock_realize(DeviceState *dev, Error **errp)
>      qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s);
>  }
>  
> +static uint64_t clock_delta(struct timespec *before, struct timespec *after)
> +{
> +    if (before->tv_sec > after->tv_sec ||
> +        (before->tv_sec == after->tv_sec &&
> +         before->tv_nsec > after->tv_nsec)) {
> +        fprintf(stderr, "clock_delta failed: before=(%ld sec, %ld nsec),"
> +                        "after=(%ld sec, %ld nsec)\n", before->tv_sec,
> +                        before->tv_nsec, after->tv_sec, after->tv_nsec);
> +        abort();
> +    }
> +
> +    return (after->tv_sec - before->tv_sec) * 1000000000ULL +
> +            after->tv_nsec - before->tv_nsec;
> +}
> +
> +static void kvmclock_pre_save(void *opaque)
> +{
> +    KVMClockState *s = opaque;
> +    struct timespec now;
> +    uint64_t ns;
> +
> +    if (s->t_aftervmstop.tv_sec == 0) {
> +        return;
> +    }
> +
> +    clock_gettime(CLOCK_MONOTONIC, &now);
> +
> +    ns = clock_delta(&s->t_aftervmstop, &now);
> +
> +    /*
> +     * Linux guests can overflow if time jumps
> +     * forward in large increments.
> +     * Cap maximum adjustment to 10 minutes.
> +     */
> +    ns = MIN(ns, 600000000000ULL);
> +
> +    if (s->clock + ns > s->clock) {
> +        s->ns = ns;
> +    }
> +}
> +
> +static int kvmclock_post_load(void *opaque, int version_id)
> +{
> +    KVMClockState *s = opaque;
> +
> +    /* save the value from incoming migration */
> +    s->advance_clock = s->ns;
> +
> +    return 0;
> +}
> +
> +static bool kvmclock_ns_needed(void *opaque)
> +{
> +    KVMClockState *s = opaque;
> +
> +    return s->adv_clock_enabled;
> +}
> +
> +static const VMStateDescription kvmclock_advance_ns = {
> +    .name = "kvmclock/advance_ns",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .needed = kvmclock_ns_needed,
> +    .pre_save = kvmclock_pre_save,
> +    .post_load = kvmclock_post_load,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_UINT64(ns, KVMClockState),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
>  static const VMStateDescription kvmclock_vmsd = {
>      .name = "kvmclock",
>      .version_id = 1,
> @@ -159,15 +255,25 @@ static const VMStateDescription kvmclock_vmsd = {
>      .fields = (VMStateField[]) {
>          VMSTATE_UINT64(clock, KVMClockState),
>          VMSTATE_END_OF_LIST()
> +    },
> +    .subsections = (const VMStateDescription * []) {
> +        &kvmclock_advance_ns,
> +        NULL
>      }
>  };
>  
> +static Property kvmclock_properties[] = {
> +    DEFINE_PROP_BOOL("advance_clock", KVMClockState, adv_clock_enabled, true),
> +    DEFINE_PROP_END_OF_LIST(),
> +};
> +
>  static void kvmclock_class_init(ObjectClass *klass, void *data)
>  {
>      DeviceClass *dc = DEVICE_CLASS(klass);
>  
>      dc->realize = kvmclock_realize;
>      dc->vmsd = &kvmclock_vmsd;
> +    dc->props = kvmclock_properties;
>  }
>  
>  static const TypeInfo kvmclock_info = {
> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> index 98dc772..243352e 100644
> --- a/include/hw/i386/pc.h
> +++ b/include/hw/i386/pc.h
> @@ -370,6 +370,11 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
>  #define PC_COMPAT_2_7 \
>      HW_COMPAT_2_7 \
>      {\
> +        .driver   = "kvmclock",\
> +        .property = "advance_clock",\
> +        .value    = "off",\
> +    },\
> +    {\
>          .driver   = TYPE_X86_CPU,\
>          .property = "l3-cache",\
>          .value    = "off",\
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK

  parent reply	other threads:[~2016-11-07 15:46 UTC|newest]

Thread overview: 80+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-11-04  9:43 [QEMU PATCH] kvmclock: advance clock by time window between vm_stop and pre_save Marcelo Tosatti
2016-11-04  9:43 ` [Qemu-devel] " Marcelo Tosatti
2016-11-04 12:28 ` Juan Quintela
2016-11-04 12:28   ` [Qemu-devel] " Juan Quintela
2016-11-04 12:35   ` Marcelo Tosatti
2016-11-04 12:35     ` [Qemu-devel] " Marcelo Tosatti
2016-11-04 14:00     ` Marcelo Tosatti
2016-11-04 14:00       ` [Qemu-devel] " Marcelo Tosatti
2016-11-04 15:25 ` Radim Krčmář
2016-11-04 15:25   ` [Qemu-devel] " Radim Krčmář
2016-11-04 15:33   ` Paolo Bonzini
2016-11-04 15:33     ` [Qemu-devel] " Paolo Bonzini
2016-11-04 15:48     ` Radim Krčmář
2016-11-04 15:48       ` [Qemu-devel] " Radim Krčmář
2016-11-04 15:57       ` Paolo Bonzini
2016-11-04 15:57         ` [Qemu-devel] " Paolo Bonzini
2016-11-04 17:16         ` Radim Krčmář
2016-11-04 17:16           ` [Qemu-devel] " Radim Krčmář
2016-11-04 21:29           ` Paolo Bonzini
2016-11-04 21:29             ` [Qemu-devel] " Paolo Bonzini
2016-11-04 21:47             ` Marcelo Tosatti
2016-11-04 21:47               ` [Qemu-devel] " Marcelo Tosatti
2016-11-04 22:35               ` Paolo Bonzini
2016-11-04 22:35                 ` [Qemu-devel] " Paolo Bonzini
2016-11-07 14:31           ` Roman Kagan
2016-11-07 14:31             ` [Qemu-devel] " Roman Kagan
2016-11-07 19:31             ` Marcelo Tosatti
2016-11-07 19:31               ` [Qemu-devel] " Marcelo Tosatti
2016-11-04 16:24       ` Marcelo Tosatti
2016-11-04 16:24         ` [Qemu-devel] " Marcelo Tosatti
2016-11-04 17:34         ` Radim Krčmář
2016-11-04 17:34           ` [Qemu-devel] " Radim Krčmář
2016-11-04 18:29           ` Marcelo Tosatti
2016-11-04 18:29             ` [Qemu-devel] " Marcelo Tosatti
2016-11-04 20:07             ` Radim Krčmář
2016-11-04 20:07               ` [Qemu-devel] " Radim Krčmář
2016-11-04 16:04   ` Marcelo Tosatti
2016-11-04 16:04     ` [Qemu-devel] " Marcelo Tosatti
2016-11-04 17:07   ` Marcelo Tosatti
2016-11-04 17:07     ` [Qemu-devel] " Marcelo Tosatti
2016-11-04 17:39     ` Radim Krčmář
2016-11-04 17:39       ` [Qemu-devel] " Radim Krčmář
2016-11-04 18:31       ` Marcelo Tosatti
2016-11-04 18:31         ` [Qemu-devel] " Marcelo Tosatti
2016-11-07 13:08       ` Dr. David Alan Gilbert
2016-11-07 13:08         ` [Qemu-devel] " Dr. David Alan Gilbert
2016-11-04 16:59 ` [QEMU PATCH v2] " Marcelo Tosatti
2016-11-04 16:59   ` [Qemu-devel] " Marcelo Tosatti
2016-11-04 18:57   ` Juan Quintela
2016-11-04 18:57     ` [Qemu-devel] " Juan Quintela
2016-11-07 15:46   ` Dr. David Alan Gilbert [this message]
2016-11-07 15:46     ` Dr. David Alan Gilbert
2016-11-07 19:41     ` Marcelo Tosatti
2016-11-07 19:41       ` [Qemu-devel] " Marcelo Tosatti
2016-11-07 20:03       ` Dr. David Alan Gilbert
2016-11-07 20:03         ` [Qemu-devel] " Dr. David Alan Gilbert
2016-11-08  0:06         ` Marcelo Tosatti
2016-11-08  0:06           ` [Qemu-devel] " Marcelo Tosatti
2016-11-08 10:22           ` Dr. David Alan Gilbert
2016-11-08 10:22             ` [Qemu-devel] " Dr. David Alan Gilbert
2016-11-08 13:32             ` Marcelo Tosatti
2016-11-08 13:32               ` [Qemu-devel] " Marcelo Tosatti
2016-11-09 19:32               ` Marcelo Tosatti
2016-11-09 19:32                 ` [Qemu-devel] " Marcelo Tosatti
2016-11-09 16:23             ` Paolo Bonzini
2016-11-09 16:23               ` [Qemu-devel] " Paolo Bonzini
2016-11-09 16:28               ` Dr. David Alan Gilbert
2016-11-09 16:28                 ` [Qemu-devel] " Dr. David Alan Gilbert
2016-11-09 16:33                 ` Paolo Bonzini
2016-11-09 16:33                   ` [Qemu-devel] " Paolo Bonzini
2016-11-10 11:48               ` Marcelo Tosatti
2016-11-10 11:48                 ` [Qemu-devel] " Marcelo Tosatti
2016-11-10 17:57                 ` Paolo Bonzini
2016-11-10 17:57                   ` [Qemu-devel] " Paolo Bonzini
2016-11-11 14:23                   ` Marcelo Tosatti
2016-11-11 14:23                     ` [Qemu-devel] " Marcelo Tosatti
2017-02-07 10:02       ` Wanpeng Li
2017-02-07 10:02         ` [Qemu-devel] " Wanpeng Li
2017-02-07 12:18         ` Marcelo Tosatti
2017-02-07 12:18           ` [Qemu-devel] " Marcelo Tosatti

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20161107154610.GG2054@work-vm \
    --to=dgilbert@redhat.com \
    --cc=ehabkost@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=mtosatti@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=quintela@redhat.com \
    --cc=rkrcmar@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.