Re: [Qemu-devel] kvmclock, Migration, and NTP clock jitter

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: Mohammed Gamal <mohammed.gamal@profitbricks.com>
To: Paolo Bonzini <pbonzini@redhat.com>, qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] kvmclock, Migration, and NTP clock jitter
Date: Fri, 16 Jan 2015 11:21:20 +0100	[thread overview]
Message-ID: <20150116102109.GA4404@gmail.com> (raw)
In-Reply-To: <54B7F89A.80107@redhat.com>

[-- Attachment #1: Type: text/plain, Size: 877 bytes --]

On Thu, Jan 15, 2015 at 06:27:54PM +0100, Paolo Bonzini wrote:
> 
> 
> On 15/01/2015 17:39, Mohammed Gamal wrote:
> > The increase in the jitter and offset values is well within the 500 ppm
> > frequency tolerance limit, and therefore are easily corrected by
> > subsequent NTP clock sync events, but some live migrations do cause much
> > higher jitter and offset jumps, which can not be corrected by NTP and
> > cause the time to go way off. Any idea why this is the case?
> 
> It might be fixed in QEMU 2.2.
> 
> See https://lists.gnu.org/archive/html/qemu-devel/2014-09/msg01239.html
> 
> Paolo

Hi Paolo,

I did try to backport these patches to qemu 1.2. However, migrations 
resulted in *higher* jitter and offset values (i.e. in the order of 100+ ppm).
I am not sure if I've done the backporting correctly though. Here are my
patches on top of the qemu 1.2 stable tree.

[-- Attachment #2: backport.patch --]
[-- Type: text/x-diff, Size: 5053 bytes --]

diff --git a/cpus.c b/cpus.c
index 29aced5..e079ee5 100644
--- a/cpus.c
+++ b/cpus.c
@@ -187,6 +187,15 @@ void cpu_disable_ticks(void)
     }
 }
 
+void cpu_clean_all_dirty(void)
+{
+    CPUArchState *cpu;
+
+    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
+        cpu_clean_state(cpu);
+    }
+}
+
 /* Correlation between real and virtual time is always going to be
    fairly approximate, so ignore small variation.
    When the guest is idle real and virtual time will be aligned in
diff --git a/cpus.h b/cpus.h
index 3fc1a4a..1ff166b 100644
--- a/cpus.h
+++ b/cpus.h
@@ -12,6 +12,7 @@ void unplug_vcpu(void *p);
 void cpu_synchronize_all_states(void);
 void cpu_synchronize_all_post_reset(void);
 void cpu_synchronize_all_post_init(void);
+void cpu_clean_all_dirty(void);
 
 void qtest_clock_warp(int64_t dest);
 
diff --git a/hw/kvm/clock.c b/hw/kvm/clock.c
index 824b978..b2bdda4 100644
--- a/hw/kvm/clock.c
+++ b/hw/kvm/clock.c
@@ -16,6 +16,8 @@
 #include "qemu-common.h"
 #include "sysemu.h"
 #include "kvm.h"
+#include "host-utils.h"
+#include "cpus.h"
 #include "hw/sysbus.h"
 #include "hw/kvm/clock.h"
 
@@ -28,6 +30,46 @@ typedef struct KVMClockState {
     bool clock_valid;
 } KVMClockState;
 
+struct pvclock_vcpu_time_info {
+    uint32_t   version;
+    uint32_t   pad0;
+    uint64_t   tsc_timestamp;
+    uint64_t   system_time;
+    uint32_t   tsc_to_system_mul;
+    int8_t     tsc_shift;
+    uint8_t    flags;
+    uint8_t    pad[2];
+} __attribute__((__packed__)); /* 32 bytes */
+
+static uint64_t kvmclock_current_nsec(KVMClockState *s)
+{
+    CPUArchState *env = first_cpu;
+    uint64_t migration_tsc = env->tsc;
+    struct pvclock_vcpu_time_info time;
+    uint64_t delta;
+    uint64_t nsec_lo;
+    uint64_t nsec_hi;
+    uint64_t nsec;
+
+    if (!(env->system_time_msr & 1ULL)) {
+        /* KVM clock not active */
+        return 0;
+    }
+    cpu_physical_memory_read((env->system_time_msr & ~1ULL), &time, sizeof(time));
+
+    assert(time.tsc_timestamp <= migration_tsc);
+    delta = migration_tsc - time.tsc_timestamp;
+    if (time.tsc_shift < 0) {
+        delta >>= -time.tsc_shift;
+    } else {
+        delta <<= time.tsc_shift;
+    }
+
+    mulu64(&nsec_lo, &nsec_hi, delta, time.tsc_to_system_mul);
+    nsec = (nsec_lo >> 32) | (nsec_hi << 32);
+    return nsec + time.system_time;
+}
+
 static void kvmclock_pre_save(void *opaque)
 {
     KVMClockState *s = opaque;
@@ -37,6 +79,23 @@ static void kvmclock_pre_save(void *opaque)
     if (s->clock_valid) {
         return;
     }
+
+    cpu_synchronize_all_states();
+    /* In theory, the cpu_synchronize_all_states() call above wouldn't
+     * affect the rest of the code, as the VCPU state inside CPUArchState
+     * is supposed to always match the VCPU state on the kernel side.
+     *
+     * In practice, calling cpu_synchronize_state() too soon will load the
+     * kernel-side APIC state into X86CPU.apic_state too early, APIC state
+     * won't be reloaded later because CPUState.vcpu_dirty==true, and
+     * outdated APIC state may be migrated to another host.
+     *
+     * The real fix would be to make sure outdated APIC state is read
+     * from the kernel again when necessary. While this is not fixed, we
+     * need the cpu_clean_all_dirty() call below.
+     */
+    cpu_clean_all_dirty();
+
     ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
     if (ret < 0) {
         fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
@@ -55,6 +114,12 @@ static int kvmclock_post_load(void *opaque, int version_id)
 {
     KVMClockState *s = opaque;
     struct kvm_clock_data data;
+    uint64_t time_at_migration = kvmclock_current_nsec(s);
+
+    /* We can't rely on the migrated clock value, just discard it */
+    if (time_at_migration) {
+        s->clock = time_at_migration;
+    }
 
     data.clock = s->clock;
     data.flags = 0;
diff --git a/kvm-all.c b/kvm-all.c
index cd2ccbe..692944e 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1547,6 +1547,11 @@ void kvm_cpu_synchronize_post_init(CPUArchState *env)
     env->kvm_vcpu_dirty = 0;
 }
 
+void kvm_cpu_clean_state(CPUArchState *env)
+{
+    env->kvm_vcpu_dirty = false;
+}
+
 int kvm_cpu_exec(CPUArchState *env)
 {
     struct kvm_run *run = env->kvm_run;
diff --git a/kvm.h b/kvm.h
index 2a68a52..92a17d8 100644
--- a/kvm.h
+++ b/kvm.h
@@ -234,6 +234,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *env, uint32_t function,
 void kvm_cpu_synchronize_state(CPUArchState *env);
 void kvm_cpu_synchronize_post_reset(CPUArchState *env);
 void kvm_cpu_synchronize_post_init(CPUArchState *env);
+void kvm_cpu_clean_state(CPUArchState *cpu);
 
 /* generic hooks - to be moved/refactored once there are more users */
 
@@ -258,6 +259,12 @@ static inline void cpu_synchronize_post_init(CPUArchState *env)
     }
 }
 
+static inline void cpu_clean_state(CPUArchState *env)
+{
+    if (kvm_enabled()) {
+        kvm_cpu_clean_state(env);
+    }
+}
 
 #if !defined(CONFIG_USER_ONLY)
 int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr,

next prev parent reply	other threads:[~2015-01-16 10:21 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-01-15 16:39 [Qemu-devel] kvmclock, Migration, and NTP clock jitter Mohammed Gamal
2015-01-15 17:27 ` Paolo Bonzini
2015-01-16 10:21   ` Mohammed Gamal [this message]
2015-01-21 10:20     ` Mohammed Gamal

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:29aced5 dfblob:e079ee5 dfblob:3fc1a4a dfblob:1ff166b
dfblob:824b978 dfblob:b2bdda4 dfblob:cd2ccbe dfblob:692944e
dfblob:2a68a52 dfblob:92a17d8 )
 OR (
bs:"Re: [Qemu-devel] kvmclock, Migration, and NTP clock jitter" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150116102109.GA4404@gmail.com \
    --to=mohammed.gamal@profitbricks.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).