From: Marcelo Tosatti <mtosatti@redhat.com>
To: Gerd Hoffmann <kraxel@redhat.com>, Avi Kivity <avi@redhat.com>
Cc: KVM list <kvm@vger.kernel.org>
Subject: Re: [PATCH] Fix kvmclock on !constant_tsc boxes.
Date: Mon, 9 Feb 2009 18:15:04 -0200 [thread overview]
Message-ID: <20090209201504.GA10603@amt.cnet> (raw)
In-Reply-To: <20090208060856.GC4437@amt.cnet>
[-- Attachment #1: Type: text/plain, Size: 789 bytes --]
On Sun, Feb 08, 2009 at 04:08:56AM -0200, Marcelo Tosatti wrote:
> On Wed, Feb 04, 2009 at 05:52:04PM +0100, Gerd Hoffmann wrote:
> > Hi folks,
> >
> > kvmclock currently falls apart on machines without constant tsc.
> > This patch fixes it. Changes:
> >
> > * keep tsc frequency in a per-cpu variable.
> > * handle kvmclock update using a new request flag, thus checking
> > whenever we need an update each time we enter guest context.
> > * use a cpufreq notifier to track frequency changes and force
> > kvmclock updates.
> > * send ipis to kick cpu out of guest context if needed to make
> > sure the guest doesn't see stale values.
> >
> > cheers,
> > Gerd
>
> ACK for 2.6.29 (but please fix the whitespace breakage).
Whitespace fixed version attached.
[-- Attachment #2: kvmclock-cpufreq.patch --]
[-- Type: text/plain, Size: 5176 bytes --]
Index: kvm/arch/x86/kvm/x86.c
===================================================================
--- kvm.orig/arch/x86/kvm/x86.c
+++ kvm/arch/x86/kvm/x86.c
@@ -36,6 +36,7 @@
#include <linux/highmem.h>
#include <linux/iommu.h>
#include <linux/intel-iommu.h>
+#include <linux/cpufreq.h>
#include <asm/uaccess.h>
#include <asm/msr.h>
@@ -617,6 +618,8 @@ static void kvm_set_time_scale(uint32_t
hv_clock->tsc_to_system_mul);
}
+static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
+
static void kvm_write_guest_time(struct kvm_vcpu *v)
{
struct timespec ts;
@@ -627,9 +630,9 @@ static void kvm_write_guest_time(struct
if ((!vcpu->time_page))
return;
- if (unlikely(vcpu->hv_clock_tsc_khz != tsc_khz)) {
- kvm_set_time_scale(tsc_khz, &vcpu->hv_clock);
- vcpu->hv_clock_tsc_khz = tsc_khz;
+ if (unlikely(vcpu->hv_clock_tsc_khz != __get_cpu_var(cpu_tsc_khz))) {
+ kvm_set_time_scale(__get_cpu_var(cpu_tsc_khz), &vcpu->hv_clock);
+ vcpu->hv_clock_tsc_khz = __get_cpu_var(cpu_tsc_khz);
}
/* Keep irq disabled to prevent changes to the clock */
@@ -660,6 +663,16 @@ static void kvm_write_guest_time(struct
mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
}
+static int kvm_request_guest_time_update(struct kvm_vcpu *v)
+{
+ struct kvm_vcpu_arch *vcpu = &v->arch;
+
+ if (!vcpu->time_page)
+ return 0;
+ set_bit(KVM_REQ_KVMCLOCK_UPDATE, &v->requests);
+ return 1;
+}
+
static bool msr_mtrr_valid(unsigned msr)
{
switch (msr) {
@@ -790,7 +803,7 @@ int kvm_set_msr_common(struct kvm_vcpu *
vcpu->arch.time_page = NULL;
}
- kvm_write_guest_time(vcpu);
+ kvm_request_guest_time_update(vcpu);
break;
}
default:
@@ -1096,7 +1109,7 @@ out:
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
kvm_x86_ops->vcpu_load(vcpu, cpu);
- kvm_write_guest_time(vcpu);
+ kvm_request_guest_time_update(vcpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -2640,9 +2653,72 @@ int kvm_emulate_pio_string(struct kvm_vc
}
EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
+static void bounce_off(void *info)
+{
+ /* nothing */
+}
+
+static unsigned int ref_freq;
+static unsigned long tsc_khz_ref;
+
+static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct cpufreq_freqs *freq = data;
+ struct kvm *kvm;
+ struct kvm_vcpu *vcpu;
+ int i, send_ipi = 0;
+
+ if (!ref_freq)
+ ref_freq = freq->old;
+
+ if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
+ return 0;
+ if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
+ return 0;
+ per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
+
+ spin_lock(&kvm_lock);
+ list_for_each_entry(kvm, &vm_list, vm_list) {
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ vcpu = kvm->vcpus[i];
+ if (!vcpu)
+ continue;
+ if (vcpu->cpu != freq->cpu)
+ continue;
+ if (!kvm_request_guest_time_update(vcpu))
+ continue;
+ if (vcpu->cpu != smp_processor_id())
+ send_ipi++;
+ }
+ }
+ spin_unlock(&kvm_lock);
+
+ if (freq->old < freq->new && send_ipi) {
+ /*
+ * We upscale the frequency. Must make the guest
+ * doesn't see old kvmclock values while running with
+ * the new frequency, otherwise we risk the guest sees
+ * time go backwards.
+ *
+ * In case we update the frequency for another cpu
+ * (which might be in guest context) send an interrupt
+ * to kick the cpu out of guest context. Next time
+ * guest context is entered kvmclock will be updated,
+ * so the guest will not see stale values.
+ */
+ smp_call_function_single(freq->cpu, bounce_off, NULL, 1);
+ }
+ return 0;
+}
+
+static struct notifier_block kvmclock_cpufreq_notifier_block = {
+ .notifier_call = kvmclock_cpufreq_notifier
+};
+
int kvm_arch_init(void *opaque)
{
- int r;
+ int r, cpu;
struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
if (kvm_x86_ops) {
@@ -2673,6 +2749,15 @@ int kvm_arch_init(void *opaque)
kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
PT_DIRTY_MASK, PT64_NX_MASK, 0, 0);
+
+ for_each_possible_cpu(cpu)
+ per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+ tsc_khz_ref = tsc_khz;
+ cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ }
+
return 0;
out:
@@ -3008,6 +3093,8 @@ static int vcpu_enter_guest(struct kvm_v
if (vcpu->requests) {
if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
__kvm_migrate_timers(vcpu);
+ if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests))
+ kvm_write_guest_time(vcpu);
if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests))
kvm_mmu_sync_roots(vcpu);
if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
Index: kvm/include/linux/kvm_host.h
===================================================================
--- kvm.orig/include/linux/kvm_host.h
+++ kvm/include/linux/kvm_host.h
@@ -37,6 +37,7 @@
#define KVM_REQ_PENDING_TIMER 5
#define KVM_REQ_UNHALT 6
#define KVM_REQ_MMU_SYNC 7
+#define KVM_REQ_KVMCLOCK_UPDATE 8
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
next prev parent reply other threads:[~2009-02-09 20:15 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-02-04 16:52 [PATCH] Fix kvmclock on !constant_tsc boxes Gerd Hoffmann
2009-02-07 7:14 ` Marcelo Tosatti
2009-02-08 6:08 ` Marcelo Tosatti
2009-02-09 20:15 ` Marcelo Tosatti [this message]
2009-02-11 13:14 ` Avi Kivity
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090209201504.GA10603@amt.cnet \
--to=mtosatti@redhat.com \
--cc=avi@redhat.com \
--cc=kraxel@redhat.com \
--cc=kvm@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox