From: Marcelo Tosatti <mtosatti@redhat.com>
To: Gerd Hoffmann <kraxel@redhat.com>, Avi Kivity <avi@redhat.com>
Cc: KVM list <kvm@vger.kernel.org>
Subject: Re: [PATCH] Fix kvmclock on !constant_tsc boxes.
Date: Mon, 9 Feb 2009 18:15:04 -0200 [thread overview]
Message-ID: <20090209201504.GA10603@amt.cnet> (raw)
In-Reply-To: <20090208060856.GC4437@amt.cnet>
[-- Attachment #1: Type: text/plain, Size: 789 bytes --]
On Sun, Feb 08, 2009 at 04:08:56AM -0200, Marcelo Tosatti wrote:
> On Wed, Feb 04, 2009 at 05:52:04PM +0100, Gerd Hoffmann wrote:
> > Hi folks,
> >
> > kvmclock currently falls apart on machines without constant tsc.
> > This patch fixes it. Changes:
> >
> > * keep tsc frequency in a per-cpu variable.
> > * handle kvmclock update using a new request flag, thus checking
> > whenever we need an update each time we enter guest context.
> > * use a cpufreq notifier to track frequency changes and force
> > kvmclock updates.
> > * send ipis to kick cpu out of guest context if needed to make
> > sure the guest doesn't see stale values.
> >
> > cheers,
> > Gerd
>
> ACK for 2.6.29 (but please fix the whitespace breakage).
Whitespace fixed version attached.
[-- Attachment #2: kvmclock-cpufreq.patch --]
[-- Type: text/plain, Size: 5176 bytes --]
Index: kvm/arch/x86/kvm/x86.c
===================================================================
--- kvm.orig/arch/x86/kvm/x86.c
+++ kvm/arch/x86/kvm/x86.c
@@ -36,6 +36,7 @@
#include <linux/highmem.h>
#include <linux/iommu.h>
#include <linux/intel-iommu.h>
+#include <linux/cpufreq.h>
#include <asm/uaccess.h>
#include <asm/msr.h>
@@ -617,6 +618,8 @@ static void kvm_set_time_scale(uint32_t
hv_clock->tsc_to_system_mul);
}
+static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
+
static void kvm_write_guest_time(struct kvm_vcpu *v)
{
struct timespec ts;
@@ -627,9 +630,9 @@ static void kvm_write_guest_time(struct
if ((!vcpu->time_page))
return;
- if (unlikely(vcpu->hv_clock_tsc_khz != tsc_khz)) {
- kvm_set_time_scale(tsc_khz, &vcpu->hv_clock);
- vcpu->hv_clock_tsc_khz = tsc_khz;
+ if (unlikely(vcpu->hv_clock_tsc_khz != __get_cpu_var(cpu_tsc_khz))) {
+ kvm_set_time_scale(__get_cpu_var(cpu_tsc_khz), &vcpu->hv_clock);
+ vcpu->hv_clock_tsc_khz = __get_cpu_var(cpu_tsc_khz);
}
/* Keep irq disabled to prevent changes to the clock */
@@ -660,6 +663,16 @@ static void kvm_write_guest_time(struct
mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
}
+static int kvm_request_guest_time_update(struct kvm_vcpu *v)
+{
+ struct kvm_vcpu_arch *vcpu = &v->arch;
+
+ if (!vcpu->time_page)
+ return 0;
+ set_bit(KVM_REQ_KVMCLOCK_UPDATE, &v->requests);
+ return 1;
+}
+
static bool msr_mtrr_valid(unsigned msr)
{
switch (msr) {
@@ -790,7 +803,7 @@ int kvm_set_msr_common(struct kvm_vcpu *
vcpu->arch.time_page = NULL;
}
- kvm_write_guest_time(vcpu);
+ kvm_request_guest_time_update(vcpu);
break;
}
default:
@@ -1096,7 +1109,7 @@ out:
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
kvm_x86_ops->vcpu_load(vcpu, cpu);
- kvm_write_guest_time(vcpu);
+ kvm_request_guest_time_update(vcpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -2640,9 +2653,72 @@ int kvm_emulate_pio_string(struct kvm_vc
}
EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
+static void bounce_off(void *info)
+{
+ /* nothing */
+}
+
+static unsigned int ref_freq;
+static unsigned long tsc_khz_ref;
+
+static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct cpufreq_freqs *freq = data;
+ struct kvm *kvm;
+ struct kvm_vcpu *vcpu;
+ int i, send_ipi = 0;
+
+ if (!ref_freq)
+ ref_freq = freq->old;
+
+ if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
+ return 0;
+ if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
+ return 0;
+ per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
+
+ spin_lock(&kvm_lock);
+ list_for_each_entry(kvm, &vm_list, vm_list) {
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ vcpu = kvm->vcpus[i];
+ if (!vcpu)
+ continue;
+ if (vcpu->cpu != freq->cpu)
+ continue;
+ if (!kvm_request_guest_time_update(vcpu))
+ continue;
+ if (vcpu->cpu != smp_processor_id())
+ send_ipi++;
+ }
+ }
+ spin_unlock(&kvm_lock);
+
+ if (freq->old < freq->new && send_ipi) {
+ /*
+ * We upscale the frequency. Must make the guest
+ * doesn't see old kvmclock values while running with
+ * the new frequency, otherwise we risk the guest sees
+ * time go backwards.
+ *
+ * In case we update the frequency for another cpu
+ * (which might be in guest context) send an interrupt
+ * to kick the cpu out of guest context. Next time
+ * guest context is entered kvmclock will be updated,
+ * so the guest will not see stale values.
+ */
+ smp_call_function_single(freq->cpu, bounce_off, NULL, 1);
+ }
+ return 0;
+}
+
+static struct notifier_block kvmclock_cpufreq_notifier_block = {
+ .notifier_call = kvmclock_cpufreq_notifier
+};
+
int kvm_arch_init(void *opaque)
{
- int r;
+ int r, cpu;
struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
if (kvm_x86_ops) {
@@ -2673,6 +2749,15 @@ int kvm_arch_init(void *opaque)
kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
PT_DIRTY_MASK, PT64_NX_MASK, 0, 0);
+
+ for_each_possible_cpu(cpu)
+ per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+ tsc_khz_ref = tsc_khz;
+ cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ }
+
return 0;
out:
@@ -3008,6 +3093,8 @@ static int vcpu_enter_guest(struct kvm_v
if (vcpu->requests) {
if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
__kvm_migrate_timers(vcpu);
+ if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests))
+ kvm_write_guest_time(vcpu);
if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests))
kvm_mmu_sync_roots(vcpu);
if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
Index: kvm/include/linux/kvm_host.h
===================================================================
--- kvm.orig/include/linux/kvm_host.h
+++ kvm/include/linux/kvm_host.h
@@ -37,6 +37,7 @@
#define KVM_REQ_PENDING_TIMER 5
#define KVM_REQ_UNHALT 6
#define KVM_REQ_MMU_SYNC 7
+#define KVM_REQ_KVMCLOCK_UPDATE 8
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
next prev parent reply other threads:[~2009-02-09 20:15 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-02-04 16:52 [PATCH] Fix kvmclock on !constant_tsc boxes Gerd Hoffmann
2009-02-07 7:14 ` Marcelo Tosatti
2009-02-08 6:08 ` Marcelo Tosatti
2009-02-09 20:15 ` Marcelo Tosatti [this message]
2009-02-11 13:14 ` Avi Kivity
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090209201504.GA10603@amt.cnet \
--to=mtosatti@redhat.com \
--cc=avi@redhat.com \
--cc=kraxel@redhat.com \
--cc=kvm@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.