From: David Woodhouse <dwmw2@infradead.org>
To: Paolo Bonzini <pbonzini@redhat.com>,
Jonathan Corbet <corbet@lwn.net>,
Shuah Khan <skhan@linuxfoundation.org>,
Sean Christopherson <seanjc@google.com>,
Thomas Gleixner <tglx@kernel.org>, Ingo Molnar <mingo@redhat.com>,
Borislav Petkov <bp@alien8.de>,
Dave Hansen <dave.hansen@linux.intel.com>,
x86@kernel.org, "H. Peter Anvin" <hpa@zytor.com>,
Vitaly Kuznetsov <vkuznets@redhat.com>,
Juergen Gross <jgross@suse.com>,
Boris Ostrovsky <boris.ostrovsky@oracle.com>,
David Woodhouse <dwmw2@infradead.org>,
Paul Durrant <paul@xen.org>, Jonathan Cameron <jic23@kernel.org>,
Sascha Bischoff <Sascha.Bischoff@arm.com>,
Marc Zyngier <maz@kernel.org>, Joey Gouly <joey.gouly@arm.com>,
Jack Allister <jalliste@amazon.com>,
Dongli Zhang <dongli.zhang@oracle.com>,
joe.jin@oracle.com, kvm@vger.kernel.org,
linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
xen-devel@lists.xenproject.org, linux-kselftest@vger.kernel.org
Subject: [PATCH v5 12/34] KVM: x86: Fix KVM clock precision in get_kvmclock() with TSC scaling
Date: Mon, 8 Jun 2026 15:47:53 +0100 [thread overview]
Message-ID: <20260608145455.89187-13-dwmw2@infradead.org> (raw)
In-Reply-To: <20260608145455.89187-1-dwmw2@infradead.org>
From: David Woodhouse <dwmw@amazon.co.uk>
When in master clock mode, the KVM clock is defined in terms of the
guest TSC. But get_kvmclock() was computing it from the host TSC
without applying TSC scaling, leading to a systemic drift from the
values the guest computes from its own TSC.
Store the VM's TSC scaling ratio in kvm_arch and precompute the
guest-TSC-based mul/shift in pvclock_update_vm_gtod_copy(). Use these
in get_kvmclock() to scale the host TSC delta to guest TSC before
converting to nanoseconds.
This avoids "definition C" of the KVM clock described in the
earlier commit "KVM: x86/xen: Do not corrupt KVM clock in
kvm_xen_shared_info_init()".
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
arch/x86/include/asm/kvm_host.h | 4 +++
arch/x86/kvm/x86.c | 52 +++++++++++++++++++++++++++++----
2 files changed, 51 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 37264212c7df..5348fd5ea3f3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1490,6 +1490,7 @@ struct kvm_arch {
u64 last_tsc_write;
u32 last_tsc_khz;
u64 last_tsc_offset;
+ u64 last_tsc_scaling_ratio;
u64 cur_tsc_nsec;
u64 cur_tsc_write;
u64 cur_tsc_offset;
@@ -1504,6 +1505,9 @@ struct kvm_arch {
bool use_master_clock;
u64 master_kernel_ns;
u64 master_cycle_now;
+ u64 master_tsc_scaling_ratio;
+ s8 master_tsc_shift;
+ u32 master_tsc_mul;
#ifdef CONFIG_KVM_HYPERV
struct kvm_hv hyperv;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6983a7494fcd..7ae6a7705353 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2781,6 +2781,7 @@ static void __kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 offset, u64 tsc,
kvm->arch.last_tsc_write = tsc;
kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
kvm->arch.last_tsc_offset = offset;
+ kvm->arch.last_tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio;
vcpu->arch.last_guest_tsc = tsc;
@@ -3109,6 +3110,8 @@ static bool kvm_get_walltime_and_clockread(struct timespec64 *ts,
*
*/
+static unsigned long get_cpu_tsc_khz(void);
+
static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
{
#ifdef CONFIG_X86_64
@@ -3132,9 +3135,30 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
&& !ka->backwards_tsc_observed
&& !ka->boot_vcpu_runs_old_kvmclock;
- if (ka->use_master_clock)
+ if (ka->use_master_clock) {
+ u64 tsc_hz;
+
atomic_set(&kvm_guest_has_master_clock, 1);
+ /*
+ * Copy the scaling ratio and precompute the mul/shift for
+ * converting guest TSC to nanoseconds. These are used by
+ * get_kvmclock() to compute kvmclock from the host TSC
+ * without needing a vCPU reference.
+ */
+ ka->master_tsc_scaling_ratio = ka->last_tsc_scaling_ratio;
+ tsc_hz = (u64)get_cpu_tsc_khz() * 1000;
+ if (tsc_hz && kvm_caps.has_tsc_control)
+ tsc_hz = kvm_scale_tsc(tsc_hz,
+ ka->master_tsc_scaling_ratio);
+ if (tsc_hz)
+ kvm_get_time_scale(NSEC_PER_SEC, tsc_hz,
+ &ka->master_tsc_shift,
+ &ka->master_tsc_mul);
+ else
+ ka->use_master_clock = false;
+ }
+
vclock_mode = pvclock_gtod_data.clock.vclock_mode;
trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
vcpus_matched);
@@ -3237,10 +3261,28 @@ static void get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
data->flags |= KVM_CLOCK_TSC_STABLE;
hv_clock.tsc_timestamp = ka->master_cycle_now;
hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
- kvm_get_time_scale(NSEC_PER_SEC, get_cpu_tsc_khz() * 1000LL,
- &hv_clock.tsc_shift,
- &hv_clock.tsc_to_system_mul);
- data->clock = __pvclock_read_cycles(&hv_clock, data->host_tsc);
+
+ /*
+ * Use the precomputed guest-TSC-based mul/shift
+ * so that the kvmclock value matches what the
+ * guest computes from its own TSC.
+ */
+ hv_clock.tsc_shift = ka->master_tsc_shift;
+ hv_clock.tsc_to_system_mul = ka->master_tsc_mul;
+
+ if (kvm_caps.has_tsc_control) {
+ u64 tsc_delta = data->host_tsc - ka->master_cycle_now;
+
+ tsc_delta = kvm_scale_tsc(tsc_delta,
+ ka->master_tsc_scaling_ratio);
+ data->clock = hv_clock.system_time +
+ pvclock_scale_delta(tsc_delta,
+ hv_clock.tsc_to_system_mul,
+ hv_clock.tsc_shift);
+ } else {
+ data->clock = __pvclock_read_cycles(&hv_clock,
+ data->host_tsc);
+ }
put_cpu();
} else
--
2.54.0
next prev parent reply other threads:[~2026-06-08 14:55 UTC|newest]
Thread overview: 44+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-08 14:47 [PATCH v5 00/34] Cleaning up the KVM clock mess David Woodhouse
2026-06-08 14:47 ` [PATCH v5 01/34] KVM: x86/xen: Do not corrupt KVM clock in kvm_xen_shared_info_init() David Woodhouse
2026-06-08 14:47 ` [PATCH v5 02/34] KVM: x86: Improve accuracy of KVM clock when TSC scaling is in force David Woodhouse
2026-06-08 14:47 ` [PATCH v5 03/34] UAPI: x86: Move pvclock-abi to UAPI for x86 platforms David Woodhouse
2026-06-08 14:47 ` [PATCH v5 04/34] KVM: x86: Add KVM_[GS]ET_CLOCK_GUEST for accurate KVM clock migration David Woodhouse
2026-06-16 6:47 ` Dongli Zhang
2026-06-16 11:13 ` David Woodhouse
2026-06-23 8:50 ` David Woodhouse
2026-06-08 14:47 ` [PATCH v5 05/34] KVM: selftests: Add KVM/PV clock selftest to prove timer correction David Woodhouse
2026-06-08 14:47 ` [PATCH v5 06/34] KVM: x86: Explicitly disable TSC scaling without CONSTANT_TSC David Woodhouse
2026-07-01 21:38 ` Sean Christopherson
2026-07-02 8:58 ` David Woodhouse
2026-06-08 14:47 ` [PATCH v5 07/34] KVM: x86: Activate master clock immediately on vCPU creation David Woodhouse
2026-06-08 14:47 ` [PATCH v5 08/34] KVM: x86: Add KVM_VCPU_TSC_SCALE and fix the documentation on TSC migration David Woodhouse
2026-06-09 23:26 ` Randy Dunlap
2026-07-01 21:47 ` Sean Christopherson
2026-07-02 8:09 ` David Woodhouse
2026-06-08 14:47 ` [PATCH v5 09/34] KVM: x86: Avoid NTP frequency skew for KVM clock on 32-bit host David Woodhouse
2026-06-08 14:47 ` [PATCH v5 10/34] KVM: x86: Fold __get_kvmclock() into get_kvmclock() David Woodhouse
2026-06-08 14:47 ` [PATCH v5 11/34] KVM: x86: Restructure get_kvmclock() David Woodhouse
2026-06-08 14:47 ` David Woodhouse [this message]
2026-06-08 14:47 ` [PATCH v5 13/34] KVM: x86: Use get_kvmclock() in kvm_get_wall_clock_epoch() David Woodhouse
2026-06-08 14:47 ` [PATCH v5 14/34] KVM: x86: Fix compute_guest_tsc() to handle negative time deltas David Woodhouse
2026-06-08 14:47 ` [PATCH v5 15/34] KVM: x86: Restructure kvm_guest_time_update() for TSC upscaling David Woodhouse
2026-06-08 14:47 ` [PATCH v5 16/34] KVM: x86: Simplify and comment kvm_get_time_scale() David Woodhouse
2026-06-08 14:47 ` [PATCH v5 17/34] KVM: x86: Remove implicit rdtsc() from kvm_compute_l1_tsc_offset() David Woodhouse
2026-06-08 14:47 ` [PATCH v5 18/34] KVM: x86: Improve synchronization in kvm_synchronize_tsc() David Woodhouse
2026-06-08 14:48 ` [PATCH v5 19/34] KVM: x86: Kill last_tsc_{nsec,write,offset} fields David Woodhouse
2026-06-08 14:48 ` [PATCH v5 20/34] KVM: x86: Replace nr_vcpus_matched_tsc count with all_vcpus_matched_tsc bool David Woodhouse
2026-06-08 14:48 ` [PATCH v5 21/34] KVM: x86: Allow KVM master clock mode when TSCs are offset from each other David Woodhouse
2026-06-08 14:48 ` [PATCH v5 22/34] KVM: selftests: Add master clock offset test David Woodhouse
2026-06-08 14:48 ` [PATCH v5 23/34] KVM: x86: Factor out kvm_use_master_clock() David Woodhouse
2026-06-08 14:48 ` [PATCH v5 24/34] KVM: x86: Avoid gratuitous global clock updates David Woodhouse
2026-06-08 14:48 ` [PATCH v5 25/34] KVM: x86/xen: Prevent runstate times from becoming negative David Woodhouse
2026-06-08 14:48 ` [PATCH v5 26/34] KVM: x86: Avoid redundant masterclock updates from multiple vCPUs David Woodhouse
2026-06-08 14:48 ` [PATCH v5 27/34] KVM: x86: Remove runtime Xen TSC frequency CPUID update David Woodhouse
2026-06-08 14:48 ` [PATCH v5 28/34] KVM: selftests: Add Xen/generic CPUID timing leaf test David Woodhouse
2026-06-08 14:48 ` [PATCH v5 29/34] KVM: x86: Re-synchronize TSC after KVM_SET_TSC_KHZ David Woodhouse
2026-06-08 14:48 ` [PATCH v5 30/34] KVM: selftests: Add Xen runstate migration test David Woodhouse
2026-06-08 14:48 ` [PATCH v5 31/34] KVM: x86: Use ktime_get_snapshot_id() for master clock David Woodhouse
2026-06-08 14:48 ` [PATCH v5 32/34] KVM: x86: Compute kvmclock base without pvclock_gtod_data David Woodhouse
2026-06-08 14:48 ` [PATCH v5 33/34] KVM: x86: Replace pvclock_gtod_data vclock_mode with boolean David Woodhouse
2026-06-08 14:48 ` [PATCH v5 34/34] KVM: x86: Remove pvclock_gtod_data and private timekeeping code David Woodhouse
2026-06-09 18:50 ` [PATCH v5 00/34] Cleaning up the KVM clock mess David Woodhouse
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260608145455.89187-13-dwmw2@infradead.org \
--to=dwmw2@infradead.org \
--cc=Sascha.Bischoff@arm.com \
--cc=boris.ostrovsky@oracle.com \
--cc=bp@alien8.de \
--cc=corbet@lwn.net \
--cc=dave.hansen@linux.intel.com \
--cc=dongli.zhang@oracle.com \
--cc=hpa@zytor.com \
--cc=jalliste@amazon.com \
--cc=jgross@suse.com \
--cc=jic23@kernel.org \
--cc=joe.jin@oracle.com \
--cc=joey.gouly@arm.com \
--cc=kvm@vger.kernel.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=maz@kernel.org \
--cc=mingo@redhat.com \
--cc=paul@xen.org \
--cc=pbonzini@redhat.com \
--cc=seanjc@google.com \
--cc=skhan@linuxfoundation.org \
--cc=tglx@kernel.org \
--cc=vkuznets@redhat.com \
--cc=x86@kernel.org \
--cc=xen-devel@lists.xenproject.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox