Linux Documentation
 help / color / mirror / Atom feed
From: David Woodhouse <dwmw2@infradead.org>
To: Paolo Bonzini <pbonzini@redhat.com>,
	Jonathan Corbet <corbet@lwn.net>,
	Shuah Khan <skhan@linuxfoundation.org>,
	Sean Christopherson <seanjc@google.com>,
	Thomas Gleixner <tglx@kernel.org>, Ingo Molnar <mingo@redhat.com>,
	Borislav Petkov <bp@alien8.de>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	x86@kernel.org, "H. Peter Anvin" <hpa@zytor.com>,
	Vitaly Kuznetsov <vkuznets@redhat.com>,
	Juergen Gross <jgross@suse.com>,
	Boris Ostrovsky <boris.ostrovsky@oracle.com>,
	David Woodhouse <dwmw2@infradead.org>,
	Paul Durrant <paul@xen.org>, Jonathan Cameron <jic23@kernel.org>,
	Sascha Bischoff <Sascha.Bischoff@arm.com>,
	Marc Zyngier <maz@kernel.org>, Joey Gouly <joey.gouly@arm.com>,
	Jack Allister <jalliste@amazon.com>,
	Dongli Zhang <dongli.zhang@oracle.com>,
	joe.jin@oracle.com, kvm@vger.kernel.org,
	linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
	xen-devel@lists.xenproject.org, linux-kselftest@vger.kernel.org
Subject: [PATCH v5 12/34] KVM: x86: Fix KVM clock precision in get_kvmclock() with TSC scaling
Date: Mon,  8 Jun 2026 15:47:53 +0100	[thread overview]
Message-ID: <20260608145455.89187-13-dwmw2@infradead.org> (raw)
In-Reply-To: <20260608145455.89187-1-dwmw2@infradead.org>

From: David Woodhouse <dwmw@amazon.co.uk>

When in master clock mode, the KVM clock is defined in terms of the
guest TSC. But get_kvmclock() was computing it from the host TSC
without applying TSC scaling, leading to a systemic drift from the
values the guest computes from its own TSC.

Store the VM's TSC scaling ratio in kvm_arch and precompute the
guest-TSC-based mul/shift in pvclock_update_vm_gtod_copy(). Use these
in get_kvmclock() to scale the host TSC delta to guest TSC before
converting to nanoseconds.

This avoids "definition C" of the KVM clock described in the
earlier commit "KVM: x86/xen: Do not corrupt KVM clock in
kvm_xen_shared_info_init()".

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 arch/x86/include/asm/kvm_host.h |  4 +++
 arch/x86/kvm/x86.c              | 52 +++++++++++++++++++++++++++++----
 2 files changed, 51 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 37264212c7df..5348fd5ea3f3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1490,6 +1490,7 @@ struct kvm_arch {
 	u64 last_tsc_write;
 	u32 last_tsc_khz;
 	u64 last_tsc_offset;
+	u64 last_tsc_scaling_ratio;
 	u64 cur_tsc_nsec;
 	u64 cur_tsc_write;
 	u64 cur_tsc_offset;
@@ -1504,6 +1505,9 @@ struct kvm_arch {
 	bool use_master_clock;
 	u64 master_kernel_ns;
 	u64 master_cycle_now;
+	u64 master_tsc_scaling_ratio;
+	s8  master_tsc_shift;
+	u32 master_tsc_mul;
 
 #ifdef CONFIG_KVM_HYPERV
 	struct kvm_hv hyperv;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6983a7494fcd..7ae6a7705353 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2781,6 +2781,7 @@ static void __kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 offset, u64 tsc,
 	kvm->arch.last_tsc_write = tsc;
 	kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
 	kvm->arch.last_tsc_offset = offset;
+	kvm->arch.last_tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio;
 
 	vcpu->arch.last_guest_tsc = tsc;
 
@@ -3109,6 +3110,8 @@ static bool kvm_get_walltime_and_clockread(struct timespec64 *ts,
  *
  */
 
+static unsigned long get_cpu_tsc_khz(void);
+
 static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
 {
 #ifdef CONFIG_X86_64
@@ -3132,9 +3135,30 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
 				&& !ka->backwards_tsc_observed
 				&& !ka->boot_vcpu_runs_old_kvmclock;
 
-	if (ka->use_master_clock)
+	if (ka->use_master_clock) {
+		u64 tsc_hz;
+
 		atomic_set(&kvm_guest_has_master_clock, 1);
 
+		/*
+		 * Copy the scaling ratio and precompute the mul/shift for
+		 * converting guest TSC to nanoseconds. These are used by
+		 * get_kvmclock() to compute kvmclock from the host TSC
+		 * without needing a vCPU reference.
+		 */
+		ka->master_tsc_scaling_ratio = ka->last_tsc_scaling_ratio;
+		tsc_hz = (u64)get_cpu_tsc_khz() * 1000;
+		if (tsc_hz && kvm_caps.has_tsc_control)
+			tsc_hz = kvm_scale_tsc(tsc_hz,
+					       ka->master_tsc_scaling_ratio);
+		if (tsc_hz)
+			kvm_get_time_scale(NSEC_PER_SEC, tsc_hz,
+					   &ka->master_tsc_shift,
+					   &ka->master_tsc_mul);
+		else
+			ka->use_master_clock = false;
+	}
+
 	vclock_mode = pvclock_gtod_data.clock.vclock_mode;
 	trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
 					vcpus_matched);
@@ -3237,10 +3261,28 @@ static void get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
 			data->flags |= KVM_CLOCK_TSC_STABLE;
 			hv_clock.tsc_timestamp = ka->master_cycle_now;
 			hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
-			kvm_get_time_scale(NSEC_PER_SEC, get_cpu_tsc_khz() * 1000LL,
-					   &hv_clock.tsc_shift,
-					   &hv_clock.tsc_to_system_mul);
-			data->clock = __pvclock_read_cycles(&hv_clock, data->host_tsc);
+
+			/*
+			 * Use the precomputed guest-TSC-based mul/shift
+			 * so that the kvmclock value matches what the
+			 * guest computes from its own TSC.
+			 */
+			hv_clock.tsc_shift = ka->master_tsc_shift;
+			hv_clock.tsc_to_system_mul = ka->master_tsc_mul;
+
+			if (kvm_caps.has_tsc_control) {
+				u64 tsc_delta = data->host_tsc - ka->master_cycle_now;
+
+				tsc_delta = kvm_scale_tsc(tsc_delta,
+							  ka->master_tsc_scaling_ratio);
+				data->clock = hv_clock.system_time +
+					pvclock_scale_delta(tsc_delta,
+							    hv_clock.tsc_to_system_mul,
+							    hv_clock.tsc_shift);
+			} else {
+				data->clock = __pvclock_read_cycles(&hv_clock,
+								    data->host_tsc);
+			}
 
 			put_cpu();
 		} else
-- 
2.54.0


  parent reply	other threads:[~2026-06-08 14:55 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-08 14:47 [PATCH v5 00/34] Cleaning up the KVM clock mess David Woodhouse
2026-06-08 14:47 ` [PATCH v5 01/34] KVM: x86/xen: Do not corrupt KVM clock in kvm_xen_shared_info_init() David Woodhouse
2026-06-08 14:47 ` [PATCH v5 02/34] KVM: x86: Improve accuracy of KVM clock when TSC scaling is in force David Woodhouse
2026-06-08 14:47 ` [PATCH v5 03/34] UAPI: x86: Move pvclock-abi to UAPI for x86 platforms David Woodhouse
2026-06-08 14:47 ` [PATCH v5 04/34] KVM: x86: Add KVM_[GS]ET_CLOCK_GUEST for accurate KVM clock migration David Woodhouse
2026-06-16  6:47   ` Dongli Zhang
2026-06-16 11:13     ` David Woodhouse
2026-06-23  8:50       ` David Woodhouse
2026-06-08 14:47 ` [PATCH v5 05/34] KVM: selftests: Add KVM/PV clock selftest to prove timer correction David Woodhouse
2026-06-08 14:47 ` [PATCH v5 06/34] KVM: x86: Explicitly disable TSC scaling without CONSTANT_TSC David Woodhouse
2026-07-01 21:38   ` Sean Christopherson
2026-07-02  8:58     ` David Woodhouse
2026-06-08 14:47 ` [PATCH v5 07/34] KVM: x86: Activate master clock immediately on vCPU creation David Woodhouse
2026-06-08 14:47 ` [PATCH v5 08/34] KVM: x86: Add KVM_VCPU_TSC_SCALE and fix the documentation on TSC migration David Woodhouse
2026-06-09 23:26   ` Randy Dunlap
2026-07-01 21:47   ` Sean Christopherson
2026-07-02  8:09     ` David Woodhouse
2026-06-08 14:47 ` [PATCH v5 09/34] KVM: x86: Avoid NTP frequency skew for KVM clock on 32-bit host David Woodhouse
2026-06-08 14:47 ` [PATCH v5 10/34] KVM: x86: Fold __get_kvmclock() into get_kvmclock() David Woodhouse
2026-06-08 14:47 ` [PATCH v5 11/34] KVM: x86: Restructure get_kvmclock() David Woodhouse
2026-06-08 14:47 ` David Woodhouse [this message]
2026-06-08 14:47 ` [PATCH v5 13/34] KVM: x86: Use get_kvmclock() in kvm_get_wall_clock_epoch() David Woodhouse
2026-06-08 14:47 ` [PATCH v5 14/34] KVM: x86: Fix compute_guest_tsc() to handle negative time deltas David Woodhouse
2026-06-08 14:47 ` [PATCH v5 15/34] KVM: x86: Restructure kvm_guest_time_update() for TSC upscaling David Woodhouse
2026-06-08 14:47 ` [PATCH v5 16/34] KVM: x86: Simplify and comment kvm_get_time_scale() David Woodhouse
2026-06-08 14:47 ` [PATCH v5 17/34] KVM: x86: Remove implicit rdtsc() from kvm_compute_l1_tsc_offset() David Woodhouse
2026-06-08 14:47 ` [PATCH v5 18/34] KVM: x86: Improve synchronization in kvm_synchronize_tsc() David Woodhouse
2026-06-08 14:48 ` [PATCH v5 19/34] KVM: x86: Kill last_tsc_{nsec,write,offset} fields David Woodhouse
2026-06-08 14:48 ` [PATCH v5 20/34] KVM: x86: Replace nr_vcpus_matched_tsc count with all_vcpus_matched_tsc bool David Woodhouse
2026-06-08 14:48 ` [PATCH v5 21/34] KVM: x86: Allow KVM master clock mode when TSCs are offset from each other David Woodhouse
2026-06-08 14:48 ` [PATCH v5 22/34] KVM: selftests: Add master clock offset test David Woodhouse
2026-06-08 14:48 ` [PATCH v5 23/34] KVM: x86: Factor out kvm_use_master_clock() David Woodhouse
2026-06-08 14:48 ` [PATCH v5 24/34] KVM: x86: Avoid gratuitous global clock updates David Woodhouse
2026-06-08 14:48 ` [PATCH v5 25/34] KVM: x86/xen: Prevent runstate times from becoming negative David Woodhouse
2026-06-08 14:48 ` [PATCH v5 26/34] KVM: x86: Avoid redundant masterclock updates from multiple vCPUs David Woodhouse
2026-06-08 14:48 ` [PATCH v5 27/34] KVM: x86: Remove runtime Xen TSC frequency CPUID update David Woodhouse
2026-06-08 14:48 ` [PATCH v5 28/34] KVM: selftests: Add Xen/generic CPUID timing leaf test David Woodhouse
2026-06-08 14:48 ` [PATCH v5 29/34] KVM: x86: Re-synchronize TSC after KVM_SET_TSC_KHZ David Woodhouse
2026-06-08 14:48 ` [PATCH v5 30/34] KVM: selftests: Add Xen runstate migration test David Woodhouse
2026-06-08 14:48 ` [PATCH v5 31/34] KVM: x86: Use ktime_get_snapshot_id() for master clock David Woodhouse
2026-06-08 14:48 ` [PATCH v5 32/34] KVM: x86: Compute kvmclock base without pvclock_gtod_data David Woodhouse
2026-06-08 14:48 ` [PATCH v5 33/34] KVM: x86: Replace pvclock_gtod_data vclock_mode with boolean David Woodhouse
2026-06-08 14:48 ` [PATCH v5 34/34] KVM: x86: Remove pvclock_gtod_data and private timekeeping code David Woodhouse
2026-06-09 18:50 ` [PATCH v5 00/34] Cleaning up the KVM clock mess David Woodhouse

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260608145455.89187-13-dwmw2@infradead.org \
    --to=dwmw2@infradead.org \
    --cc=Sascha.Bischoff@arm.com \
    --cc=boris.ostrovsky@oracle.com \
    --cc=bp@alien8.de \
    --cc=corbet@lwn.net \
    --cc=dave.hansen@linux.intel.com \
    --cc=dongli.zhang@oracle.com \
    --cc=hpa@zytor.com \
    --cc=jalliste@amazon.com \
    --cc=jgross@suse.com \
    --cc=jic23@kernel.org \
    --cc=joe.jin@oracle.com \
    --cc=joey.gouly@arm.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=maz@kernel.org \
    --cc=mingo@redhat.com \
    --cc=paul@xen.org \
    --cc=pbonzini@redhat.com \
    --cc=seanjc@google.com \
    --cc=skhan@linuxfoundation.org \
    --cc=tglx@kernel.org \
    --cc=vkuznets@redhat.com \
    --cc=x86@kernel.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox