From: David Woodhouse <dwmw2@infradead.org>
To: Paolo Bonzini <pbonzini@redhat.com>,
Jonathan Corbet <corbet@lwn.net>,
Shuah Khan <skhan@linuxfoundation.org>,
Sean Christopherson <seanjc@google.com>,
Thomas Gleixner <tglx@kernel.org>, Ingo Molnar <mingo@redhat.com>,
Borislav Petkov <bp@alien8.de>,
Dave Hansen <dave.hansen@linux.intel.com>,
x86@kernel.org, "H. Peter Anvin" <hpa@zytor.com>,
Vitaly Kuznetsov <vkuznets@redhat.com>,
Juergen Gross <jgross@suse.com>,
Boris Ostrovsky <boris.ostrovsky@oracle.com>,
David Woodhouse <dwmw2@infradead.org>,
Paul Durrant <paul@xen.org>, Jonathan Cameron <jic23@kernel.org>,
Sascha Bischoff <Sascha.Bischoff@arm.com>,
Marc Zyngier <maz@kernel.org>, Joey Gouly <joey.gouly@arm.com>,
Jack Allister <jalliste@amazon.com>,
Dongli Zhang <dongli.zhang@oracle.com>,
joe.jin@oracle.com, kvm@vger.kernel.org,
linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
xen-devel@lists.xenproject.org, linux-kselftest@vger.kernel.org
Subject: [PATCH v5 34/34] KVM: x86: Remove pvclock_gtod_data and private timekeeping code
Date: Mon, 8 Jun 2026 15:48:15 +0100 [thread overview]
Message-ID: <20260608145455.89187-35-dwmw2@infradead.org> (raw)
In-Reply-To: <20260608145455.89187-1-dwmw2@infradead.org>
From: David Woodhouse <dwmw@amazon.co.uk>
Remove the now-unused KVM-private timekeeping infrastructure:
- struct pvclock_clock and struct pvclock_gtod_data
- update_pvclock_gtod() and its seqcount-protected state copy
- read_tsc() (KVM's private TSC reader with cycle_last clamping)
- vgettsc() (KVM's private clocksource interpolation)
- do_kvmclock_base(), do_monotonic(), do_realtime()
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Assisted-by: Kiro:claude-opus-4.6-1m
---
Documentation/virt/kvm/devices/vcpu.rst | 4 +-
arch/x86/kvm/vmx/vmx.c | 2 +
arch/x86/kvm/x86.c | 177 +-----------------
.../testing/selftests/kvm/x86/pvclock_test.c | 7 +-
4 files changed, 9 insertions(+), 181 deletions(-)
diff --git a/Documentation/virt/kvm/devices/vcpu.rst b/Documentation/virt/kvm/devices/vcpu.rst
index 167aa4140d30..3d1a89c2b4f7 100644
--- a/Documentation/virt/kvm/devices/vcpu.rst
+++ b/Documentation/virt/kvm/devices/vcpu.rst
@@ -243,9 +243,9 @@ Returns:
Specifies the guest's TSC offset relative to the host's TSC. The guest's
TSC is then derived by the following equation:
- guest_tsc = ((host_tsc * tsc_scale_ratio) >> tsc_scale_bits) + KVM_VCPU_TSC_OFFSET
+ guest_tsc = ((host_tsc * tsc_ratio) >> tsc_frac_bits) + KVM_VCPU_TSC_OFFSET
-The values of tsc_scale_ratio and tsc_scale_bits can be obtained using
+The values of tsc_ratio and tsc_frac_bits can be obtained using
the KVM_VCPU_TSC_SCALE attribute.
This attribute is useful to adjust the guest's TSC on live migration,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index ed207cc7692d..1aaf3924a799 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -8674,6 +8674,8 @@ __init int vmx_hardware_setup(void)
if (cpu_has_vmx_tsc_scaling() && boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
kvm_caps.has_tsc_control = true;
+ else
+ vmcs_config.cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_TSC_SCALING;
kvm_caps.max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX;
kvm_caps.tsc_scaling_ratio_frac_bits = 48;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 93a428c37847..966057913366 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2347,58 +2347,6 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
return kvm_set_msr_ignored_check(vcpu, index, *data, true);
}
-struct pvclock_clock {
- int vclock_mode;
- u64 cycle_last;
- u64 mask;
- u32 mult;
- u32 shift;
- u64 base_cycles;
- u64 offset;
-};
-
-struct pvclock_gtod_data {
- seqcount_t seq;
-
- struct pvclock_clock clock; /* extract of a clocksource struct */
- struct pvclock_clock raw_clock; /* extract of a clocksource struct */
-
- ktime_t offs_boot;
- u64 wall_time_sec;
-};
-
-static struct pvclock_gtod_data pvclock_gtod_data;
-
-static void update_pvclock_gtod(struct timekeeper *tk)
-{
- struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
-
- write_seqcount_begin(&vdata->seq);
-
- /* copy pvclock gtod data */
- vdata->clock.vclock_mode = tk->tkr_mono.clock->vdso_clock_mode;
- vdata->clock.cycle_last = tk->tkr_mono.cycle_last;
- vdata->clock.mask = tk->tkr_mono.mask;
- vdata->clock.mult = tk->tkr_mono.mult;
- vdata->clock.shift = tk->tkr_mono.shift;
- vdata->clock.base_cycles = tk->tkr_mono.xtime_nsec;
- vdata->clock.offset = tk->tkr_mono.base;
-
- vdata->raw_clock.vclock_mode = tk->tkr_raw.clock->vdso_clock_mode;
- vdata->raw_clock.cycle_last = tk->tkr_raw.cycle_last;
- vdata->raw_clock.mask = tk->tkr_raw.mask;
- vdata->raw_clock.mult = tk->tkr_raw.mult;
- vdata->raw_clock.shift = tk->tkr_raw.shift;
- vdata->raw_clock.base_cycles = tk->tkr_raw.xtime_nsec;
- vdata->raw_clock.offset = tk->tkr_raw.base;
-
- vdata->wall_time_sec = tk->xtime_sec;
-
- vdata->offs_boot = tk->offs_boot;
-
- write_seqcount_end(&vdata->seq);
-}
-
static s64 get_kvmclock_base_ns(void)
{
/* Count up from boot time, but with the frequency of the raw clock. */
@@ -3037,128 +2985,6 @@ static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
#ifdef CONFIG_X86_64
-static u64 read_tsc(void)
-{
- u64 ret = (u64)rdtsc_ordered();
- u64 last = pvclock_gtod_data.clock.cycle_last;
-
- if (likely(ret >= last))
- return ret;
-
- /*
- * GCC likes to generate cmov here, but this branch is extremely
- * predictable (it's just a function of time and the likely is
- * very likely) and there's a data dependence, so force GCC
- * to generate a branch instead. I don't barrier() because
- * we don't actually need a barrier, and if this function
- * ever gets inlined it will generate worse code.
- */
- asm volatile ("");
- return last;
-}
-
-static inline u64 vgettsc(struct pvclock_clock *clock, u64 *tsc_timestamp,
- int *mode)
-{
- u64 tsc_pg_val;
- long v;
-
- switch (clock->vclock_mode) {
- case VDSO_CLOCKMODE_HVCLOCK:
- if (hv_read_tsc_page_tsc(hv_get_tsc_page(),
- tsc_timestamp, &tsc_pg_val)) {
- /* TSC page valid */
- *mode = VDSO_CLOCKMODE_HVCLOCK;
- v = (tsc_pg_val - clock->cycle_last) &
- clock->mask;
- } else {
- /* TSC page invalid */
- *mode = VDSO_CLOCKMODE_NONE;
- }
- break;
- case VDSO_CLOCKMODE_TSC:
- *mode = VDSO_CLOCKMODE_TSC;
- *tsc_timestamp = read_tsc();
- v = (*tsc_timestamp - clock->cycle_last) &
- clock->mask;
- break;
- default:
- *mode = VDSO_CLOCKMODE_NONE;
- }
-
- if (*mode == VDSO_CLOCKMODE_NONE)
- *tsc_timestamp = v = 0;
-
- return v * clock->mult;
-}
-
-/*
- * As with get_kvmclock_base_ns(), this counts from boot time, at the
- * frequency of CLOCK_MONOTONIC_RAW (hence adding gtos->offs_boot).
- */
-static int do_kvmclock_base(s64 *t, u64 *tsc_timestamp)
-{
- struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
- unsigned long seq;
- int mode;
- u64 ns;
-
- do {
- seq = read_seqcount_begin(>od->seq);
- ns = gtod->raw_clock.base_cycles;
- ns += vgettsc(>od->raw_clock, tsc_timestamp, &mode);
- ns >>= gtod->raw_clock.shift;
- ns += ktime_to_ns(ktime_add(gtod->raw_clock.offset, gtod->offs_boot));
- } while (unlikely(read_seqcount_retry(>od->seq, seq)));
- *t = ns;
-
- return mode;
-}
-
-/*
- * This calculates CLOCK_MONOTONIC at the time of the TSC snapshot, with
- * no boot time offset.
- */
-static int do_monotonic(s64 *t, u64 *tsc_timestamp)
-{
- struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
- unsigned long seq;
- int mode;
- u64 ns;
-
- do {
- seq = read_seqcount_begin(>od->seq);
- ns = gtod->clock.base_cycles;
- ns += vgettsc(>od->clock, tsc_timestamp, &mode);
- ns >>= gtod->clock.shift;
- ns += ktime_to_ns(gtod->clock.offset);
- } while (unlikely(read_seqcount_retry(>od->seq, seq)));
- *t = ns;
-
- return mode;
-}
-
-static int do_realtime(struct timespec64 *ts, u64 *tsc_timestamp)
-{
- struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
- unsigned long seq;
- int mode;
- u64 ns;
-
- do {
- seq = read_seqcount_begin(>od->seq);
- ts->tv_sec = gtod->wall_time_sec;
- ns = gtod->clock.base_cycles;
- ns += vgettsc(>od->clock, tsc_timestamp, &mode);
- ns >>= gtod->clock.shift;
- } while (unlikely(read_seqcount_retry(>od->seq, seq)));
-
- ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
- ts->tv_nsec = ns;
-
- return mode;
-}
-
/*
* Calculates the kvmclock_base_ns (CLOCK_MONOTONIC_RAW + boot time) and
* reports the TSC value from which it do so. Returns true if host is
@@ -6231,7 +6057,7 @@ static int kvm_arch_tsc_set_attr(struct kvm_vcpu *vcpu,
break;
}
case KVM_VCPU_TSC_SCALE:
- r = -EINVAL; /* Read only */
+ r = kvm_caps.has_tsc_control ? -EINVAL : -ENXIO;
break;
default:
r = -ENXIO;
@@ -10405,7 +10231,6 @@ static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
{
struct timekeeper *tk = priv;
- update_pvclock_gtod(tk);
#ifdef CONFIG_X86_64
kvm_host_has_tsc_clocksource =
diff --git a/tools/testing/selftests/kvm/x86/pvclock_test.c b/tools/testing/selftests/kvm/x86/pvclock_test.c
index aecd62fc8a93..4c1869fa482e 100644
--- a/tools/testing/selftests/kvm/x86/pvclock_test.c
+++ b/tools/testing/selftests/kvm/x86/pvclock_test.c
@@ -14,7 +14,6 @@
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
-#include "apic.h"
#include <asm/pvclock-abi.h>
@@ -262,10 +261,12 @@ int main(int argc, char *argv[])
return 0;
}
+static volatile uint32_t vcpu_counter;
+
static void guest_code_stable_bit(void)
{
- uint32_t apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID));
- uint64_t gpa = KVMCLOCK_GPA + apic_id * sizeof(struct pvclock_vcpu_time_info);
+ uint32_t idx = __atomic_fetch_add(&vcpu_counter, 1, __ATOMIC_SEQ_CST);
+ uint64_t gpa = KVMCLOCK_GPA + idx * sizeof(struct pvclock_vcpu_time_info);
wrmsr(MSR_KVM_SYSTEM_TIME_NEW, gpa | KVM_MSR_ENABLED);
GUEST_SYNC(0);
--
2.54.0
next prev parent reply other threads:[~2026-06-08 14:55 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-08 14:47 [PATCH v5 00/34] Cleaning up the KVM clock mess David Woodhouse
2026-06-08 14:47 ` [PATCH v5 01/34] KVM: x86/xen: Do not corrupt KVM clock in kvm_xen_shared_info_init() David Woodhouse
2026-06-08 14:47 ` [PATCH v5 02/34] KVM: x86: Improve accuracy of KVM clock when TSC scaling is in force David Woodhouse
2026-06-08 14:47 ` [PATCH v5 03/34] UAPI: x86: Move pvclock-abi to UAPI for x86 platforms David Woodhouse
2026-06-08 14:47 ` [PATCH v5 04/34] KVM: x86: Add KVM_[GS]ET_CLOCK_GUEST for accurate KVM clock migration David Woodhouse
2026-06-16 6:47 ` Dongli Zhang
2026-06-16 11:13 ` David Woodhouse
2026-06-23 8:50 ` David Woodhouse
2026-06-08 14:47 ` [PATCH v5 05/34] KVM: selftests: Add KVM/PV clock selftest to prove timer correction David Woodhouse
2026-06-08 14:47 ` [PATCH v5 06/34] KVM: x86: Explicitly disable TSC scaling without CONSTANT_TSC David Woodhouse
2026-07-01 21:38 ` Sean Christopherson
2026-06-08 14:47 ` [PATCH v5 07/34] KVM: x86: Activate master clock immediately on vCPU creation David Woodhouse
2026-06-08 14:47 ` [PATCH v5 08/34] KVM: x86: Add KVM_VCPU_TSC_SCALE and fix the documentation on TSC migration David Woodhouse
2026-06-09 23:26 ` Randy Dunlap
2026-07-01 21:47 ` Sean Christopherson
2026-06-08 14:47 ` [PATCH v5 09/34] KVM: x86: Avoid NTP frequency skew for KVM clock on 32-bit host David Woodhouse
2026-06-08 14:47 ` [PATCH v5 10/34] KVM: x86: Fold __get_kvmclock() into get_kvmclock() David Woodhouse
2026-06-08 14:47 ` [PATCH v5 11/34] KVM: x86: Restructure get_kvmclock() David Woodhouse
2026-06-08 14:47 ` [PATCH v5 12/34] KVM: x86: Fix KVM clock precision in get_kvmclock() with TSC scaling David Woodhouse
2026-06-08 14:47 ` [PATCH v5 13/34] KVM: x86: Use get_kvmclock() in kvm_get_wall_clock_epoch() David Woodhouse
2026-06-08 14:47 ` [PATCH v5 14/34] KVM: x86: Fix compute_guest_tsc() to handle negative time deltas David Woodhouse
2026-06-08 14:47 ` [PATCH v5 15/34] KVM: x86: Restructure kvm_guest_time_update() for TSC upscaling David Woodhouse
2026-06-08 14:47 ` [PATCH v5 16/34] KVM: x86: Simplify and comment kvm_get_time_scale() David Woodhouse
2026-06-08 14:47 ` [PATCH v5 17/34] KVM: x86: Remove implicit rdtsc() from kvm_compute_l1_tsc_offset() David Woodhouse
2026-06-08 14:47 ` [PATCH v5 18/34] KVM: x86: Improve synchronization in kvm_synchronize_tsc() David Woodhouse
2026-06-08 14:48 ` [PATCH v5 19/34] KVM: x86: Kill last_tsc_{nsec,write,offset} fields David Woodhouse
2026-06-08 14:48 ` [PATCH v5 20/34] KVM: x86: Replace nr_vcpus_matched_tsc count with all_vcpus_matched_tsc bool David Woodhouse
2026-06-08 14:48 ` [PATCH v5 21/34] KVM: x86: Allow KVM master clock mode when TSCs are offset from each other David Woodhouse
2026-06-08 14:48 ` [PATCH v5 22/34] KVM: selftests: Add master clock offset test David Woodhouse
2026-06-08 14:48 ` [PATCH v5 23/34] KVM: x86: Factor out kvm_use_master_clock() David Woodhouse
2026-06-08 14:48 ` [PATCH v5 24/34] KVM: x86: Avoid gratuitous global clock updates David Woodhouse
2026-06-08 14:48 ` [PATCH v5 25/34] KVM: x86/xen: Prevent runstate times from becoming negative David Woodhouse
2026-06-08 14:48 ` [PATCH v5 26/34] KVM: x86: Avoid redundant masterclock updates from multiple vCPUs David Woodhouse
2026-06-08 14:48 ` [PATCH v5 27/34] KVM: x86: Remove runtime Xen TSC frequency CPUID update David Woodhouse
2026-06-08 14:48 ` [PATCH v5 28/34] KVM: selftests: Add Xen/generic CPUID timing leaf test David Woodhouse
2026-06-08 14:48 ` [PATCH v5 29/34] KVM: x86: Re-synchronize TSC after KVM_SET_TSC_KHZ David Woodhouse
2026-06-08 14:48 ` [PATCH v5 30/34] KVM: selftests: Add Xen runstate migration test David Woodhouse
2026-06-08 14:48 ` [PATCH v5 31/34] KVM: x86: Use ktime_get_snapshot_id() for master clock David Woodhouse
2026-06-08 14:48 ` [PATCH v5 32/34] KVM: x86: Compute kvmclock base without pvclock_gtod_data David Woodhouse
2026-06-08 14:48 ` [PATCH v5 33/34] KVM: x86: Replace pvclock_gtod_data vclock_mode with boolean David Woodhouse
2026-06-08 14:48 ` David Woodhouse [this message]
2026-06-09 18:50 ` [PATCH v5 00/34] Cleaning up the KVM clock mess David Woodhouse
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260608145455.89187-35-dwmw2@infradead.org \
--to=dwmw2@infradead.org \
--cc=Sascha.Bischoff@arm.com \
--cc=boris.ostrovsky@oracle.com \
--cc=bp@alien8.de \
--cc=corbet@lwn.net \
--cc=dave.hansen@linux.intel.com \
--cc=dongli.zhang@oracle.com \
--cc=hpa@zytor.com \
--cc=jalliste@amazon.com \
--cc=jgross@suse.com \
--cc=jic23@kernel.org \
--cc=joe.jin@oracle.com \
--cc=joey.gouly@arm.com \
--cc=kvm@vger.kernel.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=maz@kernel.org \
--cc=mingo@redhat.com \
--cc=paul@xen.org \
--cc=pbonzini@redhat.com \
--cc=seanjc@google.com \
--cc=skhan@linuxfoundation.org \
--cc=tglx@kernel.org \
--cc=vkuznets@redhat.com \
--cc=x86@kernel.org \
--cc=xen-devel@lists.xenproject.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox