From: Sean Christopherson <seanjc@google.com>
To: Kiryl Shutsemau <kas@kernel.org>,
Paolo Bonzini <pbonzini@redhat.com>,
Sean Christopherson <seanjc@google.com>,
"K. Y. Srinivasan" <kys@microsoft.com>,
Haiyang Zhang <haiyangz@microsoft.com>,
Wei Liu <wei.liu@kernel.org>, Dexuan Cui <decui@microsoft.com>,
Long Li <longli@microsoft.com>,
Ajay Kaher <ajay.kaher@broadcom.com>,
Alexey Makhalov <alexey.makhalov@broadcom.com>,
Jan Kiszka <jan.kiszka@siemens.com>,
Dave Hansen <dave.hansen@linux.intel.com>,
Andy Lutomirski <luto@kernel.org>,
Peter Zijlstra <peterz@infradead.org>,
Juergen Gross <jgross@suse.com>,
Daniel Lezcano <daniel.lezcano@kernel.org>,
Thomas Gleixner <tglx@kernel.org>,
John Stultz <jstultz@google.com>
Cc: Rick Edgecombe <rick.p.edgecombe@intel.com>,
Vitaly Kuznetsov <vkuznets@redhat.com>,
Broadcom internal kernel review list
<bcm-kernel-feedback-list@broadcom.com>,
Boris Ostrovsky <boris.ostrovsky@oracle.com>,
Stephen Boyd <sboyd@kernel.org>,
x86@kernel.org, linux-coco@lists.linux.dev, kvm@vger.kernel.org,
linux-hyperv@vger.kernel.org, virtualization@lists.linux.dev,
linux-kernel@vger.kernel.org, xen-devel@lists.xenproject.org,
Michael Kelley <mhklinux@outlook.com>,
Tom Lendacky <thomas.lendacky@amd.com>,
Nikunj A Dadhania <nikunj@amd.com>,
Thomas Gleixner <tglx@linutronix.de>,
David Woodhouse <dwmw@amazon.co.uk>
Subject: [PATCH v3 11/41] x86/kvm: Don't disable kvmclock on BSP in syscore_suspend()
Date: Fri, 15 May 2026 12:19:12 -0700 [thread overview]
Message-ID: <20260515191942.1892718-12-seanjc@google.com> (raw)
In-Reply-To: <20260515191942.1892718-1-seanjc@google.com>
Don't disable kvmclock on the BSP during syscore_suspend(), as the BSP's
clock is NOT restored during syscore_resume(), but is instead restored
earlier via the sched_clock restore callback. If suspend is aborted, e.g.
due to a late wakeup, the BSP will run without its clock enabled, which
"works" only because KVM-the-hypervisor is kind enough to not clobber the
shared memory when the clock is disabled. But over time, the BSP's view
of time will drift from APs.
Plumb in an "action" to KVM-as-a-guest and kvmclock code in preparation
for additional cleanups to kvmclock's suspend/resume logic.
Fixes: c02027b5742b ("x86/kvm: Disable kvmclock on all CPUs on shutdown")
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
arch/x86/include/asm/kvm_para.h | 8 +++++++-
arch/x86/kernel/kvm.c | 15 ++++++++-------
arch/x86/kernel/kvmclock.c | 31 +++++++++++++++++++++++++------
3 files changed, 40 insertions(+), 14 deletions(-)
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 4a47c16e2df8..2adba2aff539 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -118,8 +118,14 @@ static inline long kvm_sev_hypercall3(unsigned int nr, unsigned long p1,
}
#ifdef CONFIG_KVM_GUEST
+enum kvm_guest_cpu_action {
+ KVM_GUEST_BSP_SUSPEND,
+ KVM_GUEST_AP_OFFLINE,
+ KVM_GUEST_SHUTDOWN,
+};
+
void kvmclock_init(void);
-void kvmclock_disable(void);
+void kvmclock_cpu_action(enum kvm_guest_cpu_action action);
bool kvm_para_available(void);
unsigned int kvm_arch_para_features(void);
unsigned int kvm_arch_para_hints(void);
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 06534e16cfb5..0131bc1cb459 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -457,7 +457,7 @@ static void __init sev_map_percpu_data(void)
}
}
-static void kvm_guest_cpu_offline(bool shutdown)
+static void kvm_guest_cpu_offline(enum kvm_guest_cpu_action action)
{
kvm_disable_steal_time();
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
@@ -465,9 +465,10 @@ static void kvm_guest_cpu_offline(bool shutdown)
if (kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL))
wrmsrq(MSR_KVM_MIGRATION_CONTROL, 0);
kvm_pv_disable_apf();
- if (!shutdown)
+ if (action != KVM_GUEST_SHUTDOWN)
apf_task_wake_all();
- kvmclock_disable();
+
+ kvmclock_cpu_action(action);
}
static int kvm_cpu_online(unsigned int cpu)
@@ -723,7 +724,7 @@ static int kvm_cpu_down_prepare(unsigned int cpu)
unsigned long flags;
local_irq_save(flags);
- kvm_guest_cpu_offline(false);
+ kvm_guest_cpu_offline(KVM_GUEST_AP_OFFLINE);
local_irq_restore(flags);
return 0;
}
@@ -734,7 +735,7 @@ static int kvm_suspend(void *data)
{
u64 val = 0;
- kvm_guest_cpu_offline(false);
+ kvm_guest_cpu_offline(KVM_GUEST_BSP_SUSPEND);
#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL))
@@ -765,7 +766,7 @@ static struct syscore kvm_syscore = {
static void kvm_pv_guest_cpu_reboot(void *unused)
{
- kvm_guest_cpu_offline(true);
+ kvm_guest_cpu_offline(KVM_GUEST_SHUTDOWN);
}
static int kvm_pv_reboot_notify(struct notifier_block *nb,
@@ -789,7 +790,7 @@ static struct notifier_block kvm_pv_reboot_nb = {
#ifdef CONFIG_CRASH_DUMP
static void kvm_crash_shutdown(struct pt_regs *regs)
{
- kvm_guest_cpu_offline(true);
+ kvm_guest_cpu_offline(KVM_GUEST_SHUTDOWN);
native_machine_crash_shutdown(regs);
}
#endif
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index df95516a9d89..006e3a13500b 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -178,8 +178,22 @@ static void kvm_register_clock(char *txt)
pr_debug("kvm-clock: cpu %d, msr %llx, %s", smp_processor_id(), pa, txt);
}
+static void kvmclock_disable(void)
+{
+ if (msr_kvm_system_time)
+ native_write_msr(msr_kvm_system_time, 0);
+}
+
static void kvm_save_sched_clock_state(void)
{
+ /*
+ * Stop host writes to kvmclock immediately prior to suspend/hibernate.
+ * If the system is hibernating, then kvmclock will likely reside at a
+ * different physical address when the system awakens, and host writes
+ * to the old address prior to reconfiguring kvmclock would clobber
+ * random memory.
+ */
+ kvmclock_disable();
}
static void kvm_restore_sched_clock_state(void)
@@ -187,6 +201,17 @@ static void kvm_restore_sched_clock_state(void)
kvm_register_clock("primary cpu clock, resume");
}
+void kvmclock_cpu_action(enum kvm_guest_cpu_action action)
+{
+ /*
+ * Don't disable kvmclock on the BSP during suspend. If kvmclock is
+ * being used for sched_clock, then it needs to be kept alive until the
+ * last minute, and restored as quickly as possible after resume.
+ */
+ if (action != KVM_GUEST_BSP_SUSPEND)
+ kvmclock_disable();
+}
+
#ifdef CONFIG_SMP
static void kvm_setup_secondary_clock(void)
{
@@ -194,12 +219,6 @@ static void kvm_setup_secondary_clock(void)
}
#endif
-void kvmclock_disable(void)
-{
- if (msr_kvm_system_time)
- native_write_msr(msr_kvm_system_time, 0);
-}
-
static void __init kvmclock_init_mem(void)
{
unsigned long ncpus;
--
2.54.0.563.g4f69b47b94-goog
next prev parent reply other threads:[~2026-05-15 19:20 UTC|newest]
Thread overview: 60+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-15 19:19 [PATCH v3 00/41] x86: Try to wrangle PV clocks vs. TSC Sean Christopherson
2026-05-15 19:19 ` [PATCH v3 01/41] x86/tsc: Add a standalone helpers for getting TSC info from CPUID.0x15 Sean Christopherson
2026-05-15 19:19 ` [PATCH v3 02/41] x86/tsc: Add helper to register CPU and TSC freq calibration routines Sean Christopherson
2026-05-15 20:06 ` sashiko-bot
2026-05-15 19:19 ` [PATCH v3 03/41] x86/sev: Mark TSC as reliable when configuring Secure TSC Sean Christopherson
2026-05-15 19:19 ` [PATCH v3 04/41] x86/sev: Move check for SNP Secure TSC support to tsc_early_init() Sean Christopherson
2026-05-15 19:19 ` [PATCH v3 05/41] x86/tdx: Override PV calibration routines with CPUID-based calibration Sean Christopherson
2026-05-15 19:19 ` [PATCH v3 06/41] x86/acrn: Mark TSC frequency as known when using ACRN for calibration Sean Christopherson
2026-05-15 19:19 ` [PATCH v3 07/41] clocksource: hyper-v: Register sched_clock save/restore iff it's necessary Sean Christopherson
2026-05-15 19:19 ` [PATCH v3 08/41] clocksource: hyper-v: Drop wrappers to sched_clock save/restore helpers Sean Christopherson
2026-05-15 19:19 ` [PATCH v3 09/41] clocksource: hyper-v: Don't save/restore TSC offset when using HV sched_clock Sean Christopherson
2026-05-15 19:19 ` [PATCH v3 10/41] x86/kvmclock: Setup kvmclock for secondary CPUs iff CONFIG_SMP=y Sean Christopherson
2026-05-15 19:19 ` Sean Christopherson [this message]
2026-05-15 20:34 ` [PATCH v3 11/41] x86/kvm: Don't disable kvmclock on BSP in syscore_suspend() sashiko-bot
2026-05-15 22:29 ` Sean Christopherson
2026-05-15 19:19 ` [PATCH v3 12/41] x86/paravirt: Remove unnecessary PARAVIRT=n stub for paravirt_set_sched_clock() Sean Christopherson
2026-05-15 19:19 ` [PATCH v3 13/41] x86/paravirt: Move handling of unstable PV clocks into paravirt_set_sched_clock() Sean Christopherson
2026-05-15 19:19 ` [PATCH v3 14/41] x86/kvmclock: Move sched_clock save/restore helpers up in kvmclock.c Sean Christopherson
2026-05-15 19:19 ` [PATCH v3 15/41] x86/xen/time: Nullify x86_platform's sched_clock save/restore hooks Sean Christopherson
2026-05-15 19:48 ` sashiko-bot
2026-05-15 22:43 ` Sean Christopherson
2026-05-15 19:19 ` [PATCH v3 16/41] x86/vmware: Nullify save/restore hooks when using VMware's sched_clock Sean Christopherson
2026-05-15 19:42 ` sashiko-bot
2026-05-15 19:19 ` [PATCH v3 17/41] x86/tsc: WARN if TSC sched_clock save/restore used with PV sched_clock Sean Christopherson
2026-05-15 19:55 ` sashiko-bot
2026-05-15 19:19 ` [PATCH v3 18/41] x86/paravirt: Pass sched_clock save/restore helpers during registration Sean Christopherson
2026-05-15 19:56 ` sashiko-bot
2026-05-15 19:19 ` [PATCH v3 19/41] x86/kvmclock: Move kvm_sched_clock_init() down in kvmclock.c Sean Christopherson
2026-05-15 19:19 ` [PATCH v3 20/41] x86/xen/time: Mark xen_setup_vsyscall_time_info() as __init Sean Christopherson
2026-05-15 19:19 ` [PATCH v3 21/41] x86/pvclock: Mark setup helpers and related various as __init/__ro_after_init Sean Christopherson
2026-05-15 19:19 ` [PATCH v3 22/41] x86/pvclock: WARN if pvclock's valid_flags are overwritten Sean Christopherson
2026-05-15 19:19 ` [PATCH v3 23/41] x86/kvmclock: Refactor handling of PVCLOCK_TSC_STABLE_BIT during kvmclock_init() Sean Christopherson
2026-05-15 19:19 ` [PATCH v3 24/41] timekeeping: Resume clocksources before reading persistent clock Sean Christopherson
2026-05-15 19:19 ` [PATCH v3 25/41] x86/kvmclock: Hook clocksource.suspend/resume when kvmclock isn't sched_clock Sean Christopherson
2026-05-15 19:19 ` [PATCH v3 26/41] x86/kvmclock: WARN if wall clock is read while kvmclock is suspended Sean Christopherson
2026-05-15 19:19 ` [PATCH v3 27/41] x86/kvmclock: Enable kvmclock on APs during onlining if kvmclock isn't sched_clock Sean Christopherson
2026-05-15 19:47 ` sashiko-bot
2026-05-15 19:19 ` [PATCH v3 28/41] x86/paravirt: Mark __paravirt_set_sched_clock() as __init Sean Christopherson
2026-05-15 19:19 ` [PATCH v3 29/41] x86/paravirt: Plumb a return code into __paravirt_set_sched_clock() Sean Christopherson
2026-05-15 19:48 ` sashiko-bot
2026-05-15 19:19 ` [PATCH v3 30/41] x86/paravirt: Don't use a PV sched_clock in CoCo guests with trusted TSC Sean Christopherson
2026-05-15 19:19 ` [PATCH v3 31/41] x86/tsc: Pass KNOWN_FREQ and RELIABLE as params to registration Sean Christopherson
2026-05-15 19:45 ` sashiko-bot
2026-05-15 19:19 ` [PATCH v3 32/41] x86/tsc: Rejects attempts to override TSC calibration with lesser routine Sean Christopherson
2026-05-15 20:16 ` sashiko-bot
2026-05-15 19:19 ` [PATCH v3 33/41] x86/kvmclock: Mark TSC as reliable when it's constant and nonstop Sean Christopherson
2026-05-15 19:19 ` [PATCH v3 34/41] KVM: x86: Officially define CPUID 0x40000010 as PV Timing Info (TSC and Bus) Sean Christopherson
2026-05-15 19:19 ` [PATCH v3 35/41] x86/kvmclock: Obtain TSC frequency from CPUID if present Sean Christopherson
2026-05-15 19:19 ` [PATCH v3 36/41] x86/kvmclock: Get local APIC bus frequency from PV CPUID Timing Info Sean Christopherson
2026-05-15 19:55 ` sashiko-bot
2026-05-15 19:19 ` [PATCH v3 37/41] x86/kvmclock: Use TSC for sched_clock if it's constant and non-stop Sean Christopherson
2026-05-15 20:09 ` sashiko-bot
2026-05-15 19:19 ` [PATCH v3 38/41] x86/paravirt: kvmclock: Setup kvmclock early iff it's sched_clock Sean Christopherson
2026-05-15 19:19 ` [PATCH v3 39/41] x86/paravirt: Move using_native_sched_clock() stub into timer.h Sean Christopherson
2026-05-15 19:19 ` [PATCH v3 40/41] x86/tsc: Add standalone helper for getting CPU frequency from CPUID Sean Christopherson
2026-05-15 19:51 ` sashiko-bot
2026-05-15 23:04 ` Sean Christopherson
2026-05-16 7:42 ` Paolo Bonzini
2026-05-15 19:19 ` [PATCH v3 41/41] x86/kvmclock: Get CPU base frequency from CPUID when it's available Sean Christopherson
2026-05-15 19:59 ` sashiko-bot
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260515191942.1892718-12-seanjc@google.com \
--to=seanjc@google.com \
--cc=ajay.kaher@broadcom.com \
--cc=alexey.makhalov@broadcom.com \
--cc=bcm-kernel-feedback-list@broadcom.com \
--cc=boris.ostrovsky@oracle.com \
--cc=daniel.lezcano@kernel.org \
--cc=dave.hansen@linux.intel.com \
--cc=decui@microsoft.com \
--cc=dwmw@amazon.co.uk \
--cc=haiyangz@microsoft.com \
--cc=jan.kiszka@siemens.com \
--cc=jgross@suse.com \
--cc=jstultz@google.com \
--cc=kas@kernel.org \
--cc=kvm@vger.kernel.org \
--cc=kys@microsoft.com \
--cc=linux-coco@lists.linux.dev \
--cc=linux-hyperv@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=longli@microsoft.com \
--cc=luto@kernel.org \
--cc=mhklinux@outlook.com \
--cc=nikunj@amd.com \
--cc=pbonzini@redhat.com \
--cc=peterz@infradead.org \
--cc=rick.p.edgecombe@intel.com \
--cc=sboyd@kernel.org \
--cc=tglx@kernel.org \
--cc=tglx@linutronix.de \
--cc=thomas.lendacky@amd.com \
--cc=virtualization@lists.linux.dev \
--cc=vkuznets@redhat.com \
--cc=wei.liu@kernel.org \
--cc=x86@kernel.org \
--cc=xen-devel@lists.xenproject.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.