From: Sean Christopherson <seanjc@google.com>
To: Thomas Gleixner <tglx@kernel.org>, Ingo Molnar <mingo@redhat.com>,
Borislav Petkov <bp@alien8.de>,
Dave Hansen <dave.hansen@linux.intel.com>,
x86@kernel.org, Kiryl Shutsemau <kas@kernel.org>,
Peter Zijlstra <peterz@infradead.org>,
Arnaldo Carvalho de Melo <acme@kernel.org>,
Namhyung Kim <namhyung@kernel.org>,
Sean Christopherson <seanjc@google.com>,
Paolo Bonzini <pbonzini@redhat.com>
Cc: linux-kernel@vger.kernel.org, linux-coco@lists.linux.dev,
kvm@vger.kernel.org, linux-perf-users@vger.kernel.org,
Chao Gao <chao.gao@intel.com>,
Xu Yilun <yilun.xu@linux.intel.com>,
Dan Williams <dan.j.williams@intel.com>
Subject: [PATCH v3 06/16] KVM: VMX: Move core VMXON enablement to kernel
Date: Fri, 13 Feb 2026 17:26:52 -0800 [thread overview]
Message-ID: <20260214012702.2368778-7-seanjc@google.com> (raw)
In-Reply-To: <20260214012702.2368778-1-seanjc@google.com>
Move the innermost VMXON+VMXOFF logic out of KVM and into to core x86 so
that TDX can (eventually) force VMXON without having to rely on KVM being
loaded, e.g. to do SEAMCALLs during initialization.
Opportunistically update the comment regarding emergency disabling via NMI
to clarify that virt_rebooting will be set by _another_ emergency callback,
i.e. that virt_rebooting doesn't need to be set before VMCLEAR, only
before _this_ invocation does VMXOFF.
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
arch/x86/events/intel/pt.c | 1 -
arch/x86/include/asm/virt.h | 6 +--
arch/x86/kvm/vmx/vmx.c | 73 +++----------------------------
arch/x86/virt/hw.c | 85 ++++++++++++++++++++++++++++++++++++-
4 files changed, 92 insertions(+), 73 deletions(-)
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 44524a387c58..b5726b50e77d 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -1591,7 +1591,6 @@ void intel_pt_handle_vmx(int on)
local_irq_restore(flags);
}
-EXPORT_SYMBOL_FOR_KVM(intel_pt_handle_vmx);
/*
* PMU callbacks
diff --git a/arch/x86/include/asm/virt.h b/arch/x86/include/asm/virt.h
index 0da6db4f5b0c..cca0210a5c16 100644
--- a/arch/x86/include/asm/virt.h
+++ b/arch/x86/include/asm/virt.h
@@ -2,8 +2,6 @@
#ifndef _ASM_X86_VIRT_H
#define _ASM_X86_VIRT_H
-#include <linux/percpu-defs.h>
-
#include <asm/reboot.h>
#if IS_ENABLED(CONFIG_KVM_X86)
@@ -12,7 +10,9 @@ extern bool virt_rebooting;
void __init x86_virt_init(void);
#if IS_ENABLED(CONFIG_KVM_INTEL)
-DECLARE_PER_CPU(struct vmcs *, root_vmcs);
+int x86_vmx_enable_virtualization_cpu(void);
+int x86_vmx_disable_virtualization_cpu(void);
+void x86_vmx_emergency_disable_virtualization_cpu(void);
#endif
#else
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e767835a4f3a..36238cc694fd 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -786,41 +786,16 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
return ret;
}
-/*
- * Disable VMX and clear CR4.VMXE (even if VMXOFF faults)
- *
- * Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to
- * atomically track post-VMXON state, e.g. this may be called in NMI context.
- * Eat all faults as all other faults on VMXOFF faults are mode related, i.e.
- * faults are guaranteed to be due to the !post-VMXON check unless the CPU is
- * magically in RM, VM86, compat mode, or at CPL>0.
- */
-static int kvm_cpu_vmxoff(void)
-{
- asm goto("1: vmxoff\n\t"
- _ASM_EXTABLE(1b, %l[fault])
- ::: "cc", "memory" : fault);
-
- cr4_clear_bits(X86_CR4_VMXE);
- return 0;
-
-fault:
- cr4_clear_bits(X86_CR4_VMXE);
- return -EIO;
-}
-
void vmx_emergency_disable_virtualization_cpu(void)
{
int cpu = raw_smp_processor_id();
struct loaded_vmcs *v;
- virt_rebooting = true;
-
/*
* Note, CR4.VMXE can be _cleared_ in NMI context, but it can only be
- * set in task context. If this races with VMX is disabled by an NMI,
- * VMCLEAR and VMXOFF may #UD, but KVM will eat those faults due to
- * virt_rebooting set.
+ * set in task context. If this races with _another_ emergency call
+ * from NMI context, VMCLEAR may #UD, but KVM will eat those faults due
+ * to virt_rebooting being set by the interrupting NMI callback.
*/
if (!(__read_cr4() & X86_CR4_VMXE))
return;
@@ -832,7 +807,7 @@ void vmx_emergency_disable_virtualization_cpu(void)
vmcs_clear(v->shadow_vmcs);
}
- kvm_cpu_vmxoff();
+ x86_vmx_emergency_disable_virtualization_cpu();
}
static void __loaded_vmcs_clear(void *arg)
@@ -2988,34 +2963,9 @@ int vmx_check_processor_compat(void)
return 0;
}
-static int kvm_cpu_vmxon(u64 vmxon_pointer)
-{
- u64 msr;
-
- cr4_set_bits(X86_CR4_VMXE);
-
- asm goto("1: vmxon %[vmxon_pointer]\n\t"
- _ASM_EXTABLE(1b, %l[fault])
- : : [vmxon_pointer] "m"(vmxon_pointer)
- : : fault);
- return 0;
-
-fault:
- WARN_ONCE(1, "VMXON faulted, MSR_IA32_FEAT_CTL (0x3a) = 0x%llx\n",
- rdmsrq_safe(MSR_IA32_FEAT_CTL, &msr) ? 0xdeadbeef : msr);
- cr4_clear_bits(X86_CR4_VMXE);
-
- return -EFAULT;
-}
-
int vmx_enable_virtualization_cpu(void)
{
int cpu = raw_smp_processor_id();
- u64 phys_addr = __pa(per_cpu(root_vmcs, cpu));
- int r;
-
- if (cr4_read_shadow() & X86_CR4_VMXE)
- return -EBUSY;
/*
* This can happen if we hot-added a CPU but failed to allocate
@@ -3024,15 +2974,7 @@ int vmx_enable_virtualization_cpu(void)
if (kvm_is_using_evmcs() && !hv_get_vp_assist_page(cpu))
return -EFAULT;
- intel_pt_handle_vmx(1);
-
- r = kvm_cpu_vmxon(phys_addr);
- if (r) {
- intel_pt_handle_vmx(0);
- return r;
- }
-
- return 0;
+ return x86_vmx_enable_virtualization_cpu();
}
static void vmclear_local_loaded_vmcss(void)
@@ -3049,12 +2991,9 @@ void vmx_disable_virtualization_cpu(void)
{
vmclear_local_loaded_vmcss();
- if (kvm_cpu_vmxoff())
- kvm_spurious_fault();
+ x86_vmx_disable_virtualization_cpu();
hv_reset_evmcs();
-
- intel_pt_handle_vmx(0);
}
struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags)
diff --git a/arch/x86/virt/hw.c b/arch/x86/virt/hw.c
index 40495872fdfb..dc426c2bc24a 100644
--- a/arch/x86/virt/hw.c
+++ b/arch/x86/virt/hw.c
@@ -15,8 +15,89 @@ __visible bool virt_rebooting;
EXPORT_SYMBOL_FOR_KVM(virt_rebooting);
#if IS_ENABLED(CONFIG_KVM_INTEL)
-DEFINE_PER_CPU(struct vmcs *, root_vmcs);
-EXPORT_PER_CPU_SYMBOL(root_vmcs);
+static DEFINE_PER_CPU(struct vmcs *, root_vmcs);
+
+static int x86_virt_cpu_vmxon(void)
+{
+ u64 vmxon_pointer = __pa(per_cpu(root_vmcs, raw_smp_processor_id()));
+ u64 msr;
+
+ cr4_set_bits(X86_CR4_VMXE);
+
+ asm goto("1: vmxon %[vmxon_pointer]\n\t"
+ _ASM_EXTABLE(1b, %l[fault])
+ : : [vmxon_pointer] "m"(vmxon_pointer)
+ : : fault);
+ return 0;
+
+fault:
+ WARN_ONCE(1, "VMXON faulted, MSR_IA32_FEAT_CTL (0x3a) = 0x%llx\n",
+ rdmsrq_safe(MSR_IA32_FEAT_CTL, &msr) ? 0xdeadbeef : msr);
+ cr4_clear_bits(X86_CR4_VMXE);
+
+ return -EFAULT;
+}
+
+int x86_vmx_enable_virtualization_cpu(void)
+{
+ int r;
+
+ if (cr4_read_shadow() & X86_CR4_VMXE)
+ return -EBUSY;
+
+ intel_pt_handle_vmx(1);
+
+ r = x86_virt_cpu_vmxon();
+ if (r) {
+ intel_pt_handle_vmx(0);
+ return r;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_FOR_KVM(x86_vmx_enable_virtualization_cpu);
+
+/*
+ * Disable VMX and clear CR4.VMXE (even if VMXOFF faults)
+ *
+ * Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to
+ * atomically track post-VMXON state, e.g. this may be called in NMI context.
+ * Eat all faults as all other faults on VMXOFF faults are mode related, i.e.
+ * faults are guaranteed to be due to the !post-VMXON check unless the CPU is
+ * magically in RM, VM86, compat mode, or at CPL>0.
+ */
+int x86_vmx_disable_virtualization_cpu(void)
+{
+ int r = -EIO;
+
+ asm goto("1: vmxoff\n\t"
+ _ASM_EXTABLE(1b, %l[fault])
+ ::: "cc", "memory" : fault);
+ r = 0;
+
+fault:
+ cr4_clear_bits(X86_CR4_VMXE);
+ intel_pt_handle_vmx(0);
+ return r;
+}
+EXPORT_SYMBOL_FOR_KVM(x86_vmx_disable_virtualization_cpu);
+
+void x86_vmx_emergency_disable_virtualization_cpu(void)
+{
+ virt_rebooting = true;
+
+ /*
+ * Note, CR4.VMXE can be _cleared_ in NMI context, but it can only be
+ * set in task context. If this races with _another_ emergency call
+ * from NMI context, VMXOFF may #UD, but kernel will eat those faults
+ * due to virt_rebooting being set by the interrupting NMI callback.
+ */
+ if (!(__read_cr4() & X86_CR4_VMXE))
+ return;
+
+ x86_vmx_disable_virtualization_cpu();
+}
+EXPORT_SYMBOL_FOR_KVM(x86_vmx_emergency_disable_virtualization_cpu);
static __init void x86_vmx_exit(void)
{
--
2.53.0.310.g728cabbaf7-goog
next prev parent reply other threads:[~2026-02-14 1:27 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-14 1:26 [PATCH v3 00/16] KVM: x86/tdx: Have TDX handle VMXON during bringup Sean Christopherson
2026-02-14 1:26 ` [PATCH v3 01/16] KVM: x86: Move kvm_rebooting to x86 Sean Christopherson
2026-02-14 1:26 ` [PATCH v3 02/16] KVM: VMX: Move architectural "vmcs" and "vmcs_hdr" structures to public vmx.h Sean Christopherson
2026-02-14 1:26 ` [PATCH v3 03/16] KVM: x86: Move "kvm_rebooting" to kernel as "virt_rebooting" Sean Christopherson
2026-02-14 1:26 ` [PATCH v3 04/16] KVM: VMX: Unconditionally allocate root VMCSes during boot CPU bringup Sean Christopherson
2026-02-14 1:26 ` [PATCH v3 05/16] x86/virt: Force-clear X86_FEATURE_VMX if configuring root VMCS fails Sean Christopherson
2026-02-16 20:53 ` dan.j.williams
2026-02-17 16:31 ` Sean Christopherson
2026-02-14 1:26 ` Sean Christopherson [this message]
2026-02-26 22:32 ` [PATCH v3 06/16] KVM: VMX: Move core VMXON enablement to kernel Dave Hansen
2026-02-14 1:26 ` [PATCH v3 07/16] KVM: SVM: Move core EFER.SVME " Sean Christopherson
2026-02-26 7:40 ` Chao Gao
2026-02-26 23:43 ` Sean Christopherson
2026-02-14 1:26 ` [PATCH v3 08/16] KVM: x86: Move bulk of emergency virtualizaton logic to virt subsystem Sean Christopherson
2026-02-26 8:55 ` Chao Gao
2026-02-14 1:26 ` [PATCH v3 09/16] x86/virt: Add refcounting of VMX/SVM usage to support multiple in-kernel users Sean Christopherson
2026-02-27 11:26 ` Chao Gao
2026-02-14 1:26 ` [PATCH v3 10/16] x86/virt/tdx: Drop the outdated requirement that TDX be enabled in IRQ context Sean Christopherson
2026-02-17 11:29 ` Huang, Kai
2026-02-17 15:25 ` Sean Christopherson
2026-02-17 20:30 ` Huang, Kai
2026-02-14 1:26 ` [PATCH v3 11/16] KVM: x86/tdx: Do VMXON and TDX-Module initialization during subsys init Sean Christopherson
2026-02-26 22:35 ` Dave Hansen
2026-02-27 11:28 ` Chao Gao
2026-02-14 1:26 ` [PATCH v3 12/16] x86/virt/tdx: Tag a pile of functions as __init, and globals as __ro_after_init Sean Christopherson
2026-02-14 1:26 ` [PATCH v3 13/16] x86/virt/tdx: KVM: Consolidate TDX CPU hotplug handling Sean Christopherson
2026-02-14 1:27 ` [PATCH v3 14/16] x86/virt/tdx: Use ida_is_empty() to detect if any TDs may be running Sean Christopherson
2026-02-14 1:27 ` [PATCH v3 15/16] KVM: Bury kvm_{en,dis}able_virtualization() in kvm_main.c once more Sean Christopherson
2026-02-14 1:27 ` [PATCH v3 16/16] KVM: TDX: Fold tdx_bringup() into tdx_hardware_setup() Sean Christopherson
2026-02-16 23:00 ` [PATCH v3 00/16] KVM: x86/tdx: Have TDX handle VMXON during bringup dan.j.williams
2026-02-25 14:38 ` Chao Gao
2026-03-03 21:39 ` Sagi Shahar
2026-03-04 0:06 ` Sagi Shahar
2026-03-05 17:08 ` Sean Christopherson
2026-03-05 18:50 ` dan.j.williams
2026-03-05 18:54 ` Dave Hansen
2026-03-05 19:07 ` Sean Christopherson
2026-03-05 19:08 ` Sean Christopherson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260214012702.2368778-7-seanjc@google.com \
--to=seanjc@google.com \
--cc=acme@kernel.org \
--cc=bp@alien8.de \
--cc=chao.gao@intel.com \
--cc=dan.j.williams@intel.com \
--cc=dave.hansen@linux.intel.com \
--cc=kas@kernel.org \
--cc=kvm@vger.kernel.org \
--cc=linux-coco@lists.linux.dev \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-perf-users@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=namhyung@kernel.org \
--cc=pbonzini@redhat.com \
--cc=peterz@infradead.org \
--cc=tglx@kernel.org \
--cc=x86@kernel.org \
--cc=yilun.xu@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox