public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: "Xin Li (Intel)" <xin@zytor.com>
To: linux-kernel@vger.kernel.org, kvm@vger.kernel.org,
	linux-pm@vger.kernel.org
Cc: seanjc@google.com, pbonzini@redhat.com, tglx@linutronix.de,
	mingo@redhat.com, bp@alien8.de, dave.hansen@linux.intel.com,
	x86@kernel.org, hpa@zytor.com, rafael@kernel.org,
	pavel@kernel.org, brgerst@gmail.com, xin@zytor.com,
	david.kaplan@amd.com, peterz@infradead.org,
	andrew.cooper3@citrix.com, kprateek.nayak@amd.com,
	arjan@linux.intel.com, chao.gao@intel.com,
	rick.p.edgecombe@intel.com, dan.j.williams@intel.com
Subject: [RFC PATCH v1 1/5] x86/boot: Shift VMXON from KVM init to CPU startup phase
Date: Tue,  9 Sep 2025 11:28:21 -0700	[thread overview]
Message-ID: <20250909182828.1542362-2-xin@zytor.com> (raw)
In-Reply-To: <20250909182828.1542362-1-xin@zytor.com>

Move the VMXON setup from the KVM initialization path to the CPU startup
phase to guarantee that hardware virtualization is enabled early and
without interruption.

As a result, KVM, often loaded as a kernel module, no longer needs to worry
about whether or not VMXON has been executed on a CPU (e.g., CPU offline
events or system reboots while KVM is loading).

Signed-off-by: Xin Li (Intel) <xin@zytor.com>
---
 arch/x86/include/asm/processor.h |   1 +
 arch/x86/include/asm/vmx.h       |   5 ++
 arch/x86/kernel/cpu/common.c     |  91 ++++++++++++++++++++++++
 arch/x86/kvm/vmx/vmcs.h          |   5 --
 arch/x86/kvm/vmx/vmx.c           | 117 ++-----------------------------
 arch/x86/power/cpu.c             |   7 +-
 6 files changed, 107 insertions(+), 119 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index bde58f6510ac..59660428f46d 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -230,6 +230,7 @@ void init_cpu_devs(void);
 void get_cpu_vendor(struct cpuinfo_x86 *c);
 extern void early_cpu_init(void);
 extern void identify_secondary_cpu(unsigned int cpu);
+extern void cpu_enable_virtualization(void);
 extern void print_cpu_info(struct cpuinfo_x86 *);
 void print_cpu_msr(struct cpuinfo_x86 *);
 
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index cca7d6641287..736d04c1b2fc 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -20,6 +20,11 @@
 #include <asm/trapnr.h>
 #include <asm/vmxfeatures.h>
 
+struct vmcs_hdr {
+	u32 revision_id:31;
+	u32 shadow_vmcs:1;
+};
+
 #define VMCS_CONTROL_BIT(x)	BIT(VMX_FEATURE_##x & 0x1f)
 
 /*
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 34a054181c4d..e36877b5a240 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -72,6 +72,7 @@
 #include <asm/tdx.h>
 #include <asm/posted_intr.h>
 #include <asm/runtime-const.h>
+#include <asm/vmx.h>
 
 #include "cpu.h"
 
@@ -1923,6 +1924,84 @@ static void generic_identify(struct cpuinfo_x86 *c)
 #endif
 }
 
+static bool is_vmx_supported(void)
+{
+	int cpu = raw_smp_processor_id();
+
+	if (!(cpuid_ecx(1) & (1 << (X86_FEATURE_VMX & 31)))) {
+		/* May not be an Intel CPU */
+		pr_info("VMX not supported by CPU%d\n", cpu);
+		return false;
+	}
+
+	if (!this_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) ||
+	    !this_cpu_has(X86_FEATURE_VMX)) {
+		pr_err("VMX not enabled (by BIOS) in MSR_IA32_FEAT_CTL on CPU%d\n", cpu);
+		return false;
+	}
+
+	return true;
+}
+
+/* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
+union vmxon_vmcs {
+	struct vmcs_hdr hdr;
+	char data[PAGE_SIZE];
+};
+
+static DEFINE_PER_CPU_PAGE_ALIGNED(union vmxon_vmcs, vmxon_vmcs);
+
+/*
+ * Executed during the CPU startup phase to execute VMXON to enable VMX. This
+ * ensures that KVM, often loaded as a kernel module, no longer needs to worry
+ * about whether or not VMXON has been executed on a CPU (e.g., CPU offline
+ * events or system reboots while KVM is loading).
+ *
+ * VMXON is not expected to fault, but fault handling is kept as a precaution
+ * against any unexpected code paths that might trigger it and can be removed
+ * later if unnecessary.
+ */
+void cpu_enable_virtualization(void)
+{
+	u64 vmxon_pointer = __pa(this_cpu_ptr(&vmxon_vmcs));
+	int cpu = raw_smp_processor_id();
+	u64 basic_msr;
+
+	if (!is_vmx_supported())
+		return;
+
+	if (cr4_read_shadow() & X86_CR4_VMXE) {
+		pr_err("VMX already enabled on CPU%d\n", cpu);
+		return;
+	}
+
+	memset(this_cpu_ptr(&vmxon_vmcs), 0, PAGE_SIZE);
+
+	/*
+	 * Even though not explicitly documented by TLFS, VMXArea passed as
+	 * VMXON argument should still be marked with revision_id reported by
+	 * physical CPU.
+	 */
+	rdmsrq(MSR_IA32_VMX_BASIC, basic_msr);
+	this_cpu_ptr(&vmxon_vmcs)->hdr.revision_id = vmx_basic_vmcs_revision_id(basic_msr);
+
+	intel_pt_handle_vmx(1);
+
+	cr4_set_bits(X86_CR4_VMXE);
+
+	asm goto("1: vmxon %[vmxon_pointer]\n\t"
+		 _ASM_EXTABLE(1b, %l[fault])
+		 : : [vmxon_pointer] "m"(vmxon_pointer)
+		 : : fault);
+
+	return;
+
+fault:
+	pr_err("VMXON faulted on CPU%d\n", cpu);
+	cr4_clear_bits(X86_CR4_VMXE);
+	intel_pt_handle_vmx(0);
+}
+
 /*
  * This does the hard work of actually picking apart the CPU stuff...
  */
@@ -2120,6 +2199,12 @@ void identify_secondary_cpu(unsigned int cpu)
 
 	tsx_ap_init();
 	c->initialized = true;
+
+	/*
+	 * Enable AP virtualization immediately after initializing the per-CPU
+	 * cpuinfo_x86 structure, ensuring that this_cpu_has() operates correctly.
+	 */
+	cpu_enable_virtualization();
 }
 
 void print_cpu_info(struct cpuinfo_x86 *c)
@@ -2551,6 +2636,12 @@ void __init arch_cpu_finalize_init(void)
 	*c = boot_cpu_data;
 	c->initialized = true;
 
+	/*
+	 * Enable BSP virtualization right after the BSP cpuinfo_x86 structure
+	 * is initialized to ensure this_cpu_has() works as expected.
+	 */
+	cpu_enable_virtualization();
+
 	alternative_instructions();
 
 	if (IS_ENABLED(CONFIG_X86_64)) {
diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h
index b25625314658..da5631924432 100644
--- a/arch/x86/kvm/vmx/vmcs.h
+++ b/arch/x86/kvm/vmx/vmcs.h
@@ -13,11 +13,6 @@
 
 #define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
 
-struct vmcs_hdr {
-	u32 revision_id:31;
-	u32 shadow_vmcs:1;
-};
-
 struct vmcs {
 	struct vmcs_hdr hdr;
 	u32 abort;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index aa157fe5b7b3..f6742df0c4ff 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -468,7 +468,6 @@ noinline void invept_error(unsigned long ext, u64 eptp)
 	vmx_insn_failed("invept failed: ext=0x%lx eptp=%llx\n", ext, eptp);
 }
 
-static DEFINE_PER_CPU(struct vmcs *, vmxarea);
 DEFINE_PER_CPU(struct vmcs *, current_vmcs);
 /*
  * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed
@@ -2736,43 +2735,14 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
 	return 0;
 }
 
-static bool __kvm_is_vmx_supported(void)
-{
-	int cpu = smp_processor_id();
-
-	if (!(cpuid_ecx(1) & feature_bit(VMX))) {
-		pr_err("VMX not supported by CPU %d\n", cpu);
-		return false;
-	}
-
-	if (!this_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) ||
-	    !this_cpu_has(X86_FEATURE_VMX)) {
-		pr_err("VMX not enabled (by BIOS) in MSR_IA32_FEAT_CTL on CPU %d\n", cpu);
-		return false;
-	}
-
-	return true;
-}
-
-static bool kvm_is_vmx_supported(void)
-{
-	bool supported;
-
-	migrate_disable();
-	supported = __kvm_is_vmx_supported();
-	migrate_enable();
-
-	return supported;
-}
-
 int vmx_check_processor_compat(void)
 {
 	int cpu = raw_smp_processor_id();
 	struct vmcs_config vmcs_conf;
 	struct vmx_capability vmx_cap;
 
-	if (!__kvm_is_vmx_supported())
-		return -EIO;
+	if (!(cr4_read_shadow() & X86_CR4_VMXE))
+		return -EOPNOTSUPP;
 
 	if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) {
 		pr_err("Failed to setup VMCS config on CPU %d\n", cpu);
@@ -2787,34 +2757,12 @@ int vmx_check_processor_compat(void)
 	return 0;
 }
 
-static int kvm_cpu_vmxon(u64 vmxon_pointer)
-{
-	u64 msr;
-
-	cr4_set_bits(X86_CR4_VMXE);
-
-	asm goto("1: vmxon %[vmxon_pointer]\n\t"
-			  _ASM_EXTABLE(1b, %l[fault])
-			  : : [vmxon_pointer] "m"(vmxon_pointer)
-			  : : fault);
-	return 0;
-
-fault:
-	WARN_ONCE(1, "VMXON faulted, MSR_IA32_FEAT_CTL (0x3a) = 0x%llx\n",
-		  rdmsrq_safe(MSR_IA32_FEAT_CTL, &msr) ? 0xdeadbeef : msr);
-	cr4_clear_bits(X86_CR4_VMXE);
-
-	return -EFAULT;
-}
-
 int vmx_enable_virtualization_cpu(void)
 {
 	int cpu = raw_smp_processor_id();
-	u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
-	int r;
 
-	if (cr4_read_shadow() & X86_CR4_VMXE)
-		return -EBUSY;
+	if (!(cr4_read_shadow() & X86_CR4_VMXE))
+		return -EOPNOTSUPP;
 
 	/*
 	 * This can happen if we hot-added a CPU but failed to allocate
@@ -2823,14 +2771,6 @@ int vmx_enable_virtualization_cpu(void)
 	if (kvm_is_using_evmcs() && !hv_get_vp_assist_page(cpu))
 		return -EFAULT;
 
-	intel_pt_handle_vmx(1);
-
-	r = kvm_cpu_vmxon(phys_addr);
-	if (r) {
-		intel_pt_handle_vmx(0);
-		return r;
-	}
-
 	return 0;
 }
 
@@ -2931,47 +2871,6 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
 	return -ENOMEM;
 }
 
-static void free_kvm_area(void)
-{
-	int cpu;
-
-	for_each_possible_cpu(cpu) {
-		free_vmcs(per_cpu(vmxarea, cpu));
-		per_cpu(vmxarea, cpu) = NULL;
-	}
-}
-
-static __init int alloc_kvm_area(void)
-{
-	int cpu;
-
-	for_each_possible_cpu(cpu) {
-		struct vmcs *vmcs;
-
-		vmcs = alloc_vmcs_cpu(false, cpu, GFP_KERNEL);
-		if (!vmcs) {
-			free_kvm_area();
-			return -ENOMEM;
-		}
-
-		/*
-		 * When eVMCS is enabled, alloc_vmcs_cpu() sets
-		 * vmcs->revision_id to KVM_EVMCS_VERSION instead of
-		 * revision_id reported by MSR_IA32_VMX_BASIC.
-		 *
-		 * However, even though not explicitly documented by
-		 * TLFS, VMXArea passed as VMXON argument should
-		 * still be marked with revision_id reported by
-		 * physical CPU.
-		 */
-		if (kvm_is_using_evmcs())
-			vmcs->hdr.revision_id = vmx_basic_vmcs_revision_id(vmcs_config.basic);
-
-		per_cpu(vmxarea, cpu) = vmcs;
-	}
-	return 0;
-}
-
 static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg,
 		struct kvm_segment *save)
 {
@@ -8204,8 +8103,6 @@ void vmx_hardware_unsetup(void)
 
 	if (nested)
 		nested_vmx_hardware_unsetup();
-
-	free_kvm_area();
 }
 
 void vmx_vm_destroy(struct kvm *kvm)
@@ -8499,10 +8396,6 @@ __init int vmx_hardware_setup(void)
 
 	vmx_set_cpu_caps();
 
-	r = alloc_kvm_area();
-	if (r && nested)
-		nested_vmx_hardware_unsetup();
-
 	kvm_set_posted_intr_wakeup_handler(pi_wakeup_handler);
 
 	/*
@@ -8554,7 +8447,7 @@ int __init vmx_init(void)
 
 	KVM_SANITY_CHECK_VM_STRUCT_SIZE(kvm_vmx);
 
-	if (!kvm_is_vmx_supported())
+	if (!(cr4_read_shadow() & X86_CR4_VMXE))
 		return -EOPNOTSUPP;
 
 	/*
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 916441f5e85c..0eec314b79c2 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -206,11 +206,11 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
 	/* cr4 was introduced in the Pentium CPU */
 #ifdef CONFIG_X86_32
 	if (ctxt->cr4)
-		__write_cr4(ctxt->cr4);
+		__write_cr4(ctxt->cr4 & ~X86_CR4_VMXE);
 #else
 /* CONFIG X86_64 */
 	wrmsrq(MSR_EFER, ctxt->efer);
-	__write_cr4(ctxt->cr4);
+	__write_cr4(ctxt->cr4 & ~X86_CR4_VMXE);
 #endif
 	write_cr3(ctxt->cr3);
 	write_cr2(ctxt->cr2);
@@ -291,6 +291,9 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
 	 * because some of the MSRs are "emulated" in microcode.
 	 */
 	msr_restore_context(ctxt);
+
+	if (ctxt->cr4 & X86_CR4_VMXE)
+		cpu_enable_virtualization();
 }
 
 /* Needed by apm.c */
-- 
2.51.0


  reply	other threads:[~2025-09-09 18:31 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-09-09 18:28 [RFC PATCH v1 0/5] x86/boot, KVM: Move VMXON/VMXOFF handling from KVM to CPU lifecycle Xin Li (Intel)
2025-09-09 18:28 ` Xin Li (Intel) [this message]
2025-09-10  5:37   ` [RFC PATCH v1 1/5] x86/boot: Shift VMXON from KVM init to CPU startup phase Adrian Hunter
2025-09-10  7:25   ` Chao Gao
2025-09-11  6:57     ` Xin Li
2025-09-10  8:02   ` Huang, Kai
2025-09-10 11:10     ` Chao Gao
2025-09-10 11:35       ` Huang, Kai
2025-09-10 13:13         ` Arjan van de Ven
2025-09-10 20:52           ` Huang, Kai
2025-09-09 18:28 ` [RFC PATCH v1 2/5] x86/boot: Move VMXOFF from KVM teardown to CPU shutdown phase Xin Li (Intel)
2025-09-09 18:28 ` [RFC PATCH v1 3/5] x86/shutdown, KVM: VMX: Move VMCLEAR of VMCSs to cpu_disable_virtualization() Xin Li (Intel)
2025-09-09 18:28 ` [RFC PATCH v1 4/5] x86/reboot: Remove emergency_reboot_disable_virtualization() Xin Li (Intel)
2025-09-09 18:28 ` [RFC PATCH v1 5/5] KVM: Remove kvm_rebooting and its references Xin Li (Intel)
2025-09-16 17:56   ` Sean Christopherson
2025-09-17 16:51     ` Xin Li
2025-09-17 23:02       ` Sean Christopherson
2025-09-11 14:20 ` [RFC PATCH v1 0/5] x86/boot, KVM: Move VMXON/VMXOFF handling from KVM to CPU lifecycle Sean Christopherson
2025-09-11 15:20   ` Dave Hansen
2025-09-16 17:29     ` Sean Christopherson
2025-09-11 17:04   ` Arjan van de Ven
2025-09-16 17:54     ` Sean Christopherson
2025-09-16 18:25       ` Jim Mattson
2025-09-17 13:48       ` Arjan van de Ven
2025-09-17 17:30       ` Xin Li
2025-09-17 22:40         ` Sean Christopherson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250909182828.1542362-2-xin@zytor.com \
    --to=xin@zytor.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=arjan@linux.intel.com \
    --cc=bp@alien8.de \
    --cc=brgerst@gmail.com \
    --cc=chao.gao@intel.com \
    --cc=dan.j.williams@intel.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=david.kaplan@amd.com \
    --cc=hpa@zytor.com \
    --cc=kprateek.nayak@amd.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pm@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=pavel@kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rafael@kernel.org \
    --cc=rick.p.edgecombe@intel.com \
    --cc=seanjc@google.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox