public inbox for linux-coco@lists.linux.dev
 help / color / mirror / Atom feed
From: Sean Christopherson <seanjc@google.com>
To: Thomas Gleixner <tglx@kernel.org>, Ingo Molnar <mingo@redhat.com>,
	Borislav Petkov <bp@alien8.de>,
	 Dave Hansen <dave.hansen@linux.intel.com>,
	x86@kernel.org,  Kiryl Shutsemau <kas@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	 Arnaldo Carvalho de Melo <acme@kernel.org>,
	Namhyung Kim <namhyung@kernel.org>,
	 Sean Christopherson <seanjc@google.com>,
	Paolo Bonzini <pbonzini@redhat.com>
Cc: linux-kernel@vger.kernel.org, linux-coco@lists.linux.dev,
	 kvm@vger.kernel.org, linux-perf-users@vger.kernel.org,
	 Chao Gao <chao.gao@intel.com>,
	Xu Yilun <yilun.xu@linux.intel.com>,
	 Dan Williams <dan.j.williams@intel.com>
Subject: [PATCH v3 04/16] KVM: VMX: Unconditionally allocate root VMCSes during boot CPU bringup
Date: Fri, 13 Feb 2026 17:26:50 -0800	[thread overview]
Message-ID: <20260214012702.2368778-5-seanjc@google.com> (raw)
In-Reply-To: <20260214012702.2368778-1-seanjc@google.com>

Allocate the root VMCS (misleading called "vmxarea" and "kvm_area" in KVM)
for each possible CPU during early boot CPU bringup, before early TDX
initialization, so that TDX can eventually do VMXON on-demand (to make
SEAMCALLs) without needing to load kvm-intel.ko.  Allocate the pages early
on, e.g. instead of trying to do so on-demand, to avoid having to juggle
allocation failures at runtime.

Opportunistically rename the per-CPU pointers to better reflect the role
of the VMCS.  Use Intel's "root VMCS" terminology, e.g. from various VMCS
patents[1][2] and older SDMs, not the more opaque "VMXON region" used in
recent versions of the SDM.  While it's possible the VMCS passed to VMXON
no longer serves as _the_ root VMCS on modern CPUs, it is still in effect
a "root mode VMCS", as described in the patents.

Link: https://patentimages.storage.googleapis.com/c7/e4/32/d7a7def5580667/WO2013101191A1.pdf [1]
Link: https://patentimages.storage.googleapis.com/13/f6/8d/1361fab8c33373/US20080163205A1.pdf [2]
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/include/asm/virt.h  | 13 ++++++-
 arch/x86/kernel/cpu/common.c |  2 +
 arch/x86/kvm/vmx/vmx.c       | 58 ++---------------------------
 arch/x86/virt/hw.c           | 71 ++++++++++++++++++++++++++++++++++++
 4 files changed, 89 insertions(+), 55 deletions(-)

diff --git a/arch/x86/include/asm/virt.h b/arch/x86/include/asm/virt.h
index 131b9bf9ef3c..0da6db4f5b0c 100644
--- a/arch/x86/include/asm/virt.h
+++ b/arch/x86/include/asm/virt.h
@@ -2,10 +2,21 @@
 #ifndef _ASM_X86_VIRT_H
 #define _ASM_X86_VIRT_H
 
-#include <linux/types.h>
+#include <linux/percpu-defs.h>
+
+#include <asm/reboot.h>
 
 #if IS_ENABLED(CONFIG_KVM_X86)
 extern bool virt_rebooting;
+
+void __init x86_virt_init(void);
+
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+DECLARE_PER_CPU(struct vmcs *, root_vmcs);
+#endif
+
+#else
+static __always_inline void x86_virt_init(void) {}
 #endif
 
 #endif /* _ASM_X86_VIRT_H */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index e7ab22fce3b5..dda9e41292db 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -71,6 +71,7 @@
 #include <asm/traps.h>
 #include <asm/sev.h>
 #include <asm/tdx.h>
+#include <asm/virt.h>
 #include <asm/posted_intr.h>
 #include <asm/runtime-const.h>
 
@@ -2143,6 +2144,7 @@ static __init void identify_boot_cpu(void)
 	cpu_detect_tlb(&boot_cpu_data);
 	setup_cr_pinning();
 
+	x86_virt_init();
 	tsx_init();
 	tdx_init();
 	lkgs_init();
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index fc6e3b620866..abd4830f71d8 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -580,7 +580,6 @@ noinline void invept_error(unsigned long ext, u64 eptp)
 	vmx_insn_failed("invept failed: ext=0x%lx eptp=%llx\n", ext, eptp);
 }
 
-static DEFINE_PER_CPU(struct vmcs *, vmxarea);
 DEFINE_PER_CPU(struct vmcs *, current_vmcs);
 /*
  * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed
@@ -2934,6 +2933,9 @@ static bool __kvm_is_vmx_supported(void)
 		return false;
 	}
 
+	if (!per_cpu(root_vmcs, cpu))
+		return false;
+
 	return true;
 }
 
@@ -3008,7 +3010,7 @@ static int kvm_cpu_vmxon(u64 vmxon_pointer)
 int vmx_enable_virtualization_cpu(void)
 {
 	int cpu = raw_smp_processor_id();
-	u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
+	u64 phys_addr = __pa(per_cpu(root_vmcs, cpu));
 	int r;
 
 	if (cr4_read_shadow() & X86_CR4_VMXE)
@@ -3129,47 +3131,6 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
 	return -ENOMEM;
 }
 
-static void free_kvm_area(void)
-{
-	int cpu;
-
-	for_each_possible_cpu(cpu) {
-		free_vmcs(per_cpu(vmxarea, cpu));
-		per_cpu(vmxarea, cpu) = NULL;
-	}
-}
-
-static __init int alloc_kvm_area(void)
-{
-	int cpu;
-
-	for_each_possible_cpu(cpu) {
-		struct vmcs *vmcs;
-
-		vmcs = alloc_vmcs_cpu(false, cpu, GFP_KERNEL);
-		if (!vmcs) {
-			free_kvm_area();
-			return -ENOMEM;
-		}
-
-		/*
-		 * When eVMCS is enabled, alloc_vmcs_cpu() sets
-		 * vmcs->revision_id to KVM_EVMCS_VERSION instead of
-		 * revision_id reported by MSR_IA32_VMX_BASIC.
-		 *
-		 * However, even though not explicitly documented by
-		 * TLFS, VMXArea passed as VMXON argument should
-		 * still be marked with revision_id reported by
-		 * physical CPU.
-		 */
-		if (kvm_is_using_evmcs())
-			vmcs->hdr.revision_id = vmx_basic_vmcs_revision_id(vmcs_config.basic);
-
-		per_cpu(vmxarea, cpu) = vmcs;
-	}
-	return 0;
-}
-
 static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg,
 		struct kvm_segment *save)
 {
@@ -8566,8 +8527,6 @@ void vmx_hardware_unsetup(void)
 
 	if (nested)
 		nested_vmx_hardware_unsetup();
-
-	free_kvm_area();
 }
 
 void vmx_vm_destroy(struct kvm *kvm)
@@ -8870,10 +8829,6 @@ __init int vmx_hardware_setup(void)
 			return r;
 	}
 
-	r = alloc_kvm_area();
-	if (r)
-		goto err_kvm_area;
-
 	kvm_set_posted_intr_wakeup_handler(pi_wakeup_handler);
 
 	/*
@@ -8900,11 +8855,6 @@ __init int vmx_hardware_setup(void)
 	kvm_caps.inapplicable_quirks &= ~KVM_X86_QUIRK_IGNORE_GUEST_PAT;
 
 	return 0;
-
-err_kvm_area:
-	if (nested)
-		nested_vmx_hardware_unsetup();
-	return r;
 }
 
 void vmx_exit(void)
diff --git a/arch/x86/virt/hw.c b/arch/x86/virt/hw.c
index df3dc18d19b4..56972f594d90 100644
--- a/arch/x86/virt/hw.c
+++ b/arch/x86/virt/hw.c
@@ -1,7 +1,78 @@
 // SPDX-License-Identifier: GPL-2.0-only
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/errno.h>
 #include <linux/kvm_types.h>
+#include <linux/list.h>
+#include <linux/percpu.h>
 
+#include <asm/perf_event.h>
+#include <asm/processor.h>
 #include <asm/virt.h>
+#include <asm/vmx.h>
 
 __visible bool virt_rebooting;
 EXPORT_SYMBOL_FOR_KVM(virt_rebooting);
+
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+DEFINE_PER_CPU(struct vmcs *, root_vmcs);
+EXPORT_PER_CPU_SYMBOL(root_vmcs);
+
+static __init void x86_vmx_exit(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		free_page((unsigned long)per_cpu(root_vmcs, cpu));
+		per_cpu(root_vmcs, cpu) = NULL;
+	}
+}
+
+static __init int x86_vmx_init(void)
+{
+	u64 basic_msr;
+	u32 rev_id;
+	int cpu;
+
+	if (!cpu_feature_enabled(X86_FEATURE_VMX))
+		return -EOPNOTSUPP;
+
+	rdmsrq(MSR_IA32_VMX_BASIC, basic_msr);
+
+	/* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
+	if (WARN_ON_ONCE(vmx_basic_vmcs_size(basic_msr) > PAGE_SIZE))
+		return -EIO;
+
+	/*
+	 * Even if eVMCS is enabled (or will be enabled?), and even though not
+	 * explicitly documented by TLFS, the root VMCS  passed to VMXON should
+	 * still be marked with the revision_id reported by the physical CPU.
+	 */
+	rev_id = vmx_basic_vmcs_revision_id(basic_msr);
+
+	for_each_possible_cpu(cpu) {
+		int node = cpu_to_node(cpu);
+		struct page *page;
+		struct vmcs *vmcs;
+
+		page = __alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
+		if (!page) {
+			x86_vmx_exit();
+			return -ENOMEM;
+		}
+
+		vmcs = page_address(page);
+		vmcs->hdr.revision_id = rev_id;
+		per_cpu(root_vmcs, cpu) = vmcs;
+	}
+
+	return 0;
+}
+#else
+static __init int x86_vmx_init(void) { return -EOPNOTSUPP; }
+#endif
+
+void __init x86_virt_init(void)
+{
+	x86_vmx_init();
+}
-- 
2.53.0.310.g728cabbaf7-goog


  parent reply	other threads:[~2026-02-14  1:27 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-14  1:26 [PATCH v3 00/16] KVM: x86/tdx: Have TDX handle VMXON during bringup Sean Christopherson
2026-02-14  1:26 ` [PATCH v3 01/16] KVM: x86: Move kvm_rebooting to x86 Sean Christopherson
2026-02-14  1:26 ` [PATCH v3 02/16] KVM: VMX: Move architectural "vmcs" and "vmcs_hdr" structures to public vmx.h Sean Christopherson
2026-02-14  1:26 ` [PATCH v3 03/16] KVM: x86: Move "kvm_rebooting" to kernel as "virt_rebooting" Sean Christopherson
2026-02-14  1:26 ` Sean Christopherson [this message]
2026-02-14  1:26 ` [PATCH v3 05/16] x86/virt: Force-clear X86_FEATURE_VMX if configuring root VMCS fails Sean Christopherson
2026-02-16 20:53   ` dan.j.williams
2026-02-17 16:31     ` Sean Christopherson
2026-02-14  1:26 ` [PATCH v3 06/16] KVM: VMX: Move core VMXON enablement to kernel Sean Christopherson
2026-02-26 22:32   ` Dave Hansen
2026-02-14  1:26 ` [PATCH v3 07/16] KVM: SVM: Move core EFER.SVME " Sean Christopherson
2026-02-26  7:40   ` Chao Gao
2026-02-26 23:43     ` Sean Christopherson
2026-02-14  1:26 ` [PATCH v3 08/16] KVM: x86: Move bulk of emergency virtualizaton logic to virt subsystem Sean Christopherson
2026-02-26  8:55   ` Chao Gao
2026-02-14  1:26 ` [PATCH v3 09/16] x86/virt: Add refcounting of VMX/SVM usage to support multiple in-kernel users Sean Christopherson
2026-02-27 11:26   ` Chao Gao
2026-02-14  1:26 ` [PATCH v3 10/16] x86/virt/tdx: Drop the outdated requirement that TDX be enabled in IRQ context Sean Christopherson
2026-02-17 11:29   ` Huang, Kai
2026-02-17 15:25     ` Sean Christopherson
2026-02-17 20:30       ` Huang, Kai
2026-02-14  1:26 ` [PATCH v3 11/16] KVM: x86/tdx: Do VMXON and TDX-Module initialization during subsys init Sean Christopherson
2026-02-26 22:35   ` Dave Hansen
2026-02-27 11:28   ` Chao Gao
2026-02-14  1:26 ` [PATCH v3 12/16] x86/virt/tdx: Tag a pile of functions as __init, and globals as __ro_after_init Sean Christopherson
2026-02-14  1:26 ` [PATCH v3 13/16] x86/virt/tdx: KVM: Consolidate TDX CPU hotplug handling Sean Christopherson
2026-02-14  1:27 ` [PATCH v3 14/16] x86/virt/tdx: Use ida_is_empty() to detect if any TDs may be running Sean Christopherson
2026-02-14  1:27 ` [PATCH v3 15/16] KVM: Bury kvm_{en,dis}able_virtualization() in kvm_main.c once more Sean Christopherson
2026-02-14  1:27 ` [PATCH v3 16/16] KVM: TDX: Fold tdx_bringup() into tdx_hardware_setup() Sean Christopherson
2026-02-16 23:00 ` [PATCH v3 00/16] KVM: x86/tdx: Have TDX handle VMXON during bringup dan.j.williams
2026-02-25 14:38 ` Chao Gao
2026-03-03 21:39 ` Sagi Shahar
2026-03-04  0:06   ` Sagi Shahar
2026-03-05 17:08 ` Sean Christopherson
2026-03-05 18:50   ` dan.j.williams
2026-03-05 18:54     ` Dave Hansen
2026-03-05 19:07       ` Sean Christopherson
2026-03-05 19:08     ` Sean Christopherson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260214012702.2368778-5-seanjc@google.com \
    --to=seanjc@google.com \
    --cc=acme@kernel.org \
    --cc=bp@alien8.de \
    --cc=chao.gao@intel.com \
    --cc=dan.j.williams@intel.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=kas@kernel.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-coco@lists.linux.dev \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=tglx@kernel.org \
    --cc=x86@kernel.org \
    --cc=yilun.xu@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox