public inbox for linux-perf-users@vger.kernel.org
 help / color / mirror / Atom feed
From: Sean Christopherson <seanjc@google.com>
To: Thomas Gleixner <tglx@kernel.org>, Ingo Molnar <mingo@redhat.com>,
	Borislav Petkov <bp@alien8.de>,
	 Dave Hansen <dave.hansen@linux.intel.com>,
	x86@kernel.org,  Kiryl Shutsemau <kas@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	 Arnaldo Carvalho de Melo <acme@kernel.org>,
	Namhyung Kim <namhyung@kernel.org>,
	 Sean Christopherson <seanjc@google.com>,
	Paolo Bonzini <pbonzini@redhat.com>
Cc: linux-kernel@vger.kernel.org, linux-coco@lists.linux.dev,
	 kvm@vger.kernel.org, linux-perf-users@vger.kernel.org,
	 Chao Gao <chao.gao@intel.com>,
	Xu Yilun <yilun.xu@linux.intel.com>,
	 Dan Williams <dan.j.williams@intel.com>
Subject: [PATCH v3 09/16] x86/virt: Add refcounting of VMX/SVM usage to support multiple in-kernel users
Date: Fri, 13 Feb 2026 17:26:55 -0800	[thread overview]
Message-ID: <20260214012702.2368778-10-seanjc@google.com> (raw)
In-Reply-To: <20260214012702.2368778-1-seanjc@google.com>

Implement a per-CPU refcounting scheme so that "users" of hardware
virtualization, e.g. KVM and the future TDX code, can co-exist without
pulling the rug out from under each other.  E.g. if KVM were to disable
VMX on module unload or when the last KVM VM was destroyed, SEAMCALLs from
the TDX subsystem would #UD and panic the kernel.

Disable preemption in the get/put APIs to ensure virtualization is fully
enabled/disabled before returning to the caller.  E.g. if the task were
preempted after a 0=>1 transition, the new task would see a 1=>2 and thus
return without enabling virtualization.  Explicitly disable preemption
instead of requiring the caller to do so, because the need to disable
preemption is an artifact of the implementation.  E.g. from KVM's
perspective there is no _need_ to disable preemption as KVM guarantees the
pCPU on which it is running is stable (but preemption is enabled).

Opportunistically abstract away SVM vs. VMX in the public APIs by using
X86_FEATURE_{SVM,VMX} to communicate what technology the caller wants to
enable and use.

Cc: Xu Yilun <yilun.xu@linux.intel.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/include/asm/virt.h | 11 ++-----
 arch/x86/kvm/svm/svm.c      |  4 +--
 arch/x86/kvm/vmx/vmx.c      |  4 +--
 arch/x86/virt/hw.c          | 64 +++++++++++++++++++++++++++----------
 4 files changed, 53 insertions(+), 30 deletions(-)

diff --git a/arch/x86/include/asm/virt.h b/arch/x86/include/asm/virt.h
index 2c35534437e0..1558a0673d06 100644
--- a/arch/x86/include/asm/virt.h
+++ b/arch/x86/include/asm/virt.h
@@ -11,15 +11,8 @@ extern bool virt_rebooting;
 
 void __init x86_virt_init(void);
 
-#if IS_ENABLED(CONFIG_KVM_INTEL)
-int x86_vmx_enable_virtualization_cpu(void);
-int x86_vmx_disable_virtualization_cpu(void);
-#endif
-
-#if IS_ENABLED(CONFIG_KVM_AMD)
-int x86_svm_enable_virtualization_cpu(void);
-int x86_svm_disable_virtualization_cpu(void);
-#endif
+int x86_virt_get_ref(int feat);
+void x86_virt_put_ref(int feat);
 
 int x86_virt_emergency_disable_virtualization_cpu(void);
 
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 5f033bf3ba83..539fb4306dce 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -489,7 +489,7 @@ static void svm_disable_virtualization_cpu(void)
 	if (tsc_scaling)
 		__svm_write_tsc_multiplier(SVM_TSC_RATIO_DEFAULT);
 
-	x86_svm_disable_virtualization_cpu();
+	x86_virt_put_ref(X86_FEATURE_SVM);
 
 	amd_pmu_disable_virt();
 }
@@ -501,7 +501,7 @@ static int svm_enable_virtualization_cpu(void)
 	int me = raw_smp_processor_id();
 	int r;
 
-	r = x86_svm_enable_virtualization_cpu();
+	r = x86_virt_get_ref(X86_FEATURE_SVM);
 	if (r)
 		return r;
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index c02fd7e91809..6200cf4dbd26 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2963,7 +2963,7 @@ int vmx_enable_virtualization_cpu(void)
 	if (kvm_is_using_evmcs() && !hv_get_vp_assist_page(cpu))
 		return -EFAULT;
 
-	return x86_vmx_enable_virtualization_cpu();
+	return x86_virt_get_ref(X86_FEATURE_VMX);
 }
 
 static void vmclear_local_loaded_vmcss(void)
@@ -2980,7 +2980,7 @@ void vmx_disable_virtualization_cpu(void)
 {
 	vmclear_local_loaded_vmcss();
 
-	x86_vmx_disable_virtualization_cpu();
+	x86_virt_put_ref(X86_FEATURE_VMX);
 
 	hv_reset_evmcs();
 }
diff --git a/arch/x86/virt/hw.c b/arch/x86/virt/hw.c
index 73c8309ba3fb..c898f16fe612 100644
--- a/arch/x86/virt/hw.c
+++ b/arch/x86/virt/hw.c
@@ -13,6 +13,8 @@
 
 struct x86_virt_ops {
 	int feature;
+	int (*enable_virtualization_cpu)(void);
+	int (*disable_virtualization_cpu)(void);
 	void (*emergency_disable_virtualization_cpu)(void);
 };
 static struct x86_virt_ops virt_ops __ro_after_init;
@@ -20,6 +22,8 @@ static struct x86_virt_ops virt_ops __ro_after_init;
 __visible bool virt_rebooting;
 EXPORT_SYMBOL_FOR_KVM(virt_rebooting);
 
+static DEFINE_PER_CPU(int, virtualization_nr_users);
+
 static cpu_emergency_virt_cb __rcu *kvm_emergency_callback;
 
 void x86_virt_register_emergency_callback(cpu_emergency_virt_cb *callback)
@@ -74,13 +78,10 @@ static int x86_virt_cpu_vmxon(void)
 	return -EFAULT;
 }
 
-int x86_vmx_enable_virtualization_cpu(void)
+static int x86_vmx_enable_virtualization_cpu(void)
 {
 	int r;
 
-	if (virt_ops.feature != X86_FEATURE_VMX)
-		return -EOPNOTSUPP;
-
 	if (cr4_read_shadow() & X86_CR4_VMXE)
 		return -EBUSY;
 
@@ -94,7 +95,6 @@ int x86_vmx_enable_virtualization_cpu(void)
 
 	return 0;
 }
-EXPORT_SYMBOL_FOR_KVM(x86_vmx_enable_virtualization_cpu);
 
 /*
  * Disable VMX and clear CR4.VMXE (even if VMXOFF faults)
@@ -105,7 +105,7 @@ EXPORT_SYMBOL_FOR_KVM(x86_vmx_enable_virtualization_cpu);
  * faults are guaranteed to be due to the !post-VMXON check unless the CPU is
  * magically in RM, VM86, compat mode, or at CPL>0.
  */
-int x86_vmx_disable_virtualization_cpu(void)
+static int x86_vmx_disable_virtualization_cpu(void)
 {
 	int r = -EIO;
 
@@ -119,7 +119,6 @@ int x86_vmx_disable_virtualization_cpu(void)
 	intel_pt_handle_vmx(0);
 	return r;
 }
-EXPORT_SYMBOL_FOR_KVM(x86_vmx_disable_virtualization_cpu);
 
 static void x86_vmx_emergency_disable_virtualization_cpu(void)
 {
@@ -154,6 +153,8 @@ static __init int __x86_vmx_init(void)
 {
 	const struct x86_virt_ops vmx_ops = {
 		.feature = X86_FEATURE_VMX,
+		.enable_virtualization_cpu = x86_vmx_enable_virtualization_cpu,
+		.disable_virtualization_cpu = x86_vmx_disable_virtualization_cpu,
 		.emergency_disable_virtualization_cpu = x86_vmx_emergency_disable_virtualization_cpu,
 	};
 
@@ -212,13 +213,10 @@ static __init void x86_vmx_exit(void) { }
 #endif
 
 #if IS_ENABLED(CONFIG_KVM_AMD)
-int x86_svm_enable_virtualization_cpu(void)
+static int x86_svm_enable_virtualization_cpu(void)
 {
 	u64 efer;
 
-	if (virt_ops.feature != X86_FEATURE_SVM)
-		return -EOPNOTSUPP;
-
 	rdmsrq(MSR_EFER, efer);
 	if (efer & EFER_SVME)
 		return -EBUSY;
@@ -226,9 +224,8 @@ int x86_svm_enable_virtualization_cpu(void)
 	wrmsrq(MSR_EFER, efer | EFER_SVME);
 	return 0;
 }
-EXPORT_SYMBOL_FOR_KVM(x86_svm_enable_virtualization_cpu);
 
-int x86_svm_disable_virtualization_cpu(void)
+static int x86_svm_disable_virtualization_cpu(void)
 {
 	int r = -EIO;
 	u64 efer;
@@ -247,7 +244,6 @@ int x86_svm_disable_virtualization_cpu(void)
 	wrmsrq(MSR_EFER, efer & ~EFER_SVME);
 	return r;
 }
-EXPORT_SYMBOL_FOR_KVM(x86_svm_disable_virtualization_cpu);
 
 static void x86_svm_emergency_disable_virtualization_cpu(void)
 {
@@ -268,6 +264,8 @@ static __init int x86_svm_init(void)
 {
 	const struct x86_virt_ops svm_ops = {
 		.feature = X86_FEATURE_SVM,
+		.enable_virtualization_cpu = x86_svm_enable_virtualization_cpu,
+		.disable_virtualization_cpu = x86_svm_disable_virtualization_cpu,
 		.emergency_disable_virtualization_cpu = x86_svm_emergency_disable_virtualization_cpu,
 	};
 
@@ -281,6 +279,41 @@ static __init int x86_svm_init(void)
 static __init int x86_svm_init(void) { return -EOPNOTSUPP; }
 #endif
 
+int x86_virt_get_ref(int feat)
+{
+	int r;
+
+	/* Ensure the !feature check can't get false positives. */
+	BUILD_BUG_ON(!X86_FEATURE_SVM || !X86_FEATURE_VMX);
+
+	if (!virt_ops.feature || virt_ops.feature != feat)
+		return -EOPNOTSUPP;
+
+	guard(preempt)();
+
+	if (this_cpu_inc_return(virtualization_nr_users) > 1)
+		return 0;
+
+	r = virt_ops.enable_virtualization_cpu();
+	if (r)
+		WARN_ON_ONCE(this_cpu_dec_return(virtualization_nr_users));
+
+	return r;
+}
+EXPORT_SYMBOL_FOR_KVM(x86_virt_get_ref);
+
+void x86_virt_put_ref(int feat)
+{
+	guard(preempt)();
+
+	if (WARN_ON_ONCE(!this_cpu_read(virtualization_nr_users)) ||
+	    this_cpu_dec_return(virtualization_nr_users))
+		return;
+
+	BUG_ON(virt_ops.disable_virtualization_cpu() && !virt_rebooting);
+}
+EXPORT_SYMBOL_FOR_KVM(x86_virt_put_ref);
+
 /*
  * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
  * reboot.  VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
@@ -288,9 +321,6 @@ static __init int x86_svm_init(void) { return -EOPNOTSUPP; }
  */
 int x86_virt_emergency_disable_virtualization_cpu(void)
 {
-	/* Ensure the !feature check can't get false positives. */
-	BUILD_BUG_ON(!X86_FEATURE_SVM || !X86_FEATURE_VMX);
-
 	if (!virt_ops.feature)
 		return -EOPNOTSUPP;
 
-- 
2.53.0.310.g728cabbaf7-goog


  parent reply	other threads:[~2026-02-14  1:27 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-14  1:26 [PATCH v3 00/16] KVM: x86/tdx: Have TDX handle VMXON during bringup Sean Christopherson
2026-02-14  1:26 ` [PATCH v3 01/16] KVM: x86: Move kvm_rebooting to x86 Sean Christopherson
2026-02-14  1:26 ` [PATCH v3 02/16] KVM: VMX: Move architectural "vmcs" and "vmcs_hdr" structures to public vmx.h Sean Christopherson
2026-02-14  1:26 ` [PATCH v3 03/16] KVM: x86: Move "kvm_rebooting" to kernel as "virt_rebooting" Sean Christopherson
2026-02-14  1:26 ` [PATCH v3 04/16] KVM: VMX: Unconditionally allocate root VMCSes during boot CPU bringup Sean Christopherson
2026-02-14  1:26 ` [PATCH v3 05/16] x86/virt: Force-clear X86_FEATURE_VMX if configuring root VMCS fails Sean Christopherson
2026-02-16 20:53   ` dan.j.williams
2026-02-17 16:31     ` Sean Christopherson
2026-02-14  1:26 ` [PATCH v3 06/16] KVM: VMX: Move core VMXON enablement to kernel Sean Christopherson
2026-02-26 22:32   ` Dave Hansen
2026-02-14  1:26 ` [PATCH v3 07/16] KVM: SVM: Move core EFER.SVME " Sean Christopherson
2026-02-26  7:40   ` Chao Gao
2026-02-26 23:43     ` Sean Christopherson
2026-02-14  1:26 ` [PATCH v3 08/16] KVM: x86: Move bulk of emergency virtualizaton logic to virt subsystem Sean Christopherson
2026-02-26  8:55   ` Chao Gao
2026-02-14  1:26 ` Sean Christopherson [this message]
2026-02-27 11:26   ` [PATCH v3 09/16] x86/virt: Add refcounting of VMX/SVM usage to support multiple in-kernel users Chao Gao
2026-02-14  1:26 ` [PATCH v3 10/16] x86/virt/tdx: Drop the outdated requirement that TDX be enabled in IRQ context Sean Christopherson
2026-02-17 11:29   ` Huang, Kai
2026-02-17 15:25     ` Sean Christopherson
2026-02-17 20:30       ` Huang, Kai
2026-02-14  1:26 ` [PATCH v3 11/16] KVM: x86/tdx: Do VMXON and TDX-Module initialization during subsys init Sean Christopherson
2026-02-26 22:35   ` Dave Hansen
2026-02-27 11:28   ` Chao Gao
2026-02-14  1:26 ` [PATCH v3 12/16] x86/virt/tdx: Tag a pile of functions as __init, and globals as __ro_after_init Sean Christopherson
2026-02-14  1:26 ` [PATCH v3 13/16] x86/virt/tdx: KVM: Consolidate TDX CPU hotplug handling Sean Christopherson
2026-02-14  1:27 ` [PATCH v3 14/16] x86/virt/tdx: Use ida_is_empty() to detect if any TDs may be running Sean Christopherson
2026-02-14  1:27 ` [PATCH v3 15/16] KVM: Bury kvm_{en,dis}able_virtualization() in kvm_main.c once more Sean Christopherson
2026-02-14  1:27 ` [PATCH v3 16/16] KVM: TDX: Fold tdx_bringup() into tdx_hardware_setup() Sean Christopherson
2026-02-16 23:00 ` [PATCH v3 00/16] KVM: x86/tdx: Have TDX handle VMXON during bringup dan.j.williams
2026-02-25 14:38 ` Chao Gao
2026-03-03 21:39 ` Sagi Shahar
2026-03-04  0:06   ` Sagi Shahar
2026-03-05 17:08 ` Sean Christopherson
2026-03-05 18:50   ` dan.j.williams
2026-03-05 18:54     ` Dave Hansen
2026-03-05 19:07       ` Sean Christopherson
2026-03-05 19:08     ` Sean Christopherson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260214012702.2368778-10-seanjc@google.com \
    --to=seanjc@google.com \
    --cc=acme@kernel.org \
    --cc=bp@alien8.de \
    --cc=chao.gao@intel.com \
    --cc=dan.j.williams@intel.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=kas@kernel.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-coco@lists.linux.dev \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=tglx@kernel.org \
    --cc=x86@kernel.org \
    --cc=yilun.xu@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox