public inbox for linux-pm@vger.kernel.org
 help / color / mirror / Atom feed
From: Xin Li <xin@zytor.com>
To: Sean Christopherson <seanjc@google.com>,
	Arjan van de Ven <arjan@linux.intel.com>
Cc: linux-kernel@vger.kernel.org, kvm@vger.kernel.org,
	linux-pm@vger.kernel.org, pbonzini@redhat.com,
	tglx@linutronix.de, mingo@redhat.com, bp@alien8.de,
	dave.hansen@linux.intel.com, x86@kernel.org, hpa@zytor.com,
	rafael@kernel.org, pavel@kernel.org, brgerst@gmail.com,
	david.kaplan@amd.com, peterz@infradead.org,
	andrew.cooper3@citrix.com, kprateek.nayak@amd.com,
	chao.gao@intel.com, rick.p.edgecombe@intel.com,
	dan.j.williams@intel.com,
	"adrian.hunter@intel.com" <adrian.hunter@intel.com>
Subject: Re: [RFC PATCH v1 0/5] x86/boot, KVM: Move VMXON/VMXOFF handling from KVM to CPU lifecycle
Date: Wed, 17 Sep 2025 10:30:31 -0700	[thread overview]
Message-ID: <f533d3a4-183e-4b3d-9b3a-95defb1876e0@zytor.com> (raw)
In-Reply-To: <aMmkZlWl4TiS2qm8@google.com>

On 9/16/2025 10:54 AM, Sean Christopherson wrote:
> On Thu, Sep 11, 2025, Arjan van de Ven wrote:
>> Hi,
>>> I also want to keep the code as a module, both to avoid doing VMXON unconditionally,
>>
>> can you expand on what the problem is with having VMXON unconditionally enabled?
> 
> Unlike say EFER.SVME, VMXON fundamentally changes CPU behavior.  E.g. blocks INIT,
> activates VMCS caches (which aren't cleared by VMXOFF on pre-SPR CPUs, and AFAIK
> Intel hasn't even publicly committed to that behavior for SPR+), restricts allowed
> CR0 and CR4 values, raises questions about ucode patch updates, triggers unique
> flows in SMI/RSM, prevents Intel PT from tracing on certain CPUs, and probably a
> few other things I'm forgetting.


Regarding Intel PT, if VMXON/VMXOFF are moved to CPU startup/shutdown, as
Intel PT is initialized during arch_initcall() stage, entering and leaving
VMX operation no longer happen while Intel PT is _active_, thus
intel_pt_handle_vmx() no longer needs to "handles" VMX state transitions.

Thus, the function's purpose is simplified to signaling Intel pt not to
write to IA32_RTIT_CTL during VMX operation if the processor supports Intel
PT but disallows its use in VMX operation, indicated by IA32_VMX_MISC[14]
being cleared.  Otherwise, it does nothing and leaves pt_ctx.vmx_on as 0.

If the following patch is correct, it's more of a simplification then :)

diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index e8cf29d2b10c..8325a824700a 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -225,17 +225,6 @@ static int __init pt_pmu_hw_init(void)
  		break;
  	}

-	if (boot_cpu_has(X86_FEATURE_VMX)) {
-		/*
-		 * Intel SDM, 36.5 "Tracing post-VMXON" says that
-		 * "IA32_VMX_MISC[bit 14]" being 1 means PT can trace
-		 * post-VMXON.
-		 */
-		rdmsrq(MSR_IA32_VMX_MISC, reg);
-		if (reg & BIT(14))
-			pt_pmu.vmx = true;
-	}
-
  	for (i = 0; i < PT_CPUID_LEAVES; i++) {
  		cpuid_count(20, i,
  			    &pt_pmu.caps[CPUID_EAX + i*PT_CPUID_REGS_NUM],
@@ -1556,41 +1545,39 @@ void intel_pt_interrupt(void)
  	}
  }

-void intel_pt_handle_vmx(int on)
+/*
+ * VMXON is done in the CPU startup phase, thus pt is initialized later.
+ *
+ * Signal pt to not write IA32_RTIT_CTL while in VMX operation if the
+ * processor supports Intel PT but does not allow it to be used in VMX
+ * operation, i.e. IA32_VMX_MISC[bit 14] is cleared.
+ *
+ * Note: If IA32_VMX_MISC[bit 14] is set, vmx_on in pt_ctx remains 0.
+ */
+void intel_pt_set_vmx(int on)
  {
  	struct pt *pt = this_cpu_ptr(&pt_ctx);
-	struct perf_event *event;
-	unsigned long flags;
+	int cpu = raw_smp_processor_id();
+
+	if (!cpu && cpu_feature_enabled(X86_FEATURE_VMX)) {
+		u64 misc;
+
+		/*
+		 * Intel SDM, 36.5 "Tracing post-VMXON" says that
+		 * "IA32_VMX_MISC[bit 14]" being 1 means PT can trace
+		 * post-VMXON.
+		 */
+		rdmsrq(MSR_IA32_VMX_MISC, misc);
+		if (misc & BIT(14))
+			pt_pmu.vmx = true;
+	}

  	/* PT plays nice with VMX, do nothing */
  	if (pt_pmu.vmx)
  		return;

-	/*
-	 * VMXON will clear RTIT_CTL.TraceEn; we need to make
-	 * sure to not try to set it while VMX is on. Disable
-	 * interrupts to avoid racing with pmu callbacks;
-	 * concurrent PMI should be handled fine.
-	 */
-	local_irq_save(flags);
  	WRITE_ONCE(pt->vmx_on, on);
-
-	/*
-	 * If an AUX transaction is in progress, it will contain
-	 * gap(s), so flag it PARTIAL to inform the user.
-	 */
-	event = pt->handle.event;
-	if (event)
-		perf_aux_output_flag(&pt->handle,
-		                     PERF_AUX_FLAG_PARTIAL);
-
-	/* Turn PTs back on */
-	if (!on && event)
-		wrmsrq(MSR_IA32_RTIT_CTL, event->hw.aux_config);
-
-	local_irq_restore(flags);
  }
-EXPORT_SYMBOL_GPL(intel_pt_handle_vmx);

  /*
   * PMU callbacks
diff --git a/arch/x86/include/asm/perf_event.h 
b/arch/x86/include/asm/perf_event.h
index 70d1d94aca7e..9140796e6268 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -659,12 +659,9 @@ static inline void x86_perf_get_lbr(struct x86_pmu_lbr 
*lbr)
  #endif

  #ifdef CONFIG_CPU_SUP_INTEL
- extern void intel_pt_handle_vmx(int on);
+extern void intel_pt_set_vmx(int on);
  #else
-static inline void intel_pt_handle_vmx(int on)
-{
-
-}
+static inline void intel_pt_set_vmx(int on) { }
  #endif

  #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 03b28fa2e91e..9dad23c86152 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -2009,7 +2009,7 @@ void cpu_enable_virtualization(void)
  	rdmsrq(MSR_IA32_VMX_BASIC, basic_msr);
  	this_cpu_ptr(&vmxon_vmcs)->hdr.revision_id = 
vmx_basic_vmcs_revision_id(basic_msr);

-	intel_pt_handle_vmx(1);
+	intel_pt_set_vmx(1);

  	cr4_set_bits(X86_CR4_VMXE);

@@ -2023,7 +2023,7 @@ void cpu_enable_virtualization(void)
  fault:
  	pr_err("VMXON faulted on CPU%d\n", cpu);
  	cr4_clear_bits(X86_CR4_VMXE);
-	intel_pt_handle_vmx(0);
+	intel_pt_set_vmx(0);
  }

  /*
@@ -2055,7 +2055,7 @@ void cpu_disable_virtualization(void)

  exit:
  	cr4_clear_bits(X86_CR4_VMXE);
-	intel_pt_handle_vmx(0);
+	intel_pt_set_vmx(0);
  	return;

  fault:

  parent reply	other threads:[~2025-09-17 17:31 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-09-09 18:28 [RFC PATCH v1 0/5] x86/boot, KVM: Move VMXON/VMXOFF handling from KVM to CPU lifecycle Xin Li (Intel)
2025-09-09 18:28 ` [RFC PATCH v1 1/5] x86/boot: Shift VMXON from KVM init to CPU startup phase Xin Li (Intel)
2025-09-10  5:37   ` Adrian Hunter
2025-09-10  7:25   ` Chao Gao
2025-09-11  6:57     ` Xin Li
2025-09-10  8:02   ` Huang, Kai
2025-09-10 11:10     ` Chao Gao
2025-09-10 11:35       ` Huang, Kai
2025-09-10 13:13         ` Arjan van de Ven
2025-09-10 20:52           ` Huang, Kai
2025-09-09 18:28 ` [RFC PATCH v1 2/5] x86/boot: Move VMXOFF from KVM teardown to CPU shutdown phase Xin Li (Intel)
2025-09-09 18:28 ` [RFC PATCH v1 3/5] x86/shutdown, KVM: VMX: Move VMCLEAR of VMCSs to cpu_disable_virtualization() Xin Li (Intel)
2025-09-09 18:28 ` [RFC PATCH v1 4/5] x86/reboot: Remove emergency_reboot_disable_virtualization() Xin Li (Intel)
2025-09-09 18:28 ` [RFC PATCH v1 5/5] KVM: Remove kvm_rebooting and its references Xin Li (Intel)
2025-09-16 17:56   ` Sean Christopherson
2025-09-17 16:51     ` Xin Li
2025-09-17 23:02       ` Sean Christopherson
2025-09-11 14:20 ` [RFC PATCH v1 0/5] x86/boot, KVM: Move VMXON/VMXOFF handling from KVM to CPU lifecycle Sean Christopherson
2025-09-11 15:20   ` Dave Hansen
2025-09-16 17:29     ` Sean Christopherson
2025-09-11 17:04   ` Arjan van de Ven
2025-09-16 17:54     ` Sean Christopherson
2025-09-16 18:25       ` Jim Mattson
2025-09-17 13:48       ` Arjan van de Ven
2025-09-17 17:30       ` Xin Li [this message]
2025-09-17 22:40         ` Sean Christopherson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f533d3a4-183e-4b3d-9b3a-95defb1876e0@zytor.com \
    --to=xin@zytor.com \
    --cc=adrian.hunter@intel.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=arjan@linux.intel.com \
    --cc=bp@alien8.de \
    --cc=brgerst@gmail.com \
    --cc=chao.gao@intel.com \
    --cc=dan.j.williams@intel.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=david.kaplan@amd.com \
    --cc=hpa@zytor.com \
    --cc=kprateek.nayak@amd.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pm@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=pavel@kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rafael@kernel.org \
    --cc=rick.p.edgecombe@intel.com \
    --cc=seanjc@google.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox