Re: [PATCH v6 44/44] KVM: VMX: Add mediated PMU support for CPUs without "save perf global ctrl"

linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: "Mi, Dapeng" <dapeng1.mi@linux.intel.com>
To: Sean Christopherson <seanjc@google.com>,
	Marc Zyngier <maz@kernel.org>, Oliver Upton <oupton@kernel.org>,
	Tianrui Zhao <zhaotianrui@loongson.cn>,
	Bibo Mao <maobibo@loongson.cn>,
	Huacai Chen <chenhuacai@kernel.org>,
	Anup Patel <anup@brainfault.org>, Paul Walmsley <pjw@kernel.org>,
	Palmer Dabbelt <palmer@dabbelt.com>,
	Albert Ou <aou@eecs.berkeley.edu>, Xin Li <xin@zytor.com>,
	"H. Peter Anvin" <hpa@zytor.com>,
	Andy Lutomirski <luto@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Namhyung Kim <namhyung@kernel.org>,
	Paolo Bonzini <pbonzini@redhat.com>
Cc: linux-arm-kernel@lists.infradead.org, kvmarm@lists.linux.dev,
	kvm@vger.kernel.org, loongarch@lists.linux.dev,
	kvm-riscv@lists.infradead.org, linux-riscv@lists.infradead.org,
	linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org,
	Mingwei Zhang <mizhang@google.com>,
	Xudong Hao <xudong.hao@intel.com>,
	Sandipan Das <sandipan.das@amd.com>,
	Xiong Zhang <xiong.y.zhang@linux.intel.com>,
	Manali Shukla <manali.shukla@amd.com>,
	Jim Mattson <jmattson@google.com>
Subject: Re: [PATCH v6 44/44] KVM: VMX: Add mediated PMU support for CPUs without "save perf global ctrl"
Date: Mon, 8 Dec 2025 17:39:18 +0800	[thread overview]
Message-ID: <ee30ed9c-6377-4d14-b930-f0e2c809df7c@linux.intel.com> (raw)
In-Reply-To: <20251206001720.468579-45-seanjc@google.com>


On 12/6/2025 8:17 AM, Sean Christopherson wrote:
> Extend mediated PMU support for Intel CPUs without support for saving
> PERF_GLOBAL_CONTROL into the guest VMCS field on VM-Exit, e.g. for Skylake
> and its derivatives, as well as Icelake.  While supporting CPUs without
> VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL isn't completely trivial, it's not that
> complex either.  And not supporting such CPUs would mean not supporting 7+
> years of Intel CPUs released in the past 10 years.
>
> On VM-Exit, immediately propagate the saved PERF_GLOBAL_CTRL to the VMCS
> as well as KVM's software cache so that KVM doesn't need to add full EXREG
> tracking of PERF_GLOBAL_CTRL.  In practice, the vast majority of VM-Exits
> won't trigger software writes to guest PERF_GLOBAL_CTRL, so deferring the
> VMWRITE to the next VM-Enter would only delay the inevitable without
> batching/avoiding VMWRITEs.
>
> Note!  Take care to refresh VM_EXIT_MSR_STORE_COUNT on nested VM-Exit, as
> it's unfortunately possible that KVM could recalculate MSR intercepts
> while L2 is active, e.g. if userspace loads nested state and _then_ sets
> PERF_CAPABILITIES.  Eating the VMWRITE on every nested VM-Exit is
> unfortunate, but that's a pre-existing problem and can/should be solved
> separately, e.g. modifying the number of auto-load entries while L2 is
> active is also uncommon on modern CPUs.
>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
>  arch/x86/kvm/vmx/nested.c    |  6 ++++-
>  arch/x86/kvm/vmx/pmu_intel.c |  7 -----
>  arch/x86/kvm/vmx/vmx.c       | 52 ++++++++++++++++++++++++++++++++----
>  3 files changed, 52 insertions(+), 13 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index 614b789ecf16..1ee1edc8419d 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -5142,7 +5142,11 @@ void __nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
>  
>  	kvm_nested_vmexit_handle_ibrs(vcpu);
>  
> -	/* Update any VMCS fields that might have changed while L2 ran */
> +	/*
> +	 * Update any VMCS fields that might have changed while vmcs02 was the
> +	 * active VMCS.  The tracking is per-vCPU, not per-VMCS.
> +	 */
> +	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, vmx->msr_autostore.nr);
>  	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
>  	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
>  	vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index 55249fa4db95..27eb76e6b6a0 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -777,13 +777,6 @@ static bool intel_pmu_is_mediated_pmu_supported(struct x86_pmu_capability *host_
>  	if (WARN_ON_ONCE(!cpu_has_load_perf_global_ctrl()))
>  		return false;
>  
> -	/*
> -	 * KVM doesn't yet support mediated PMU on CPUs without support for
> -	 * saving PERF_GLOBAL_CTRL via a dedicated VMCS field.
> -	 */
> -	if (!cpu_has_save_perf_global_ctrl())
> -		return false;
> -
>  	return true;
>  }
>  
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 6a17cb90eaf4..ba1262c3e3ff 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -1204,6 +1204,17 @@ static bool update_transition_efer(struct vcpu_vmx *vmx)
>  	return true;
>  }
>  
> +static void vmx_add_autostore_msr(struct vcpu_vmx *vmx, u32 msr)
> +{
> +	vmx_add_auto_msr(&vmx->msr_autostore, msr, 0, VM_EXIT_MSR_STORE_COUNT,
> +			 vmx->vcpu.kvm);
> +}
> +
> +static void vmx_remove_autostore_msr(struct vcpu_vmx *vmx, u32 msr)
> +{
> +	vmx_remove_auto_msr(&vmx->msr_autostore, msr, VM_EXIT_MSR_STORE_COUNT);
> +}
> +
>  #ifdef CONFIG_X86_32
>  /*
>   * On 32-bit kernels, VM exits still load the FS and GS bases from the
> @@ -4225,6 +4236,8 @@ void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu)
>  
>  static void vmx_recalc_pmu_msr_intercepts(struct kvm_vcpu *vcpu)
>  {
> +	u64 vm_exit_controls_bits = VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |
> +				    VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL;
>  	bool has_mediated_pmu = kvm_vcpu_has_mediated_pmu(vcpu);
>  	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> @@ -4234,12 +4247,19 @@ static void vmx_recalc_pmu_msr_intercepts(struct kvm_vcpu *vcpu)
>  	if (!enable_mediated_pmu)
>  		return;
>  
> +	if (!cpu_has_save_perf_global_ctrl()) {
> +		vm_exit_controls_bits &= ~VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL;
> +
> +		if (has_mediated_pmu)
> +			vmx_add_autostore_msr(vmx, MSR_CORE_PERF_GLOBAL_CTRL);
> +		else
> +			vmx_remove_autostore_msr(vmx, MSR_CORE_PERF_GLOBAL_CTRL);
> +	}
> +
>  	vm_entry_controls_changebit(vmx, VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
>  				    has_mediated_pmu);
>  
> -	vm_exit_controls_changebit(vmx, VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |
> -					VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL,
> -				   has_mediated_pmu);
> +	vm_exit_controls_changebit(vmx, vm_exit_controls_bits, has_mediated_pmu);
>  
>  	for (i = 0; i < pmu->nr_arch_gp_counters; i++) {
>  		vmx_set_intercept_for_msr(vcpu, MSR_IA32_PERFCTR0 + i,
> @@ -7346,6 +7366,29 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
>  					      msrs[i].host);
>  }
>  
> +static void vmx_refresh_guest_perf_global_control(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +
> +	if (msr_write_intercepted(vmx, MSR_CORE_PERF_GLOBAL_CTRL))
> +		return;
> +
> +	if (!cpu_has_save_perf_global_ctrl()) {
> +		int slot = vmx_find_loadstore_msr_slot(&vmx->msr_autostore,
> +						       MSR_CORE_PERF_GLOBAL_CTRL);
> +
> +		if (WARN_ON_ONCE(slot < 0))
> +			return;
> +
> +		pmu->global_ctrl = vmx->msr_autostore.val[slot].value;
> +		vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, pmu->global_ctrl);
> +		return;
> +	}
> +
> +	pmu->global_ctrl = vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL);
> +}
> +
>  static void vmx_update_hv_timer(struct kvm_vcpu *vcpu, bool force_immediate_exit)
>  {
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> @@ -7631,8 +7674,7 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
>  
>  	vmx->loaded_vmcs->launched = 1;
>  
> -	if (!msr_write_intercepted(vmx, MSR_CORE_PERF_GLOBAL_CTRL))
> -		vcpu_to_pmu(vcpu)->global_ctrl = vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL);
> +	vmx_refresh_guest_perf_global_control(vcpu);
>  
>  	vmx_recover_nmi_blocking(vmx);
>  	vmx_complete_interrupts(vmx);

The change looks good to me, but I still have no bandwidth to test the code
on Ice lake, I would go back after I finish the tests. Thanks.

next prev parent reply	other threads:[~2025-12-08  9:39 UTC|newest]

Thread overview: 60+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-06  0:16 [PATCH v6 00/44] KVM: x86: Add support for mediated vPMUs Sean Christopherson
2025-12-06  0:16 ` [PATCH v6 01/44] perf: Skip pmu_ctx based on event_type Sean Christopherson
2025-12-06  0:16 ` [PATCH v6 02/44] perf: Add generic exclude_guest support Sean Christopherson
2025-12-06  0:16 ` [PATCH v6 03/44] perf: Move security_perf_event_free() call to __free_event() Sean Christopherson
2025-12-06  0:16 ` [PATCH v6 04/44] perf: Add APIs to create/release mediated guest vPMUs Sean Christopherson
2025-12-08 11:51   ` Peter Zijlstra
2025-12-08 18:07     ` Sean Christopherson
2025-12-06  0:16 ` [PATCH v6 05/44] perf: Clean up perf ctx time Sean Christopherson
2025-12-06  0:16 ` [PATCH v6 06/44] perf: Add a EVENT_GUEST flag Sean Christopherson
2025-12-06  0:16 ` [PATCH v6 07/44] perf: Add APIs to load/put guest mediated PMU context Sean Christopherson
2025-12-06  0:16 ` [PATCH v6 08/44] perf/x86/core: Register a new vector for handling mediated guest PMIs Sean Christopherson
2025-12-06  0:16 ` [PATCH v6 09/44] perf/x86/core: Add APIs to switch to/from mediated PMI vector (for KVM) Sean Christopherson
2025-12-06  0:16 ` [PATCH v6 10/44] perf/x86/core: Do not set bit width for unavailable counters Sean Christopherson
2025-12-06  0:16 ` [PATCH v6 11/44] perf/x86/core: Plumb mediated PMU capability from x86_pmu to x86_pmu_cap Sean Christopherson
2025-12-06  0:16 ` [PATCH v6 12/44] perf/x86/intel: Support PERF_PMU_CAP_MEDIATED_VPMU Sean Christopherson
2025-12-06  0:16 ` [PATCH v6 13/44] perf/x86/amd: Support PERF_PMU_CAP_MEDIATED_VPMU for AMD host Sean Christopherson
2025-12-06  0:16 ` [PATCH v6 14/44] KVM: Add a simplified wrapper for registering perf callbacks Sean Christopherson
2025-12-06  0:16 ` [PATCH v6 15/44] KVM: x86/pmu: Snapshot host (i.e. perf's) reported PMU capabilities Sean Christopherson
2025-12-06  0:16 ` [PATCH v6 16/44] KVM: x86/pmu: Start stubbing in mediated PMU support Sean Christopherson
2025-12-06  0:16 ` [PATCH v6 17/44] KVM: x86/pmu: Implement Intel mediated PMU requirements and constraints Sean Christopherson
2025-12-06  0:16 ` [PATCH v6 18/44] KVM: x86/pmu: Implement AMD mediated PMU requirements Sean Christopherson
2025-12-06  0:16 ` [PATCH v6 19/44] KVM: x86/pmu: Register PMI handler for mediated vPMU Sean Christopherson
2025-12-06  0:16 ` [PATCH v6 20/44] KVM: x86/pmu: Disable RDPMC interception for compatible " Sean Christopherson
2025-12-06  0:16 ` [PATCH v6 21/44] KVM: x86/pmu: Load/save GLOBAL_CTRL via entry/exit fields for mediated PMU Sean Christopherson
2025-12-06  0:16 ` [PATCH v6 22/44] KVM: x86/pmu: Disable interception of select PMU MSRs for mediated vPMUs Sean Christopherson
2025-12-06  0:16 ` [PATCH v6 23/44] KVM: x86/pmu: Bypass perf checks when emulating mediated PMU counter accesses Sean Christopherson
2025-12-06  0:17 ` [PATCH v6 24/44] KVM: x86/pmu: Introduce eventsel_hw to prepare for pmu event filtering Sean Christopherson
2025-12-06  0:17 ` [PATCH v6 25/44] KVM: x86/pmu: Reprogram mediated PMU event selectors on event filter updates Sean Christopherson
2025-12-06  0:17 ` [PATCH v6 26/44] KVM: x86/pmu: Always stuff GuestOnly=1,HostOnly=0 for mediated PMCs on AMD Sean Christopherson
2025-12-06  0:17 ` [PATCH v6 27/44] KVM: x86/pmu: Load/put mediated PMU context when entering/exiting guest Sean Christopherson
2025-12-06  0:17 ` [PATCH v6 28/44] KVM: x86/pmu: Disallow emulation in the fastpath if mediated PMCs are active Sean Christopherson
2025-12-06  0:17 ` [PATCH v6 29/44] KVM: x86/pmu: Handle emulated instruction for mediated vPMU Sean Christopherson
2025-12-06  0:17 ` [PATCH v6 30/44] KVM: nVMX: Add macros to simplify nested MSR interception setting Sean Christopherson
2025-12-06  0:17 ` [PATCH v6 31/44] KVM: nVMX: Disable PMU MSR interception as appropriate while running L2 Sean Christopherson
2025-12-06  0:17 ` [PATCH v6 32/44] KVM: nSVM: " Sean Christopherson
2025-12-06  0:17 ` [PATCH v6 33/44] KVM: x86/pmu: Expose enable_mediated_pmu parameter to user space Sean Christopherson
2025-12-06  0:17 ` [PATCH v6 34/44] KVM: x86/pmu: Elide WRMSRs when loading guest PMCs if values already match Sean Christopherson
2025-12-06  0:17 ` [PATCH v6 35/44] KVM: VMX: Drop intermediate "guest" field from msr_autostore Sean Christopherson
2025-12-08  9:14   ` Mi, Dapeng
2025-12-06  0:17 ` [PATCH v6 36/44] KVM: nVMX: Don't update msr_autostore count when saving TSC for vmcs12 Sean Christopherson
2025-12-06  0:17 ` [PATCH v6 37/44] KVM: VMX: Dedup code for removing MSR from VMCS's auto-load list Sean Christopherson
2025-12-08  9:29   ` Mi, Dapeng
2025-12-09 17:37     ` Sean Christopherson
2025-12-10  1:08       ` Mi, Dapeng
2025-12-06  0:17 ` [PATCH v6 38/44] KVM: VMX: Drop unused @entry_only param from add_atomic_switch_msr() Sean Christopherson
2025-12-08  9:32   ` Mi, Dapeng
2025-12-06  0:17 ` [PATCH v6 39/44] KVM: VMX: Bug the VM if either MSR auto-load list is full Sean Christopherson
2025-12-08  9:32   ` Mi, Dapeng
2025-12-08  9:34   ` Mi, Dapeng
2025-12-06  0:17 ` [PATCH v6 40/44] KVM: VMX: Set MSR index auto-load entry if and only if entry is "new" Sean Christopherson
2025-12-08  9:35   ` Mi, Dapeng
2025-12-06  0:17 ` [PATCH v6 41/44] KVM: VMX: Compartmentalize adding MSRs to host vs. guest auto-load list Sean Christopherson
2025-12-08  9:36   ` Mi, Dapeng
2025-12-06  0:17 ` [PATCH v6 42/44] KVM: VMX: Dedup code for adding MSR to VMCS's auto list Sean Christopherson
2025-12-08  9:37   ` Mi, Dapeng
2025-12-06  0:17 ` [PATCH v6 43/44] KVM: VMX: Initialize vmcs01.VM_EXIT_MSR_STORE_ADDR with list address Sean Christopherson
2025-12-06  0:17 ` [PATCH v6 44/44] KVM: VMX: Add mediated PMU support for CPUs without "save perf global ctrl" Sean Christopherson
2025-12-08  9:39   ` Mi, Dapeng [this message]
2025-12-09  6:31     ` Mi, Dapeng
2025-12-08 15:37 ` [PATCH v6 00/44] KVM: x86: Add support for mediated vPMUs Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ee30ed9c-6377-4d14-b930-f0e2c809df7c@linux.intel.com \
    --to=dapeng1.mi@linux.intel.com \
    --cc=acme@kernel.org \
    --cc=anup@brainfault.org \
    --cc=aou@eecs.berkeley.edu \
    --cc=chenhuacai@kernel.org \
    --cc=hpa@zytor.com \
    --cc=jmattson@google.com \
    --cc=kvm-riscv@lists.infradead.org \
    --cc=kvm@vger.kernel.org \
    --cc=kvmarm@lists.linux.dev \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=linux-riscv@lists.infradead.org \
    --cc=loongarch@lists.linux.dev \
    --cc=luto@kernel.org \
    --cc=manali.shukla@amd.com \
    --cc=maobibo@loongson.cn \
    --cc=maz@kernel.org \
    --cc=mingo@redhat.com \
    --cc=mizhang@google.com \
    --cc=namhyung@kernel.org \
    --cc=oupton@kernel.org \
    --cc=palmer@dabbelt.com \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=pjw@kernel.org \
    --cc=sandipan.das@amd.com \
    --cc=seanjc@google.com \
    --cc=xin@zytor.com \
    --cc=xiong.y.zhang@linux.intel.com \
    --cc=xudong.hao@intel.com \
    --cc=zhaotianrui@loongson.cn \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).