Re: [RFC PATCH 4/7] KVM: x86/pmu: Add support for hardware virtualized PMUs

linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: "Mi, Dapeng" <dapeng1.mi@linux.intel.com>
To: Sandipan Das <sandipan.das@amd.com>,
	kvm@vger.kernel.org, linux-perf-users@vger.kernel.org
Cc: Sean Christopherson <seanjc@google.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Kan Liang <kan.liang@linux.intel.com>,
	Mingwei Zhang <mizhang@google.com>,
	"Nikunj A . Dadhania" <nikunj@amd.com>,
	Manali Shukla <manali.shukla@amd.com>,
	Ravi Bangoria <ravi.bangoria@amd.com>,
	Ananth Narayan <ananth.narayan@amd.com>
Subject: Re: [RFC PATCH 4/7] KVM: x86/pmu: Add support for hardware virtualized PMUs
Date: Fri, 14 Nov 2025 14:28:55 +0800	[thread overview]
Message-ID: <9919b426-4ce6-4651-b1f2-367c4f79474d@linux.intel.com> (raw)
In-Reply-To: <b83d93a3e677fecdbdd2c159e85de4bca2165b79.1762960531.git.sandipan.das@amd.com>


On 11/13/2025 2:18 PM, Sandipan Das wrote:
> Extend the Mediated PMU framework to support hardware virtualized PMUs.
> The key differences with Mediated PMU are listed below.
>   * Hardware saves and restores the guest PMU state on world switches.
>   * The guest PMU state is saved in vendor-specific structures (such as
>     VMCB or VMCS) instead of struct kvm_pmu.
>   * Hardware relies on interrupt virtualization (such as VNMI or AVIC)
>     to notify guests about counter overflows instead of receiving
>     interrupts in host context after switching the delivery mode in
>     LVTPC and then injecting them back in to the guest (KVM_REQ_PMI).
>
> Parts of the original PMU load and put functionality are reused as the
> active host events still need to be scheduled in and out in preparation
> for world switches.
>
> Event filtering and instruction emulation require the ability to change
> the guest PMU state in software. Since struct kvm_pmu no longer has the
> correct state, make use of host-initiated MSR accesses for accessing
> MSR states directly from vendor-specific structures.
>
> RDPMC is intercepted for legacy guests which do not have access to all
> counters. Host-initiated MSR accesses are also used in such cases to
> read the latest counter value from vendor-specific structures.
>
> Signed-off-by: Sandipan Das <sandipan.das@amd.com>
> ---
>  arch/x86/kvm/pmu.c           | 94 +++++++++++++++++++++++++++++-------
>  arch/x86/kvm/pmu.h           |  6 +++
>  arch/x86/kvm/svm/pmu.c       |  1 +
>  arch/x86/kvm/vmx/pmu_intel.c |  1 +
>  arch/x86/kvm/x86.c           |  4 ++
>  arch/x86/kvm/x86.h           |  1 +
>  6 files changed, 89 insertions(+), 18 deletions(-)
>
> diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
> index 0e5048ae86fa..1453fb3a60a2 100644
> --- a/arch/x86/kvm/pmu.c
> +++ b/arch/x86/kvm/pmu.c
> @@ -168,6 +168,43 @@ void kvm_handle_guest_mediated_pmi(void)
>  	kvm_make_request(KVM_REQ_PMI, vcpu);
>  }
>  
> +static __always_inline u32 fixed_counter_msr(u32 idx)
> +{
> +	return kvm_pmu_ops.FIXED_COUNTER_BASE + idx * kvm_pmu_ops.MSR_STRIDE;
> +}
> +
> +static __always_inline u32 gp_counter_msr(u32 idx)
> +{
> +	return kvm_pmu_ops.GP_COUNTER_BASE + idx * kvm_pmu_ops.MSR_STRIDE;
> +}
> +
> +static __always_inline u32 gp_eventsel_msr(u32 idx)
> +{
> +	return kvm_pmu_ops.GP_EVENTSEL_BASE + idx * kvm_pmu_ops.MSR_STRIDE;
> +}
> +
> +static void kvm_pmu_get_msr_state(struct kvm_vcpu *vcpu, u32 index, u64 *data)
> +{
> +	struct msr_data msr_info;
> +
> +	msr_info.index = index;
> +	msr_info.host_initiated = true;
> +
> +	KVM_BUG_ON(kvm_pmu_call(get_msr)(vcpu, &msr_info), vcpu->kvm);
> +	*data = msr_info.data;
> +}
> +
> +static void kvm_pmu_set_msr_state(struct kvm_vcpu *vcpu, u32 index, u64 data)
> +{
> +	struct msr_data msr_info;
> +
> +	msr_info.data = data;
> +	msr_info.index = index;
> +	msr_info.host_initiated = true;
> +
> +	KVM_BUG_ON(kvm_pmu_call(set_msr)(vcpu, &msr_info), vcpu->kvm);
> +}

With these 2 new helpers, suppose the helpers
write_global_ctrl()/read_global_ctrl() could be retired. 


> +
>  static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
>  {
>  	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
> @@ -520,19 +557,22 @@ static bool pmc_is_event_allowed(struct kvm_pmc *pmc)
>  
>  static void kvm_mediated_pmu_refresh_event_filter(struct kvm_pmc *pmc)
>  {
> -	bool allowed = pmc_is_event_allowed(pmc);
>  	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
> +	struct kvm_vcpu *vcpu = pmc->vcpu;
>  
>  	if (pmc_is_gp(pmc)) {
>  		pmc->eventsel_hw &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
> -		if (allowed)
> +		if (pmc_is_event_allowed(pmc))
>  			pmc->eventsel_hw |= pmc->eventsel &
>  					    ARCH_PERFMON_EVENTSEL_ENABLE;
> +
> +		if (kvm_vcpu_has_virtualized_pmu(vcpu))
> +			kvm_pmu_set_msr_state(vcpu, gp_eventsel_msr(pmc->idx), pmc->eventsel_hw);
>  	} else {
>  		u64 mask = intel_fixed_bits_by_idx(pmc->idx - KVM_FIXED_PMC_BASE_IDX, 0xf);
>  
>  		pmu->fixed_ctr_ctrl_hw &= ~mask;
> -		if (allowed)
> +		if (pmc_is_event_allowed(pmc))
>  			pmu->fixed_ctr_ctrl_hw |= pmu->fixed_ctr_ctrl & mask;
>  	}
>  }
> @@ -740,6 +780,9 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
>  	    kvm_is_cr0_bit_set(vcpu, X86_CR0_PE))
>  		return 1;
>  
> +	if (kvm_vcpu_has_virtualized_pmu(pmc->vcpu))
> +		kvm_pmu_get_msr_state(pmc->vcpu, gp_counter_msr(pmc->idx), &pmc->counter);
> +
>  	*data = pmc_read_counter(pmc) & mask;
>  	return 0;
>  }
> @@ -974,6 +1017,9 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
>  	    (kvm_pmu_has_perf_global_ctrl(pmu) || kvm_vcpu_has_mediated_pmu(vcpu)))
>  		pmu->global_ctrl = GENMASK_ULL(pmu->nr_arch_gp_counters - 1, 0);
>  
> +	if (kvm_vcpu_has_virtualized_pmu(vcpu))
> +		kvm_pmu_set_msr_state(vcpu, kvm_pmu_ops.PERF_GLOBAL_CTRL, pmu->global_ctrl);
> +
>  	if (kvm_vcpu_has_mediated_pmu(vcpu))
>  		kvm_pmu_call(write_global_ctrl)(pmu->global_ctrl);
>  
> @@ -1099,6 +1145,11 @@ static void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu,
>  	if (bitmap_empty(event_pmcs, X86_PMC_IDX_MAX))
>  		return;
>  
> +	if (kvm_vcpu_has_virtualized_pmu(vcpu)) {
> +		kvm_pmu_get_msr_state(vcpu, kvm_pmu_ops.PERF_GLOBAL_CTRL, &pmu->global_ctrl);
> +		kvm_pmu_get_msr_state(vcpu, kvm_pmu_ops.PERF_GLOBAL_STATUS, &pmu->global_status);
> +	}
> +
>  	if (!kvm_pmu_has_perf_global_ctrl(pmu))
>  		bitmap_copy(bitmap, event_pmcs, X86_PMC_IDX_MAX);
>  	else if (!bitmap_and(bitmap, event_pmcs,
> @@ -1107,11 +1158,21 @@ static void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu,
>  
>  	idx = srcu_read_lock(&vcpu->kvm->srcu);
>  	kvm_for_each_pmc(pmu, pmc, i, bitmap) {
> +		if (kvm_vcpu_has_virtualized_pmu(vcpu))
> +			kvm_pmu_get_msr_state(vcpu, gp_counter_msr(pmc->idx), &pmc->counter);
> +
>  		if (!pmc_is_event_allowed(pmc) || !cpl_is_matched(pmc))
>  			continue;
>  
>  		kvm_pmu_incr_counter(pmc);
> +
> +		if (kvm_vcpu_has_virtualized_pmu(vcpu))
> +			kvm_pmu_set_msr_state(vcpu, gp_counter_msr(pmc->idx), pmc->counter);
>  	}
> +
> +	if (kvm_vcpu_has_virtualized_pmu(vcpu))
> +		kvm_pmu_set_msr_state(vcpu, kvm_pmu_ops.PERF_GLOBAL_STATUS, pmu->global_status);
> +
>  	srcu_read_unlock(&vcpu->kvm->srcu, idx);
>  }
>  
> @@ -1270,21 +1331,6 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
>  	return r;
>  }
>  
> -static __always_inline u32 fixed_counter_msr(u32 idx)
> -{
> -	return kvm_pmu_ops.FIXED_COUNTER_BASE + idx * kvm_pmu_ops.MSR_STRIDE;
> -}
> -
> -static __always_inline u32 gp_counter_msr(u32 idx)
> -{
> -	return kvm_pmu_ops.GP_COUNTER_BASE + idx * kvm_pmu_ops.MSR_STRIDE;
> -}
> -
> -static __always_inline u32 gp_eventsel_msr(u32 idx)
> -{
> -	return kvm_pmu_ops.GP_EVENTSEL_BASE + idx * kvm_pmu_ops.MSR_STRIDE;
> -}
> -
>  static void kvm_pmu_load_guest_pmcs(struct kvm_vcpu *vcpu)
>  {
>  	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> @@ -1319,6 +1365,12 @@ void kvm_mediated_pmu_load(struct kvm_vcpu *vcpu)
>  
>  	lockdep_assert_irqs_disabled();
>  
> +	/* Guest PMU state is restored by hardware at VM-Entry */
> +	if (kvm_vcpu_has_virtualized_pmu(vcpu)) {
> +		perf_load_guest_context(0);
> +		return;
> +	}
> +
>  	perf_load_guest_context(kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVTPC));
>  
>  	/*
> @@ -1372,6 +1424,12 @@ void kvm_mediated_pmu_put(struct kvm_vcpu *vcpu)
>  
>  	lockdep_assert_irqs_disabled();
>  
> +	/* Guest PMU state is saved by hardware at VM-Exit */
> +	if (kvm_vcpu_has_virtualized_pmu(vcpu)) {
> +		perf_put_guest_context();
> +		return;
> +	}
> +
>  	/*
>  	 * Defer handling of PERF_GLOBAL_CTRL to vendor code.  On Intel, it's
>  	 * atomically cleared on VM-Exit, i.e. doesn't need to be clear here.
> diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
> index a0cd42cbea9d..55f0679b522d 100644
> --- a/arch/x86/kvm/pmu.h
> +++ b/arch/x86/kvm/pmu.h
> @@ -47,6 +47,7 @@ struct kvm_pmu_ops {
>  	const int MIN_NR_GP_COUNTERS;
>  
>  	const u32 PERF_GLOBAL_CTRL;
> +	const u32 PERF_GLOBAL_STATUS;
>  	const u32 GP_EVENTSEL_BASE;
>  	const u32 GP_COUNTER_BASE;
>  	const u32 FIXED_COUNTER_BASE;
> @@ -76,6 +77,11 @@ static inline bool kvm_vcpu_has_mediated_pmu(struct kvm_vcpu *vcpu)
>  	return enable_mediated_pmu && vcpu_to_pmu(vcpu)->version;
>  }
>  
> +static inline bool kvm_vcpu_has_virtualized_pmu(struct kvm_vcpu *vcpu)
> +{
> +	return enable_virtualized_pmu && vcpu_to_pmu(vcpu)->version;
> +}
> +
>  /*
>   * KVM tracks all counters in 64-bit bitmaps, with general purpose counters
>   * mapped to bits 31:0 and fixed counters mapped to 63:32, e.g. fixed counter 0
> diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
> index c03720b30785..8a32e1a9c07d 100644
> --- a/arch/x86/kvm/svm/pmu.c
> +++ b/arch/x86/kvm/svm/pmu.c
> @@ -278,6 +278,7 @@ struct kvm_pmu_ops amd_pmu_ops __initdata = {
>  	.MIN_NR_GP_COUNTERS = AMD64_NUM_COUNTERS,
>  
>  	.PERF_GLOBAL_CTRL = MSR_AMD64_PERF_CNTR_GLOBAL_CTL,
> +	.PERF_GLOBAL_STATUS = MSR_AMD64_PERF_CNTR_GLOBAL_STATUS,
>  	.GP_EVENTSEL_BASE = MSR_F15H_PERF_CTL0,
>  	.GP_COUNTER_BASE = MSR_F15H_PERF_CTR0,
>  	.FIXED_COUNTER_BASE = 0,
> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index 41a845de789e..9685af27c15c 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -845,6 +845,7 @@ struct kvm_pmu_ops intel_pmu_ops __initdata = {
>  	.MIN_NR_GP_COUNTERS = 1,
>  
>  	.PERF_GLOBAL_CTRL = MSR_CORE_PERF_GLOBAL_CTRL,
> +	.PERF_GLOBAL_STATUS = MSR_CORE_PERF_GLOBAL_STATUS,
>  	.GP_EVENTSEL_BASE = MSR_P6_EVNTSEL0,
>  	.GP_COUNTER_BASE = MSR_IA32_PMC0,
>  	.FIXED_COUNTER_BASE = MSR_CORE_PERF_FIXED_CTR0,
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 6bdf7ef0b535..750535a53a30 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -191,6 +191,10 @@ module_param(enable_pmu, bool, 0444);
>  bool __read_mostly enable_mediated_pmu;
>  EXPORT_SYMBOL_GPL(enable_mediated_pmu);
>  
> +/* Enable/disable hardware PMU virtualization. */
> +bool __read_mostly enable_virtualized_pmu;
> +EXPORT_SYMBOL_GPL(enable_virtualized_pmu);
> +
>  bool __read_mostly eager_page_split = true;
>  module_param(eager_page_split, bool, 0644);
>  
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index bd1149768acc..8cca48d1eed7 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -446,6 +446,7 @@ extern struct kvm_host_values kvm_host;
>  
>  extern bool enable_pmu;
>  extern bool enable_mediated_pmu;
> +extern bool enable_virtualized_pmu;
>  
>  /*
>   * Get a filtered version of KVM's supported XCR0 that strips out dynamic

next prev parent reply	other threads:[~2025-11-14  6:29 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-13  6:18 [RFC PATCH 0/7] KVM: SVM: Support for PMC virtualization Sandipan Das
2025-11-13  6:18 ` [RFC PATCH 1/7] perf: Add a capability for hardware virtualized PMUs Sandipan Das
2025-11-13  6:18 ` [RFC PATCH 2/7] x86/cpufeatures: Add PerfCtrVirt feature bit Sandipan Das
2025-11-13  6:18 ` [RFC PATCH 3/7] perf/x86/amd/core: Set PERF_PMU_CAP_VIRTUALIZED_VPMU Sandipan Das
2025-11-13  6:18 ` [RFC PATCH 4/7] KVM: x86/pmu: Add support for hardware virtualized PMUs Sandipan Das
2025-11-14  6:28   ` Mi, Dapeng [this message]
2025-11-14  6:38     ` Sandipan Das
2025-11-13  6:18 ` [RFC PATCH 5/7] KVM: SVM: Add VMCB fields for PMC virtualization Sandipan Das
2025-11-13  6:18 ` [RFC PATCH 6/7] KVM: SVM: Add support " Sandipan Das
2025-11-13  6:18 ` [RFC PATCH 7/7] KVM: SVM: Adjust MSR index for legacy guests Sandipan Das

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9919b426-4ce6-4651-b1f2-367c4f79474d@linux.intel.com \
    --to=dapeng1.mi@linux.intel.com \
    --cc=ananth.narayan@amd.com \
    --cc=kan.liang@linux.intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=manali.shukla@amd.com \
    --cc=mizhang@google.com \
    --cc=nikunj@amd.com \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=ravi.bangoria@amd.com \
    --cc=sandipan.das@amd.com \
    --cc=seanjc@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).