* [PATCH V2 1/4] KVM: x86/pmu: Do not map fixed counters >= 3 to generic perf events
2026-04-23 17:46 [PATCH V2 0/4] KVM: x86/pmu: Add hardware Topdown metrics support Zide Chen
@ 2026-04-23 17:46 ` Zide Chen
2026-04-30 1:55 ` Mi, Dapeng
2026-04-23 17:46 ` [PATCH V2 2/4] KVM: x86/pmu: Support Intel fixed counter 3 on mediated vPMU Zide Chen
` (2 subsequent siblings)
3 siblings, 1 reply; 12+ messages in thread
From: Zide Chen @ 2026-04-23 17:46 UTC (permalink / raw)
To: Sean Christopherson, Paolo Bonzini
Cc: kvm, linux-kernel, Jim Mattson, Mingwei Zhang, Zide Chen,
Das Sandipan, Shukla Manali, Dapeng Mi, Falcon Thomas, Xudong Hao
Only fixed counters 0..2 have matching generic cross-platform
hardware perf events (INSTRUCTIONS, CPU_CYCLES, REF_CPU_CYCLES).
Therefore, perf_get_hw_event_config() is only applicable to these
counters.
KVM does not intend to emulate fixed counters >= 3 on legacy
(non-mediated) vPMU, while for mediated vPMU, KVM does not care what
the fixed counter event mappings are. Therefore, return 0 for their
eventsel.
Also remove __always_inline as BUILD_BUG_ON() is no longer needed.
Signed-off-by: Zide Chen <zide.chen@intel.com>
---
V2:
- Replace 3 in "if (index < 3)" with ARRAY_SIZE(fixed_pmc_perf_ids).
---
arch/x86/kvm/vmx/pmu_intel.c | 26 ++++++++++++++------------
1 file changed, 14 insertions(+), 12 deletions(-)
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 27eb76e6b6a0..05a59f4acfdd 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -454,28 +454,30 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
* different perf_event is already utilizing the requested counter, but the end
* result is the same (ignoring the fact that using a general purpose counter
* will likely exacerbate counter contention).
- *
- * Forcibly inlined to allow asserting on @index at build time, and there should
- * never be more than one user.
*/
-static __always_inline u64 intel_get_fixed_pmc_eventsel(unsigned int index)
+static u64 intel_get_fixed_pmc_eventsel(unsigned int index)
{
const enum perf_hw_id fixed_pmc_perf_ids[] = {
[0] = PERF_COUNT_HW_INSTRUCTIONS,
[1] = PERF_COUNT_HW_CPU_CYCLES,
[2] = PERF_COUNT_HW_REF_CPU_CYCLES,
};
- u64 eventsel;
-
- BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_perf_ids) != KVM_MAX_NR_INTEL_FIXED_COUNTERS);
- BUILD_BUG_ON(index >= KVM_MAX_NR_INTEL_FIXED_COUNTERS);
+ u64 eventsel = 0;
/*
- * Yell if perf reports support for a fixed counter but perf doesn't
- * have a known encoding for the associated general purpose event.
+ * Fixed counters 3 and above don't have corresponding generic hardware
+ * perf event, and KVM does not intend to emulate them on non-mediated
+ * vPMU.
*/
- eventsel = perf_get_hw_event_config(fixed_pmc_perf_ids[index]);
- WARN_ON_ONCE(!eventsel && index < kvm_pmu_cap.num_counters_fixed);
+ if (index < ARRAY_SIZE(fixed_pmc_perf_ids)) {
+ /*
+ * Yell if perf reports support for a fixed counter but perf
+ * doesn't have a known encoding for the associated general
+ * purpose event.
+ */
+ eventsel = perf_get_hw_event_config(fixed_pmc_perf_ids[index]);
+ WARN_ON_ONCE(!eventsel && index < kvm_pmu_cap.num_counters_fixed);
+ }
return eventsel;
}
--
2.53.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* Re: [PATCH V2 1/4] KVM: x86/pmu: Do not map fixed counters >= 3 to generic perf events
2026-04-23 17:46 ` [PATCH V2 1/4] KVM: x86/pmu: Do not map fixed counters >= 3 to generic perf events Zide Chen
@ 2026-04-30 1:55 ` Mi, Dapeng
0 siblings, 0 replies; 12+ messages in thread
From: Mi, Dapeng @ 2026-04-30 1:55 UTC (permalink / raw)
To: Zide Chen, Sean Christopherson, Paolo Bonzini
Cc: kvm, linux-kernel, Jim Mattson, Mingwei Zhang, Das Sandipan,
Shukla Manali, Falcon Thomas, Xudong Hao
On 4/24/2026 1:46 AM, Zide Chen wrote:
> Only fixed counters 0..2 have matching generic cross-platform
> hardware perf events (INSTRUCTIONS, CPU_CYCLES, REF_CPU_CYCLES).
> Therefore, perf_get_hw_event_config() is only applicable to these
> counters.
>
> KVM does not intend to emulate fixed counters >= 3 on legacy
> (non-mediated) vPMU, while for mediated vPMU, KVM does not care what
> the fixed counter event mappings are. Therefore, return 0 for their
> eventsel.
>
> Also remove __always_inline as BUILD_BUG_ON() is no longer needed.
>
> Signed-off-by: Zide Chen <zide.chen@intel.com>
> ---
> V2:
> - Replace 3 in "if (index < 3)" with ARRAY_SIZE(fixed_pmc_perf_ids).
> ---
> arch/x86/kvm/vmx/pmu_intel.c | 26 ++++++++++++++------------
> 1 file changed, 14 insertions(+), 12 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index 27eb76e6b6a0..05a59f4acfdd 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -454,28 +454,30 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> * different perf_event is already utilizing the requested counter, but the end
> * result is the same (ignoring the fact that using a general purpose counter
> * will likely exacerbate counter contention).
> - *
> - * Forcibly inlined to allow asserting on @index at build time, and there should
> - * never be more than one user.
> */
> -static __always_inline u64 intel_get_fixed_pmc_eventsel(unsigned int index)
> +static u64 intel_get_fixed_pmc_eventsel(unsigned int index)
> {
> const enum perf_hw_id fixed_pmc_perf_ids[] = {
> [0] = PERF_COUNT_HW_INSTRUCTIONS,
> [1] = PERF_COUNT_HW_CPU_CYCLES,
> [2] = PERF_COUNT_HW_REF_CPU_CYCLES,
> };
> - u64 eventsel;
> -
> - BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_perf_ids) != KVM_MAX_NR_INTEL_FIXED_COUNTERS);
> - BUILD_BUG_ON(index >= KVM_MAX_NR_INTEL_FIXED_COUNTERS);
> + u64 eventsel = 0;
>
> /*
> - * Yell if perf reports support for a fixed counter but perf doesn't
> - * have a known encoding for the associated general purpose event.
> + * Fixed counters 3 and above don't have corresponding generic hardware
> + * perf event, and KVM does not intend to emulate them on non-mediated
> + * vPMU.
The above comments are ambiguous. Fixed counter 3 and above indeed have
dedicated perf event, but they are not supported perf_hw_id{} yet. So KVM
has no way to get their specific event code now.
Thanks.
> */
> - eventsel = perf_get_hw_event_config(fixed_pmc_perf_ids[index]);
> - WARN_ON_ONCE(!eventsel && index < kvm_pmu_cap.num_counters_fixed);
> + if (index < ARRAY_SIZE(fixed_pmc_perf_ids)) {
> + /*
> + * Yell if perf reports support for a fixed counter but perf
> + * doesn't have a known encoding for the associated general
> + * purpose event.
> + */
> + eventsel = perf_get_hw_event_config(fixed_pmc_perf_ids[index]);
> + WARN_ON_ONCE(!eventsel && index < kvm_pmu_cap.num_counters_fixed);
> + }
> return eventsel;
> }
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH V2 2/4] KVM: x86/pmu: Support Intel fixed counter 3 on mediated vPMU
2026-04-23 17:46 [PATCH V2 0/4] KVM: x86/pmu: Add hardware Topdown metrics support Zide Chen
2026-04-23 17:46 ` [PATCH V2 1/4] KVM: x86/pmu: Do not map fixed counters >= 3 to generic perf events Zide Chen
@ 2026-04-23 17:46 ` Zide Chen
2026-04-30 2:19 ` Mi, Dapeng
2026-04-23 17:46 ` [PATCH V2 3/4] KVM: x86/pmu: Support PERF_METRICS MSR in " Zide Chen
2026-04-23 17:46 ` [PATCH V2 4/4] KVM: selftests: Add perf_metrics and fixed counter 3 tests Zide Chen
3 siblings, 1 reply; 12+ messages in thread
From: Zide Chen @ 2026-04-23 17:46 UTC (permalink / raw)
To: Sean Christopherson, Paolo Bonzini
Cc: kvm, linux-kernel, Jim Mattson, Mingwei Zhang, Zide Chen,
Das Sandipan, Shukla Manali, Dapeng Mi, Falcon Thomas, Xudong Hao
From: Dapeng Mi <dapeng1.mi@linux.intel.com>
Starting with Ice Lake, Intel introduces fixed counter 3, which counts
TOPDOWN.SLOTS - the number of available slots for an unhalted logical
processor. It serves as the denominator for top-level metrics in the
Top-down Microarchitecture Analysis method.
Emulating this counter on legacy vPMU would require introducing a new
generic perf encoding for the Intel-specific TOPDOWN.SLOTS event in
order to call perf_get_hw_event_config(). This is undesirable as it
would pollute the generic perf event encoding.
Moreover, KVM does not intend to emulate IA32_PERF_METRICS in the
legacy vPMU model, and without IA32_PERF_METRICS, emulating this
counter has little practical value. Therefore, expose fixed counter
3 to guests only when mediated vPMU is enabled.
Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Co-developed-by: Zide Chen <zide.chen@intel.com>
Signed-off-by: Zide Chen <zide.chen@intel.com>
---
V2:
- Don't advertise fixed counter 3 to userspace if the host doesn't
support it.
---
arch/x86/include/asm/kvm_host.h | 2 +-
arch/x86/kvm/cpuid.c | 9 +++++++--
arch/x86/kvm/pmu.c | 4 ++++
arch/x86/kvm/x86.c | 4 ++--
4 files changed, 14 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c470e40a00aa..cb736a4c72ea 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -556,7 +556,7 @@ struct kvm_pmc {
#define KVM_MAX_NR_GP_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_GP_COUNTERS, \
KVM_MAX_NR_AMD_GP_COUNTERS)
-#define KVM_MAX_NR_INTEL_FIXED_COUNTERS 3
+#define KVM_MAX_NR_INTEL_FIXED_COUNTERS 4
#define KVM_MAX_NR_AMD_FIXED_COUNTERS 0
#define KVM_MAX_NR_FIXED_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_FIXED_COUNTERS, \
KVM_MAX_NR_AMD_FIXED_COUNTERS)
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index e69156b54cff..d87a26f740e5 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -1505,7 +1505,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
break;
case 0xa: { /* Architectural Performance Monitoring */
union cpuid10_eax eax = { };
- union cpuid10_edx edx = { };
+ union cpuid10_edx edx = { }, host_edx;
if (!enable_pmu || !static_cpu_has(X86_FEATURE_ARCH_PERFMON)) {
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
@@ -1516,9 +1516,14 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
eax.split.num_counters = kvm_pmu_cap.num_counters_gp;
eax.split.bit_width = kvm_pmu_cap.bit_width_gp;
eax.split.mask_length = kvm_pmu_cap.events_mask_len;
- edx.split.num_counters_fixed = kvm_pmu_cap.num_counters_fixed;
edx.split.bit_width_fixed = kvm_pmu_cap.bit_width_fixed;
+ /* Guest does not support non-contiguous fixed counters. */
+ host_edx = (union cpuid10_edx)entry->edx;
+ edx.split.num_counters_fixed =
+ min_t(int, kvm_pmu_cap.num_counters_fixed,
+ host_edx.split.num_counters_fixed);
+
if (kvm_pmu_cap.version)
edx.split.anythread_deprecated = 1;
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index e218352e3423..9ff4a6a9cd0b 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -148,12 +148,16 @@ void kvm_init_pmu_capability(struct kvm_pmu_ops *pmu_ops)
}
memcpy(&kvm_pmu_cap, &kvm_host_pmu, sizeof(kvm_host_pmu));
+
kvm_pmu_cap.version = min(kvm_pmu_cap.version, 2);
kvm_pmu_cap.num_counters_gp = min(kvm_pmu_cap.num_counters_gp,
pmu_ops->MAX_NR_GP_COUNTERS);
kvm_pmu_cap.num_counters_fixed = min(kvm_pmu_cap.num_counters_fixed,
KVM_MAX_NR_FIXED_COUNTERS);
+ if (!enable_mediated_pmu && kvm_pmu_cap.num_counters_fixed > 3)
+ kvm_pmu_cap.num_counters_fixed = 3;
+
kvm_pmu_eventsel.INSTRUCTIONS_RETIRED =
perf_get_hw_event_config(PERF_COUNT_HW_INSTRUCTIONS);
kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED =
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0a1b63c63d1a..604072d9354f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -360,7 +360,7 @@ static const u32 msrs_to_save_base[] = {
static const u32 msrs_to_save_pmu[] = {
MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
- MSR_ARCH_PERFMON_FIXED_CTR0 + 2,
+ MSR_ARCH_PERFMON_FIXED_CTR2, MSR_ARCH_PERFMON_FIXED_CTR3,
MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
MSR_CORE_PERF_GLOBAL_CTRL,
MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG,
@@ -7756,7 +7756,7 @@ static void kvm_init_msr_lists(void)
{
unsigned i;
- BUILD_BUG_ON_MSG(KVM_MAX_NR_FIXED_COUNTERS != 3,
+ BUILD_BUG_ON_MSG(KVM_MAX_NR_FIXED_COUNTERS != 4,
"Please update the fixed PMCs in msrs_to_save_pmu[]");
num_msrs_to_save = 0;
--
2.54.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* Re: [PATCH V2 2/4] KVM: x86/pmu: Support Intel fixed counter 3 on mediated vPMU
2026-04-23 17:46 ` [PATCH V2 2/4] KVM: x86/pmu: Support Intel fixed counter 3 on mediated vPMU Zide Chen
@ 2026-04-30 2:19 ` Mi, Dapeng
2026-04-30 17:54 ` Chen, Zide
0 siblings, 1 reply; 12+ messages in thread
From: Mi, Dapeng @ 2026-04-30 2:19 UTC (permalink / raw)
To: Zide Chen, Sean Christopherson, Paolo Bonzini
Cc: kvm, linux-kernel, Jim Mattson, Mingwei Zhang, Das Sandipan,
Shukla Manali, Falcon Thomas, Xudong Hao
On 4/24/2026 1:46 AM, Zide Chen wrote:
> From: Dapeng Mi <dapeng1.mi@linux.intel.com>
>
> Starting with Ice Lake, Intel introduces fixed counter 3, which counts
> TOPDOWN.SLOTS - the number of available slots for an unhalted logical
> processor. It serves as the denominator for top-level metrics in the
> Top-down Microarchitecture Analysis method.
>
> Emulating this counter on legacy vPMU would require introducing a new
> generic perf encoding for the Intel-specific TOPDOWN.SLOTS event in
> order to call perf_get_hw_event_config(). This is undesirable as it
> would pollute the generic perf event encoding.
>
> Moreover, KVM does not intend to emulate IA32_PERF_METRICS in the
> legacy vPMU model, and without IA32_PERF_METRICS, emulating this
> counter has little practical value. Therefore, expose fixed counter
> 3 to guests only when mediated vPMU is enabled.
>
> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
> Co-developed-by: Zide Chen <zide.chen@intel.com>
> Signed-off-by: Zide Chen <zide.chen@intel.com>
> ---
> V2:
> - Don't advertise fixed counter 3 to userspace if the host doesn't
> support it.
> ---
> arch/x86/include/asm/kvm_host.h | 2 +-
> arch/x86/kvm/cpuid.c | 9 +++++++--
> arch/x86/kvm/pmu.c | 4 ++++
> arch/x86/kvm/x86.c | 4 ++--
> 4 files changed, 14 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index c470e40a00aa..cb736a4c72ea 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -556,7 +556,7 @@ struct kvm_pmc {
> #define KVM_MAX_NR_GP_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_GP_COUNTERS, \
> KVM_MAX_NR_AMD_GP_COUNTERS)
>
> -#define KVM_MAX_NR_INTEL_FIXED_COUNTERS 3
> +#define KVM_MAX_NR_INTEL_FIXED_COUNTERS 4
> #define KVM_MAX_NR_AMD_FIXED_COUNTERS 0
> #define KVM_MAX_NR_FIXED_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_FIXED_COUNTERS, \
> KVM_MAX_NR_AMD_FIXED_COUNTERS)
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index e69156b54cff..d87a26f740e5 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -1505,7 +1505,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
> break;
> case 0xa: { /* Architectural Performance Monitoring */
> union cpuid10_eax eax = { };
> - union cpuid10_edx edx = { };
> + union cpuid10_edx edx = { }, host_edx;
>
> if (!enable_pmu || !static_cpu_has(X86_FEATURE_ARCH_PERFMON)) {
> entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
> @@ -1516,9 +1516,14 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
> eax.split.num_counters = kvm_pmu_cap.num_counters_gp;
> eax.split.bit_width = kvm_pmu_cap.bit_width_gp;
> eax.split.mask_length = kvm_pmu_cap.events_mask_len;
> - edx.split.num_counters_fixed = kvm_pmu_cap.num_counters_fixed;
> edx.split.bit_width_fixed = kvm_pmu_cap.bit_width_fixed;
>
> + /* Guest does not support non-contiguous fixed counters. */
> + host_edx = (union cpuid10_edx)entry->edx;
> + edx.split.num_counters_fixed =
> + min_t(int, kvm_pmu_cap.num_counters_fixed,
> + host_edx.split.num_counters_fixed);
kvm_pmu_cap are derived from kvm_pmu_host which already represents host
fixed counters number, why host fixed counters number is checked again here?
Besides, we can't only depend on the fixed counters number to check if
fixed counter 3 is supported on host, e.g., CWF supports fixed counter 4, 5
and 6 but doesn't support fixed counter 3. Before adding PerfmonExt (0x23)
CPUID leaves support in KVM, we need to check the CPUID.0xa.ecx to get the
real fixed countera bitmap and then check if fixed counter 3 is supported.
Thanks.
> +
> if (kvm_pmu_cap.version)
> edx.split.anythread_deprecated = 1;
>
> diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
> index e218352e3423..9ff4a6a9cd0b 100644
> --- a/arch/x86/kvm/pmu.c
> +++ b/arch/x86/kvm/pmu.c
> @@ -148,12 +148,16 @@ void kvm_init_pmu_capability(struct kvm_pmu_ops *pmu_ops)
> }
>
> memcpy(&kvm_pmu_cap, &kvm_host_pmu, sizeof(kvm_host_pmu));
> +
> kvm_pmu_cap.version = min(kvm_pmu_cap.version, 2);
> kvm_pmu_cap.num_counters_gp = min(kvm_pmu_cap.num_counters_gp,
> pmu_ops->MAX_NR_GP_COUNTERS);
> kvm_pmu_cap.num_counters_fixed = min(kvm_pmu_cap.num_counters_fixed,
> KVM_MAX_NR_FIXED_COUNTERS);
>
> + if (!enable_mediated_pmu && kvm_pmu_cap.num_counters_fixed > 3)
> + kvm_pmu_cap.num_counters_fixed = 3;
> +
> kvm_pmu_eventsel.INSTRUCTIONS_RETIRED =
> perf_get_hw_event_config(PERF_COUNT_HW_INSTRUCTIONS);
> kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED =
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 0a1b63c63d1a..604072d9354f 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -360,7 +360,7 @@ static const u32 msrs_to_save_base[] = {
>
> static const u32 msrs_to_save_pmu[] = {
> MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
> - MSR_ARCH_PERFMON_FIXED_CTR0 + 2,
> + MSR_ARCH_PERFMON_FIXED_CTR2, MSR_ARCH_PERFMON_FIXED_CTR3,
> MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
> MSR_CORE_PERF_GLOBAL_CTRL,
> MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG,
> @@ -7756,7 +7756,7 @@ static void kvm_init_msr_lists(void)
> {
> unsigned i;
>
> - BUILD_BUG_ON_MSG(KVM_MAX_NR_FIXED_COUNTERS != 3,
> + BUILD_BUG_ON_MSG(KVM_MAX_NR_FIXED_COUNTERS != 4,
> "Please update the fixed PMCs in msrs_to_save_pmu[]");
>
> num_msrs_to_save = 0;
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH V2 2/4] KVM: x86/pmu: Support Intel fixed counter 3 on mediated vPMU
2026-04-30 2:19 ` Mi, Dapeng
@ 2026-04-30 17:54 ` Chen, Zide
2026-05-06 1:36 ` Mi, Dapeng
0 siblings, 1 reply; 12+ messages in thread
From: Chen, Zide @ 2026-04-30 17:54 UTC (permalink / raw)
To: Mi, Dapeng, Sean Christopherson, Paolo Bonzini
Cc: kvm, linux-kernel, Jim Mattson, Mingwei Zhang, Das Sandipan,
Shukla Manali, Falcon Thomas, Xudong Hao
On 4/29/2026 7:19 PM, Mi, Dapeng wrote:
>
> On 4/24/2026 1:46 AM, Zide Chen wrote:
>> From: Dapeng Mi <dapeng1.mi@linux.intel.com>
>>
>> Starting with Ice Lake, Intel introduces fixed counter 3, which counts
>> TOPDOWN.SLOTS - the number of available slots for an unhalted logical
>> processor. It serves as the denominator for top-level metrics in the
>> Top-down Microarchitecture Analysis method.
>>
>> Emulating this counter on legacy vPMU would require introducing a new
>> generic perf encoding for the Intel-specific TOPDOWN.SLOTS event in
>> order to call perf_get_hw_event_config(). This is undesirable as it
>> would pollute the generic perf event encoding.
>>
>> Moreover, KVM does not intend to emulate IA32_PERF_METRICS in the
>> legacy vPMU model, and without IA32_PERF_METRICS, emulating this
>> counter has little practical value. Therefore, expose fixed counter
>> 3 to guests only when mediated vPMU is enabled.
>>
>> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
>> Co-developed-by: Zide Chen <zide.chen@intel.com>
>> Signed-off-by: Zide Chen <zide.chen@intel.com>
>> ---
>> V2:
>> - Don't advertise fixed counter 3 to userspace if the host doesn't
>> support it.
>> ---
>> arch/x86/include/asm/kvm_host.h | 2 +-
>> arch/x86/kvm/cpuid.c | 9 +++++++--
>> arch/x86/kvm/pmu.c | 4 ++++
>> arch/x86/kvm/x86.c | 4 ++--
>> 4 files changed, 14 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>> index c470e40a00aa..cb736a4c72ea 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -556,7 +556,7 @@ struct kvm_pmc {
>> #define KVM_MAX_NR_GP_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_GP_COUNTERS, \
>> KVM_MAX_NR_AMD_GP_COUNTERS)
>>
>> -#define KVM_MAX_NR_INTEL_FIXED_COUNTERS 3
>> +#define KVM_MAX_NR_INTEL_FIXED_COUNTERS 4
>> #define KVM_MAX_NR_AMD_FIXED_COUNTERS 0
>> #define KVM_MAX_NR_FIXED_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_FIXED_COUNTERS, \
>> KVM_MAX_NR_AMD_FIXED_COUNTERS)
>> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>> index e69156b54cff..d87a26f740e5 100644
>> --- a/arch/x86/kvm/cpuid.c
>> +++ b/arch/x86/kvm/cpuid.c
>> @@ -1505,7 +1505,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>> break;
>> case 0xa: { /* Architectural Performance Monitoring */
>> union cpuid10_eax eax = { };
>> - union cpuid10_edx edx = { };
>> + union cpuid10_edx edx = { }, host_edx;
>>
>> if (!enable_pmu || !static_cpu_has(X86_FEATURE_ARCH_PERFMON)) {
>> entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
>> @@ -1516,9 +1516,14 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>> eax.split.num_counters = kvm_pmu_cap.num_counters_gp;
>> eax.split.bit_width = kvm_pmu_cap.bit_width_gp;
>> eax.split.mask_length = kvm_pmu_cap.events_mask_len;
>> - edx.split.num_counters_fixed = kvm_pmu_cap.num_counters_fixed;
>> edx.split.bit_width_fixed = kvm_pmu_cap.bit_width_fixed;
>>
>> + /* Guest does not support non-contiguous fixed counters. */
>> + host_edx = (union cpuid10_edx)entry->edx;
>> + edx.split.num_counters_fixed =
>> + min_t(int, kvm_pmu_cap.num_counters_fixed,
>> + host_edx.split.num_counters_fixed);
>
> kvm_pmu_cap are derived from kvm_pmu_host which already represents host
> fixed counters number, why host fixed counters number is checked again here?
This stems from KVM not supporting non-contiguous fixed counters on the
guest.
On CWF, the fixed counter mask is 0x77 and the number of contiguous
fixed counters is 3. kvm_host_pmu.num_counters_fixed is 6 from the host,
and in kvm_pmu_cap it's capped to KVM_MAX_NR_INTEL_FIXED_COUNTERS
without accounting for non-contiguity:
memcpy(&kvm_pmu_cap, &kvm_host_pmu, sizeof(kvm_host_pmu));
kvm_pmu_cap.num_counters_fixed = min(kvm_pmu_cap.num_counters_fixed,
KVM_MAX_NR_FIXED_COUNTERS);
It would be more natural to check against the host's contiguous fixed
counter count in kvm_init_pmu_capability(), but I placed it in cpuid.c
to leverage do_host_cpuid().
A more complete fix would be to pull in some PerfmonExt patches to add
fixed/GP counter mask support in kvm_host_pmu, and filter out
non-contiguous counters in kvm_init_pmu_capability(). But in this way,
it could have too much "temporary" code to translate between
nr_of_xxx_counters and xxx_counter_mask.
> Besides, we can't only depend on the fixed counters number to check if
> fixed counter 3 is supported on host, e.g., CWF supports fixed counter 4, 5
> and 6 but doesn't support fixed counter 3. Before adding PerfmonExt (0x23)
> CPUID leaves support in KVM, we need to check the CPUID.0xa.ecx to get the
> real fixed countera bitmap and then check if fixed counter 3 is supported.
This is a theoretical concern even without fixed counter 3 support.
Before this patch, KVM supports up to 3 fixed counters and assumes they
are contiguous, which holds true in practice.
CPUID.0xa.ecx is only meaningful starting from PMU v4, so it can't be
used unconditionally. However, CPUID.0xa.edx[4:0] always represents the
number of contiguous fixed counters, so checking against it is
sufficient to filter out non-contiguous ones.
> Thanks.
>
>
>> +
>> if (kvm_pmu_cap.version)
>> edx.split.anythread_deprecated = 1;
>>
>> diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
>> index e218352e3423..9ff4a6a9cd0b 100644
>> --- a/arch/x86/kvm/pmu.c
>> +++ b/arch/x86/kvm/pmu.c
>> @@ -148,12 +148,16 @@ void kvm_init_pmu_capability(struct kvm_pmu_ops *pmu_ops)
>> }
>>
>> memcpy(&kvm_pmu_cap, &kvm_host_pmu, sizeof(kvm_host_pmu));
>> +
>> kvm_pmu_cap.version = min(kvm_pmu_cap.version, 2);
>> kvm_pmu_cap.num_counters_gp = min(kvm_pmu_cap.num_counters_gp,
>> pmu_ops->MAX_NR_GP_COUNTERS);
>> kvm_pmu_cap.num_counters_fixed = min(kvm_pmu_cap.num_counters_fixed,
>> KVM_MAX_NR_FIXED_COUNTERS);
>>
>> + if (!enable_mediated_pmu && kvm_pmu_cap.num_counters_fixed > 3)
>> + kvm_pmu_cap.num_counters_fixed = 3;
>> +
>> kvm_pmu_eventsel.INSTRUCTIONS_RETIRED =
>> perf_get_hw_event_config(PERF_COUNT_HW_INSTRUCTIONS);
>> kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED =
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 0a1b63c63d1a..604072d9354f 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -360,7 +360,7 @@ static const u32 msrs_to_save_base[] = {
>>
>> static const u32 msrs_to_save_pmu[] = {
>> MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
>> - MSR_ARCH_PERFMON_FIXED_CTR0 + 2,
>> + MSR_ARCH_PERFMON_FIXED_CTR2, MSR_ARCH_PERFMON_FIXED_CTR3,
>> MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
>> MSR_CORE_PERF_GLOBAL_CTRL,
>> MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG,
>> @@ -7756,7 +7756,7 @@ static void kvm_init_msr_lists(void)
>> {
>> unsigned i;
>>
>> - BUILD_BUG_ON_MSG(KVM_MAX_NR_FIXED_COUNTERS != 3,
>> + BUILD_BUG_ON_MSG(KVM_MAX_NR_FIXED_COUNTERS != 4,
>> "Please update the fixed PMCs in msrs_to_save_pmu[]");
>>
>> num_msrs_to_save = 0;
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH V2 2/4] KVM: x86/pmu: Support Intel fixed counter 3 on mediated vPMU
2026-04-30 17:54 ` Chen, Zide
@ 2026-05-06 1:36 ` Mi, Dapeng
0 siblings, 0 replies; 12+ messages in thread
From: Mi, Dapeng @ 2026-05-06 1:36 UTC (permalink / raw)
To: Chen, Zide, Sean Christopherson, Paolo Bonzini
Cc: kvm, linux-kernel, Jim Mattson, Mingwei Zhang, Das Sandipan,
Shukla Manali, Falcon Thomas, Xudong Hao
On 5/1/2026 1:54 AM, Chen, Zide wrote:
>
> On 4/29/2026 7:19 PM, Mi, Dapeng wrote:
>> On 4/24/2026 1:46 AM, Zide Chen wrote:
>>> From: Dapeng Mi <dapeng1.mi@linux.intel.com>
>>>
>>> Starting with Ice Lake, Intel introduces fixed counter 3, which counts
>>> TOPDOWN.SLOTS - the number of available slots for an unhalted logical
>>> processor. It serves as the denominator for top-level metrics in the
>>> Top-down Microarchitecture Analysis method.
>>>
>>> Emulating this counter on legacy vPMU would require introducing a new
>>> generic perf encoding for the Intel-specific TOPDOWN.SLOTS event in
>>> order to call perf_get_hw_event_config(). This is undesirable as it
>>> would pollute the generic perf event encoding.
>>>
>>> Moreover, KVM does not intend to emulate IA32_PERF_METRICS in the
>>> legacy vPMU model, and without IA32_PERF_METRICS, emulating this
>>> counter has little practical value. Therefore, expose fixed counter
>>> 3 to guests only when mediated vPMU is enabled.
>>>
>>> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
>>> Co-developed-by: Zide Chen <zide.chen@intel.com>
>>> Signed-off-by: Zide Chen <zide.chen@intel.com>
>>> ---
>>> V2:
>>> - Don't advertise fixed counter 3 to userspace if the host doesn't
>>> support it.
>>> ---
>>> arch/x86/include/asm/kvm_host.h | 2 +-
>>> arch/x86/kvm/cpuid.c | 9 +++++++--
>>> arch/x86/kvm/pmu.c | 4 ++++
>>> arch/x86/kvm/x86.c | 4 ++--
>>> 4 files changed, 14 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>>> index c470e40a00aa..cb736a4c72ea 100644
>>> --- a/arch/x86/include/asm/kvm_host.h
>>> +++ b/arch/x86/include/asm/kvm_host.h
>>> @@ -556,7 +556,7 @@ struct kvm_pmc {
>>> #define KVM_MAX_NR_GP_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_GP_COUNTERS, \
>>> KVM_MAX_NR_AMD_GP_COUNTERS)
>>>
>>> -#define KVM_MAX_NR_INTEL_FIXED_COUNTERS 3
>>> +#define KVM_MAX_NR_INTEL_FIXED_COUNTERS 4
>>> #define KVM_MAX_NR_AMD_FIXED_COUNTERS 0
>>> #define KVM_MAX_NR_FIXED_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_FIXED_COUNTERS, \
>>> KVM_MAX_NR_AMD_FIXED_COUNTERS)
>>> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>>> index e69156b54cff..d87a26f740e5 100644
>>> --- a/arch/x86/kvm/cpuid.c
>>> +++ b/arch/x86/kvm/cpuid.c
>>> @@ -1505,7 +1505,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>>> break;
>>> case 0xa: { /* Architectural Performance Monitoring */
>>> union cpuid10_eax eax = { };
>>> - union cpuid10_edx edx = { };
>>> + union cpuid10_edx edx = { }, host_edx;
>>>
>>> if (!enable_pmu || !static_cpu_has(X86_FEATURE_ARCH_PERFMON)) {
>>> entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
>>> @@ -1516,9 +1516,14 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>>> eax.split.num_counters = kvm_pmu_cap.num_counters_gp;
>>> eax.split.bit_width = kvm_pmu_cap.bit_width_gp;
>>> eax.split.mask_length = kvm_pmu_cap.events_mask_len;
>>> - edx.split.num_counters_fixed = kvm_pmu_cap.num_counters_fixed;
>>> edx.split.bit_width_fixed = kvm_pmu_cap.bit_width_fixed;
>>>
>>> + /* Guest does not support non-contiguous fixed counters. */
>>> + host_edx = (union cpuid10_edx)entry->edx;
>>> + edx.split.num_counters_fixed =
>>> + min_t(int, kvm_pmu_cap.num_counters_fixed,
>>> + host_edx.split.num_counters_fixed);
>> kvm_pmu_cap are derived from kvm_pmu_host which already represents host
>> fixed counters number, why host fixed counters number is checked again here?
> This stems from KVM not supporting non-contiguous fixed counters on the
> guest.
>
> On CWF, the fixed counter mask is 0x77 and the number of contiguous
> fixed counters is 3. kvm_host_pmu.num_counters_fixed is 6 from the host,
> and in kvm_pmu_cap it's capped to KVM_MAX_NR_INTEL_FIXED_COUNTERS
> without accounting for non-contiguity:
>
> memcpy(&kvm_pmu_cap, &kvm_host_pmu, sizeof(kvm_host_pmu));
> kvm_pmu_cap.num_counters_fixed = min(kvm_pmu_cap.num_counters_fixed,
> KVM_MAX_NR_FIXED_COUNTERS);
>
> It would be more natural to check against the host's contiguous fixed
> counter count in kvm_init_pmu_capability(), but I placed it in cpuid.c
> to leverage do_host_cpuid().
>
> A more complete fix would be to pull in some PerfmonExt patches to add
> fixed/GP counter mask support in kvm_host_pmu, and filter out
> non-contiguous counters in kvm_init_pmu_capability(). But in this way,
> it could have too much "temporary" code to translate between
> nr_of_xxx_counters and xxx_counter_mask.
I see. It may be not a good choice to pull in the PerfmonExt patches in
this patchset considering its large patch size. We'd better move this part
of code into kvm_init_pmu_capability() which is a better place for it, and
we need some comments to explain it. Thanks.
>
>
>> Besides, we can't only depend on the fixed counters number to check if
>> fixed counter 3 is supported on host, e.g., CWF supports fixed counter 4, 5
>> and 6 but doesn't support fixed counter 3. Before adding PerfmonExt (0x23)
>> CPUID leaves support in KVM, we need to check the CPUID.0xa.ecx to get the
>> real fixed countera bitmap and then check if fixed counter 3 is supported.
> This is a theoretical concern even without fixed counter 3 support.
> Before this patch, KVM supports up to 3 fixed counters and assumes they
> are contiguous, which holds true in practice.
>
> CPUID.0xa.ecx is only meaningful starting from PMU v4, so it can't be
> used unconditionally. However, CPUID.0xa.edx[4:0] always represents the
> number of contiguous fixed counters, so checking against it is
> sufficient to filter out non-contiguous ones.
>
>> Thanks.
>>
>>
>>> +
>>> if (kvm_pmu_cap.version)
>>> edx.split.anythread_deprecated = 1;
>>>
>>> diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
>>> index e218352e3423..9ff4a6a9cd0b 100644
>>> --- a/arch/x86/kvm/pmu.c
>>> +++ b/arch/x86/kvm/pmu.c
>>> @@ -148,12 +148,16 @@ void kvm_init_pmu_capability(struct kvm_pmu_ops *pmu_ops)
>>> }
>>>
>>> memcpy(&kvm_pmu_cap, &kvm_host_pmu, sizeof(kvm_host_pmu));
>>> +
>>> kvm_pmu_cap.version = min(kvm_pmu_cap.version, 2);
>>> kvm_pmu_cap.num_counters_gp = min(kvm_pmu_cap.num_counters_gp,
>>> pmu_ops->MAX_NR_GP_COUNTERS);
>>> kvm_pmu_cap.num_counters_fixed = min(kvm_pmu_cap.num_counters_fixed,
>>> KVM_MAX_NR_FIXED_COUNTERS);
>>>
>>> + if (!enable_mediated_pmu && kvm_pmu_cap.num_counters_fixed > 3)
>>> + kvm_pmu_cap.num_counters_fixed = 3;
>>> +
>>> kvm_pmu_eventsel.INSTRUCTIONS_RETIRED =
>>> perf_get_hw_event_config(PERF_COUNT_HW_INSTRUCTIONS);
>>> kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED =
>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>> index 0a1b63c63d1a..604072d9354f 100644
>>> --- a/arch/x86/kvm/x86.c
>>> +++ b/arch/x86/kvm/x86.c
>>> @@ -360,7 +360,7 @@ static const u32 msrs_to_save_base[] = {
>>>
>>> static const u32 msrs_to_save_pmu[] = {
>>> MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
>>> - MSR_ARCH_PERFMON_FIXED_CTR0 + 2,
>>> + MSR_ARCH_PERFMON_FIXED_CTR2, MSR_ARCH_PERFMON_FIXED_CTR3,
>>> MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
>>> MSR_CORE_PERF_GLOBAL_CTRL,
>>> MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG,
>>> @@ -7756,7 +7756,7 @@ static void kvm_init_msr_lists(void)
>>> {
>>> unsigned i;
>>>
>>> - BUILD_BUG_ON_MSG(KVM_MAX_NR_FIXED_COUNTERS != 3,
>>> + BUILD_BUG_ON_MSG(KVM_MAX_NR_FIXED_COUNTERS != 4,
>>> "Please update the fixed PMCs in msrs_to_save_pmu[]");
>>>
>>> num_msrs_to_save = 0;
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH V2 3/4] KVM: x86/pmu: Support PERF_METRICS MSR in mediated vPMU
2026-04-23 17:46 [PATCH V2 0/4] KVM: x86/pmu: Add hardware Topdown metrics support Zide Chen
2026-04-23 17:46 ` [PATCH V2 1/4] KVM: x86/pmu: Do not map fixed counters >= 3 to generic perf events Zide Chen
2026-04-23 17:46 ` [PATCH V2 2/4] KVM: x86/pmu: Support Intel fixed counter 3 on mediated vPMU Zide Chen
@ 2026-04-23 17:46 ` Zide Chen
2026-04-30 2:22 ` Mi, Dapeng
2026-04-23 17:46 ` [PATCH V2 4/4] KVM: selftests: Add perf_metrics and fixed counter 3 tests Zide Chen
3 siblings, 1 reply; 12+ messages in thread
From: Zide Chen @ 2026-04-23 17:46 UTC (permalink / raw)
To: Sean Christopherson, Paolo Bonzini
Cc: kvm, linux-kernel, Jim Mattson, Mingwei Zhang, Zide Chen,
Das Sandipan, Shukla Manali, Dapeng Mi, Falcon Thomas, Xudong Hao
From: Dapeng Mi <dapeng1.mi@linux.intel.com>
Bit 15 in IA32_PERF_CAPABILITIES indicates that the CPU provides
built-in support for Topdown Microarchitecture Analysis (TMA) L1
metrics via the IA32_PERF_METRICS MSR.
Expose this capability only when mediated vPMU is enabled, as emulating
IA32_PERF_METRICS in the legacy vPMU model is impractical.
Pass IA32_PERF_METRICS through to the guest only when mediated vPMU is
enabled and bit 15 is set in guest IA32_PERF_CAPABILITIES is. Allow
kvm_pmu_{get,set}_msr() to handle this MSR for host accesses.
Save and restore this MSR on host/guest PMU context switches so that
host PMU activity does not clobber the guest value, and guest state
is not leaked into the host.
Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Zide Chen <zide.chen@intel.com>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/include/asm/msr-index.h | 1 +
arch/x86/include/asm/perf_event.h | 1 +
arch/x86/kvm/vmx/pmu_intel.c | 31 +++++++++++++++++++++++++++++++
arch/x86/kvm/vmx/pmu_intel.h | 5 +++++
arch/x86/kvm/vmx/vmx.c | 6 ++++++
arch/x86/kvm/x86.c | 6 +++++-
7 files changed, 50 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index cb736a4c72ea..69a02e58eba2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -576,6 +576,7 @@ struct kvm_pmu {
u64 global_status_rsvd;
u64 reserved_bits;
u64 raw_event_mask;
+ u64 perf_metrics;
struct kvm_pmc gp_counters[KVM_MAX_NR_GP_COUNTERS];
struct kvm_pmc fixed_counters[KVM_MAX_NR_FIXED_COUNTERS];
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index a14a0f43e04a..b60fdf79816a 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -331,6 +331,7 @@
#define PERF_CAP_PEBS_FORMAT 0xf00
#define PERF_CAP_FW_WRITES BIT_ULL(13)
#define PERF_CAP_PEBS_BASELINE BIT_ULL(14)
+#define PERF_CAP_PERF_METRICS BIT_ULL(15)
#define PERF_CAP_PEBS_TIMING_INFO BIT_ULL(17)
#define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \
PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE | \
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 752cb319d5ea..cbfe6a55dea8 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -445,6 +445,7 @@ static inline bool is_topdown_idx(int idx)
#define GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT 54
#define GLOBAL_STATUS_ARCH_PEBS_THRESHOLD BIT_ULL(GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT)
#define GLOBAL_STATUS_PERF_METRICS_OVF_BIT 48
+#define GLOBAL_STATUS_PERF_METRICS_OVF BIT_ULL(GLOBAL_STATUS_PERF_METRICS_OVF_BIT)
#define GLOBAL_CTRL_EN_PERF_METRICS BIT_ULL(48)
/*
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 05a59f4acfdd..a7d862a6f1be 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -180,6 +180,8 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
switch (msr) {
case MSR_CORE_PERF_FIXED_CTR_CTRL:
return kvm_pmu_has_perf_global_ctrl(pmu);
+ case MSR_PERF_METRICS:
+ return vcpu_has_perf_metrics(vcpu);
case MSR_IA32_PEBS_ENABLE:
ret = vcpu_get_perf_capabilities(vcpu) & PERF_CAP_PEBS_FORMAT;
break;
@@ -335,6 +337,10 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_CORE_PERF_FIXED_CTR_CTRL:
msr_info->data = pmu->fixed_ctr_ctrl;
break;
+ case MSR_PERF_METRICS:
+ WARN_ON(!msr_info->host_initiated);
+ msr_info->data = pmu->perf_metrics;
+ break;
case MSR_IA32_PEBS_ENABLE:
msr_info->data = pmu->pebs_enable;
break;
@@ -384,6 +390,10 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (pmu->fixed_ctr_ctrl != data)
reprogram_fixed_counters(pmu, data);
break;
+ case MSR_PERF_METRICS:
+ WARN_ON(!msr_info->host_initiated);
+ pmu->perf_metrics = data;
+ break;
case MSR_IA32_PEBS_ENABLE:
if (data & pmu->pebs_enable_rsvd)
return 1;
@@ -579,6 +589,11 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->global_status_rsvd &=
~MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI;
+ if (perf_capabilities & PERF_CAP_PERF_METRICS) {
+ pmu->global_ctrl_rsvd &= ~GLOBAL_CTRL_EN_PERF_METRICS;
+ pmu->global_status_rsvd &= ~GLOBAL_STATUS_PERF_METRICS_OVF;
+ }
+
if (perf_capabilities & PERF_CAP_PEBS_FORMAT) {
if (perf_capabilities & PERF_CAP_PEBS_BASELINE) {
pmu->pebs_enable_rsvd = counter_rsvd;
@@ -622,6 +637,9 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
static void intel_pmu_reset(struct kvm_vcpu *vcpu)
{
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+
+ pmu->perf_metrics = 0;
intel_pmu_release_guest_lbr_event(vcpu);
}
@@ -793,6 +811,13 @@ static void intel_mediated_pmu_load(struct kvm_vcpu *vcpu)
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
u64 global_status, toggle;
+ /*
+ * PERF_METRICS MSR must be restored closely after fixed counter 3
+ * (kvm_pmu_load_guest_pmcs()).
+ */
+ if (vcpu_has_perf_metrics(vcpu))
+ wrmsrq(MSR_PERF_METRICS, pmu->perf_metrics);
+
rdmsrq(MSR_CORE_PERF_GLOBAL_STATUS, global_status);
toggle = pmu->global_status ^ global_status;
if (global_status & toggle)
@@ -821,6 +846,12 @@ static void intel_mediated_pmu_put(struct kvm_vcpu *vcpu)
*/
if (pmu->fixed_ctr_ctrl_hw)
wrmsrq(MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
+
+ if (vcpu_has_perf_metrics(vcpu)) {
+ pmu->perf_metrics = rdpmc(INTEL_PMC_FIXED_RDPMC_METRICS);
+ if (pmu->perf_metrics)
+ wrmsrq(MSR_PERF_METRICS, 0);
+ }
}
struct kvm_pmu_ops intel_pmu_ops __initdata = {
diff --git a/arch/x86/kvm/vmx/pmu_intel.h b/arch/x86/kvm/vmx/pmu_intel.h
index 5d9357640aa1..2ec547223b09 100644
--- a/arch/x86/kvm/vmx/pmu_intel.h
+++ b/arch/x86/kvm/vmx/pmu_intel.h
@@ -40,4 +40,9 @@ struct lbr_desc {
extern struct x86_pmu_lbr vmx_lbr_caps;
+static inline bool vcpu_has_perf_metrics(struct kvm_vcpu *vcpu)
+{
+ return !!(vcpu_get_perf_capabilities(vcpu) & PERF_CAP_PERF_METRICS);
+}
+
#endif /* __KVM_X86_VMX_PMU_INTEL_H */
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index a29896a9ef14..d10d86abbce7 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4229,6 +4229,9 @@ static void vmx_recalc_pmu_msr_intercepts(struct kvm_vcpu *vcpu)
MSR_TYPE_RW, intercept);
vmx_set_intercept_for_msr(vcpu, MSR_CORE_PERF_GLOBAL_OVF_CTRL,
MSR_TYPE_RW, intercept);
+
+ vmx_set_intercept_for_msr(vcpu, MSR_PERF_METRICS, MSR_TYPE_RW,
+ !vcpu_has_perf_metrics(vcpu));
}
static void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
@@ -8074,6 +8077,9 @@ static __init u64 vmx_get_perf_capabilities(void)
perf_cap &= ~PERF_CAP_PEBS_BASELINE;
}
+ if (enable_mediated_pmu)
+ perf_cap |= host_perf_cap & PERF_CAP_PERF_METRICS;
+
return perf_cap;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 604072d9354f..d497ffc5d90e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -362,7 +362,7 @@ static const u32 msrs_to_save_pmu[] = {
MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
MSR_ARCH_PERFMON_FIXED_CTR2, MSR_ARCH_PERFMON_FIXED_CTR3,
MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
- MSR_CORE_PERF_GLOBAL_CTRL,
+ MSR_CORE_PERF_GLOBAL_CTRL, MSR_PERF_METRICS,
MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG,
/* This part of MSRs should match KVM_MAX_NR_INTEL_GP_COUNTERS. */
@@ -7693,6 +7693,10 @@ static void kvm_probe_msr_to_save(u32 msr_index)
intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2))
return;
break;
+ case MSR_PERF_METRICS:
+ if (!(kvm_caps.supported_perf_cap & PERF_CAP_PERF_METRICS))
+ return;
+ break;
case MSR_ARCH_PERFMON_PERFCTR0 ...
MSR_ARCH_PERFMON_PERFCTR0 + KVM_MAX_NR_GP_COUNTERS - 1:
if (msr_index - MSR_ARCH_PERFMON_PERFCTR0 >=
--
2.53.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* Re: [PATCH V2 3/4] KVM: x86/pmu: Support PERF_METRICS MSR in mediated vPMU
2026-04-23 17:46 ` [PATCH V2 3/4] KVM: x86/pmu: Support PERF_METRICS MSR in " Zide Chen
@ 2026-04-30 2:22 ` Mi, Dapeng
0 siblings, 0 replies; 12+ messages in thread
From: Mi, Dapeng @ 2026-04-30 2:22 UTC (permalink / raw)
To: Zide Chen, Sean Christopherson, Paolo Bonzini
Cc: kvm, linux-kernel, Jim Mattson, Mingwei Zhang, Das Sandipan,
Shukla Manali, Falcon Thomas, Xudong Hao
On 4/24/2026 1:46 AM, Zide Chen wrote:
> From: Dapeng Mi <dapeng1.mi@linux.intel.com>
>
> Bit 15 in IA32_PERF_CAPABILITIES indicates that the CPU provides
> built-in support for Topdown Microarchitecture Analysis (TMA) L1
> metrics via the IA32_PERF_METRICS MSR.
>
> Expose this capability only when mediated vPMU is enabled, as emulating
> IA32_PERF_METRICS in the legacy vPMU model is impractical.
>
> Pass IA32_PERF_METRICS through to the guest only when mediated vPMU is
> enabled and bit 15 is set in guest IA32_PERF_CAPABILITIES is. Allow
> kvm_pmu_{get,set}_msr() to handle this MSR for host accesses.
>
> Save and restore this MSR on host/guest PMU context switches so that
> host PMU activity does not clobber the guest value, and guest state
> is not leaked into the host.
>
> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
> Signed-off-by: Zide Chen <zide.chen@intel.com>
> ---
> arch/x86/include/asm/kvm_host.h | 1 +
> arch/x86/include/asm/msr-index.h | 1 +
> arch/x86/include/asm/perf_event.h | 1 +
> arch/x86/kvm/vmx/pmu_intel.c | 31 +++++++++++++++++++++++++++++++
> arch/x86/kvm/vmx/pmu_intel.h | 5 +++++
> arch/x86/kvm/vmx/vmx.c | 6 ++++++
> arch/x86/kvm/x86.c | 6 +++++-
> 7 files changed, 50 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index cb736a4c72ea..69a02e58eba2 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -576,6 +576,7 @@ struct kvm_pmu {
> u64 global_status_rsvd;
> u64 reserved_bits;
> u64 raw_event_mask;
> + u64 perf_metrics;
> struct kvm_pmc gp_counters[KVM_MAX_NR_GP_COUNTERS];
> struct kvm_pmc fixed_counters[KVM_MAX_NR_FIXED_COUNTERS];
>
> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
> index a14a0f43e04a..b60fdf79816a 100644
> --- a/arch/x86/include/asm/msr-index.h
> +++ b/arch/x86/include/asm/msr-index.h
> @@ -331,6 +331,7 @@
> #define PERF_CAP_PEBS_FORMAT 0xf00
> #define PERF_CAP_FW_WRITES BIT_ULL(13)
> #define PERF_CAP_PEBS_BASELINE BIT_ULL(14)
> +#define PERF_CAP_PERF_METRICS BIT_ULL(15)
> #define PERF_CAP_PEBS_TIMING_INFO BIT_ULL(17)
> #define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \
> PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE | \
> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
> index 752cb319d5ea..cbfe6a55dea8 100644
> --- a/arch/x86/include/asm/perf_event.h
> +++ b/arch/x86/include/asm/perf_event.h
> @@ -445,6 +445,7 @@ static inline bool is_topdown_idx(int idx)
> #define GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT 54
> #define GLOBAL_STATUS_ARCH_PEBS_THRESHOLD BIT_ULL(GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT)
> #define GLOBAL_STATUS_PERF_METRICS_OVF_BIT 48
> +#define GLOBAL_STATUS_PERF_METRICS_OVF BIT_ULL(GLOBAL_STATUS_PERF_METRICS_OVF_BIT)
>
> #define GLOBAL_CTRL_EN_PERF_METRICS BIT_ULL(48)
> /*
> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index 05a59f4acfdd..a7d862a6f1be 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -180,6 +180,8 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
> switch (msr) {
> case MSR_CORE_PERF_FIXED_CTR_CTRL:
> return kvm_pmu_has_perf_global_ctrl(pmu);
> + case MSR_PERF_METRICS:
> + return vcpu_has_perf_metrics(vcpu);
> case MSR_IA32_PEBS_ENABLE:
> ret = vcpu_get_perf_capabilities(vcpu) & PERF_CAP_PEBS_FORMAT;
> break;
> @@ -335,6 +337,10 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> case MSR_CORE_PERF_FIXED_CTR_CTRL:
> msr_info->data = pmu->fixed_ctr_ctrl;
> break;
> + case MSR_PERF_METRICS:
> + WARN_ON(!msr_info->host_initiated);
WARN_ON() -> WARN_ON_ONCE().
> + msr_info->data = pmu->perf_metrics;
> + break;
> case MSR_IA32_PEBS_ENABLE:
> msr_info->data = pmu->pebs_enable;
> break;
> @@ -384,6 +390,10 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> if (pmu->fixed_ctr_ctrl != data)
> reprogram_fixed_counters(pmu, data);
> break;
> + case MSR_PERF_METRICS:
> + WARN_ON(!msr_info->host_initiated);
Ditto.
Thanks.
> + pmu->perf_metrics = data;
> + break;
> case MSR_IA32_PEBS_ENABLE:
> if (data & pmu->pebs_enable_rsvd)
> return 1;
> @@ -579,6 +589,11 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
> pmu->global_status_rsvd &=
> ~MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI;
>
> + if (perf_capabilities & PERF_CAP_PERF_METRICS) {
> + pmu->global_ctrl_rsvd &= ~GLOBAL_CTRL_EN_PERF_METRICS;
> + pmu->global_status_rsvd &= ~GLOBAL_STATUS_PERF_METRICS_OVF;
> + }
> +
> if (perf_capabilities & PERF_CAP_PEBS_FORMAT) {
> if (perf_capabilities & PERF_CAP_PEBS_BASELINE) {
> pmu->pebs_enable_rsvd = counter_rsvd;
> @@ -622,6 +637,9 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
>
> static void intel_pmu_reset(struct kvm_vcpu *vcpu)
> {
> + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> +
> + pmu->perf_metrics = 0;
> intel_pmu_release_guest_lbr_event(vcpu);
> }
>
> @@ -793,6 +811,13 @@ static void intel_mediated_pmu_load(struct kvm_vcpu *vcpu)
> struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> u64 global_status, toggle;
>
> + /*
> + * PERF_METRICS MSR must be restored closely after fixed counter 3
> + * (kvm_pmu_load_guest_pmcs()).
> + */
> + if (vcpu_has_perf_metrics(vcpu))
> + wrmsrq(MSR_PERF_METRICS, pmu->perf_metrics);
> +
> rdmsrq(MSR_CORE_PERF_GLOBAL_STATUS, global_status);
> toggle = pmu->global_status ^ global_status;
> if (global_status & toggle)
> @@ -821,6 +846,12 @@ static void intel_mediated_pmu_put(struct kvm_vcpu *vcpu)
> */
> if (pmu->fixed_ctr_ctrl_hw)
> wrmsrq(MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
> +
> + if (vcpu_has_perf_metrics(vcpu)) {
> + pmu->perf_metrics = rdpmc(INTEL_PMC_FIXED_RDPMC_METRICS);
> + if (pmu->perf_metrics)
> + wrmsrq(MSR_PERF_METRICS, 0);
> + }
> }
>
> struct kvm_pmu_ops intel_pmu_ops __initdata = {
> diff --git a/arch/x86/kvm/vmx/pmu_intel.h b/arch/x86/kvm/vmx/pmu_intel.h
> index 5d9357640aa1..2ec547223b09 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.h
> +++ b/arch/x86/kvm/vmx/pmu_intel.h
> @@ -40,4 +40,9 @@ struct lbr_desc {
>
> extern struct x86_pmu_lbr vmx_lbr_caps;
>
> +static inline bool vcpu_has_perf_metrics(struct kvm_vcpu *vcpu)
> +{
> + return !!(vcpu_get_perf_capabilities(vcpu) & PERF_CAP_PERF_METRICS);
> +}
> +
> #endif /* __KVM_X86_VMX_PMU_INTEL_H */
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index a29896a9ef14..d10d86abbce7 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -4229,6 +4229,9 @@ static void vmx_recalc_pmu_msr_intercepts(struct kvm_vcpu *vcpu)
> MSR_TYPE_RW, intercept);
> vmx_set_intercept_for_msr(vcpu, MSR_CORE_PERF_GLOBAL_OVF_CTRL,
> MSR_TYPE_RW, intercept);
> +
> + vmx_set_intercept_for_msr(vcpu, MSR_PERF_METRICS, MSR_TYPE_RW,
> + !vcpu_has_perf_metrics(vcpu));
> }
>
> static void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
> @@ -8074,6 +8077,9 @@ static __init u64 vmx_get_perf_capabilities(void)
> perf_cap &= ~PERF_CAP_PEBS_BASELINE;
> }
>
> + if (enable_mediated_pmu)
> + perf_cap |= host_perf_cap & PERF_CAP_PERF_METRICS;
> +
> return perf_cap;
> }
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 604072d9354f..d497ffc5d90e 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -362,7 +362,7 @@ static const u32 msrs_to_save_pmu[] = {
> MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
> MSR_ARCH_PERFMON_FIXED_CTR2, MSR_ARCH_PERFMON_FIXED_CTR3,
> MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
> - MSR_CORE_PERF_GLOBAL_CTRL,
> + MSR_CORE_PERF_GLOBAL_CTRL, MSR_PERF_METRICS,
> MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG,
>
> /* This part of MSRs should match KVM_MAX_NR_INTEL_GP_COUNTERS. */
> @@ -7693,6 +7693,10 @@ static void kvm_probe_msr_to_save(u32 msr_index)
> intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2))
> return;
> break;
> + case MSR_PERF_METRICS:
> + if (!(kvm_caps.supported_perf_cap & PERF_CAP_PERF_METRICS))
> + return;
> + break;
> case MSR_ARCH_PERFMON_PERFCTR0 ...
> MSR_ARCH_PERFMON_PERFCTR0 + KVM_MAX_NR_GP_COUNTERS - 1:
> if (msr_index - MSR_ARCH_PERFMON_PERFCTR0 >=
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH V2 4/4] KVM: selftests: Add perf_metrics and fixed counter 3 tests
2026-04-23 17:46 [PATCH V2 0/4] KVM: x86/pmu: Add hardware Topdown metrics support Zide Chen
` (2 preceding siblings ...)
2026-04-23 17:46 ` [PATCH V2 3/4] KVM: x86/pmu: Support PERF_METRICS MSR in " Zide Chen
@ 2026-04-23 17:46 ` Zide Chen
2026-04-30 2:26 ` Mi, Dapeng
3 siblings, 1 reply; 12+ messages in thread
From: Zide Chen @ 2026-04-23 17:46 UTC (permalink / raw)
To: Sean Christopherson, Paolo Bonzini
Cc: kvm, linux-kernel, Jim Mattson, Mingwei Zhang, Zide Chen,
Das Sandipan, Shukla Manali, Dapeng Mi, Falcon Thomas, Xudong Hao
Add a selftest to exercise IA32_PERF_METRICS, i.e. architectural
support for Topdown (TMA) Level 1 metrics, enumerated by
IA32_PERF_CAPABILITIES[15].
Only check for non-zero metrics, as they are derived and depend on
the workload, CPU model, and host scheduling, making precise
expectations fragile.
Extend the PMU selftest to cover Intel fixed counter 3 by bumping
MAX_NR_FIXED_COUNTERS to 4 and validating basic functionality.
Signed-off-by: Zide Chen <zide.chen@intel.com>
---
V2: New patch.
---
tools/arch/x86/include/asm/msr-index.h | 1 +
tools/testing/selftests/kvm/include/x86/pmu.h | 3 +
.../selftests/kvm/x86/pmu_counters_test.c | 71 +++++++++++++++++--
3 files changed, 70 insertions(+), 5 deletions(-)
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 6673601246b3..31ecbea42459 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -331,6 +331,7 @@
#define PERF_CAP_PEBS_FORMAT 0xf00
#define PERF_CAP_FW_WRITES BIT_ULL(13)
#define PERF_CAP_PEBS_BASELINE BIT_ULL(14)
+#define PERF_CAP_PERF_METRICS BIT_ULL(15)
#define PERF_CAP_PEBS_TIMING_INFO BIT_ULL(17)
#define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \
PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE | \
diff --git a/tools/testing/selftests/kvm/include/x86/pmu.h b/tools/testing/selftests/kvm/include/x86/pmu.h
index 72575eadb63a..c68d6435422c 100644
--- a/tools/testing/selftests/kvm/include/x86/pmu.h
+++ b/tools/testing/selftests/kvm/include/x86/pmu.h
@@ -46,6 +46,9 @@
/* Fixed PMC controls, Intel only. */
#define FIXED_PMC_GLOBAL_CTRL_ENABLE(_idx) BIT_ULL((32 + (_idx)))
+/* PERF_METRICS enable, Intel only. */
+#define PERF_METRICS_GLOBAL_CTRL_ENABLE BIT_ULL(48)
+
#define FIXED_PMC_KERNEL BIT_ULL(0)
#define FIXED_PMC_USER BIT_ULL(1)
#define FIXED_PMC_ANYTHREAD BIT_ULL(2)
diff --git a/tools/testing/selftests/kvm/x86/pmu_counters_test.c b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
index 3eaa216b96c0..240bcf9184c2 100644
--- a/tools/testing/selftests/kvm/x86/pmu_counters_test.c
+++ b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
@@ -6,6 +6,7 @@
#include "pmu.h"
#include "processor.h"
+#include <linux/bitfield.h>
/* Number of iterations of the loop for the guest measurement payload. */
#define NUM_LOOPS 10
@@ -241,17 +242,20 @@ do { \
); \
} while (0)
-#define GUEST_TEST_EVENT(_idx, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \
+#define GUEST_RUN_PAYLOAD(_ctrl_msr, _value, FEP) \
do { \
- wrmsr(_pmc_msr, 0); \
- \
if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \
GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt %[m]", FEP); \
else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \
GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush %[m]", FEP); \
else \
GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \
- \
+} while (0)
+
+#define GUEST_TEST_EVENT(_idx, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \
+do { \
+ wrmsr(_pmc_msr, 0); \
+ GUEST_RUN_PAYLOAD(_ctrl_msr, _value, FEP); \
guest_assert_event_count(_idx, _pmc, _pmc_msr); \
} while (0)
@@ -318,6 +322,55 @@ static void guest_test_arch_event(uint8_t idx)
FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
}
+static void guest_test_perf_metrics(void)
+{
+ int retiring, bad_spec, fe_bound, be_bound;
+ uint64_t global_ctrl, metrics;
+
+ if ((guest_get_pmu_version() < 2) || /* Does guest has GLOBAL_CTRL? */
+ !this_cpu_has(X86_FEATURE_PDCM) ||
+ !(rdmsr(MSR_IA32_PERF_CAPABILITIES) & PERF_CAP_PERF_METRICS))
+ return;
+
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ wrmsr(MSR_CORE_PERF_FIXED_CTR3, 0);
+ wrmsr(MSR_PERF_METRICS, 0);
+
+ /* Enable fixed ctr3 (TOPDOWN.SLOTS) and PERF_METRICS . */
+ wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(3, FIXED_PMC_KERNEL));
+ global_ctrl = FIXED_PMC_GLOBAL_CTRL_ENABLE(3) |
+ PERF_METRICS_GLOBAL_CTRL_ENABLE;
+
+ GUEST_RUN_PAYLOAD(MSR_CORE_PERF_GLOBAL_CTRL, global_ctrl, "");
+
+ /* Check test results. */
+ metrics = rdmsr(MSR_PERF_METRICS);
+ retiring = FIELD_GET(GENMASK_ULL(7, 0), metrics);
+ bad_spec = FIELD_GET(GENMASK_ULL(15, 8), metrics);
+ fe_bound = FIELD_GET(GENMASK_ULL(23, 16), metrics);
+ be_bound = FIELD_GET(GENMASK_ULL(31, 24), metrics);
+
+ /*
+ * Be conservative: the measured payload definitely retires work, so
+ * Retiring should be non-zero.
+ */
+ GUEST_ASSERT_NE(metrics, 0ULL);
+ GUEST_ASSERT_NE(retiring, 0ULL);
+
+ /*
+ * The derived percentage of the metrics should be close to 100%.
+ * 3 is chosen as a loose sanity check.
+ */
+ GUEST_ASSERT(abs(retiring + bad_spec + fe_bound + be_bound - 0xff) < 3);
+
+ /* Sanity check after PERF_METRICS disabled. */
+ __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
+ GUEST_ASSERT_EQ(rdmsr(MSR_PERF_METRICS), metrics);
+ wrmsr(MSR_PERF_METRICS, 0xdeaddead);
+
+ GUEST_ASSERT_EQ(rdmsr(MSR_PERF_METRICS), 0xdeaddead);
+}
+
static void guest_test_arch_events(void)
{
uint8_t i;
@@ -325,6 +378,8 @@ static void guest_test_arch_events(void)
for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++)
guest_test_arch_event(i);
+ guest_test_perf_metrics();
+
GUEST_DONE();
}
@@ -361,7 +416,7 @@ static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
* other than PMCs in the future.
*/
#define MAX_NR_GP_COUNTERS 8
-#define MAX_NR_FIXED_COUNTERS 3
+#define MAX_NR_FIXED_COUNTERS 4
#define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector) \
__GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \
@@ -586,6 +641,7 @@ static void test_intel_counters(void)
uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
+ uint64_t advertised_perf_caps = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES);
unsigned int i;
uint8_t v, j;
uint32_t k;
@@ -593,6 +649,7 @@ static void test_intel_counters(void)
const uint64_t perf_caps[] = {
0,
PMU_CAP_FW_WRITES,
+ PERF_CAP_PERF_METRICS,
};
/*
@@ -650,6 +707,10 @@ static void test_intel_counters(void)
if (!kvm_has_perf_caps && perf_caps[i])
continue;
+ /* Ignore unsupported features. */
+ if (perf_caps[i] & ~advertised_perf_caps)
+ continue;
+
pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n",
v, perf_caps[i]);
--
2.53.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* Re: [PATCH V2 4/4] KVM: selftests: Add perf_metrics and fixed counter 3 tests
2026-04-23 17:46 ` [PATCH V2 4/4] KVM: selftests: Add perf_metrics and fixed counter 3 tests Zide Chen
@ 2026-04-30 2:26 ` Mi, Dapeng
2026-04-30 18:13 ` Chen, Zide
0 siblings, 1 reply; 12+ messages in thread
From: Mi, Dapeng @ 2026-04-30 2:26 UTC (permalink / raw)
To: Zide Chen, Sean Christopherson, Paolo Bonzini
Cc: kvm, linux-kernel, Jim Mattson, Mingwei Zhang, Das Sandipan,
Shukla Manali, Falcon Thomas, Xudong Hao
On 4/24/2026 1:46 AM, Zide Chen wrote:
> Add a selftest to exercise IA32_PERF_METRICS, i.e. architectural
> support for Topdown (TMA) Level 1 metrics, enumerated by
> IA32_PERF_CAPABILITIES[15].
>
> Only check for non-zero metrics, as they are derived and depend on
> the workload, CPU model, and host scheduling, making precise
> expectations fragile.
>
> Extend the PMU selftest to cover Intel fixed counter 3 by bumping
> MAX_NR_FIXED_COUNTERS to 4 and validating basic functionality.
>
> Signed-off-by: Zide Chen <zide.chen@intel.com>
> ---
> V2: New patch.
> ---
> tools/arch/x86/include/asm/msr-index.h | 1 +
> tools/testing/selftests/kvm/include/x86/pmu.h | 3 +
> .../selftests/kvm/x86/pmu_counters_test.c | 71 +++++++++++++++++--
> 3 files changed, 70 insertions(+), 5 deletions(-)
>
> diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
> index 6673601246b3..31ecbea42459 100644
> --- a/tools/arch/x86/include/asm/msr-index.h
> +++ b/tools/arch/x86/include/asm/msr-index.h
> @@ -331,6 +331,7 @@
> #define PERF_CAP_PEBS_FORMAT 0xf00
> #define PERF_CAP_FW_WRITES BIT_ULL(13)
> #define PERF_CAP_PEBS_BASELINE BIT_ULL(14)
> +#define PERF_CAP_PERF_METRICS BIT_ULL(15)
> #define PERF_CAP_PEBS_TIMING_INFO BIT_ULL(17)
> #define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \
> PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE | \
> diff --git a/tools/testing/selftests/kvm/include/x86/pmu.h b/tools/testing/selftests/kvm/include/x86/pmu.h
> index 72575eadb63a..c68d6435422c 100644
> --- a/tools/testing/selftests/kvm/include/x86/pmu.h
> +++ b/tools/testing/selftests/kvm/include/x86/pmu.h
> @@ -46,6 +46,9 @@
> /* Fixed PMC controls, Intel only. */
> #define FIXED_PMC_GLOBAL_CTRL_ENABLE(_idx) BIT_ULL((32 + (_idx)))
>
> +/* PERF_METRICS enable, Intel only. */
> +#define PERF_METRICS_GLOBAL_CTRL_ENABLE BIT_ULL(48)
> +
> #define FIXED_PMC_KERNEL BIT_ULL(0)
> #define FIXED_PMC_USER BIT_ULL(1)
> #define FIXED_PMC_ANYTHREAD BIT_ULL(2)
> diff --git a/tools/testing/selftests/kvm/x86/pmu_counters_test.c b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
> index 3eaa216b96c0..240bcf9184c2 100644
> --- a/tools/testing/selftests/kvm/x86/pmu_counters_test.c
> +++ b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
> @@ -6,6 +6,7 @@
>
> #include "pmu.h"
> #include "processor.h"
> +#include <linux/bitfield.h>
>
> /* Number of iterations of the loop for the guest measurement payload. */
> #define NUM_LOOPS 10
> @@ -241,17 +242,20 @@ do { \
> ); \
> } while (0)
>
> -#define GUEST_TEST_EVENT(_idx, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \
> +#define GUEST_RUN_PAYLOAD(_ctrl_msr, _value, FEP) \
> do { \
> - wrmsr(_pmc_msr, 0); \
> - \
> if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \
> GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt %[m]", FEP); \
> else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \
> GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush %[m]", FEP); \
> else \
> GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \
> - \
> +} while (0)
> +
> +#define GUEST_TEST_EVENT(_idx, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \
> +do { \
> + wrmsr(_pmc_msr, 0); \
> + GUEST_RUN_PAYLOAD(_ctrl_msr, _value, FEP); \
> guest_assert_event_count(_idx, _pmc, _pmc_msr); \
> } while (0)
>
> @@ -318,6 +322,55 @@ static void guest_test_arch_event(uint8_t idx)
> FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
> }
>
> +static void guest_test_perf_metrics(void)
> +{
> + int retiring, bad_spec, fe_bound, be_bound;
> + uint64_t global_ctrl, metrics;
> +
> + if ((guest_get_pmu_version() < 2) || /* Does guest has GLOBAL_CTRL? */
> + !this_cpu_has(X86_FEATURE_PDCM) ||
> + !(rdmsr(MSR_IA32_PERF_CAPABILITIES) & PERF_CAP_PERF_METRICS))
> + return;
> +
> + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
> + wrmsr(MSR_CORE_PERF_FIXED_CTR3, 0);
> + wrmsr(MSR_PERF_METRICS, 0);
> +
> + /* Enable fixed ctr3 (TOPDOWN.SLOTS) and PERF_METRICS . */
> + wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(3, FIXED_PMC_KERNEL));
> + global_ctrl = FIXED_PMC_GLOBAL_CTRL_ENABLE(3) |
> + PERF_METRICS_GLOBAL_CTRL_ENABLE;
> +
> + GUEST_RUN_PAYLOAD(MSR_CORE_PERF_GLOBAL_CTRL, global_ctrl, "");
> +
> + /* Check test results. */
> + metrics = rdmsr(MSR_PERF_METRICS);
> + retiring = FIELD_GET(GENMASK_ULL(7, 0), metrics);
> + bad_spec = FIELD_GET(GENMASK_ULL(15, 8), metrics);
> + fe_bound = FIELD_GET(GENMASK_ULL(23, 16), metrics);
> + be_bound = FIELD_GET(GENMASK_ULL(31, 24), metrics);
> +
> + /*
> + * Be conservative: the measured payload definitely retires work, so
> + * Retiring should be non-zero.
> + */
> + GUEST_ASSERT_NE(metrics, 0ULL);
> + GUEST_ASSERT_NE(retiring, 0ULL);
> +
> + /*
> + * The derived percentage of the metrics should be close to 100%.
Better say "The sum of the 4 level-1 topdown metrics should be close to
100%" which is preciser.
Thanks.
> + * 3 is chosen as a loose sanity check.
> + */
> + GUEST_ASSERT(abs(retiring + bad_spec + fe_bound + be_bound - 0xff) < 3);
> +
> + /* Sanity check after PERF_METRICS disabled. */
> + __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
> + GUEST_ASSERT_EQ(rdmsr(MSR_PERF_METRICS), metrics);
> + wrmsr(MSR_PERF_METRICS, 0xdeaddead);
> +
> + GUEST_ASSERT_EQ(rdmsr(MSR_PERF_METRICS), 0xdeaddead);
> +}
> +
> static void guest_test_arch_events(void)
> {
> uint8_t i;
> @@ -325,6 +378,8 @@ static void guest_test_arch_events(void)
> for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++)
> guest_test_arch_event(i);
>
> + guest_test_perf_metrics();
> +
> GUEST_DONE();
> }
>
> @@ -361,7 +416,7 @@ static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
> * other than PMCs in the future.
> */
> #define MAX_NR_GP_COUNTERS 8
> -#define MAX_NR_FIXED_COUNTERS 3
> +#define MAX_NR_FIXED_COUNTERS 4
>
> #define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector) \
> __GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \
> @@ -586,6 +641,7 @@ static void test_intel_counters(void)
> uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
> uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
> uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
> + uint64_t advertised_perf_caps = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES);
> unsigned int i;
> uint8_t v, j;
> uint32_t k;
> @@ -593,6 +649,7 @@ static void test_intel_counters(void)
> const uint64_t perf_caps[] = {
> 0,
> PMU_CAP_FW_WRITES,
> + PERF_CAP_PERF_METRICS,
> };
>
> /*
> @@ -650,6 +707,10 @@ static void test_intel_counters(void)
> if (!kvm_has_perf_caps && perf_caps[i])
> continue;
>
> + /* Ignore unsupported features. */
> + if (perf_caps[i] & ~advertised_perf_caps)
> + continue;
> +
> pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n",
> v, perf_caps[i]);
>
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH V2 4/4] KVM: selftests: Add perf_metrics and fixed counter 3 tests
2026-04-30 2:26 ` Mi, Dapeng
@ 2026-04-30 18:13 ` Chen, Zide
0 siblings, 0 replies; 12+ messages in thread
From: Chen, Zide @ 2026-04-30 18:13 UTC (permalink / raw)
To: Mi, Dapeng, Sean Christopherson, Paolo Bonzini
Cc: kvm, linux-kernel, Jim Mattson, Mingwei Zhang, Das Sandipan,
Shukla Manali, Falcon Thomas, Xudong Hao
On 4/29/2026 7:26 PM, Mi, Dapeng wrote:
>
> On 4/24/2026 1:46 AM, Zide Chen wrote:
>> Add a selftest to exercise IA32_PERF_METRICS, i.e. architectural
>> support for Topdown (TMA) Level 1 metrics, enumerated by
>> IA32_PERF_CAPABILITIES[15].
>>
>> Only check for non-zero metrics, as they are derived and depend on
>> the workload, CPU model, and host scheduling, making precise
>> expectations fragile.
>>
>> Extend the PMU selftest to cover Intel fixed counter 3 by bumping
>> MAX_NR_FIXED_COUNTERS to 4 and validating basic functionality.
>>
>> Signed-off-by: Zide Chen <zide.chen@intel.com>
>> ---
>> V2: New patch.
>> ---
>> tools/arch/x86/include/asm/msr-index.h | 1 +
>> tools/testing/selftests/kvm/include/x86/pmu.h | 3 +
>> .../selftests/kvm/x86/pmu_counters_test.c | 71 +++++++++++++++++--
>> 3 files changed, 70 insertions(+), 5 deletions(-)
>>
>> diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
>> index 6673601246b3..31ecbea42459 100644
>> --- a/tools/arch/x86/include/asm/msr-index.h
>> +++ b/tools/arch/x86/include/asm/msr-index.h
>> @@ -331,6 +331,7 @@
>> #define PERF_CAP_PEBS_FORMAT 0xf00
>> #define PERF_CAP_FW_WRITES BIT_ULL(13)
>> #define PERF_CAP_PEBS_BASELINE BIT_ULL(14)
>> +#define PERF_CAP_PERF_METRICS BIT_ULL(15)
>> #define PERF_CAP_PEBS_TIMING_INFO BIT_ULL(17)
>> #define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \
>> PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE | \
>> diff --git a/tools/testing/selftests/kvm/include/x86/pmu.h b/tools/testing/selftests/kvm/include/x86/pmu.h
>> index 72575eadb63a..c68d6435422c 100644
>> --- a/tools/testing/selftests/kvm/include/x86/pmu.h
>> +++ b/tools/testing/selftests/kvm/include/x86/pmu.h
>> @@ -46,6 +46,9 @@
>> /* Fixed PMC controls, Intel only. */
>> #define FIXED_PMC_GLOBAL_CTRL_ENABLE(_idx) BIT_ULL((32 + (_idx)))
>>
>> +/* PERF_METRICS enable, Intel only. */
>> +#define PERF_METRICS_GLOBAL_CTRL_ENABLE BIT_ULL(48)
>> +
>> #define FIXED_PMC_KERNEL BIT_ULL(0)
>> #define FIXED_PMC_USER BIT_ULL(1)
>> #define FIXED_PMC_ANYTHREAD BIT_ULL(2)
>> diff --git a/tools/testing/selftests/kvm/x86/pmu_counters_test.c b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
>> index 3eaa216b96c0..240bcf9184c2 100644
>> --- a/tools/testing/selftests/kvm/x86/pmu_counters_test.c
>> +++ b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
>> @@ -6,6 +6,7 @@
>>
>> #include "pmu.h"
>> #include "processor.h"
>> +#include <linux/bitfield.h>
>>
>> /* Number of iterations of the loop for the guest measurement payload. */
>> #define NUM_LOOPS 10
>> @@ -241,17 +242,20 @@ do { \
>> ); \
>> } while (0)
>>
>> -#define GUEST_TEST_EVENT(_idx, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \
>> +#define GUEST_RUN_PAYLOAD(_ctrl_msr, _value, FEP) \
>> do { \
>> - wrmsr(_pmc_msr, 0); \
>> - \
>> if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \
>> GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt %[m]", FEP); \
>> else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \
>> GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush %[m]", FEP); \
>> else \
>> GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \
>> - \
>> +} while (0)
>> +
>> +#define GUEST_TEST_EVENT(_idx, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \
>> +do { \
>> + wrmsr(_pmc_msr, 0); \
>> + GUEST_RUN_PAYLOAD(_ctrl_msr, _value, FEP); \
>> guest_assert_event_count(_idx, _pmc, _pmc_msr); \
>> } while (0)
>>
>> @@ -318,6 +322,55 @@ static void guest_test_arch_event(uint8_t idx)
>> FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
>> }
>>
>> +static void guest_test_perf_metrics(void)
>> +{
>> + int retiring, bad_spec, fe_bound, be_bound;
>> + uint64_t global_ctrl, metrics;
>> +
>> + if ((guest_get_pmu_version() < 2) || /* Does guest has GLOBAL_CTRL? */
>> + !this_cpu_has(X86_FEATURE_PDCM) ||
>> + !(rdmsr(MSR_IA32_PERF_CAPABILITIES) & PERF_CAP_PERF_METRICS))
>> + return;
>> +
>> + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
>> + wrmsr(MSR_CORE_PERF_FIXED_CTR3, 0);
>> + wrmsr(MSR_PERF_METRICS, 0);
>> +
>> + /* Enable fixed ctr3 (TOPDOWN.SLOTS) and PERF_METRICS . */
>> + wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(3, FIXED_PMC_KERNEL));
>> + global_ctrl = FIXED_PMC_GLOBAL_CTRL_ENABLE(3) |
>> + PERF_METRICS_GLOBAL_CTRL_ENABLE;
>> +
>> + GUEST_RUN_PAYLOAD(MSR_CORE_PERF_GLOBAL_CTRL, global_ctrl, "");
>> +
>> + /* Check test results. */
>> + metrics = rdmsr(MSR_PERF_METRICS);
>> + retiring = FIELD_GET(GENMASK_ULL(7, 0), metrics);
>> + bad_spec = FIELD_GET(GENMASK_ULL(15, 8), metrics);
>> + fe_bound = FIELD_GET(GENMASK_ULL(23, 16), metrics);
>> + be_bound = FIELD_GET(GENMASK_ULL(31, 24), metrics);
>> +
>> + /*
>> + * Be conservative: the measured payload definitely retires work, so
>> + * Retiring should be non-zero.
>> + */
>> + GUEST_ASSERT_NE(metrics, 0ULL);
>> + GUEST_ASSERT_NE(retiring, 0ULL);
>> +
>> + /*
>> + * The derived percentage of the metrics should be close to 100%.
>
> Better say "The sum of the 4 level-1 topdown metrics should be close to
> 100%" which is preciser.
Yes, thanks.
>
> Thanks.
>
>
>> + * 3 is chosen as a loose sanity check.
>> + */
>> + GUEST_ASSERT(abs(retiring + bad_spec + fe_bound + be_bound - 0xff) < 3);
>> +
>> + /* Sanity check after PERF_METRICS disabled. */
>> + __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
>> + GUEST_ASSERT_EQ(rdmsr(MSR_PERF_METRICS), metrics);
>> + wrmsr(MSR_PERF_METRICS, 0xdeaddead);
>> +
>> + GUEST_ASSERT_EQ(rdmsr(MSR_PERF_METRICS), 0xdeaddead);
>> +}
>> +
>> static void guest_test_arch_events(void)
>> {
>> uint8_t i;
>> @@ -325,6 +378,8 @@ static void guest_test_arch_events(void)
>> for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++)
>> guest_test_arch_event(i);
>>
>> + guest_test_perf_metrics();
>> +
>> GUEST_DONE();
>> }
>>
>> @@ -361,7 +416,7 @@ static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
>> * other than PMCs in the future.
>> */
>> #define MAX_NR_GP_COUNTERS 8
>> -#define MAX_NR_FIXED_COUNTERS 3
>> +#define MAX_NR_FIXED_COUNTERS 4
>>
>> #define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector) \
>> __GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \
>> @@ -586,6 +641,7 @@ static void test_intel_counters(void)
>> uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
>> uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
>> uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
>> + uint64_t advertised_perf_caps = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES);
>> unsigned int i;
>> uint8_t v, j;
>> uint32_t k;
>> @@ -593,6 +649,7 @@ static void test_intel_counters(void)
>> const uint64_t perf_caps[] = {
>> 0,
>> PMU_CAP_FW_WRITES,
>> + PERF_CAP_PERF_METRICS,
>> };
>>
>> /*
>> @@ -650,6 +707,10 @@ static void test_intel_counters(void)
>> if (!kvm_has_perf_caps && perf_caps[i])
>> continue;
>>
>> + /* Ignore unsupported features. */
>> + if (perf_caps[i] & ~advertised_perf_caps)
>> + continue;
>> +
>> pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n",
>> v, perf_caps[i]);
>>
^ permalink raw reply [flat|nested] 12+ messages in thread