From: Zide Chen <zide.chen@intel.com>
To: Sean Christopherson <seanjc@google.com>,
Paolo Bonzini <pbonzini@redhat.com>
Cc: kvm@vger.kernel.org, linux-kernel@vger.kernel.org,
Jim Mattson <jmattson@google.com>,
Mingwei Zhang <mizhang@google.com>,
Zide Chen <zide.chen@intel.com>,
Das Sandipan <Sandipan.Das@amd.com>,
Shukla Manali <Manali.Shukla@amd.com>,
Dapeng Mi <dapeng1.mi@linux.intel.com>,
Falcon Thomas <thomas.falcon@intel.com>,
Xudong Hao <xudong.hao@intel.com>
Subject: [PATCH V3 3/4] KVM: x86/pmu: Support PERF_METRICS MSR in mediated vPMU
Date: Mon, 15 Jun 2026 16:01:17 -0700 [thread overview]
Message-ID: <20260615230118.50718-4-zide.chen@intel.com> (raw)
In-Reply-To: <20260615230118.50718-1-zide.chen@intel.com>
From: Dapeng Mi <dapeng1.mi@linux.intel.com>
Bit 15 in IA32_PERF_CAPABILITIES indicates that the CPU provides
built-in support for Topdown Microarchitecture Analysis (TMA) L1
metrics via the IA32_PERF_METRICS MSR.
Expose this capability only when mediated vPMU is enabled, as emulating
IA32_PERF_METRICS in the legacy vPMU model is impractical.
Pass IA32_PERF_METRICS through to the guest only when mediated vPMU is
enabled and bit 15 is set in guest IA32_PERF_CAPABILITIES. Allow
kvm_pmu_{get,set}_msr() to handle this MSR for host accesses.
Save and restore this MSR on host/guest PMU context switches so that
host PMU activity does not clobber the guest value, and guest state
is not leaked into the host.
Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Zide Chen <zide.chen@intel.com>
---
v3:
- Replace WARN_ON() with WARN_ON_ONCE(). (Dapeng)
- Add comments to explain why don't validate writes on PERF_METRICS.
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/include/asm/msr-index.h | 1 +
arch/x86/include/asm/perf_event.h | 1 +
arch/x86/kvm/vmx/pmu_intel.c | 36 +++++++++++++++++++++++++++++++
arch/x86/kvm/vmx/pmu_intel.h | 5 +++++
arch/x86/kvm/vmx/vmx.c | 6 ++++++
arch/x86/kvm/x86.c | 6 +++++-
7 files changed, 55 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 754103e7ab4d..9b03475843f3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -597,6 +597,7 @@ struct kvm_pmu {
u64 global_status_rsvd;
u64 reserved_bits;
u64 raw_event_mask;
+ u64 perf_metrics;
struct kvm_pmc gp_counters[KVM_MAX_NR_GP_COUNTERS];
struct kvm_pmc fixed_counters[KVM_MAX_NR_FIXED_COUNTERS];
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 18c4be75e927..fdcaeb6c8352 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -331,6 +331,7 @@
#define PERF_CAP_PEBS_FORMAT 0xf00
#define PERF_CAP_FW_WRITES BIT_ULL(13)
#define PERF_CAP_PEBS_BASELINE BIT_ULL(14)
+#define PERF_CAP_PERF_METRICS BIT_ULL(15)
#define PERF_CAP_PEBS_TIMING_INFO BIT_ULL(17)
#define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \
PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE | \
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 1eb13673e889..bc2e1cbcd9b9 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -447,6 +447,7 @@ static inline bool is_topdown_idx(int idx)
#define GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT 54
#define GLOBAL_STATUS_ARCH_PEBS_THRESHOLD BIT_ULL(GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT)
#define GLOBAL_STATUS_PERF_METRICS_OVF_BIT 48
+#define GLOBAL_STATUS_PERF_METRICS_OVF BIT_ULL(GLOBAL_STATUS_PERF_METRICS_OVF_BIT)
#define GLOBAL_CTRL_EN_PERF_METRICS BIT_ULL(48)
/*
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 59b7a90c79e1..19daee29b731 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -188,6 +188,8 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
switch (msr) {
case MSR_CORE_PERF_FIXED_CTR_CTRL:
return kvm_pmu_has_perf_global_ctrl(pmu);
+ case MSR_PERF_METRICS:
+ return vcpu_has_perf_metrics(vcpu);
case MSR_IA32_PEBS_ENABLE:
ret = vcpu_get_perf_capabilities(vcpu) & PERF_CAP_PEBS_FORMAT;
break;
@@ -345,6 +347,10 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_CORE_PERF_FIXED_CTR_CTRL:
msr_info->data = pmu->fixed_ctr_ctrl;
break;
+ case MSR_PERF_METRICS:
+ WARN_ON_ONCE(!msr_info->host_initiated);
+ msr_info->data = pmu->perf_metrics;
+ break;
case MSR_IA32_PEBS_ENABLE:
msr_info->data = pmu->pebs_enable;
break;
@@ -394,6 +400,15 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (pmu->fixed_ctr_ctrl != data)
reprogram_fixed_counters(pmu, data);
break;
+ case MSR_PERF_METRICS:
+ WARN_ON_ONCE(!msr_info->host_initiated);
+
+ /*
+ * If TMA level 2 is not supported, bits [63:32] are reserved
+ * and ignored on write, so no validation is needed here.
+ */
+ pmu->perf_metrics = data;
+ break;
case MSR_IA32_PEBS_ENABLE:
if (data & pmu->pebs_enable_rsvd)
return 1;
@@ -589,6 +604,11 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->global_status_rsvd &=
~MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI;
+ if (perf_capabilities & PERF_CAP_PERF_METRICS) {
+ pmu->global_ctrl_rsvd &= ~GLOBAL_CTRL_EN_PERF_METRICS;
+ pmu->global_status_rsvd &= ~GLOBAL_STATUS_PERF_METRICS_OVF;
+ }
+
if (perf_capabilities & PERF_CAP_PEBS_FORMAT) {
if (perf_capabilities & PERF_CAP_PEBS_BASELINE) {
pmu->pebs_enable_rsvd = counter_rsvd;
@@ -632,6 +652,9 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
static void intel_pmu_reset(struct kvm_vcpu *vcpu)
{
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+
+ pmu->perf_metrics = 0;
intel_pmu_release_guest_lbr_event(vcpu);
}
@@ -803,6 +826,13 @@ static void intel_mediated_pmu_load(struct kvm_vcpu *vcpu)
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
u64 global_status, toggle;
+ /*
+ * PERF_METRICS MSR must be restored closely after fixed counter 3
+ * (kvm_pmu_load_guest_pmcs()).
+ */
+ if (vcpu_has_perf_metrics(vcpu))
+ wrmsrq(MSR_PERF_METRICS, pmu->perf_metrics);
+
rdmsrq(MSR_CORE_PERF_GLOBAL_STATUS, global_status);
toggle = pmu->global_status ^ global_status;
if (global_status & toggle)
@@ -831,6 +861,12 @@ static void intel_mediated_pmu_put(struct kvm_vcpu *vcpu)
*/
if (pmu->fixed_ctr_ctrl_hw)
wrmsrq(MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
+
+ if (vcpu_has_perf_metrics(vcpu)) {
+ pmu->perf_metrics = rdpmc(INTEL_PMC_FIXED_RDPMC_METRICS);
+ if (pmu->perf_metrics)
+ wrmsrq(MSR_PERF_METRICS, 0);
+ }
}
struct kvm_pmu_ops intel_pmu_ops __initdata = {
diff --git a/arch/x86/kvm/vmx/pmu_intel.h b/arch/x86/kvm/vmx/pmu_intel.h
index 5d9357640aa1..2ec547223b09 100644
--- a/arch/x86/kvm/vmx/pmu_intel.h
+++ b/arch/x86/kvm/vmx/pmu_intel.h
@@ -40,4 +40,9 @@ struct lbr_desc {
extern struct x86_pmu_lbr vmx_lbr_caps;
+static inline bool vcpu_has_perf_metrics(struct kvm_vcpu *vcpu)
+{
+ return !!(vcpu_get_perf_capabilities(vcpu) & PERF_CAP_PERF_METRICS);
+}
+
#endif /* __KVM_X86_VMX_PMU_INTEL_H */
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index c548f22375ad..7cffe1619a1f 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4264,6 +4264,9 @@ static void vmx_recalc_pmu_msr_intercepts(struct kvm_vcpu *vcpu)
MSR_TYPE_RW, intercept);
vmx_set_intercept_for_msr(vcpu, MSR_CORE_PERF_GLOBAL_OVF_CTRL,
MSR_TYPE_RW, intercept);
+
+ vmx_set_intercept_for_msr(vcpu, MSR_PERF_METRICS, MSR_TYPE_RW,
+ !vcpu_has_perf_metrics(vcpu));
}
static void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
@@ -8088,6 +8091,9 @@ static __init u64 vmx_get_perf_capabilities(void)
perf_cap &= ~PERF_CAP_PEBS_BASELINE;
}
+ if (enable_mediated_pmu)
+ perf_cap |= host_perf_cap & PERF_CAP_PERF_METRICS;
+
return perf_cap;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b9cca855bc10..2ac297ba3598 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -352,7 +352,7 @@ static const u32 msrs_to_save_pmu[] = {
MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
MSR_ARCH_PERFMON_FIXED_CTR2, MSR_ARCH_PERFMON_FIXED_CTR3,
MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
- MSR_CORE_PERF_GLOBAL_CTRL,
+ MSR_CORE_PERF_GLOBAL_CTRL, MSR_PERF_METRICS,
MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG,
/* This part of MSRs should match KVM_MAX_NR_INTEL_GP_COUNTERS. */
@@ -7679,6 +7679,10 @@ static void kvm_probe_msr_to_save(u32 msr_index)
intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2))
return;
break;
+ case MSR_PERF_METRICS:
+ if (!(kvm_caps.supported_perf_cap & PERF_CAP_PERF_METRICS))
+ return;
+ break;
case MSR_ARCH_PERFMON_PERFCTR0 ...
MSR_ARCH_PERFMON_PERFCTR0 + KVM_MAX_NR_GP_COUNTERS - 1:
if (msr_index - MSR_ARCH_PERFMON_PERFCTR0 >=
--
2.54.0
next prev parent reply other threads:[~2026-06-15 23:10 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-15 23:01 [PATCH V3 0/4] KVM: x86/pmu: Add hardware Topdown metrics support Zide Chen
2026-06-15 23:01 ` [PATCH V3 1/4] KVM: x86/pmu: Do not map fixed counters >= 3 to generic perf events Zide Chen
2026-06-15 23:01 ` [PATCH V3 2/4] KVM: x86/pmu: Support Intel fixed counter 3 on mediated vPMU Zide Chen
2026-06-15 23:01 ` Zide Chen [this message]
2026-06-15 23:26 ` [PATCH V3 3/4] KVM: x86/pmu: Support PERF_METRICS MSR in " sashiko-bot
2026-06-16 16:29 ` Chen, Zide
2026-06-18 2:21 ` Mi, Dapeng
2026-06-15 23:01 ` [PATCH V3 4/4] KVM: selftests: Add perf_metrics and fixed counter 3 tests Zide Chen
2026-06-15 23:26 ` sashiko-bot
2026-06-16 16:32 ` Chen, Zide
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260615230118.50718-4-zide.chen@intel.com \
--to=zide.chen@intel.com \
--cc=Manali.Shukla@amd.com \
--cc=Sandipan.Das@amd.com \
--cc=dapeng1.mi@linux.intel.com \
--cc=jmattson@google.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mizhang@google.com \
--cc=pbonzini@redhat.com \
--cc=seanjc@google.com \
--cc=thomas.falcon@intel.com \
--cc=xudong.hao@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.