* [PATCH v4 1/6] target/i386: Don't save/restore PERF_GLOBAL_OVF_CTRL MSRs
2026-06-04 2:55 [PATCH v4 0/6] target/i386: Misc PMU fixes and enabling Zide Chen
@ 2026-06-04 2:55 ` Zide Chen
2026-06-05 14:18 ` Fabiano Rosas
2026-06-05 14:47 ` Sandipan Das
2026-06-04 2:55 ` [PATCH v4 2/6] target/i386: Gate enable_pmu on kvm_enabled() Zide Chen
` (4 subsequent siblings)
5 siblings, 2 replies; 10+ messages in thread
From: Zide Chen @ 2026-06-04 2:55 UTC (permalink / raw)
To: qemu-devel, kvm, Paolo Bonzini, Zhao Liu, Peter Xu, Fabiano Rosas,
Sandipan Das
Cc: Xiaoyao Li, Dongli Zhang, Dapeng Mi, Zide Chen
From: Dapeng Mi <dapeng1.mi@linux.intel.com>
MSR_CORE_PERF_GLOBAL_OVF_CTRL and MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR
are write-only MSRs and reads always return zero.
Saving and restoring these MSRs is therefore unnecessary. Replace
VMSTATE_UINT64 with VMSTATE_UNUSED in the VMStateDescription to ignore
env.msr_global_ovf_ctrl during migration. This avoids the need to bump
version_id and does not introduce any migration incompatibility.
Cc: Dongli Zhang <dongli.zhang@oracle.com>
Cc: Sandipan Das <sandipan.das@amd.com>
Fixes: e587632c228e ("target/i386/kvm: support perfmon-v2 for reset")
Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Co-developed-by: Zide Chen <zide.chen@intel.com>
Signed-off-by: Zide Chen <zide.chen@intel.com>
---
V3:
- Remove MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR as well.
---
target/i386/cpu.h | 3 ---
target/i386/kvm/kvm.c | 10 ----------
target/i386/machine.c | 4 ++--
3 files changed, 2 insertions(+), 15 deletions(-)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 6b500737c3be..ff44487d0b6d 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -507,11 +507,9 @@ typedef enum X86Seg {
#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x38d
#define MSR_CORE_PERF_GLOBAL_STATUS 0x38e
#define MSR_CORE_PERF_GLOBAL_CTRL 0x38f
-#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x390
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300
#define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301
-#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302
#define MSR_K7_EVNTSEL0 0xc0010000
#define MSR_K7_PERFCTR0 0xc0010004
@@ -2104,7 +2102,6 @@ typedef struct CPUArchState {
uint64_t msr_fixed_ctr_ctrl;
uint64_t msr_global_ctrl;
uint64_t msr_global_status;
- uint64_t msr_global_ovf_ctrl;
uint64_t msr_fixed_counters[MAX_FIXED_COUNTERS];
uint64_t msr_gp_counters[MAX_GP_COUNTERS];
uint64_t msr_gp_evtsel[MAX_GP_COUNTERS];
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index a29f757c168a..1ac1803e8a2e 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -4290,8 +4290,6 @@ static int kvm_put_msrs(X86CPU *cpu, KvmPutState level)
if (pmu_version > 1) {
kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_STATUS,
env->msr_global_status);
- kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_OVF_CTRL,
- env->msr_global_ovf_ctrl);
/* Now start the PMU. */
kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR_CTRL,
@@ -4335,8 +4333,6 @@ static int kvm_put_msrs(X86CPU *cpu, KvmPutState level)
if (pmu_version > 1) {
kvm_msr_entry_add(cpu, MSR_AMD64_PERF_CNTR_GLOBAL_STATUS,
env->msr_global_status);
- kvm_msr_entry_add(cpu, MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR,
- env->msr_global_ovf_ctrl);
kvm_msr_entry_add(cpu, MSR_AMD64_PERF_CNTR_GLOBAL_CTL,
env->msr_global_ctrl);
}
@@ -4852,7 +4848,6 @@ static int kvm_get_msrs(X86CPU *cpu)
kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_CTRL, 0);
kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_STATUS, 0);
- kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_OVF_CTRL, 0);
}
for (i = 0; i < num_pmu_fixed_counters; i++) {
kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR0 + i, 0);
@@ -4895,7 +4890,6 @@ static int kvm_get_msrs(X86CPU *cpu)
if (pmu_version > 1) {
kvm_msr_entry_add(cpu, MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0);
kvm_msr_entry_add(cpu, MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, 0);
- kvm_msr_entry_add(cpu, MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, 0);
}
}
@@ -5218,10 +5212,6 @@ static int kvm_get_msrs(X86CPU *cpu)
case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
env->msr_global_status = msrs[i].data;
break;
- case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
- case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
- env->msr_global_ovf_ctrl = msrs[i].data;
- break;
case MSR_CORE_PERF_FIXED_CTR0 ... MSR_CORE_PERF_FIXED_CTR0 + MAX_FIXED_COUNTERS - 1:
env->msr_fixed_counters[index - MSR_CORE_PERF_FIXED_CTR0] = msrs[i].data;
break;
diff --git a/target/i386/machine.c b/target/i386/machine.c
index 48a2a4b31907..e0a5a5da6f5a 100644
--- a/target/i386/machine.c
+++ b/target/i386/machine.c
@@ -666,7 +666,7 @@ static bool pmu_enable_needed(void *opaque)
int i;
if (env->msr_fixed_ctr_ctrl || env->msr_global_ctrl ||
- env->msr_global_status || env->msr_global_ovf_ctrl) {
+ env->msr_global_status) {
return true;
}
for (i = 0; i < MAX_FIXED_COUNTERS; i++) {
@@ -692,7 +692,7 @@ static const VMStateDescription vmstate_msr_architectural_pmu = {
VMSTATE_UINT64(env.msr_fixed_ctr_ctrl, X86CPU),
VMSTATE_UINT64(env.msr_global_ctrl, X86CPU),
VMSTATE_UINT64(env.msr_global_status, X86CPU),
- VMSTATE_UINT64(env.msr_global_ovf_ctrl, X86CPU),
+ VMSTATE_UNUSED(sizeof(uint64_t)),
VMSTATE_UINT64_ARRAY(env.msr_fixed_counters, X86CPU, MAX_FIXED_COUNTERS),
VMSTATE_UINT64_ARRAY(env.msr_gp_counters, X86CPU, MAX_GP_COUNTERS),
VMSTATE_UINT64_ARRAY(env.msr_gp_evtsel, X86CPU, MAX_GP_COUNTERS),
--
2.54.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [PATCH v4 1/6] target/i386: Don't save/restore PERF_GLOBAL_OVF_CTRL MSRs
2026-06-04 2:55 ` [PATCH v4 1/6] target/i386: Don't save/restore PERF_GLOBAL_OVF_CTRL MSRs Zide Chen
@ 2026-06-05 14:18 ` Fabiano Rosas
2026-06-05 14:47 ` Sandipan Das
1 sibling, 0 replies; 10+ messages in thread
From: Fabiano Rosas @ 2026-06-05 14:18 UTC (permalink / raw)
To: Zide Chen, qemu-devel, kvm, Paolo Bonzini, Zhao Liu, Peter Xu,
Sandipan Das
Cc: Xiaoyao Li, Dongli Zhang, Dapeng Mi, Zide Chen
Zide Chen <zide.chen@intel.com> writes:
> From: Dapeng Mi <dapeng1.mi@linux.intel.com>
>
> MSR_CORE_PERF_GLOBAL_OVF_CTRL and MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR
> are write-only MSRs and reads always return zero.
>
> Saving and restoring these MSRs is therefore unnecessary. Replace
> VMSTATE_UINT64 with VMSTATE_UNUSED in the VMStateDescription to ignore
> env.msr_global_ovf_ctrl during migration. This avoids the need to bump
> version_id and does not introduce any migration incompatibility.
>
> Cc: Dongli Zhang <dongli.zhang@oracle.com>
> Cc: Sandipan Das <sandipan.das@amd.com>
> Fixes: e587632c228e ("target/i386/kvm: support perfmon-v2 for reset")
> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
> Co-developed-by: Zide Chen <zide.chen@intel.com>
> Signed-off-by: Zide Chen <zide.chen@intel.com>
> ---
> V3:
> - Remove MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR as well.
> ---
> target/i386/cpu.h | 3 ---
> target/i386/kvm/kvm.c | 10 ----------
> target/i386/machine.c | 4 ++--
> 3 files changed, 2 insertions(+), 15 deletions(-)
>
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index 6b500737c3be..ff44487d0b6d 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -507,11 +507,9 @@ typedef enum X86Seg {
> #define MSR_CORE_PERF_FIXED_CTR_CTRL 0x38d
> #define MSR_CORE_PERF_GLOBAL_STATUS 0x38e
> #define MSR_CORE_PERF_GLOBAL_CTRL 0x38f
> -#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x390
>
> #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300
> #define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301
> -#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302
>
> #define MSR_K7_EVNTSEL0 0xc0010000
> #define MSR_K7_PERFCTR0 0xc0010004
> @@ -2104,7 +2102,6 @@ typedef struct CPUArchState {
> uint64_t msr_fixed_ctr_ctrl;
> uint64_t msr_global_ctrl;
> uint64_t msr_global_status;
> - uint64_t msr_global_ovf_ctrl;
> uint64_t msr_fixed_counters[MAX_FIXED_COUNTERS];
> uint64_t msr_gp_counters[MAX_GP_COUNTERS];
> uint64_t msr_gp_evtsel[MAX_GP_COUNTERS];
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index a29f757c168a..1ac1803e8a2e 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -4290,8 +4290,6 @@ static int kvm_put_msrs(X86CPU *cpu, KvmPutState level)
> if (pmu_version > 1) {
> kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_STATUS,
> env->msr_global_status);
> - kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_OVF_CTRL,
> - env->msr_global_ovf_ctrl);
Assuming there's no weird nested virtualization scenario and combination
of migration, resets and context-switching between L1/L2 that implicitly
relies on this (effective) clearing of the MSR to work properly.
Reviewed-by: Fabiano Rosas <farosas@suse.de>
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH v4 1/6] target/i386: Don't save/restore PERF_GLOBAL_OVF_CTRL MSRs
2026-06-04 2:55 ` [PATCH v4 1/6] target/i386: Don't save/restore PERF_GLOBAL_OVF_CTRL MSRs Zide Chen
2026-06-05 14:18 ` Fabiano Rosas
@ 2026-06-05 14:47 ` Sandipan Das
1 sibling, 0 replies; 10+ messages in thread
From: Sandipan Das @ 2026-06-05 14:47 UTC (permalink / raw)
To: Zide Chen, qemu-devel, kvm, Paolo Bonzini, Zhao Liu, Peter Xu,
Fabiano Rosas
Cc: Xiaoyao Li, Dongli Zhang, Dapeng Mi
On 04-06-2026 08:25, Zide Chen wrote:
> From: Dapeng Mi <dapeng1.mi@linux.intel.com>
>
> MSR_CORE_PERF_GLOBAL_OVF_CTRL and MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR
> are write-only MSRs and reads always return zero.
>
> Saving and restoring these MSRs is therefore unnecessary. Replace
> VMSTATE_UINT64 with VMSTATE_UNUSED in the VMStateDescription to ignore
> env.msr_global_ovf_ctrl during migration. This avoids the need to bump
> version_id and does not introduce any migration incompatibility.
>
> Cc: Dongli Zhang <dongli.zhang@oracle.com>
> Cc: Sandipan Das <sandipan.das@amd.com>
> Fixes: e587632c228e ("target/i386/kvm: support perfmon-v2 for reset")
> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
> Co-developed-by: Zide Chen <zide.chen@intel.com>
> Signed-off-by: Zide Chen <zide.chen@intel.com>
> ---
Reviewed-by: Sandipan Das <sandipan.das@amd.com>
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v4 2/6] target/i386: Gate enable_pmu on kvm_enabled()
2026-06-04 2:55 [PATCH v4 0/6] target/i386: Misc PMU fixes and enabling Zide Chen
2026-06-04 2:55 ` [PATCH v4 1/6] target/i386: Don't save/restore PERF_GLOBAL_OVF_CTRL MSRs Zide Chen
@ 2026-06-04 2:55 ` Zide Chen
2026-06-05 14:48 ` Sandipan Das
2026-06-04 2:55 ` [PATCH v4 3/6] target/i386: Adjust maximum number of PMU counters Zide Chen
` (3 subsequent siblings)
5 siblings, 1 reply; 10+ messages in thread
From: Zide Chen @ 2026-06-04 2:55 UTC (permalink / raw)
To: qemu-devel, kvm, Paolo Bonzini, Zhao Liu, Peter Xu, Fabiano Rosas,
Sandipan Das
Cc: Xiaoyao Li, Dongli Zhang, Dapeng Mi, Zide Chen
Guest PMU support requires KVM. Clear cpu->enable_pmu when KVM is not
enabled, so PMU-related code can rely solely on cpu->enable_pmu.
This reduces duplication and avoids bugs where one of the checks is
missed. For example, cpu_x86_cpuid() enables CPUID.0AH when
cpu->enable_pmu is set but does not check kvm_enabled(). This is
implicitly fixed by this patch:
if (cpu->enable_pmu) {
x86_cpu_get_supported_cpuid(0xA, count, eax, ebx, ecx, edx);
}
Also fix two places that check kvm_enabled() but not cpu->enable_pmu.
Reviewed-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Zide Chen <zide.chen@intel.com>
---
V2:
- Replace a tab with spaces.
---
target/i386/cpu.c | 10 +++++++---
target/i386/kvm/kvm.c | 4 ++--
2 files changed, 9 insertions(+), 5 deletions(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index b5e483e8cd25..c978e957df6a 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -8790,7 +8790,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
*ecx = 0;
*edx = 0;
if (!(env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) ||
- !kvm_enabled()) {
+ !cpu->enable_pmu) {
break;
}
@@ -9137,7 +9137,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
case 0x80000022:
*eax = *ebx = *ecx = *edx = 0;
/* AMD Extended Performance Monitoring and Debug */
- if (kvm_enabled() && cpu->enable_pmu &&
+ if (cpu->enable_pmu &&
(env->features[FEAT_8000_0022_EAX] & CPUID_8000_0022_EAX_PERFMON_V2)) {
*eax |= CPUID_8000_0022_EAX_PERFMON_V2;
*ebx |= kvm_arch_get_supported_cpuid(cs->kvm_state, index, count,
@@ -9753,7 +9753,7 @@ static bool x86_cpu_filter_features(X86CPU *cpu, bool verbose)
* are advertised by cpu_x86_cpuid(). Keep these two in sync.
*/
if ((env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) &&
- kvm_enabled()) {
+ cpu->enable_pmu) {
x86_cpu_get_supported_cpuid(0x14, 0,
&eax_0, &ebx_0, &ecx_0, &edx_0);
x86_cpu_get_supported_cpuid(0x14, 1,
@@ -9901,6 +9901,10 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
Error *local_err = NULL;
unsigned requested_lbr_fmt;
+ if (!kvm_enabled()) {
+ cpu->enable_pmu = false;
+ }
+
#if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY)
/* Use pc-relative instructions in system-mode */
tcg_cflags_set(cs, CF_PCREL);
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 1ac1803e8a2e..5c953a0f3a60 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -4483,7 +4483,7 @@ static int kvm_put_msrs(X86CPU *cpu, KvmPutState level)
env->msr_xfd_err);
}
- if (kvm_enabled() && cpu->enable_pmu &&
+ if (cpu->enable_pmu &&
(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR)) {
uint64_t depth;
int ret;
@@ -4995,7 +4995,7 @@ static int kvm_get_msrs(X86CPU *cpu)
kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR, 0);
}
- if (kvm_enabled() && cpu->enable_pmu &&
+ if (cpu->enable_pmu &&
(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR)) {
uint64_t depth;
--
2.54.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [PATCH v4 2/6] target/i386: Gate enable_pmu on kvm_enabled()
2026-06-04 2:55 ` [PATCH v4 2/6] target/i386: Gate enable_pmu on kvm_enabled() Zide Chen
@ 2026-06-05 14:48 ` Sandipan Das
0 siblings, 0 replies; 10+ messages in thread
From: Sandipan Das @ 2026-06-05 14:48 UTC (permalink / raw)
To: Zide Chen, qemu-devel, kvm, Paolo Bonzini, Zhao Liu, Peter Xu,
Fabiano Rosas
Cc: Xiaoyao Li, Dongli Zhang, Dapeng Mi
On 04-06-2026 08:25, Zide Chen wrote:
> Guest PMU support requires KVM. Clear cpu->enable_pmu when KVM is not
> enabled, so PMU-related code can rely solely on cpu->enable_pmu.
>
> This reduces duplication and avoids bugs where one of the checks is
> missed. For example, cpu_x86_cpuid() enables CPUID.0AH when
> cpu->enable_pmu is set but does not check kvm_enabled(). This is
> implicitly fixed by this patch:
>
> if (cpu->enable_pmu) {
> x86_cpu_get_supported_cpuid(0xA, count, eax, ebx, ecx, edx);
> }
>
> Also fix two places that check kvm_enabled() but not cpu->enable_pmu.
>
> Reviewed-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
> Signed-off-by: Zide Chen <zide.chen@intel.com>
> ---
Reviewed-by: Sandipan Das <sandipan.das@amd.com>
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v4 3/6] target/i386: Adjust maximum number of PMU counters
2026-06-04 2:55 [PATCH v4 0/6] target/i386: Misc PMU fixes and enabling Zide Chen
2026-06-04 2:55 ` [PATCH v4 1/6] target/i386: Don't save/restore PERF_GLOBAL_OVF_CTRL MSRs Zide Chen
2026-06-04 2:55 ` [PATCH v4 2/6] target/i386: Gate enable_pmu on kvm_enabled() Zide Chen
@ 2026-06-04 2:55 ` Zide Chen
2026-06-04 2:55 ` [PATCH v4 4/6] target/i386: Support full-width writes for perf counters Zide Chen
` (2 subsequent siblings)
5 siblings, 0 replies; 10+ messages in thread
From: Zide Chen @ 2026-06-04 2:55 UTC (permalink / raw)
To: qemu-devel, kvm, Paolo Bonzini, Zhao Liu, Peter Xu, Fabiano Rosas,
Sandipan Das
Cc: Xiaoyao Li, Dongli Zhang, Dapeng Mi, Zide Chen
Changing either MAX_GP_COUNTERS or MAX_FIXED_COUNTERS affects the
VMState layout and therefore requires bumping the migration version
IDs. Adjust both limits together to avoid repeated VMState version
bumps in follow-up patches.
To support full-width writes, QEMU needs to handle the alias MSRs
starting at 0x4c1. With the current limits, the alias range can
extend into MSR_MCG_EXT_CTL (0x4d0). Reducing MAX_GP_COUNTERS from 18
to 15 avoids the overlap while still leaving room for future expansion
beyond current hardware (which supports at most 10 GP counters).
Increase MAX_FIXED_COUNTERS to 7 to support additional fixed counters
(e.g. Topdown metric events).
With these changes, bump version_id to prevent migration to older
QEMU, and bump minimum_version_id to prevent migration from older
QEMU, which could otherwise result in VMState overflows.
Reviewed-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Zide Chen <zide.chen@intel.com>
---
target/i386/cpu.h | 8 ++------
target/i386/machine.c | 4 ++--
2 files changed, 4 insertions(+), 8 deletions(-)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index ff44487d0b6d..5288c92fe52f 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1751,12 +1751,8 @@ typedef struct {
#define CPU_NB_REGS CPU_NB_REGS32
#endif
-#define MAX_FIXED_COUNTERS 3
-/*
- * This formula is based on Intel's MSR. The current size also meets AMD's
- * needs.
- */
-#define MAX_GP_COUNTERS (MSR_IA32_PERF_STATUS - MSR_P6_EVNTSEL0)
+#define MAX_FIXED_COUNTERS 7
+#define MAX_GP_COUNTERS 15
#define NB_OPMASK_REGS 8
diff --git a/target/i386/machine.c b/target/i386/machine.c
index e0a5a5da6f5a..05aa38a8a43d 100644
--- a/target/i386/machine.c
+++ b/target/i386/machine.c
@@ -685,8 +685,8 @@ static bool pmu_enable_needed(void *opaque)
static const VMStateDescription vmstate_msr_architectural_pmu = {
.name = "cpu/msr_architectural_pmu",
- .version_id = 1,
- .minimum_version_id = 1,
+ .version_id = 2,
+ .minimum_version_id = 2,
.needed = pmu_enable_needed,
.fields = (const VMStateField[]) {
VMSTATE_UINT64(env.msr_fixed_ctr_ctrl, X86CPU),
--
2.54.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v4 4/6] target/i386: Support full-width writes for perf counters
2026-06-04 2:55 [PATCH v4 0/6] target/i386: Misc PMU fixes and enabling Zide Chen
` (2 preceding siblings ...)
2026-06-04 2:55 ` [PATCH v4 3/6] target/i386: Adjust maximum number of PMU counters Zide Chen
@ 2026-06-04 2:55 ` Zide Chen
2026-06-04 2:55 ` [PATCH v4 5/6] target/i386: Increase MSR_BUF_SIZE and split KVM_[GET/SET]_MSRS calls Zide Chen
2026-06-04 2:55 ` [PATCH v4 6/6] target/i386: Add Topdown metrics feature support Zide Chen
5 siblings, 0 replies; 10+ messages in thread
From: Zide Chen @ 2026-06-04 2:55 UTC (permalink / raw)
To: qemu-devel, kvm, Paolo Bonzini, Zhao Liu, Peter Xu, Fabiano Rosas,
Sandipan Das
Cc: Xiaoyao Li, Dongli Zhang, Dapeng Mi, Zide Chen
From: Dapeng Mi <dapeng1.mi@linux.intel.com>
If IA32_PERF_CAPABILITIES.FW_WRITE (bit 13) is set, each general-
purpose counter IA32_PMCi (starting at 0xc1) is accompanied by a
corresponding 64-bit alias MSR starting at 0x4c1 (IA32_A_PMC0).
The legacy IA32_PMCi MSRs are not full-width and their effective width
is determined by CPUID.0AH:EAX[23:16].
Since these MSRs are architectural aliases, when IA32_A_PMCi is
supported it is safe to use it for save/restore instead of the legacy
IA32_PMCi MSRs.
Full-width write is a user-visible feature and can be disabled
individually.
Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Zide Chen <zide.chen@intel.com>
---
V3:
- Move the MAX_GP_COUNTERS change and migrate version ID code to
[patch v3 4/13] to avoid bumping version IDs twice in one patch
series.
V2:
- Slightly improve the commit message wording.
- Update the comment for MSR_IA32_PMC0 definition.
---
target/i386/cpu.h | 3 +++
target/i386/kvm/kvm.c | 18 ++++++++++++++++--
2 files changed, 19 insertions(+), 2 deletions(-)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 5288c92fe52f..8cc3c2f139e7 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -421,6 +421,7 @@ typedef enum X86Seg {
#define MSR_IA32_PERF_CAPABILITIES 0x345
#define PERF_CAP_LBR_FMT 0x3f
+#define PERF_CAP_FULL_WRITE (1U << 13)
#define MSR_IA32_TSX_CTRL 0x122
#define MSR_IA32_TSCDEADLINE 0x6e0
@@ -448,6 +449,8 @@ typedef enum X86Seg {
#define MSR_IA32_SGXLEPUBKEYHASH3 0x8f
#define MSR_P6_PERFCTR0 0xc1
+/* Alias MSR range for full-width general-purpose performance counters */
+#define MSR_IA32_PMC0 0x4c1
#define MSR_IA32_SMBASE 0x9e
#define MSR_SMI_COUNT 0x34
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 5c953a0f3a60..a99c1dba83f2 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -4270,6 +4270,12 @@ static int kvm_put_msrs(X86CPU *cpu, KvmPutState level)
}
if ((IS_INTEL_CPU(env) || IS_ZHAOXIN_CPU(env)) && pmu_version > 0) {
+ uint32_t perf_cntr_base = MSR_P6_PERFCTR0;
+
+ if (env->features[FEAT_PERF_CAPABILITIES] & PERF_CAP_FULL_WRITE) {
+ perf_cntr_base = MSR_IA32_PMC0;
+ }
+
if (pmu_version > 1) {
/* Stop the counter. */
kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
@@ -4282,7 +4288,7 @@ static int kvm_put_msrs(X86CPU *cpu, KvmPutState level)
env->msr_fixed_counters[i]);
}
for (i = 0; i < num_pmu_gp_counters; i++) {
- kvm_msr_entry_add(cpu, MSR_P6_PERFCTR0 + i,
+ kvm_msr_entry_add(cpu, perf_cntr_base + i,
env->msr_gp_counters[i]);
kvm_msr_entry_add(cpu, MSR_P6_EVNTSEL0 + i,
env->msr_gp_evtsel[i]);
@@ -4844,6 +4850,11 @@ static int kvm_get_msrs(X86CPU *cpu)
}
if ((IS_INTEL_CPU(env) || IS_ZHAOXIN_CPU(env)) && pmu_version > 0) {
+ uint32_t perf_cntr_base = MSR_P6_PERFCTR0;
+
+ if (env->features[FEAT_PERF_CAPABILITIES] & PERF_CAP_FULL_WRITE) {
+ perf_cntr_base = MSR_IA32_PMC0;
+ }
if (pmu_version > 1) {
kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_CTRL, 0);
@@ -4853,7 +4864,7 @@ static int kvm_get_msrs(X86CPU *cpu)
kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR0 + i, 0);
}
for (i = 0; i < num_pmu_gp_counters; i++) {
- kvm_msr_entry_add(cpu, MSR_P6_PERFCTR0 + i, 0);
+ kvm_msr_entry_add(cpu, perf_cntr_base + i, 0);
kvm_msr_entry_add(cpu, MSR_P6_EVNTSEL0 + i, 0);
}
}
@@ -5218,6 +5229,9 @@ static int kvm_get_msrs(X86CPU *cpu)
case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR0 + MAX_GP_COUNTERS - 1:
env->msr_gp_counters[index - MSR_P6_PERFCTR0] = msrs[i].data;
break;
+ case MSR_IA32_PMC0 ... MSR_IA32_PMC0 + MAX_GP_COUNTERS - 1:
+ env->msr_gp_counters[index - MSR_IA32_PMC0] = msrs[i].data;
+ break;
case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL0 + MAX_GP_COUNTERS - 1:
env->msr_gp_evtsel[index - MSR_P6_EVNTSEL0] = msrs[i].data;
break;
--
2.54.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v4 5/6] target/i386: Increase MSR_BUF_SIZE and split KVM_[GET/SET]_MSRS calls
2026-06-04 2:55 [PATCH v4 0/6] target/i386: Misc PMU fixes and enabling Zide Chen
` (3 preceding siblings ...)
2026-06-04 2:55 ` [PATCH v4 4/6] target/i386: Support full-width writes for perf counters Zide Chen
@ 2026-06-04 2:55 ` Zide Chen
2026-06-04 2:55 ` [PATCH v4 6/6] target/i386: Add Topdown metrics feature support Zide Chen
5 siblings, 0 replies; 10+ messages in thread
From: Zide Chen @ 2026-06-04 2:55 UTC (permalink / raw)
To: qemu-devel, kvm, Paolo Bonzini, Zhao Liu, Peter Xu, Fabiano Rosas,
Sandipan Das
Cc: Xiaoyao Li, Dongli Zhang, Dapeng Mi, Zide Chen
Newer Intel server CPUs support a large number of PMU MSRs. Currently,
QEMU allocates cpu->kvm_msr_buf as a single-page buffer, which is not
sufficient to hold all possible MSRs.
Increase MSR_BUF_SIZE to 8192 bytes, providing space for up to 511 MSRs.
This is sufficient even for the theoretical worst case, such as
architectural LBR with a depth of 64.
KVM_[GET/SET]_MSRS is limited to 255 MSRs per call. Raising this limit
to 511 would require changes in KVM and would introduce backward
compatibility issues. Instead, split requests into multiple
KVM_[GET/SET]_MSRS calls when the number of MSRs exceeds the API limit.
Reviewed-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Zide Chen <zide.chen@intel.com>
---
v4:
- Add assert on kvm_init_msrs() to catch the error of KVM_SET_MSRS
has negative return.
v3:
- Address Dapeng's comments.
---
target/i386/kvm/kvm.c | 110 +++++++++++++++++++++++++++++++++++-------
1 file changed, 92 insertions(+), 18 deletions(-)
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index a99c1dba83f2..9b6407794e43 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -97,9 +97,12 @@
#define KVM_APIC_BUS_CYCLE_NS 1
#define KVM_APIC_BUS_FREQUENCY (1000000000ULL / KVM_APIC_BUS_CYCLE_NS)
-/* A 4096-byte buffer can hold the 8-byte kvm_msrs header, plus
- * 255 kvm_msr_entry structs */
-#define MSR_BUF_SIZE 4096
+/* An 8192-byte buffer can hold the 8-byte kvm_msrs header, plus
+ * 511 kvm_msr_entry structs */
+#define MSR_BUF_SIZE 8192
+
+/* Maximum number of MSRs in one single KVM_[GET/SET]_MSRS call. */
+#define KVM_MAX_IO_MSRS 255
typedef bool QEMURDMSRHandler(X86CPU *cpu, uint32_t msr, uint64_t *val);
typedef bool QEMUWRMSRHandler(X86CPU *cpu, uint32_t msr, uint64_t val);
@@ -4099,21 +4102,99 @@ static void kvm_msr_entry_add_perf(X86CPU *cpu, FeatureWordArray f)
}
}
-static int kvm_buf_set_msrs(X86CPU *cpu)
+static int __kvm_buf_set_msrs(X86CPU *cpu, struct kvm_msrs *msrs)
{
- int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf);
+ int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, msrs);
if (ret < 0) {
return ret;
}
- if (ret < cpu->kvm_msr_buf->nmsrs) {
- struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret];
+ if (ret < msrs->nmsrs) {
+ struct kvm_msr_entry *e = &msrs->entries[ret];
error_report("error: failed to set MSR 0x%" PRIx32 " to 0x%" PRIx64,
(uint32_t)e->index, (uint64_t)e->data);
}
- assert(ret == cpu->kvm_msr_buf->nmsrs);
- return 0;
+ assert(ret == msrs->nmsrs);
+ return ret;
+}
+
+static int __kvm_buf_get_msrs(X86CPU *cpu, struct kvm_msrs *msrs)
+{
+ int ret;
+
+ ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, msrs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (ret < msrs->nmsrs) {
+ struct kvm_msr_entry *e = &msrs->entries[ret];
+ error_report("error: failed to get MSR 0x%" PRIx32,
+ (uint32_t)e->index);
+ }
+
+ assert(ret == msrs->nmsrs);
+ return ret;
+}
+
+static int kvm_buf_set_or_get_msrs(X86CPU *cpu, bool is_write)
+{
+ struct kvm_msr_entry *entries = cpu->kvm_msr_buf->entries;
+ struct kvm_msrs *buf = NULL;
+ int current, remaining, ret = 0;
+ size_t buf_size;
+
+ buf_size = KVM_MAX_IO_MSRS * sizeof(struct kvm_msr_entry) +
+ sizeof(struct kvm_msrs);
+ buf = g_malloc(buf_size);
+
+ remaining = cpu->kvm_msr_buf->nmsrs;
+ current = 0;
+ while (remaining) {
+ size_t size;
+
+ memset(buf, 0, buf_size);
+
+ if (remaining > KVM_MAX_IO_MSRS) {
+ buf->nmsrs = KVM_MAX_IO_MSRS;
+ } else {
+ buf->nmsrs = remaining;
+ }
+
+ size = buf->nmsrs * sizeof(entries[0]);
+ memcpy(buf->entries, &entries[current], size);
+
+ if (is_write) {
+ ret = __kvm_buf_set_msrs(cpu, buf);
+ } else {
+ ret = __kvm_buf_get_msrs(cpu, buf);
+ }
+
+ if (ret < 0) {
+ goto out;
+ }
+
+ if (!is_write)
+ memcpy(&entries[current], buf->entries, size);
+
+ current += buf->nmsrs;
+ remaining -= buf->nmsrs;
+ }
+
+out:
+ g_free(buf);
+ return ret < 0 ? ret : cpu->kvm_msr_buf->nmsrs;
+}
+
+static inline int kvm_buf_set_msrs(X86CPU *cpu)
+{
+ return kvm_buf_set_or_get_msrs(cpu, true);
+}
+
+static inline int kvm_buf_get_msrs(X86CPU *cpu)
+{
+ return kvm_buf_set_or_get_msrs(cpu, false);
}
static void kvm_init_msrs(X86CPU *cpu)
@@ -4149,7 +4230,7 @@ static void kvm_init_msrs(X86CPU *cpu)
if (has_msr_ucode_rev) {
kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev);
}
- assert(kvm_buf_set_msrs(cpu) == 0);
+ assert(kvm_buf_set_msrs(cpu) == cpu->kvm_msr_buf->nmsrs);
}
static int kvm_put_msrs(X86CPU *cpu, KvmPutState level)
@@ -5042,18 +5123,11 @@ static int kvm_get_msrs(X86CPU *cpu)
}
}
- ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, cpu->kvm_msr_buf);
+ ret = kvm_buf_get_msrs(cpu);
if (ret < 0) {
return ret;
}
- if (ret < cpu->kvm_msr_buf->nmsrs) {
- struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret];
- error_report("error: failed to get MSR 0x%" PRIx32,
- (uint32_t)e->index);
- }
-
- assert(ret == cpu->kvm_msr_buf->nmsrs);
/*
* MTRR masks: Each mask consists of 5 parts
* a 10..0: must be zero
--
2.54.0
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v4 6/6] target/i386: Add Topdown metrics feature support
2026-06-04 2:55 [PATCH v4 0/6] target/i386: Misc PMU fixes and enabling Zide Chen
` (4 preceding siblings ...)
2026-06-04 2:55 ` [PATCH v4 5/6] target/i386: Increase MSR_BUF_SIZE and split KVM_[GET/SET]_MSRS calls Zide Chen
@ 2026-06-04 2:55 ` Zide Chen
5 siblings, 0 replies; 10+ messages in thread
From: Zide Chen @ 2026-06-04 2:55 UTC (permalink / raw)
To: qemu-devel, kvm, Paolo Bonzini, Zhao Liu, Peter Xu, Fabiano Rosas,
Sandipan Das
Cc: Xiaoyao Li, Dongli Zhang, Dapeng Mi, Zide Chen
From: Dapeng Mi <dapeng1.mi@linux.intel.com>
IA32_PERF_CAPABILITIES.PERF_METRICS_AVAILABLE (bit 15) indicates that
the CPU provides built-in support for TMA L1 metrics through
the PERF_METRICS MSR. Expose it as a user-visible CPU feature
("perf-metrics"), allowing it to be explicitly enabled or disabled and
used with migratable guests.
Handle IA32_PERF_METRICS in the KVM MSR get/put paths to save and
restore it. Migrate IA32_PERF_METRICS MSR using a new subsection of
vmstate_msr_architectural_pmu.
Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Co-developed-by: Zide Chen <zide.chen@intel.com>
Signed-off-by: Zide Chen <zide.chen@intel.com>
---
V3: New patch.
---
target/i386/cpu.c | 2 +-
target/i386/cpu.h | 3 +++
target/i386/kvm/kvm.c | 10 ++++++++++
target/i386/machine.c | 25 ++++++++++++++++++++++++-
4 files changed, 38 insertions(+), 2 deletions(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index c978e957df6a..c4de8639bd48 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1620,7 +1620,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
- NULL, "full-width-write", NULL, NULL,
+ NULL, "full-width-write", NULL, "perf-metrics",
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 8cc3c2f139e7..ed5069bb5fad 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -422,6 +422,7 @@ typedef enum X86Seg {
#define MSR_IA32_PERF_CAPABILITIES 0x345
#define PERF_CAP_LBR_FMT 0x3f
#define PERF_CAP_FULL_WRITE (1U << 13)
+#define PERF_CAP_TOPDOWN (1U << 15)
#define MSR_IA32_TSX_CTRL 0x122
#define MSR_IA32_TSCDEADLINE 0x6e0
@@ -507,6 +508,7 @@ typedef enum X86Seg {
#define MSR_CORE_PERF_FIXED_CTR0 0x309
#define MSR_CORE_PERF_FIXED_CTR1 0x30a
#define MSR_CORE_PERF_FIXED_CTR2 0x30b
+#define MSR_PERF_METRICS 0x329
#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x38d
#define MSR_CORE_PERF_GLOBAL_STATUS 0x38e
#define MSR_CORE_PERF_GLOBAL_CTRL 0x38f
@@ -2101,6 +2103,7 @@ typedef struct CPUArchState {
uint64_t msr_fixed_ctr_ctrl;
uint64_t msr_global_ctrl;
uint64_t msr_global_status;
+ uint64_t msr_perf_metrics;
uint64_t msr_fixed_counters[MAX_FIXED_COUNTERS];
uint64_t msr_gp_counters[MAX_GP_COUNTERS];
uint64_t msr_gp_evtsel[MAX_GP_COUNTERS];
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 9b6407794e43..777510e52843 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -4368,6 +4368,10 @@ static int kvm_put_msrs(X86CPU *cpu, KvmPutState level)
kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR0 + i,
env->msr_fixed_counters[i]);
}
+ /* SDM: Write IA32_PERF_METRICS after fixed counter 3. */
+ if (env->features[FEAT_PERF_CAPABILITIES] & PERF_CAP_TOPDOWN) {
+ kvm_msr_entry_add(cpu, MSR_PERF_METRICS, env->msr_perf_metrics);
+ }
for (i = 0; i < num_pmu_gp_counters; i++) {
kvm_msr_entry_add(cpu, perf_cntr_base + i,
env->msr_gp_counters[i]);
@@ -4941,6 +4945,9 @@ static int kvm_get_msrs(X86CPU *cpu)
kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_CTRL, 0);
kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_STATUS, 0);
}
+ if (env->features[FEAT_PERF_CAPABILITIES] & PERF_CAP_TOPDOWN) {
+ kvm_msr_entry_add(cpu, MSR_PERF_METRICS, 0);
+ }
for (i = 0; i < num_pmu_fixed_counters; i++) {
kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR0 + i, 0);
}
@@ -5297,6 +5304,9 @@ static int kvm_get_msrs(X86CPU *cpu)
case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
env->msr_global_status = msrs[i].data;
break;
+ case MSR_PERF_METRICS:
+ env->msr_perf_metrics = msrs[i].data;
+ break;
case MSR_CORE_PERF_FIXED_CTR0 ... MSR_CORE_PERF_FIXED_CTR0 + MAX_FIXED_COUNTERS - 1:
env->msr_fixed_counters[index - MSR_CORE_PERF_FIXED_CTR0] = msrs[i].data;
break;
diff --git a/target/i386/machine.c b/target/i386/machine.c
index 05aa38a8a43d..2ec6ca1428cf 100644
--- a/target/i386/machine.c
+++ b/target/i386/machine.c
@@ -659,6 +659,25 @@ static const VMStateDescription vmstate_msr_ia32_feature_control = {
}
};
+static bool perf_metrics_enabled(void *opaque)
+{
+ X86CPU *cpu = opaque;
+ CPUX86State *env = &cpu->env;
+
+ return !!env->msr_perf_metrics;
+}
+
+static const VMStateDescription vmstate_msr_perf_metrics = {
+ .name = "cpu/msr_architectural_pmu/msr_perf_metrics",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = perf_metrics_enabled,
+ .fields = (const VMStateField[]){
+ VMSTATE_UINT64(env.msr_perf_metrics, X86CPU),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
static bool pmu_enable_needed(void *opaque)
{
X86CPU *cpu = opaque;
@@ -697,7 +716,11 @@ static const VMStateDescription vmstate_msr_architectural_pmu = {
VMSTATE_UINT64_ARRAY(env.msr_gp_counters, X86CPU, MAX_GP_COUNTERS),
VMSTATE_UINT64_ARRAY(env.msr_gp_evtsel, X86CPU, MAX_GP_COUNTERS),
VMSTATE_END_OF_LIST()
- }
+ },
+ .subsections = (const VMStateDescription * const []) {
+ &vmstate_msr_perf_metrics,
+ NULL,
+ },
};
static bool mpx_needed(void *opaque)
--
2.54.0
^ permalink raw reply related [flat|nested] 10+ messages in thread