From: Zide Chen <zide.chen@intel.com>
To: qemu-devel@nongnu.org, kvm@vger.kernel.org,
Paolo Bonzini <pbonzini@redhat.com>,
Zhao Liu <zhao1.liu@intel.com>, Peter Xu <peterx@redhat.com>,
Fabiano Rosas <farosas@suse.de>,
Sandipan Das <sandipan.das@amd.com>
Cc: Xiaoyao Li <xiaoyao.li@intel.com>,
Dongli Zhang <dongli.zhang@oracle.com>,
Dapeng Mi <dapeng1.mi@linux.intel.com>,
Zide Chen <zide.chen@intel.com>
Subject: [PATCH V3 06/13] target/i386: Increase MSR_BUF_SIZE and split KVM_[GET/SET]_MSRS calls
Date: Wed, 4 Mar 2026 10:07:05 -0800 [thread overview]
Message-ID: <20260304180713.360471-7-zide.chen@intel.com> (raw)
In-Reply-To: <20260304180713.360471-1-zide.chen@intel.com>
Newer Intel server CPUs support a large number of PMU MSRs. Currently,
QEMU allocates cpu->kvm_msr_buf as a single-page buffer, which is not
sufficient to hold all possible MSRs.
Increase MSR_BUF_SIZE to 8192 bytes, providing space for up to 511 MSRs.
This is sufficient even for the theoretical worst case, such as
architectural LBR with a depth of 64.
KVM_[GET/SET]_MSRS is limited to 255 MSRs per call. Raising this limit
to 511 would require changes in KVM and would introduce backward
compatibility issues. Instead, split requests into multiple
KVM_[GET/SET]_MSRS calls when the number of MSRs exceeds the API limit.
Signed-off-by: Zide Chen <zide.chen@intel.com>
---
v3:
- Address Dapeng's comments.
---
target/i386/kvm/kvm.c | 110 +++++++++++++++++++++++++++++++++++-------
1 file changed, 92 insertions(+), 18 deletions(-)
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 39a67c58ac22..4ba54151320f 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -97,9 +97,12 @@
#define KVM_APIC_BUS_CYCLE_NS 1
#define KVM_APIC_BUS_FREQUENCY (1000000000ULL / KVM_APIC_BUS_CYCLE_NS)
-/* A 4096-byte buffer can hold the 8-byte kvm_msrs header, plus
- * 255 kvm_msr_entry structs */
-#define MSR_BUF_SIZE 4096
+/* A 8192-byte buffer can hold the 8-byte kvm_msrs header, plus
+ * 511 kvm_msr_entry structs */
+#define MSR_BUF_SIZE 8192
+
+/* Maximum number of MSRs in one single KVM_[GET/SET]_MSRS call. */
+#define KVM_MAX_IO_MSRS 255
typedef bool QEMURDMSRHandler(X86CPU *cpu, uint32_t msr, uint64_t *val);
typedef bool QEMUWRMSRHandler(X86CPU *cpu, uint32_t msr, uint64_t val);
@@ -4016,21 +4019,99 @@ static void kvm_msr_entry_add_perf(X86CPU *cpu, FeatureWordArray f)
}
}
-static int kvm_buf_set_msrs(X86CPU *cpu)
+static int __kvm_buf_set_msrs(X86CPU *cpu, struct kvm_msrs *msrs)
{
- int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf);
+ int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, msrs);
if (ret < 0) {
return ret;
}
- if (ret < cpu->kvm_msr_buf->nmsrs) {
- struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret];
+ if (ret < msrs->nmsrs) {
+ struct kvm_msr_entry *e = &msrs->entries[ret];
error_report("error: failed to set MSR 0x%" PRIx32 " to 0x%" PRIx64,
(uint32_t)e->index, (uint64_t)e->data);
}
- assert(ret == cpu->kvm_msr_buf->nmsrs);
- return 0;
+ assert(ret == msrs->nmsrs);
+ return ret;
+}
+
+static int __kvm_buf_get_msrs(X86CPU *cpu, struct kvm_msrs *msrs)
+{
+ int ret;
+
+ ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, msrs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (ret < msrs->nmsrs) {
+ struct kvm_msr_entry *e = &msrs->entries[ret];
+ error_report("error: failed to get MSR 0x%" PRIx32,
+ (uint32_t)e->index);
+ }
+
+ assert(ret == msrs->nmsrs);
+ return ret;
+}
+
+static int kvm_buf_set_or_get_msrs(X86CPU *cpu, bool is_write)
+{
+ struct kvm_msr_entry *entries = cpu->kvm_msr_buf->entries;
+ struct kvm_msrs *buf = NULL;
+ int current, remaining, ret = 0;
+ size_t buf_size;
+
+ buf_size = KVM_MAX_IO_MSRS * sizeof(struct kvm_msr_entry) +
+ sizeof(struct kvm_msrs);
+ buf = g_malloc(buf_size);
+
+ remaining = cpu->kvm_msr_buf->nmsrs;
+ current = 0;
+ while (remaining) {
+ size_t size;
+
+ memset(buf, 0, buf_size);
+
+ if (remaining > KVM_MAX_IO_MSRS) {
+ buf->nmsrs = KVM_MAX_IO_MSRS;
+ } else {
+ buf->nmsrs = remaining;
+ }
+
+ size = buf->nmsrs * sizeof(entries[0]);
+ memcpy(buf->entries, &entries[current], size);
+
+ if (is_write) {
+ ret = __kvm_buf_set_msrs(cpu, buf);
+ } else {
+ ret = __kvm_buf_get_msrs(cpu, buf);
+ }
+
+ if (ret < 0) {
+ goto out;
+ }
+
+ if (!is_write)
+ memcpy(&entries[current], buf->entries, size);
+
+ current += buf->nmsrs;
+ remaining -= buf->nmsrs;
+ }
+
+out:
+ g_free(buf);
+ return ret < 0 ? ret : cpu->kvm_msr_buf->nmsrs;
+}
+
+static inline int kvm_buf_set_msrs(X86CPU *cpu)
+{
+ return kvm_buf_set_or_get_msrs(cpu, true);
+}
+
+static inline int kvm_buf_get_msrs(X86CPU *cpu)
+{
+ return kvm_buf_set_or_get_msrs(cpu, false);
}
static void kvm_init_msrs(X86CPU *cpu)
@@ -4066,7 +4147,7 @@ static void kvm_init_msrs(X86CPU *cpu)
if (has_msr_ucode_rev) {
kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev);
}
- assert(kvm_buf_set_msrs(cpu) == 0);
+ kvm_buf_set_msrs(cpu);
}
static int kvm_put_msrs(X86CPU *cpu, KvmPutState level)
@@ -4959,18 +5040,11 @@ static int kvm_get_msrs(X86CPU *cpu)
}
}
- ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, cpu->kvm_msr_buf);
+ ret = kvm_buf_get_msrs(cpu);
if (ret < 0) {
return ret;
}
- if (ret < cpu->kvm_msr_buf->nmsrs) {
- struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret];
- error_report("error: failed to get MSR 0x%" PRIx32,
- (uint32_t)e->index);
- }
-
- assert(ret == cpu->kvm_msr_buf->nmsrs);
/*
* MTRR masks: Each mask consists of 5 parts
* a 10..0: must be zero
--
2.53.0
next prev parent reply other threads:[~2026-03-04 18:15 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-04 18:06 [PATCH V3 00/13] target/i386: Misc PMU fixes and enabling Zide Chen
2026-03-04 18:07 ` [PATCH V3 01/13] target/i386: Disable unsupported BTS for guest Zide Chen
2026-03-04 18:07 ` [PATCH V3 02/13] target/i386: Don't save/restore PERF_GLOBAL_OVF_CTRL MSRs Zide Chen
2026-03-04 18:07 ` [PATCH V3 03/13] target/i386: Gate enable_pmu on kvm_enabled() Zide Chen
2026-03-04 18:07 ` [PATCH V3 04/13] target/i386: Adjust maximum number of PMU counters Zide Chen
2026-03-06 3:02 ` Mi, Dapeng
2026-03-04 18:07 ` [PATCH V3 05/13] target/i386: Support full-width writes for perf counters Zide Chen
2026-03-04 18:07 ` Zide Chen [this message]
2026-03-06 3:09 ` [PATCH V3 06/13] target/i386: Increase MSR_BUF_SIZE and split KVM_[GET/SET]_MSRS calls Mi, Dapeng
2026-03-04 18:07 ` [PATCH V3 07/13] target/i386: Add get/set/migrate support for legacy PEBS MSRs Zide Chen
2026-03-06 3:17 ` Mi, Dapeng
2026-03-04 18:07 ` [PATCH V3 08/13] target/i386: Make some PEBS features user-visible Zide Chen
2026-03-06 3:25 ` Mi, Dapeng
2026-03-04 18:07 ` [PATCH V3 09/13] target/i386: Clean up LBR format handling Zide Chen
2026-03-04 18:07 ` [PATCH V3 10/13] target/i386: Refactor " Zide Chen
2026-03-04 18:07 ` [PATCH V3 11/13] target/i386: Add pebs-fmt CPU option Zide Chen
2026-03-06 5:23 ` Mi, Dapeng
2026-03-04 18:07 ` [PATCH V3 12/13] target/i386: Clean up Intel Debug Store feature dependencies Zide Chen
2026-03-06 5:34 ` Mi, Dapeng
2026-03-16 3:21 ` Chenyi Qiang
2026-03-16 6:57 ` Xiaoyao Li
2026-03-16 18:17 ` Chen, Zide
2026-03-16 18:17 ` Chen, Zide
2026-03-04 18:07 ` [PATCH V3 13/13] target/i386: Add Topdown metrics feature support Zide Chen
2026-03-06 5:37 ` Mi, Dapeng
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260304180713.360471-7-zide.chen@intel.com \
--to=zide.chen@intel.com \
--cc=dapeng1.mi@linux.intel.com \
--cc=dongli.zhang@oracle.com \
--cc=farosas@suse.de \
--cc=kvm@vger.kernel.org \
--cc=pbonzini@redhat.com \
--cc=peterx@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=sandipan.das@amd.com \
--cc=xiaoyao.li@intel.com \
--cc=zhao1.liu@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox