Re: [PATCH V3 06/13] target/i386: Increase MSR_BUF_SIZE and split KVM_[GET/SET]_MSRS calls

All of lore.kernel.org
 help / color / mirror / Atom feed

From: "Mi, Dapeng" <dapeng1.mi@linux.intel.com>
To: Zide Chen <zide.chen@intel.com>,
	qemu-devel@nongnu.org, kvm@vger.kernel.org,
	Paolo Bonzini <pbonzini@redhat.com>,
	Zhao Liu <zhao1.liu@intel.com>, Peter Xu <peterx@redhat.com>,
	Fabiano Rosas <farosas@suse.de>,
	Sandipan Das <sandipan.das@amd.com>
Cc: Xiaoyao Li <xiaoyao.li@intel.com>,
	Dongli Zhang <dongli.zhang@oracle.com>
Subject: Re: [PATCH V3 06/13] target/i386: Increase MSR_BUF_SIZE and split KVM_[GET/SET]_MSRS calls
Date: Fri, 6 Mar 2026 11:09:23 +0800	[thread overview]
Message-ID: <198db4f2-4aec-4930-97bf-ed0c3418083e@linux.intel.com> (raw)
In-Reply-To: <20260304180713.360471-7-zide.chen@intel.com>

LGTM. Thanks.

Reviewed-by: Dapeng Mi <dapeng1.mi@linux.intel.com>

On 3/5/2026 2:07 AM, Zide Chen wrote:
> Newer Intel server CPUs support a large number of PMU MSRs.  Currently,
> QEMU allocates cpu->kvm_msr_buf as a single-page buffer, which is not
> sufficient to hold all possible MSRs.
>
> Increase MSR_BUF_SIZE to 8192 bytes, providing space for up to 511 MSRs.
> This is sufficient even for the theoretical worst case, such as
> architectural LBR with a depth of 64.
>
> KVM_[GET/SET]_MSRS is limited to 255 MSRs per call.  Raising this limit
> to 511 would require changes in KVM and would introduce backward
> compatibility issues.  Instead, split requests into multiple
> KVM_[GET/SET]_MSRS calls when the number of MSRs exceeds the API limit.
>
> Signed-off-by: Zide Chen <zide.chen@intel.com>
> ---
> v3:
> - Address Dapeng's comments.
> ---
>  target/i386/kvm/kvm.c | 110 +++++++++++++++++++++++++++++++++++-------
>  1 file changed, 92 insertions(+), 18 deletions(-)
>
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index 39a67c58ac22..4ba54151320f 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -97,9 +97,12 @@
>  #define KVM_APIC_BUS_CYCLE_NS       1
>  #define KVM_APIC_BUS_FREQUENCY      (1000000000ULL / KVM_APIC_BUS_CYCLE_NS)
>  
> -/* A 4096-byte buffer can hold the 8-byte kvm_msrs header, plus
> - * 255 kvm_msr_entry structs */
> -#define MSR_BUF_SIZE 4096
> +/* A 8192-byte buffer can hold the 8-byte kvm_msrs header, plus
> + * 511 kvm_msr_entry structs */
> +#define MSR_BUF_SIZE      8192
> +
> +/* Maximum number of MSRs in one single KVM_[GET/SET]_MSRS call. */
> +#define KVM_MAX_IO_MSRS   255
>  
>  typedef bool QEMURDMSRHandler(X86CPU *cpu, uint32_t msr, uint64_t *val);
>  typedef bool QEMUWRMSRHandler(X86CPU *cpu, uint32_t msr, uint64_t val);
> @@ -4016,21 +4019,99 @@ static void kvm_msr_entry_add_perf(X86CPU *cpu, FeatureWordArray f)
>      }
>  }
>  
> -static int kvm_buf_set_msrs(X86CPU *cpu)
> +static int __kvm_buf_set_msrs(X86CPU *cpu, struct kvm_msrs *msrs)
>  {
> -    int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf);
> +    int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, msrs);
>      if (ret < 0) {
>          return ret;
>      }
>  
> -    if (ret < cpu->kvm_msr_buf->nmsrs) {
> -        struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret];
> +    if (ret < msrs->nmsrs) {
> +        struct kvm_msr_entry *e = &msrs->entries[ret];
>          error_report("error: failed to set MSR 0x%" PRIx32 " to 0x%" PRIx64,
>                       (uint32_t)e->index, (uint64_t)e->data);
>      }
>  
> -    assert(ret == cpu->kvm_msr_buf->nmsrs);
> -    return 0;
> +    assert(ret == msrs->nmsrs);
> +    return ret;
> +}
> +
> +static int __kvm_buf_get_msrs(X86CPU *cpu, struct kvm_msrs *msrs)
> +{
> +    int ret;
> +
> +    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, msrs);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +
> +    if (ret < msrs->nmsrs) {
> +        struct kvm_msr_entry *e = &msrs->entries[ret];
> +        error_report("error: failed to get MSR 0x%" PRIx32,
> +                     (uint32_t)e->index);
> +    }
> +
> +    assert(ret == msrs->nmsrs);
> +    return ret;
> +}
> +
> +static int kvm_buf_set_or_get_msrs(X86CPU *cpu, bool is_write)
> +{
> +    struct kvm_msr_entry *entries = cpu->kvm_msr_buf->entries;
> +    struct kvm_msrs *buf = NULL;
> +    int current, remaining, ret = 0;
> +    size_t buf_size;
> +
> +    buf_size = KVM_MAX_IO_MSRS * sizeof(struct kvm_msr_entry) +
> +               sizeof(struct kvm_msrs);
> +    buf = g_malloc(buf_size);
> +
> +    remaining = cpu->kvm_msr_buf->nmsrs;
> +    current = 0;
> +    while (remaining) {
> +        size_t size;
> +
> +        memset(buf, 0, buf_size);
> +
> +        if (remaining > KVM_MAX_IO_MSRS) {
> +            buf->nmsrs = KVM_MAX_IO_MSRS;
> +        } else {
> +            buf->nmsrs = remaining;
> +        }
> +
> +        size = buf->nmsrs * sizeof(entries[0]);
> +        memcpy(buf->entries, &entries[current], size);
> +
> +        if (is_write) {
> +            ret = __kvm_buf_set_msrs(cpu, buf);
> +        } else {
> +            ret = __kvm_buf_get_msrs(cpu, buf);
> +        }
> +
> +        if (ret < 0) {
> +            goto out;
> +        }
> +
> +        if (!is_write)
> +            memcpy(&entries[current], buf->entries, size);
> +
> +        current += buf->nmsrs;
> +        remaining -= buf->nmsrs;
> +    }
> +
> +out:
> +    g_free(buf);
> +    return ret < 0 ? ret : cpu->kvm_msr_buf->nmsrs;
> +}
> +
> +static inline int kvm_buf_set_msrs(X86CPU *cpu)
> +{
> +    return kvm_buf_set_or_get_msrs(cpu, true);
> +}
> +
> +static inline int kvm_buf_get_msrs(X86CPU *cpu)
> +{
> +    return kvm_buf_set_or_get_msrs(cpu, false);
>  }
>  
>  static void kvm_init_msrs(X86CPU *cpu)
> @@ -4066,7 +4147,7 @@ static void kvm_init_msrs(X86CPU *cpu)
>      if (has_msr_ucode_rev) {
>          kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev);
>      }
> -    assert(kvm_buf_set_msrs(cpu) == 0);
> +    kvm_buf_set_msrs(cpu);
>  }
>  
>  static int kvm_put_msrs(X86CPU *cpu, KvmPutState level)
> @@ -4959,18 +5040,11 @@ static int kvm_get_msrs(X86CPU *cpu)
>          }
>      }
>  
> -    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, cpu->kvm_msr_buf);
> +    ret = kvm_buf_get_msrs(cpu);
>      if (ret < 0) {
>          return ret;
>      }
>  
> -    if (ret < cpu->kvm_msr_buf->nmsrs) {
> -        struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret];
> -        error_report("error: failed to get MSR 0x%" PRIx32,
> -                     (uint32_t)e->index);
> -    }
> -
> -    assert(ret == cpu->kvm_msr_buf->nmsrs);
>      /*
>       * MTRR masks: Each mask consists of 5 parts
>       * a  10..0: must be zero

next prev parent reply	other threads:[~2026-03-06  3:09 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-04 18:06 [PATCH V3 00/13] target/i386: Misc PMU fixes and enabling Zide Chen
2026-03-04 18:07 ` [PATCH V3 01/13] target/i386: Disable unsupported BTS for guest Zide Chen
2026-04-22 10:07   ` Zhao Liu
2026-04-24 18:23     ` Chen, Zide
2026-03-04 18:07 ` [PATCH V3 02/13] target/i386: Don't save/restore PERF_GLOBAL_OVF_CTRL MSRs Zide Chen
2026-03-04 18:07 ` [PATCH V3 03/13] target/i386: Gate enable_pmu on kvm_enabled() Zide Chen
2026-03-04 18:07 ` [PATCH V3 04/13] target/i386: Adjust maximum number of PMU counters Zide Chen
2026-03-06  3:02   ` Mi, Dapeng
2026-03-04 18:07 ` [PATCH V3 05/13] target/i386: Support full-width writes for perf counters Zide Chen
2026-03-04 18:07 ` [PATCH V3 06/13] target/i386: Increase MSR_BUF_SIZE and split KVM_[GET/SET]_MSRS calls Zide Chen
2026-03-06  3:09   ` Mi, Dapeng [this message]
2026-03-04 18:07 ` [PATCH V3 07/13] target/i386: Add get/set/migrate support for legacy PEBS MSRs Zide Chen
2026-03-06  3:17   ` Mi, Dapeng
2026-03-04 18:07 ` [PATCH V3 08/13] target/i386: Make some PEBS features user-visible Zide Chen
2026-03-06  3:25   ` Mi, Dapeng
2026-03-04 18:07 ` [PATCH V3 09/13] target/i386: Clean up LBR format handling Zide Chen
2026-03-04 18:07 ` [PATCH V3 10/13] target/i386: Refactor " Zide Chen
2026-03-04 18:07 ` [PATCH V3 11/13] target/i386: Add pebs-fmt CPU option Zide Chen
2026-03-06  5:23   ` Mi, Dapeng
2026-04-22  8:21   ` Zhao Liu
2026-04-22 21:03     ` Chen, Zide
2026-03-04 18:07 ` [PATCH V3 12/13] target/i386: Clean up Intel Debug Store feature dependencies Zide Chen
2026-03-06  5:34   ` Mi, Dapeng
2026-03-16  3:21   ` Chenyi Qiang
2026-03-16  6:57     ` Xiaoyao Li
2026-03-16 18:17       ` Chen, Zide
2026-03-16 18:17     ` Chen, Zide
2026-03-04 18:07 ` [PATCH V3 13/13] target/i386: Add Topdown metrics feature support Zide Chen
2026-03-06  5:37   ` Mi, Dapeng

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=198db4f2-4aec-4930-97bf-ed0c3418083e@linux.intel.com \
    --to=dapeng1.mi@linux.intel.com \
    --cc=dongli.zhang@oracle.com \
    --cc=farosas@suse.de \
    --cc=kvm@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=sandipan.das@amd.com \
    --cc=xiaoyao.li@intel.com \
    --cc=zhao1.liu@intel.com \
    --cc=zide.chen@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.