qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Paolo Bonzini <pbonzini@redhat.com>
To: Yang Zhong <yang.zhong@intel.com>, qemu-devel@nongnu.org
Cc: seanjc@google.com, kevin.tian@intel.com,
	jing2.liu@linux.intel.com, wei.w.wang@intel.com,
	guang.zeng@intel.com
Subject: Re: [PATCH 3/7] x86: Grant AMX permission for guest
Date: Mon, 24 Jan 2022 11:16:36 +0100	[thread overview]
Message-ID: <cdd1bc06-619c-cd8c-b9cf-e6da22fd8751@redhat.com> (raw)
In-Reply-To: <20220124075523.108875-4-yang.zhong@intel.com>

On 1/24/22 08:55, Yang Zhong wrote:
> Kernel allocates 4K xstate buffer by default. For XSAVE features
> which require large state component (e.g. AMX), Linux kernel
> dynamically expands the xstate buffer only after the process has
> acquired the necessary permissions. Those are called dynamically-
> enabled XSAVE features (or dynamic xfeatures).
> 
> There are separate permissions for native tasks and guests.
> 
> Qemu should request the guest permissions for dynamic xfeatures
> which will be exposed to the guest. This only needs to be done
> once before the first vcpu is created.
> 
> Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Yang Zhong <yang.zhong@intel.com>
> Signed-off-by: Jing Liu <jing2.liu@intel.com>
> Signed-off-by: Wei Wang <wei.w.wang@intel.com>
> ---
>   target/i386/cpu.h         |  7 +++++++
>   target/i386/cpu.c         | 31 +++++++++++++++++++++++++++++++
>   target/i386/kvm/kvm-cpu.c | 12 ++++++------
>   target/i386/kvm/kvm.c     |  6 ++++++
>   4 files changed, 50 insertions(+), 6 deletions(-)
> 
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index 06d2d6bccf..d4ad0f56bd 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -549,6 +549,13 @@ typedef enum X86Seg {
>   #define XSTATE_ZMM_Hi256_MASK           (1ULL << XSTATE_ZMM_Hi256_BIT)
>   #define XSTATE_Hi16_ZMM_MASK            (1ULL << XSTATE_Hi16_ZMM_BIT)
>   #define XSTATE_PKRU_MASK                (1ULL << XSTATE_PKRU_BIT)
> +#define XSTATE_XTILE_CFG_MASK           (1ULL << XSTATE_XTILE_CFG_BIT)
> +#define XSTATE_XTILE_DATA_MASK          (1ULL << XSTATE_XTILE_DATA_BIT)
> +#define XFEATURE_XTILE_MASK             (XSTATE_XTILE_CFG_MASK \
> +                                         | XSTATE_XTILE_DATA_MASK)
> +
> +#define ARCH_GET_XCOMP_GUEST_PERM       0x1024
> +#define ARCH_REQ_XCOMP_GUEST_PERM       0x1025
>   
>   #define ESA_FEATURE_ALIGN64_BIT         1
>   
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index 3390820745..29b0348c25 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -43,6 +43,10 @@
>   #include "disas/capstone.h"
>   #include "cpu-internal.h"
>   
> +#include <sys/syscall.h>
> +
> +bool request_perm;
> +
>   /* Helpers for building CPUID[2] descriptors: */
>   
>   struct CPUID2CacheDescriptorInfo {
> @@ -6000,6 +6004,27 @@ static void x86_cpu_adjust_feat_level(X86CPU *cpu, FeatureWord w)
>       }
>   }
>   
> +static void kvm_request_xsave_components(X86CPU *cpu, uint32_t bit)
> +{
> +    KVMState *s = CPU(cpu)->kvm_state;
> +
> +    long rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM,
> +                      bit);
> +    if (rc) {
> +        /*
> +         * The older kernel version(<5.15) can't support
> +         * ARCH_REQ_XCOMP_GUEST_PERM and directly return.
> +         */
> +        return;
> +    }
> +
> +    rc = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX);
> +    if (!(rc & XFEATURE_XTILE_MASK)) {
> +        error_report("get cpuid failure and rc=0x%lx", rc);
> +        exit(EXIT_FAILURE);
> +    }
> +}
> +
>   /* Calculate XSAVE components based on the configured CPU feature flags */
>   static void x86_cpu_enable_xsave_components(X86CPU *cpu)
>   {
> @@ -6021,6 +6046,12 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu)
>           }
>       }
>   
> +    /* Only request permission from fisrt vcpu. */
> +    if (kvm_enabled() && !request_perm) {
> +        kvm_request_xsave_components(cpu, XSTATE_XTILE_DATA_BIT);
> +        request_perm = true;
> +    }

You should pass "mask" here, or "mask & XSTATE_DYNAMIC_MASK" so that the 
components are only requested if necessary.

>       env->features[FEAT_XSAVE_COMP_LO] = mask;
>       env->features[FEAT_XSAVE_COMP_HI] = mask >> 32;
>   }
> diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
> index 033ca011ea..5ab6a0b9d2 100644
> --- a/target/i386/kvm/kvm-cpu.c
> +++ b/target/i386/kvm/kvm-cpu.c
> @@ -84,7 +84,7 @@ static void kvm_cpu_max_instance_init(X86CPU *cpu)
>   static void kvm_cpu_xsave_init(void)
>   {
>       static bool first = true;
> -    KVMState *s = kvm_state;
> +    uint32_t eax, ebx, ecx, edx;
>       int i;
>   
>       if (!first) {
> @@ -100,13 +100,13 @@ static void kvm_cpu_xsave_init(void)
>           ExtSaveArea *esa = &x86_ext_save_areas[i];
>   
>           if (esa->size) {
> -            int sz = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EAX);
> -            if (sz != 0) {
> -                assert(esa->size == sz);
> -                esa->offset = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EBX);
> +            host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx);
> +            if (eax != 0) {
> +                assert(esa->size == eax);
> +                esa->offset = ebx;
>               }
>   
> -            esa->ecx = kvm_arch_get_supported_cpuid(s, 0xd, i, R_ECX);
> +            esa->ecx = ecx;

I think esa->ecx should be assigned inside the "if".  This is also true 
in patch 1.

>           }
>       }
>   }
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index 2c8feb4a6f..caf1388d8b 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -405,6 +405,12 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
>           if (!has_msr_arch_capabs) {
>               ret &= ~CPUID_7_0_EDX_ARCH_CAPABILITIES;
>           }
> +    } else if (function == 0xd && index == 0 && reg == R_EAX) {
> +        /*
> +         * We can set the AMX XTILE DATA flag, even if KVM does not
> +         * return it on GET_SUPPORTED_CPUID.
> +         */
> +        ret |= XSTATE_XTILE_DATA_MASK;
>       } else if (function == 0x80000001 && reg == R_ECX) {
>           /*
>            * It's safe to enable TOPOEXT even if it's not returned by
> 

Instead of setting XSTATE_XTILE_DATA_MASK blindly, you need something 
like arch_prctl(ARCH_GET_XCOMP_GUEST_SUPP).  However, this arch_prctl 
only exists in the bare-metal version ARCH_GET_XCOMP_SUPP, and it would 
need access to the variable supported_xcr0 that KVM exports.  So I think 
it's better to implement it as a new KVM_CHECK_EXTENSION value instead 
of a prctl.

Paolo


  reply	other threads:[~2022-01-24 10:19 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-24  7:55 [PATCH 0/7] AMX support in Qemu Yang Zhong
2022-01-24  7:55 ` [PATCH 1/7] x86: Fix the 64-byte boundary enumeration for extended state Yang Zhong
2022-01-24  7:55 ` [PATCH 2/7] x86: Add AMX XTILECFG and XTILEDATA components Yang Zhong
2022-01-24  7:55 ` [PATCH 3/7] x86: Grant AMX permission for guest Yang Zhong
2022-01-24 10:16   ` Paolo Bonzini [this message]
2022-01-27 13:45     ` Yang Zhong
2022-01-28 11:09       ` Paolo Bonzini
2022-01-24  7:55 ` [PATCH 4/7] x86: Add XFD faulting bit for state components Yang Zhong
2022-01-24  7:55 ` [PATCH 5/7] x86: Add AMX CPUIDs enumeration Yang Zhong
2022-01-24 10:13   ` Paolo Bonzini
2022-01-27 13:46     ` Yang Zhong
2022-01-24  7:55 ` [PATCH 6/7] x86: add support for KVM_CAP_XSAVE2 and AMX state migration Yang Zhong
2022-01-24 10:15   ` Paolo Bonzini
2022-01-27 13:47     ` Yang Zhong
2022-01-24  7:55 ` [PATCH 7/7] x86: Support XFD and AMX xsave data migration Yang Zhong
2022-01-24 10:17 ` [PATCH 0/7] AMX support in Qemu Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=cdd1bc06-619c-cd8c-b9cf-e6da22fd8751@redhat.com \
    --to=pbonzini@redhat.com \
    --cc=guang.zeng@intel.com \
    --cc=jing2.liu@linux.intel.com \
    --cc=kevin.tian@intel.com \
    --cc=qemu-devel@nongnu.org \
    --cc=seanjc@google.com \
    --cc=wei.w.wang@intel.com \
    --cc=yang.zhong@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).