Re: [PATCH v2 1/4] xen/libxc: Allow changes to hypervisor CPUID leaf from config file

xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed

From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: keir@xen.org, jbeulich@suse.com,
	stefano.stabellini@eu.citrix.com, eddie.dong@intel.com,
	ian.jackson@eu.citrix.com, xen-devel@lists.xen.org,
	jun.nakajima@intel.com, ian.campbell@citrix.com
Subject: Re: [PATCH v2 1/4] xen/libxc: Allow changes to hypervisor CPUID leaf from config file
Date: Tue, 11 Mar 2014 14:26:48 +0000	[thread overview]
Message-ID: <531F1D28.5090108@citrix.com> (raw)
In-Reply-To: <531F188F.6030807@oracle.com>

On 11/03/14 14:07, Boris Ostrovsky wrote:
> On 03/11/2014 06:10 AM, Andrew Cooper wrote:
>> On 11/03/14 03:54, Boris Ostrovsky wrote:
>>> Currently only "real" cpuid leaves can be overwritten by users via
>>> 'cpuid' option in the configuration file. This patch provides
>>> ability to
>>> do the same for hypervisor leaves (those in the 0x40000000 range).
>>>
>>> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
>> How?  There is nothing stopping leaves in 0x40000000 being set in the
>> policy with XEN_DOMCTL_set_cpuid, but I dont see anything which plumbs
>> this together at the Xen level.
>
> Right. What this patch mostly provides is ability to query the
> hypervisor (via sysctl) about default values of hypervisor CPUID leaf
> from userspace. We cannot use CPUID instruction here (for dom0). And
> /dev/cpu/<n>/cpuid may not exist.

The XEN_FORCED_EMULATION prefix would be fine, and not require a new
custom hypercall, but only an HVM guest is going to care whether it can
find this magic leaf.

>
> We then use these values plus whatever the user requested (because the
> user may ask for only one of the 4 registers) to pass to the
> subsequent XEN_DOMCTL_set_cpuid call.

I currently have a project to fix this braindead thinking of having Xen
and libxc guess at what eachother supports and will report.

>
>>
>>> ---
>>>   tools/libxc/xc_cpuid_x86.c   |   23 ++++++++++++++++++++++-
>>>   tools/libxc/xc_misc.c        |   18 ++++++++++++++++++
>>>   tools/libxc/xenctrl.h        |    2 ++
>>>   xen/arch/x86/domain.c        |   19 ++++++++++++++++---
>>>   xen/arch/x86/sysctl.c        |   17 +++++++++++++++++
>>>   xen/arch/x86/traps.c         |    3 +++
>>>   xen/include/asm-x86/domain.h |    7 +++++++
>>>   xen/include/public/sysctl.h  |   18 ++++++++++++++++++
>>>   8 files changed, 103 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/tools/libxc/xc_cpuid_x86.c b/tools/libxc/xc_cpuid_x86.c
>>> index bbbf9b8..544a0fd 100644
>>> --- a/tools/libxc/xc_cpuid_x86.c
>>> +++ b/tools/libxc/xc_cpuid_x86.c
>>> @@ -33,6 +33,8 @@
>>>   #define DEF_MAX_INTELEXT  0x80000008u
>>>   #define DEF_MAX_AMDEXT    0x8000001cu
>>>   +#define HYPERVISOR_LEAF(idx) (((idx) & 0x40000000) == 0x40000000)
>>> +
>> This check is wrong.
>
> Because of Viridian leaves? Or something else?

It should be (((idx) & 0xf0000000) == 0x40000000)

According to the AMD and Intel manuals, it is strictly leaf 0x40000000
reserved for hypervisor use, not 0xC0000000 or others.

>
>>
>>>   static int hypervisor_is_64bit(xc_interface *xch)
>>>   {
>>>       xen_capabilities_info_t xen_caps = "";
>>> @@ -555,6 +557,9 @@ static int xc_cpuid_policy(
>>>   {
>>>       xc_dominfo_t        info;
>>>   +    if ( HYPERVISOR_LEAF(input[0]) )
>>> +        return 0;
>>> +
>>>       if ( xc_domain_getinfo(xch, domid, 1, &info) == 0 )
>>>           return -EINVAL;
>>>   @@ -754,7 +759,23 @@ int xc_cpuid_set(
>>>         memset(config_transformed, 0, 4 * sizeof(*config_transformed));
>>>   -    cpuid(input, regs);
>>> +    if ( HYPERVISOR_LEAF(input[0]) )
>>> +    {
>>> +        xc_cpuid_t cpuid;
>>> +
>>> +        cpuid.input[0] = input[0];
>>> +        cpuid.input[1] = input[1];
>>> +
>>> +        if (xc_cpuid(xch, &cpuid))
>>> +            regs[0] = regs[1] = regs[2] = regs[3] = 0;
>>> +        else {
>>> +            regs[0] = cpuid.eax;
>>> +            regs[1] = cpuid.ebx;
>>> +            regs[2] = cpuid.ecx;
>>> +            regs[3] = cpuid.edx;
>>> +        }
>>> +     } else
>>> +        cpuid(input, regs);
>>>         memcpy(polregs, regs, sizeof(regs));
>>>       xc_cpuid_policy(xch, domid, input, polregs);
>>> diff --git a/tools/libxc/xc_misc.c b/tools/libxc/xc_misc.c
>>> index 3303454..4e8669b 100644
>>> --- a/tools/libxc/xc_misc.c
>>> +++ b/tools/libxc/xc_misc.c
>>> @@ -159,6 +159,24 @@ int xc_send_debug_keys(xc_interface *xch, char
>>> *keys)
>>>       return ret;
>>>   }
>>>   +int xc_cpuid(xc_interface *xch,
>>> +             xc_cpuid_t *cpuid)
>>> +{
>>> +    int ret;
>>> +    DECLARE_SYSCTL;
>>> +
>>> +    sysctl.cmd = XEN_SYSCTL_cpuid_op;
>>> +
>>> +    memcpy(&sysctl.u.cpuid, cpuid, sizeof(*cpuid));
>>> +
>>> +    if ( (ret = do_sysctl(xch, &sysctl)) != 0 )
>>> +        return ret;
>>> +
>>> +    memcpy(cpuid, &sysctl.u.cpuid, sizeof(*cpuid));
>>> +
>>> +    return 0;
>>> +}
>>> +
>>>   int xc_physinfo(xc_interface *xch,
>>>                   xc_physinfo_t *put_info)
>>>   {
>>> diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
>>> index 13f816b..6c709f5 100644
>>> --- a/tools/libxc/xenctrl.h
>>> +++ b/tools/libxc/xenctrl.h
>>> @@ -1135,6 +1135,7 @@ int xc_send_debug_keys(xc_interface *xch, char
>>> *keys);
>>>   typedef xen_sysctl_physinfo_t xc_physinfo_t;
>>>   typedef xen_sysctl_topologyinfo_t xc_topologyinfo_t;
>>>   typedef xen_sysctl_numainfo_t xc_numainfo_t;
>>> +typedef xen_sysctl_cpuid_t xc_cpuid_t;
>>>     typedef uint32_t xc_cpu_to_node_t;
>>>   typedef uint32_t xc_cpu_to_socket_t;
>>> @@ -1146,6 +1147,7 @@ typedef uint32_t xc_node_to_node_dist_t;
>>>   int xc_physinfo(xc_interface *xch, xc_physinfo_t *info);
>>>   int xc_topologyinfo(xc_interface *xch, xc_topologyinfo_t *info);
>>>   int xc_numainfo(xc_interface *xch, xc_numainfo_t *info);
>>> +int xc_cpuid(xc_interface *xch, xc_cpuid_t *cpuid);
>>>     int xc_sched_id(xc_interface *xch,
>>>                   int *sched_id);
>>> diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
>>> index c42a079..98e2b5f 100644
>>> --- a/xen/arch/x86/domain.c
>>> +++ b/xen/arch/x86/domain.c
>>> @@ -1997,7 +1997,7 @@ void arch_dump_vcpu_info(struct vcpu *v)
>>>           vpmu_dump(v);
>>>   }
>>>   -void domain_cpuid(
>>> +bool_t domain_cpuid_exists(
>>>       struct domain *d,
>>>       unsigned int  input,
>>>       unsigned int  sub_input,
>>> @@ -2030,11 +2030,24 @@ void domain_cpuid(
>>>                    !d->disable_migrate && !d->arch.vtsc )
>>>                   *edx &= ~(1u<<8); /* TSC Invariant */
>>>   -            return;
>>> +            return 1;
>>>           }
>>>       }
>>>   -    *eax = *ebx = *ecx = *edx = 0;
>>> +    return 0;
>>> +}
>>> +
>>> +void domain_cpuid(
>>> +    struct domain *d,
>>> +    unsigned int  input,
>>> +    unsigned int  sub_input,
>>> +    unsigned int  *eax,
>>> +    unsigned int  *ebx,
>>> +    unsigned int  *ecx,
>>> +    unsigned int  *edx)
>>> +{
>>> +    if ( !domain_cpuid_exists(d, input, sub_input, eax, ebx, ecx,
>>> edx) )
>>> +        *eax = *ebx = *ecx = *edx = 0;
>>>   }
>>>     void vcpu_kick(struct vcpu *v)
>>> diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c
>>> index 15d4b91..08f8038 100644
>>> --- a/xen/arch/x86/sysctl.c
>>> +++ b/xen/arch/x86/sysctl.c
>>> @@ -101,6 +101,23 @@ long arch_do_sysctl(
>>>       }
>>>       break;
>>>   +    case XEN_SYSCTL_cpuid_op:
>> This would appear to be a cpuid instruction in the context of a domain,
>> which should be a domctl, not a sysctl.  I have a different
>> implementation of sysctl cpuid posted, which takes a pcpu parameter.
>
> No, this is not intended to handle CPUID instruction. This is invoked
> only as part of a sysctl.
>
> As for domctl vs sysctl --- I in fact would have preferred to do this
> via domctl since we already have a data structure to handle this
> (xen_domctl_cpuid). I decided to use sysctl since the intent here is
> to query what the system provides, not what a domain sees.
>
>
>>
>>> +    {
>>> +        struct xen_sysctl_cpuid *cpuid = &sysctl->u.cpuid;
>>> +
>>> +        if ( !cpuid_hypervisor_leaves(cpuid->input[0],
>>> cpuid->input[1],
>>> +                                      &cpuid->eax,&cpuid->ebx,
>>> +                                      &cpuid->ecx, &cpuid->edx) )
>>> +            asm ( "cpuid"
>>> +                 :"=a" (cpuid->eax), "=b" (cpuid->ebx),
>>> +                  "=c" (cpuid->ecx), "=d" (cpuid->edx)
>>> +                 : "0" (cpuid->input[0]), "1" (cpuid->input[1]) );
>> This will likely bypass masking/levelling for a domain.  As suggested,
>> the hypervisor leaves should be plumbed properly through to be usable
>> from domain_cpuid().
>
> Yes, that was the intent. The thinking here is that we provide to the
> sysctl caller what the actual CPUID is. Now, this (the asm part) is
> not used anywhere so if we limit this sysctl to hypervisor leaves (or
> even to leaf 0x40000001 only as Jan suggested) we won't need this.
>
> (Moreover, for 0x40000001-only approach we may not even need this
> whole sysctl business)
>
> Thanks.
> -boris

All of this smells like it would be substantially more simple under the
proposition in my design to fix heterogeneous feature levelling, where
Xen presents a full and completely CPUID policy for PV and HVM domains
for consumption my the toolstack.

This removes the need for these "bypass the current domains policy so I
can see something about real hardware to build another domain against",
and avoids having libxc create a wrong idea of what it thinks Xen will
provide to the guest, just to have Xen fix up later anyway.

~Andrew

next prev parent reply	other threads:[~2014-03-11 14:26 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-03-11  3:53 [PATCH v2 0/4] Expose HW APIC virtualization support to HVM guests Boris Ostrovsky
2014-03-11  3:54 ` [PATCH v2 1/4] xen/libxc: Allow changes to hypervisor CPUID leaf from config file Boris Ostrovsky
2014-03-11  8:45   ` Jan Beulich
2014-03-11 14:24     ` Boris Ostrovsky
2014-03-11 10:10   ` Andrew Cooper
2014-03-11 14:07     ` Boris Ostrovsky
2014-03-11 14:26       ` Andrew Cooper [this message]
2014-03-11 14:48         ` Boris Ostrovsky
2014-03-11 16:19         ` Jan Beulich
2014-03-11  3:54 ` [PATCH v2 2/4] x86/hvm: Revert 80ecb40362365ba77e68fc609de8bd3b7208ae19 Boris Ostrovsky
2014-03-11  3:54 ` [PATCH v2 3/4] x86/hvm: Add HVM-specific hypervisor CPUID leaf Boris Ostrovsky
2014-03-11  3:54 ` [PATCH v2 4/4] x86/hvm: Indicate avaliability of HW support of APIC virtualization to HVM guests Boris Ostrovsky
2014-03-11  7:37 ` [PATCH v2 0/4] Expose HW APIC virtualization support " Zhang, Yang Z
2014-03-11 14:32   ` Boris Ostrovsky
2014-03-11 11:00 ` David Vrabel
2014-03-11 14:14   ` Boris Ostrovsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=531F1D28.5090108@citrix.com \
    --to=andrew.cooper3@citrix.com \
    --cc=boris.ostrovsky@oracle.com \
    --cc=eddie.dong@intel.com \
    --cc=ian.campbell@citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=jbeulich@suse.com \
    --cc=jun.nakajima@intel.com \
    --cc=keir@xen.org \
    --cc=stefano.stabellini@eu.citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).