All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: keir@xen.org, jbeulich@suse.com,
	stefano.stabellini@eu.citrix.com, eddie.dong@intel.com,
	ian.jackson@eu.citrix.com, xen-devel@lists.xen.org,
	jun.nakajima@intel.com, ian.campbell@citrix.com
Subject: Re: [PATCH v2 1/4] xen/libxc: Allow changes to hypervisor CPUID leaf from config file
Date: Tue, 11 Mar 2014 14:26:48 +0000	[thread overview]
Message-ID: <531F1D28.5090108@citrix.com> (raw)
In-Reply-To: <531F188F.6030807@oracle.com>

On 11/03/14 14:07, Boris Ostrovsky wrote:
> On 03/11/2014 06:10 AM, Andrew Cooper wrote:
>> On 11/03/14 03:54, Boris Ostrovsky wrote:
>>> Currently only "real" cpuid leaves can be overwritten by users via
>>> 'cpuid' option in the configuration file. This patch provides
>>> ability to
>>> do the same for hypervisor leaves (those in the 0x40000000 range).
>>>
>>> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
>> How?  There is nothing stopping leaves in 0x40000000 being set in the
>> policy with XEN_DOMCTL_set_cpuid, but I dont see anything which plumbs
>> this together at the Xen level.
>
> Right. What this patch mostly provides is ability to query the
> hypervisor (via sysctl) about default values of hypervisor CPUID leaf
> from userspace. We cannot use CPUID instruction here (for dom0). And
> /dev/cpu/<n>/cpuid may not exist.

The XEN_FORCED_EMULATION prefix would be fine, and not require a new
custom hypercall, but only an HVM guest is going to care whether it can
find this magic leaf.

>
> We then use these values plus whatever the user requested (because the
> user may ask for only one of the 4 registers) to pass to the
> subsequent XEN_DOMCTL_set_cpuid call.

I currently have a project to fix this braindead thinking of having Xen
and libxc guess at what eachother supports and will report.

>
>>
>>> ---
>>>   tools/libxc/xc_cpuid_x86.c   |   23 ++++++++++++++++++++++-
>>>   tools/libxc/xc_misc.c        |   18 ++++++++++++++++++
>>>   tools/libxc/xenctrl.h        |    2 ++
>>>   xen/arch/x86/domain.c        |   19 ++++++++++++++++---
>>>   xen/arch/x86/sysctl.c        |   17 +++++++++++++++++
>>>   xen/arch/x86/traps.c         |    3 +++
>>>   xen/include/asm-x86/domain.h |    7 +++++++
>>>   xen/include/public/sysctl.h  |   18 ++++++++++++++++++
>>>   8 files changed, 103 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/tools/libxc/xc_cpuid_x86.c b/tools/libxc/xc_cpuid_x86.c
>>> index bbbf9b8..544a0fd 100644
>>> --- a/tools/libxc/xc_cpuid_x86.c
>>> +++ b/tools/libxc/xc_cpuid_x86.c
>>> @@ -33,6 +33,8 @@
>>>   #define DEF_MAX_INTELEXT  0x80000008u
>>>   #define DEF_MAX_AMDEXT    0x8000001cu
>>>   +#define HYPERVISOR_LEAF(idx) (((idx) & 0x40000000) == 0x40000000)
>>> +
>> This check is wrong.
>
> Because of Viridian leaves? Or something else?

It should be (((idx) & 0xf0000000) == 0x40000000)

According to the AMD and Intel manuals, it is strictly leaf 0x40000000
reserved for hypervisor use, not 0xC0000000 or others.

>
>>
>>>   static int hypervisor_is_64bit(xc_interface *xch)
>>>   {
>>>       xen_capabilities_info_t xen_caps = "";
>>> @@ -555,6 +557,9 @@ static int xc_cpuid_policy(
>>>   {
>>>       xc_dominfo_t        info;
>>>   +    if ( HYPERVISOR_LEAF(input[0]) )
>>> +        return 0;
>>> +
>>>       if ( xc_domain_getinfo(xch, domid, 1, &info) == 0 )
>>>           return -EINVAL;
>>>   @@ -754,7 +759,23 @@ int xc_cpuid_set(
>>>         memset(config_transformed, 0, 4 * sizeof(*config_transformed));
>>>   -    cpuid(input, regs);
>>> +    if ( HYPERVISOR_LEAF(input[0]) )
>>> +    {
>>> +        xc_cpuid_t cpuid;
>>> +
>>> +        cpuid.input[0] = input[0];
>>> +        cpuid.input[1] = input[1];
>>> +
>>> +        if (xc_cpuid(xch, &cpuid))
>>> +            regs[0] = regs[1] = regs[2] = regs[3] = 0;
>>> +        else {
>>> +            regs[0] = cpuid.eax;
>>> +            regs[1] = cpuid.ebx;
>>> +            regs[2] = cpuid.ecx;
>>> +            regs[3] = cpuid.edx;
>>> +        }
>>> +     } else
>>> +        cpuid(input, regs);
>>>         memcpy(polregs, regs, sizeof(regs));
>>>       xc_cpuid_policy(xch, domid, input, polregs);
>>> diff --git a/tools/libxc/xc_misc.c b/tools/libxc/xc_misc.c
>>> index 3303454..4e8669b 100644
>>> --- a/tools/libxc/xc_misc.c
>>> +++ b/tools/libxc/xc_misc.c
>>> @@ -159,6 +159,24 @@ int xc_send_debug_keys(xc_interface *xch, char
>>> *keys)
>>>       return ret;
>>>   }
>>>   +int xc_cpuid(xc_interface *xch,
>>> +             xc_cpuid_t *cpuid)
>>> +{
>>> +    int ret;
>>> +    DECLARE_SYSCTL;
>>> +
>>> +    sysctl.cmd = XEN_SYSCTL_cpuid_op;
>>> +
>>> +    memcpy(&sysctl.u.cpuid, cpuid, sizeof(*cpuid));
>>> +
>>> +    if ( (ret = do_sysctl(xch, &sysctl)) != 0 )
>>> +        return ret;
>>> +
>>> +    memcpy(cpuid, &sysctl.u.cpuid, sizeof(*cpuid));
>>> +
>>> +    return 0;
>>> +}
>>> +
>>>   int xc_physinfo(xc_interface *xch,
>>>                   xc_physinfo_t *put_info)
>>>   {
>>> diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
>>> index 13f816b..6c709f5 100644
>>> --- a/tools/libxc/xenctrl.h
>>> +++ b/tools/libxc/xenctrl.h
>>> @@ -1135,6 +1135,7 @@ int xc_send_debug_keys(xc_interface *xch, char
>>> *keys);
>>>   typedef xen_sysctl_physinfo_t xc_physinfo_t;
>>>   typedef xen_sysctl_topologyinfo_t xc_topologyinfo_t;
>>>   typedef xen_sysctl_numainfo_t xc_numainfo_t;
>>> +typedef xen_sysctl_cpuid_t xc_cpuid_t;
>>>     typedef uint32_t xc_cpu_to_node_t;
>>>   typedef uint32_t xc_cpu_to_socket_t;
>>> @@ -1146,6 +1147,7 @@ typedef uint32_t xc_node_to_node_dist_t;
>>>   int xc_physinfo(xc_interface *xch, xc_physinfo_t *info);
>>>   int xc_topologyinfo(xc_interface *xch, xc_topologyinfo_t *info);
>>>   int xc_numainfo(xc_interface *xch, xc_numainfo_t *info);
>>> +int xc_cpuid(xc_interface *xch, xc_cpuid_t *cpuid);
>>>     int xc_sched_id(xc_interface *xch,
>>>                   int *sched_id);
>>> diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
>>> index c42a079..98e2b5f 100644
>>> --- a/xen/arch/x86/domain.c
>>> +++ b/xen/arch/x86/domain.c
>>> @@ -1997,7 +1997,7 @@ void arch_dump_vcpu_info(struct vcpu *v)
>>>           vpmu_dump(v);
>>>   }
>>>   -void domain_cpuid(
>>> +bool_t domain_cpuid_exists(
>>>       struct domain *d,
>>>       unsigned int  input,
>>>       unsigned int  sub_input,
>>> @@ -2030,11 +2030,24 @@ void domain_cpuid(
>>>                    !d->disable_migrate && !d->arch.vtsc )
>>>                   *edx &= ~(1u<<8); /* TSC Invariant */
>>>   -            return;
>>> +            return 1;
>>>           }
>>>       }
>>>   -    *eax = *ebx = *ecx = *edx = 0;
>>> +    return 0;
>>> +}
>>> +
>>> +void domain_cpuid(
>>> +    struct domain *d,
>>> +    unsigned int  input,
>>> +    unsigned int  sub_input,
>>> +    unsigned int  *eax,
>>> +    unsigned int  *ebx,
>>> +    unsigned int  *ecx,
>>> +    unsigned int  *edx)
>>> +{
>>> +    if ( !domain_cpuid_exists(d, input, sub_input, eax, ebx, ecx,
>>> edx) )
>>> +        *eax = *ebx = *ecx = *edx = 0;
>>>   }
>>>     void vcpu_kick(struct vcpu *v)
>>> diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c
>>> index 15d4b91..08f8038 100644
>>> --- a/xen/arch/x86/sysctl.c
>>> +++ b/xen/arch/x86/sysctl.c
>>> @@ -101,6 +101,23 @@ long arch_do_sysctl(
>>>       }
>>>       break;
>>>   +    case XEN_SYSCTL_cpuid_op:
>> This would appear to be a cpuid instruction in the context of a domain,
>> which should be a domctl, not a sysctl.  I have a different
>> implementation of sysctl cpuid posted, which takes a pcpu parameter.
>
> No, this is not intended to handle CPUID instruction. This is invoked
> only as part of a sysctl.
>
> As for domctl vs sysctl --- I in fact would have preferred to do this
> via domctl since we already have a data structure to handle this
> (xen_domctl_cpuid). I decided to use sysctl since the intent here is
> to query what the system provides, not what a domain sees.
>
>
>>
>>> +    {
>>> +        struct xen_sysctl_cpuid *cpuid = &sysctl->u.cpuid;
>>> +
>>> +        if ( !cpuid_hypervisor_leaves(cpuid->input[0],
>>> cpuid->input[1],
>>> +                                      &cpuid->eax,&cpuid->ebx,
>>> +                                      &cpuid->ecx, &cpuid->edx) )
>>> +            asm ( "cpuid"
>>> +                 :"=a" (cpuid->eax), "=b" (cpuid->ebx),
>>> +                  "=c" (cpuid->ecx), "=d" (cpuid->edx)
>>> +                 : "0" (cpuid->input[0]), "1" (cpuid->input[1]) );
>> This will likely bypass masking/levelling for a domain.  As suggested,
>> the hypervisor leaves should be plumbed properly through to be usable
>> from domain_cpuid().
>
> Yes, that was the intent. The thinking here is that we provide to the
> sysctl caller what the actual CPUID is. Now, this (the asm part) is
> not used anywhere so if we limit this sysctl to hypervisor leaves (or
> even to leaf 0x40000001 only as Jan suggested) we won't need this.
>
> (Moreover, for 0x40000001-only approach we may not even need this
> whole sysctl business)
>
> Thanks.
> -boris

All of this smells like it would be substantially more simple under the
proposition in my design to fix heterogeneous feature levelling, where
Xen presents a full and completely CPUID policy for PV and HVM domains
for consumption my the toolstack.

This removes the need for these "bypass the current domains policy so I
can see something about real hardware to build another domain against",
and avoids having libxc create a wrong idea of what it thinks Xen will
provide to the guest, just to have Xen fix up later anyway.

~Andrew

  reply	other threads:[~2014-03-11 14:26 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-03-11  3:53 [PATCH v2 0/4] Expose HW APIC virtualization support to HVM guests Boris Ostrovsky
2014-03-11  3:54 ` [PATCH v2 1/4] xen/libxc: Allow changes to hypervisor CPUID leaf from config file Boris Ostrovsky
2014-03-11  8:45   ` Jan Beulich
2014-03-11 14:24     ` Boris Ostrovsky
2014-03-11 10:10   ` Andrew Cooper
2014-03-11 14:07     ` Boris Ostrovsky
2014-03-11 14:26       ` Andrew Cooper [this message]
2014-03-11 14:48         ` Boris Ostrovsky
2014-03-11 16:19         ` Jan Beulich
2014-03-11  3:54 ` [PATCH v2 2/4] x86/hvm: Revert 80ecb40362365ba77e68fc609de8bd3b7208ae19 Boris Ostrovsky
2014-03-11  3:54 ` [PATCH v2 3/4] x86/hvm: Add HVM-specific hypervisor CPUID leaf Boris Ostrovsky
2014-03-11  3:54 ` [PATCH v2 4/4] x86/hvm: Indicate avaliability of HW support of APIC virtualization to HVM guests Boris Ostrovsky
2014-03-11  7:37 ` [PATCH v2 0/4] Expose HW APIC virtualization support " Zhang, Yang Z
2014-03-11 14:32   ` Boris Ostrovsky
2014-03-11 11:00 ` David Vrabel
2014-03-11 14:14   ` Boris Ostrovsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=531F1D28.5090108@citrix.com \
    --to=andrew.cooper3@citrix.com \
    --cc=boris.ostrovsky@oracle.com \
    --cc=eddie.dong@intel.com \
    --cc=ian.campbell@citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=jbeulich@suse.com \
    --cc=jun.nakajima@intel.com \
    --cc=keir@xen.org \
    --cc=stefano.stabellini@eu.citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.