From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: keir@xen.org, jbeulich@suse.com,
stefano.stabellini@eu.citrix.com, eddie.dong@intel.com,
ian.jackson@eu.citrix.com, xen-devel@lists.xen.org,
jun.nakajima@intel.com, ian.campbell@citrix.com
Subject: Re: [PATCH v2 1/4] xen/libxc: Allow changes to hypervisor CPUID leaf from config file
Date: Tue, 11 Mar 2014 14:26:48 +0000 [thread overview]
Message-ID: <531F1D28.5090108@citrix.com> (raw)
In-Reply-To: <531F188F.6030807@oracle.com>
On 11/03/14 14:07, Boris Ostrovsky wrote:
> On 03/11/2014 06:10 AM, Andrew Cooper wrote:
>> On 11/03/14 03:54, Boris Ostrovsky wrote:
>>> Currently only "real" cpuid leaves can be overwritten by users via
>>> 'cpuid' option in the configuration file. This patch provides
>>> ability to
>>> do the same for hypervisor leaves (those in the 0x40000000 range).
>>>
>>> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
>> How? There is nothing stopping leaves in 0x40000000 being set in the
>> policy with XEN_DOMCTL_set_cpuid, but I dont see anything which plumbs
>> this together at the Xen level.
>
> Right. What this patch mostly provides is ability to query the
> hypervisor (via sysctl) about default values of hypervisor CPUID leaf
> from userspace. We cannot use CPUID instruction here (for dom0). And
> /dev/cpu/<n>/cpuid may not exist.
The XEN_FORCED_EMULATION prefix would be fine, and not require a new
custom hypercall, but only an HVM guest is going to care whether it can
find this magic leaf.
>
> We then use these values plus whatever the user requested (because the
> user may ask for only one of the 4 registers) to pass to the
> subsequent XEN_DOMCTL_set_cpuid call.
I currently have a project to fix this braindead thinking of having Xen
and libxc guess at what eachother supports and will report.
>
>>
>>> ---
>>> tools/libxc/xc_cpuid_x86.c | 23 ++++++++++++++++++++++-
>>> tools/libxc/xc_misc.c | 18 ++++++++++++++++++
>>> tools/libxc/xenctrl.h | 2 ++
>>> xen/arch/x86/domain.c | 19 ++++++++++++++++---
>>> xen/arch/x86/sysctl.c | 17 +++++++++++++++++
>>> xen/arch/x86/traps.c | 3 +++
>>> xen/include/asm-x86/domain.h | 7 +++++++
>>> xen/include/public/sysctl.h | 18 ++++++++++++++++++
>>> 8 files changed, 103 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/tools/libxc/xc_cpuid_x86.c b/tools/libxc/xc_cpuid_x86.c
>>> index bbbf9b8..544a0fd 100644
>>> --- a/tools/libxc/xc_cpuid_x86.c
>>> +++ b/tools/libxc/xc_cpuid_x86.c
>>> @@ -33,6 +33,8 @@
>>> #define DEF_MAX_INTELEXT 0x80000008u
>>> #define DEF_MAX_AMDEXT 0x8000001cu
>>> +#define HYPERVISOR_LEAF(idx) (((idx) & 0x40000000) == 0x40000000)
>>> +
>> This check is wrong.
>
> Because of Viridian leaves? Or something else?
It should be (((idx) & 0xf0000000) == 0x40000000)
According to the AMD and Intel manuals, it is strictly leaf 0x40000000
reserved for hypervisor use, not 0xC0000000 or others.
>
>>
>>> static int hypervisor_is_64bit(xc_interface *xch)
>>> {
>>> xen_capabilities_info_t xen_caps = "";
>>> @@ -555,6 +557,9 @@ static int xc_cpuid_policy(
>>> {
>>> xc_dominfo_t info;
>>> + if ( HYPERVISOR_LEAF(input[0]) )
>>> + return 0;
>>> +
>>> if ( xc_domain_getinfo(xch, domid, 1, &info) == 0 )
>>> return -EINVAL;
>>> @@ -754,7 +759,23 @@ int xc_cpuid_set(
>>> memset(config_transformed, 0, 4 * sizeof(*config_transformed));
>>> - cpuid(input, regs);
>>> + if ( HYPERVISOR_LEAF(input[0]) )
>>> + {
>>> + xc_cpuid_t cpuid;
>>> +
>>> + cpuid.input[0] = input[0];
>>> + cpuid.input[1] = input[1];
>>> +
>>> + if (xc_cpuid(xch, &cpuid))
>>> + regs[0] = regs[1] = regs[2] = regs[3] = 0;
>>> + else {
>>> + regs[0] = cpuid.eax;
>>> + regs[1] = cpuid.ebx;
>>> + regs[2] = cpuid.ecx;
>>> + regs[3] = cpuid.edx;
>>> + }
>>> + } else
>>> + cpuid(input, regs);
>>> memcpy(polregs, regs, sizeof(regs));
>>> xc_cpuid_policy(xch, domid, input, polregs);
>>> diff --git a/tools/libxc/xc_misc.c b/tools/libxc/xc_misc.c
>>> index 3303454..4e8669b 100644
>>> --- a/tools/libxc/xc_misc.c
>>> +++ b/tools/libxc/xc_misc.c
>>> @@ -159,6 +159,24 @@ int xc_send_debug_keys(xc_interface *xch, char
>>> *keys)
>>> return ret;
>>> }
>>> +int xc_cpuid(xc_interface *xch,
>>> + xc_cpuid_t *cpuid)
>>> +{
>>> + int ret;
>>> + DECLARE_SYSCTL;
>>> +
>>> + sysctl.cmd = XEN_SYSCTL_cpuid_op;
>>> +
>>> + memcpy(&sysctl.u.cpuid, cpuid, sizeof(*cpuid));
>>> +
>>> + if ( (ret = do_sysctl(xch, &sysctl)) != 0 )
>>> + return ret;
>>> +
>>> + memcpy(cpuid, &sysctl.u.cpuid, sizeof(*cpuid));
>>> +
>>> + return 0;
>>> +}
>>> +
>>> int xc_physinfo(xc_interface *xch,
>>> xc_physinfo_t *put_info)
>>> {
>>> diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
>>> index 13f816b..6c709f5 100644
>>> --- a/tools/libxc/xenctrl.h
>>> +++ b/tools/libxc/xenctrl.h
>>> @@ -1135,6 +1135,7 @@ int xc_send_debug_keys(xc_interface *xch, char
>>> *keys);
>>> typedef xen_sysctl_physinfo_t xc_physinfo_t;
>>> typedef xen_sysctl_topologyinfo_t xc_topologyinfo_t;
>>> typedef xen_sysctl_numainfo_t xc_numainfo_t;
>>> +typedef xen_sysctl_cpuid_t xc_cpuid_t;
>>> typedef uint32_t xc_cpu_to_node_t;
>>> typedef uint32_t xc_cpu_to_socket_t;
>>> @@ -1146,6 +1147,7 @@ typedef uint32_t xc_node_to_node_dist_t;
>>> int xc_physinfo(xc_interface *xch, xc_physinfo_t *info);
>>> int xc_topologyinfo(xc_interface *xch, xc_topologyinfo_t *info);
>>> int xc_numainfo(xc_interface *xch, xc_numainfo_t *info);
>>> +int xc_cpuid(xc_interface *xch, xc_cpuid_t *cpuid);
>>> int xc_sched_id(xc_interface *xch,
>>> int *sched_id);
>>> diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
>>> index c42a079..98e2b5f 100644
>>> --- a/xen/arch/x86/domain.c
>>> +++ b/xen/arch/x86/domain.c
>>> @@ -1997,7 +1997,7 @@ void arch_dump_vcpu_info(struct vcpu *v)
>>> vpmu_dump(v);
>>> }
>>> -void domain_cpuid(
>>> +bool_t domain_cpuid_exists(
>>> struct domain *d,
>>> unsigned int input,
>>> unsigned int sub_input,
>>> @@ -2030,11 +2030,24 @@ void domain_cpuid(
>>> !d->disable_migrate && !d->arch.vtsc )
>>> *edx &= ~(1u<<8); /* TSC Invariant */
>>> - return;
>>> + return 1;
>>> }
>>> }
>>> - *eax = *ebx = *ecx = *edx = 0;
>>> + return 0;
>>> +}
>>> +
>>> +void domain_cpuid(
>>> + struct domain *d,
>>> + unsigned int input,
>>> + unsigned int sub_input,
>>> + unsigned int *eax,
>>> + unsigned int *ebx,
>>> + unsigned int *ecx,
>>> + unsigned int *edx)
>>> +{
>>> + if ( !domain_cpuid_exists(d, input, sub_input, eax, ebx, ecx,
>>> edx) )
>>> + *eax = *ebx = *ecx = *edx = 0;
>>> }
>>> void vcpu_kick(struct vcpu *v)
>>> diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c
>>> index 15d4b91..08f8038 100644
>>> --- a/xen/arch/x86/sysctl.c
>>> +++ b/xen/arch/x86/sysctl.c
>>> @@ -101,6 +101,23 @@ long arch_do_sysctl(
>>> }
>>> break;
>>> + case XEN_SYSCTL_cpuid_op:
>> This would appear to be a cpuid instruction in the context of a domain,
>> which should be a domctl, not a sysctl. I have a different
>> implementation of sysctl cpuid posted, which takes a pcpu parameter.
>
> No, this is not intended to handle CPUID instruction. This is invoked
> only as part of a sysctl.
>
> As for domctl vs sysctl --- I in fact would have preferred to do this
> via domctl since we already have a data structure to handle this
> (xen_domctl_cpuid). I decided to use sysctl since the intent here is
> to query what the system provides, not what a domain sees.
>
>
>>
>>> + {
>>> + struct xen_sysctl_cpuid *cpuid = &sysctl->u.cpuid;
>>> +
>>> + if ( !cpuid_hypervisor_leaves(cpuid->input[0],
>>> cpuid->input[1],
>>> + &cpuid->eax,&cpuid->ebx,
>>> + &cpuid->ecx, &cpuid->edx) )
>>> + asm ( "cpuid"
>>> + :"=a" (cpuid->eax), "=b" (cpuid->ebx),
>>> + "=c" (cpuid->ecx), "=d" (cpuid->edx)
>>> + : "0" (cpuid->input[0]), "1" (cpuid->input[1]) );
>> This will likely bypass masking/levelling for a domain. As suggested,
>> the hypervisor leaves should be plumbed properly through to be usable
>> from domain_cpuid().
>
> Yes, that was the intent. The thinking here is that we provide to the
> sysctl caller what the actual CPUID is. Now, this (the asm part) is
> not used anywhere so if we limit this sysctl to hypervisor leaves (or
> even to leaf 0x40000001 only as Jan suggested) we won't need this.
>
> (Moreover, for 0x40000001-only approach we may not even need this
> whole sysctl business)
>
> Thanks.
> -boris
All of this smells like it would be substantially more simple under the
proposition in my design to fix heterogeneous feature levelling, where
Xen presents a full and completely CPUID policy for PV and HVM domains
for consumption my the toolstack.
This removes the need for these "bypass the current domains policy so I
can see something about real hardware to build another domain against",
and avoids having libxc create a wrong idea of what it thinks Xen will
provide to the guest, just to have Xen fix up later anyway.
~Andrew
next prev parent reply other threads:[~2014-03-11 14:26 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-03-11 3:53 [PATCH v2 0/4] Expose HW APIC virtualization support to HVM guests Boris Ostrovsky
2014-03-11 3:54 ` [PATCH v2 1/4] xen/libxc: Allow changes to hypervisor CPUID leaf from config file Boris Ostrovsky
2014-03-11 8:45 ` Jan Beulich
2014-03-11 14:24 ` Boris Ostrovsky
2014-03-11 10:10 ` Andrew Cooper
2014-03-11 14:07 ` Boris Ostrovsky
2014-03-11 14:26 ` Andrew Cooper [this message]
2014-03-11 14:48 ` Boris Ostrovsky
2014-03-11 16:19 ` Jan Beulich
2014-03-11 3:54 ` [PATCH v2 2/4] x86/hvm: Revert 80ecb40362365ba77e68fc609de8bd3b7208ae19 Boris Ostrovsky
2014-03-11 3:54 ` [PATCH v2 3/4] x86/hvm: Add HVM-specific hypervisor CPUID leaf Boris Ostrovsky
2014-03-11 3:54 ` [PATCH v2 4/4] x86/hvm: Indicate avaliability of HW support of APIC virtualization to HVM guests Boris Ostrovsky
2014-03-11 7:37 ` [PATCH v2 0/4] Expose HW APIC virtualization support " Zhang, Yang Z
2014-03-11 14:32 ` Boris Ostrovsky
2014-03-11 11:00 ` David Vrabel
2014-03-11 14:14 ` Boris Ostrovsky
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=531F1D28.5090108@citrix.com \
--to=andrew.cooper3@citrix.com \
--cc=boris.ostrovsky@oracle.com \
--cc=eddie.dong@intel.com \
--cc=ian.campbell@citrix.com \
--cc=ian.jackson@eu.citrix.com \
--cc=jbeulich@suse.com \
--cc=jun.nakajima@intel.com \
--cc=keir@xen.org \
--cc=stefano.stabellini@eu.citrix.com \
--cc=xen-devel@lists.xen.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).