* [PATCH v2 1/4] xen/libxc: Allow changes to hypervisor CPUID leaf from config file
2014-03-11 3:53 [PATCH v2 0/4] Expose HW APIC virtualization support to HVM guests Boris Ostrovsky
@ 2014-03-11 3:54 ` Boris Ostrovsky
2014-03-11 8:45 ` Jan Beulich
2014-03-11 10:10 ` Andrew Cooper
2014-03-11 3:54 ` [PATCH v2 2/4] x86/hvm: Revert 80ecb40362365ba77e68fc609de8bd3b7208ae19 Boris Ostrovsky
` (4 subsequent siblings)
5 siblings, 2 replies; 16+ messages in thread
From: Boris Ostrovsky @ 2014-03-11 3:54 UTC (permalink / raw)
To: jbeulich, keir, jun.nakajima, eddie.dong, ian.jackson,
stefano.stabellini, ian.campbell
Cc: boris.ostrovsky, xen-devel
Currently only "real" cpuid leaves can be overwritten by users via
'cpuid' option in the configuration file. This patch provides ability to
do the same for hypervisor leaves (those in the 0x40000000 range).
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
tools/libxc/xc_cpuid_x86.c | 23 ++++++++++++++++++++++-
tools/libxc/xc_misc.c | 18 ++++++++++++++++++
tools/libxc/xenctrl.h | 2 ++
xen/arch/x86/domain.c | 19 ++++++++++++++++---
xen/arch/x86/sysctl.c | 17 +++++++++++++++++
xen/arch/x86/traps.c | 3 +++
xen/include/asm-x86/domain.h | 7 +++++++
xen/include/public/sysctl.h | 18 ++++++++++++++++++
8 files changed, 103 insertions(+), 4 deletions(-)
diff --git a/tools/libxc/xc_cpuid_x86.c b/tools/libxc/xc_cpuid_x86.c
index bbbf9b8..544a0fd 100644
--- a/tools/libxc/xc_cpuid_x86.c
+++ b/tools/libxc/xc_cpuid_x86.c
@@ -33,6 +33,8 @@
#define DEF_MAX_INTELEXT 0x80000008u
#define DEF_MAX_AMDEXT 0x8000001cu
+#define HYPERVISOR_LEAF(idx) (((idx) & 0x40000000) == 0x40000000)
+
static int hypervisor_is_64bit(xc_interface *xch)
{
xen_capabilities_info_t xen_caps = "";
@@ -555,6 +557,9 @@ static int xc_cpuid_policy(
{
xc_dominfo_t info;
+ if ( HYPERVISOR_LEAF(input[0]) )
+ return 0;
+
if ( xc_domain_getinfo(xch, domid, 1, &info) == 0 )
return -EINVAL;
@@ -754,7 +759,23 @@ int xc_cpuid_set(
memset(config_transformed, 0, 4 * sizeof(*config_transformed));
- cpuid(input, regs);
+ if ( HYPERVISOR_LEAF(input[0]) )
+ {
+ xc_cpuid_t cpuid;
+
+ cpuid.input[0] = input[0];
+ cpuid.input[1] = input[1];
+
+ if (xc_cpuid(xch, &cpuid))
+ regs[0] = regs[1] = regs[2] = regs[3] = 0;
+ else {
+ regs[0] = cpuid.eax;
+ regs[1] = cpuid.ebx;
+ regs[2] = cpuid.ecx;
+ regs[3] = cpuid.edx;
+ }
+ } else
+ cpuid(input, regs);
memcpy(polregs, regs, sizeof(regs));
xc_cpuid_policy(xch, domid, input, polregs);
diff --git a/tools/libxc/xc_misc.c b/tools/libxc/xc_misc.c
index 3303454..4e8669b 100644
--- a/tools/libxc/xc_misc.c
+++ b/tools/libxc/xc_misc.c
@@ -159,6 +159,24 @@ int xc_send_debug_keys(xc_interface *xch, char *keys)
return ret;
}
+int xc_cpuid(xc_interface *xch,
+ xc_cpuid_t *cpuid)
+{
+ int ret;
+ DECLARE_SYSCTL;
+
+ sysctl.cmd = XEN_SYSCTL_cpuid_op;
+
+ memcpy(&sysctl.u.cpuid, cpuid, sizeof(*cpuid));
+
+ if ( (ret = do_sysctl(xch, &sysctl)) != 0 )
+ return ret;
+
+ memcpy(cpuid, &sysctl.u.cpuid, sizeof(*cpuid));
+
+ return 0;
+}
+
int xc_physinfo(xc_interface *xch,
xc_physinfo_t *put_info)
{
diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
index 13f816b..6c709f5 100644
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -1135,6 +1135,7 @@ int xc_send_debug_keys(xc_interface *xch, char *keys);
typedef xen_sysctl_physinfo_t xc_physinfo_t;
typedef xen_sysctl_topologyinfo_t xc_topologyinfo_t;
typedef xen_sysctl_numainfo_t xc_numainfo_t;
+typedef xen_sysctl_cpuid_t xc_cpuid_t;
typedef uint32_t xc_cpu_to_node_t;
typedef uint32_t xc_cpu_to_socket_t;
@@ -1146,6 +1147,7 @@ typedef uint32_t xc_node_to_node_dist_t;
int xc_physinfo(xc_interface *xch, xc_physinfo_t *info);
int xc_topologyinfo(xc_interface *xch, xc_topologyinfo_t *info);
int xc_numainfo(xc_interface *xch, xc_numainfo_t *info);
+int xc_cpuid(xc_interface *xch, xc_cpuid_t *cpuid);
int xc_sched_id(xc_interface *xch,
int *sched_id);
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index c42a079..98e2b5f 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1997,7 +1997,7 @@ void arch_dump_vcpu_info(struct vcpu *v)
vpmu_dump(v);
}
-void domain_cpuid(
+bool_t domain_cpuid_exists(
struct domain *d,
unsigned int input,
unsigned int sub_input,
@@ -2030,11 +2030,24 @@ void domain_cpuid(
!d->disable_migrate && !d->arch.vtsc )
*edx &= ~(1u<<8); /* TSC Invariant */
- return;
+ return 1;
}
}
- *eax = *ebx = *ecx = *edx = 0;
+ return 0;
+}
+
+void domain_cpuid(
+ struct domain *d,
+ unsigned int input,
+ unsigned int sub_input,
+ unsigned int *eax,
+ unsigned int *ebx,
+ unsigned int *ecx,
+ unsigned int *edx)
+{
+ if ( !domain_cpuid_exists(d, input, sub_input, eax, ebx, ecx, edx) )
+ *eax = *ebx = *ecx = *edx = 0;
}
void vcpu_kick(struct vcpu *v)
diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c
index 15d4b91..08f8038 100644
--- a/xen/arch/x86/sysctl.c
+++ b/xen/arch/x86/sysctl.c
@@ -101,6 +101,23 @@ long arch_do_sysctl(
}
break;
+ case XEN_SYSCTL_cpuid_op:
+ {
+ struct xen_sysctl_cpuid *cpuid = &sysctl->u.cpuid;
+
+ if ( !cpuid_hypervisor_leaves(cpuid->input[0], cpuid->input[1],
+ &cpuid->eax,&cpuid->ebx,
+ &cpuid->ecx, &cpuid->edx) )
+ asm ( "cpuid"
+ :"=a" (cpuid->eax), "=b" (cpuid->ebx),
+ "=c" (cpuid->ecx), "=d" (cpuid->edx)
+ : "0" (cpuid->input[0]), "1" (cpuid->input[1]) );
+
+ if ( __copy_to_guest(u_sysctl, sysctl, 1) )
+ ret = -EFAULT;
+ }
+ break;
+
default:
ret = -ENOSYS;
break;
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index c462317..e1f39d9 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -690,6 +690,9 @@ int cpuid_hypervisor_leaves( uint32_t idx, uint32_t sub_idx,
if ( idx > limit )
return 0;
+ if ( domain_cpuid_exists(d, base + idx, sub_idx, eax, ebx, ecx, edx) )
+ return 1;
+
switch ( idx )
{
case 0:
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 49f7c0c..5fd47de 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -471,6 +471,13 @@ unsigned long pv_guest_cr4_fixup(const struct vcpu *, unsigned long guest_cr4);
((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD | \
X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE))
+bool_t domain_cpuid_exists(struct domain *d,
+ unsigned int input,
+ unsigned int sub_input,
+ unsigned int *eax,
+ unsigned int *ebx,
+ unsigned int *ecx,
+ unsigned int *edx);
void domain_cpuid(struct domain *d,
unsigned int input,
unsigned int sub_input,
diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
index 8437d31..7c1c662 100644
--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -632,6 +632,20 @@ struct xen_sysctl_coverage_op {
typedef struct xen_sysctl_coverage_op xen_sysctl_coverage_op_t;
DEFINE_XEN_GUEST_HANDLE(xen_sysctl_coverage_op_t);
+#if defined(__i386__) || defined(__x86_64__)
+/* XEN_SYSCTL_cpuid_op */
+/* Get CPUID of a particular leaf */
+
+struct xen_sysctl_cpuid {
+ uint32_t input[2];
+ uint32_t eax;
+ uint32_t ebx;
+ uint32_t ecx;
+ uint32_t edx;
+};
+typedef struct xen_sysctl_cpuid xen_sysctl_cpuid_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpuid_t);
+#endif
struct xen_sysctl {
uint32_t cmd;
@@ -654,6 +668,7 @@ struct xen_sysctl {
#define XEN_SYSCTL_cpupool_op 18
#define XEN_SYSCTL_scheduler_op 19
#define XEN_SYSCTL_coverage_op 20
+#define XEN_SYSCTL_cpuid_op 21
uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */
union {
struct xen_sysctl_readconsole readconsole;
@@ -675,6 +690,9 @@ struct xen_sysctl {
struct xen_sysctl_cpupool_op cpupool_op;
struct xen_sysctl_scheduler_op scheduler_op;
struct xen_sysctl_coverage_op coverage_op;
+#if defined(__i386__) || defined(__x86_64__)
+ struct xen_sysctl_cpuid cpuid;
+#endif
uint8_t pad[128];
} u;
};
--
1.7.10.4
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: [PATCH v2 1/4] xen/libxc: Allow changes to hypervisor CPUID leaf from config file
2014-03-11 3:54 ` [PATCH v2 1/4] xen/libxc: Allow changes to hypervisor CPUID leaf from config file Boris Ostrovsky
@ 2014-03-11 8:45 ` Jan Beulich
2014-03-11 14:24 ` Boris Ostrovsky
2014-03-11 10:10 ` Andrew Cooper
1 sibling, 1 reply; 16+ messages in thread
From: Jan Beulich @ 2014-03-11 8:45 UTC (permalink / raw)
To: Boris Ostrovsky
Cc: keir, ian.campbell, stefano.stabellini, ian.jackson, eddie.dong,
xen-devel, jun.nakajima
>>> On 11.03.14 at 04:54, Boris Ostrovsky <boris.ostrovsky@oracle.com> wrote:
> Currently only "real" cpuid leaves can be overwritten by users via
> 'cpuid' option in the configuration file. This patch provides ability to
> do the same for hypervisor leaves (those in the 0x40000000 range).
And honestly I'm not certain we want to go that far. Limiting the
number of leaves seems reasonable (even CPU vendors had to
introduce this), but altering other hypervisor CPUID output seems
to only call for trouble.
> +struct xen_sysctl_cpuid {
> + uint32_t input[2];
> + uint32_t eax;
> + uint32_t ebx;
> + uint32_t ecx;
> + uint32_t edx;
> +};
Having just the four register fields here would be enough - eax
and ecx would simply be IN/OUT (and if need be in the future,
ebx/edx could become IN/OUT too without altering the structure
layout).
Jan
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH v2 1/4] xen/libxc: Allow changes to hypervisor CPUID leaf from config file
2014-03-11 8:45 ` Jan Beulich
@ 2014-03-11 14:24 ` Boris Ostrovsky
0 siblings, 0 replies; 16+ messages in thread
From: Boris Ostrovsky @ 2014-03-11 14:24 UTC (permalink / raw)
To: Jan Beulich
Cc: keir, ian.campbell, stefano.stabellini, eddie.dong, ian.jackson,
xen-devel, jun.nakajima
On 03/11/2014 04:45 AM, Jan Beulich wrote:
>>>> On 11.03.14 at 04:54, Boris Ostrovsky <boris.ostrovsky@oracle.com> wrote:
>> Currently only "real" cpuid leaves can be overwritten by users via
>> 'cpuid' option in the configuration file. This patch provides ability to
>> do the same for hypervisor leaves (those in the 0x40000000 range).
> And honestly I'm not certain we want to go that far. Limiting the
> number of leaves seems reasonable (even CPU vendors had to
> introduce this), but altering other hypervisor CPUID output seems
> to only call for trouble.
If we do this I suspect we can get rid of the sysctl altogether.
Alternatively we can do this as part of policy in libxc but preserve
ability to change the policy (and keep sysctl). I slightly prefer the
latter as I think it's a useful feature but I can see reasons for not
doing it.
>
>> +struct xen_sysctl_cpuid {
>> + uint32_t input[2];
>> + uint32_t eax;
>> + uint32_t ebx;
>> + uint32_t ecx;
>> + uint32_t edx;
>> +};
> Having just the four register fields here would be enough - eax
> and ecx would simply be IN/OUT (and if need be in the future,
> ebx/edx could become IN/OUT too without altering the structure
> layout).
Right. I just blindly copied xen_domctl_cpuid here.
Thanks.
-boris
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v2 1/4] xen/libxc: Allow changes to hypervisor CPUID leaf from config file
2014-03-11 3:54 ` [PATCH v2 1/4] xen/libxc: Allow changes to hypervisor CPUID leaf from config file Boris Ostrovsky
2014-03-11 8:45 ` Jan Beulich
@ 2014-03-11 10:10 ` Andrew Cooper
2014-03-11 14:07 ` Boris Ostrovsky
1 sibling, 1 reply; 16+ messages in thread
From: Andrew Cooper @ 2014-03-11 10:10 UTC (permalink / raw)
To: Boris Ostrovsky
Cc: keir, jbeulich, stefano.stabellini, eddie.dong, ian.jackson,
xen-devel, jun.nakajima, ian.campbell
On 11/03/14 03:54, Boris Ostrovsky wrote:
> Currently only "real" cpuid leaves can be overwritten by users via
> 'cpuid' option in the configuration file. This patch provides ability to
> do the same for hypervisor leaves (those in the 0x40000000 range).
>
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
How? There is nothing stopping leaves in 0x40000000 being set in the
policy with XEN_DOMCTL_set_cpuid, but I dont see anything which plumbs
this together at the Xen level.
> ---
> tools/libxc/xc_cpuid_x86.c | 23 ++++++++++++++++++++++-
> tools/libxc/xc_misc.c | 18 ++++++++++++++++++
> tools/libxc/xenctrl.h | 2 ++
> xen/arch/x86/domain.c | 19 ++++++++++++++++---
> xen/arch/x86/sysctl.c | 17 +++++++++++++++++
> xen/arch/x86/traps.c | 3 +++
> xen/include/asm-x86/domain.h | 7 +++++++
> xen/include/public/sysctl.h | 18 ++++++++++++++++++
> 8 files changed, 103 insertions(+), 4 deletions(-)
>
> diff --git a/tools/libxc/xc_cpuid_x86.c b/tools/libxc/xc_cpuid_x86.c
> index bbbf9b8..544a0fd 100644
> --- a/tools/libxc/xc_cpuid_x86.c
> +++ b/tools/libxc/xc_cpuid_x86.c
> @@ -33,6 +33,8 @@
> #define DEF_MAX_INTELEXT 0x80000008u
> #define DEF_MAX_AMDEXT 0x8000001cu
>
> +#define HYPERVISOR_LEAF(idx) (((idx) & 0x40000000) == 0x40000000)
> +
This check is wrong.
> static int hypervisor_is_64bit(xc_interface *xch)
> {
> xen_capabilities_info_t xen_caps = "";
> @@ -555,6 +557,9 @@ static int xc_cpuid_policy(
> {
> xc_dominfo_t info;
>
> + if ( HYPERVISOR_LEAF(input[0]) )
> + return 0;
> +
> if ( xc_domain_getinfo(xch, domid, 1, &info) == 0 )
> return -EINVAL;
>
> @@ -754,7 +759,23 @@ int xc_cpuid_set(
>
> memset(config_transformed, 0, 4 * sizeof(*config_transformed));
>
> - cpuid(input, regs);
> + if ( HYPERVISOR_LEAF(input[0]) )
> + {
> + xc_cpuid_t cpuid;
> +
> + cpuid.input[0] = input[0];
> + cpuid.input[1] = input[1];
> +
> + if (xc_cpuid(xch, &cpuid))
> + regs[0] = regs[1] = regs[2] = regs[3] = 0;
> + else {
> + regs[0] = cpuid.eax;
> + regs[1] = cpuid.ebx;
> + regs[2] = cpuid.ecx;
> + regs[3] = cpuid.edx;
> + }
> + } else
> + cpuid(input, regs);
>
> memcpy(polregs, regs, sizeof(regs));
> xc_cpuid_policy(xch, domid, input, polregs);
> diff --git a/tools/libxc/xc_misc.c b/tools/libxc/xc_misc.c
> index 3303454..4e8669b 100644
> --- a/tools/libxc/xc_misc.c
> +++ b/tools/libxc/xc_misc.c
> @@ -159,6 +159,24 @@ int xc_send_debug_keys(xc_interface *xch, char *keys)
> return ret;
> }
>
> +int xc_cpuid(xc_interface *xch,
> + xc_cpuid_t *cpuid)
> +{
> + int ret;
> + DECLARE_SYSCTL;
> +
> + sysctl.cmd = XEN_SYSCTL_cpuid_op;
> +
> + memcpy(&sysctl.u.cpuid, cpuid, sizeof(*cpuid));
> +
> + if ( (ret = do_sysctl(xch, &sysctl)) != 0 )
> + return ret;
> +
> + memcpy(cpuid, &sysctl.u.cpuid, sizeof(*cpuid));
> +
> + return 0;
> +}
> +
> int xc_physinfo(xc_interface *xch,
> xc_physinfo_t *put_info)
> {
> diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
> index 13f816b..6c709f5 100644
> --- a/tools/libxc/xenctrl.h
> +++ b/tools/libxc/xenctrl.h
> @@ -1135,6 +1135,7 @@ int xc_send_debug_keys(xc_interface *xch, char *keys);
> typedef xen_sysctl_physinfo_t xc_physinfo_t;
> typedef xen_sysctl_topologyinfo_t xc_topologyinfo_t;
> typedef xen_sysctl_numainfo_t xc_numainfo_t;
> +typedef xen_sysctl_cpuid_t xc_cpuid_t;
>
> typedef uint32_t xc_cpu_to_node_t;
> typedef uint32_t xc_cpu_to_socket_t;
> @@ -1146,6 +1147,7 @@ typedef uint32_t xc_node_to_node_dist_t;
> int xc_physinfo(xc_interface *xch, xc_physinfo_t *info);
> int xc_topologyinfo(xc_interface *xch, xc_topologyinfo_t *info);
> int xc_numainfo(xc_interface *xch, xc_numainfo_t *info);
> +int xc_cpuid(xc_interface *xch, xc_cpuid_t *cpuid);
>
> int xc_sched_id(xc_interface *xch,
> int *sched_id);
> diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
> index c42a079..98e2b5f 100644
> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -1997,7 +1997,7 @@ void arch_dump_vcpu_info(struct vcpu *v)
> vpmu_dump(v);
> }
>
> -void domain_cpuid(
> +bool_t domain_cpuid_exists(
> struct domain *d,
> unsigned int input,
> unsigned int sub_input,
> @@ -2030,11 +2030,24 @@ void domain_cpuid(
> !d->disable_migrate && !d->arch.vtsc )
> *edx &= ~(1u<<8); /* TSC Invariant */
>
> - return;
> + return 1;
> }
> }
>
> - *eax = *ebx = *ecx = *edx = 0;
> + return 0;
> +}
> +
> +void domain_cpuid(
> + struct domain *d,
> + unsigned int input,
> + unsigned int sub_input,
> + unsigned int *eax,
> + unsigned int *ebx,
> + unsigned int *ecx,
> + unsigned int *edx)
> +{
> + if ( !domain_cpuid_exists(d, input, sub_input, eax, ebx, ecx, edx) )
> + *eax = *ebx = *ecx = *edx = 0;
> }
>
> void vcpu_kick(struct vcpu *v)
> diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c
> index 15d4b91..08f8038 100644
> --- a/xen/arch/x86/sysctl.c
> +++ b/xen/arch/x86/sysctl.c
> @@ -101,6 +101,23 @@ long arch_do_sysctl(
> }
> break;
>
> + case XEN_SYSCTL_cpuid_op:
This would appear to be a cpuid instruction in the context of a domain,
which should be a domctl, not a sysctl. I have a different
implementation of sysctl cpuid posted, which takes a pcpu parameter.
> + {
> + struct xen_sysctl_cpuid *cpuid = &sysctl->u.cpuid;
> +
> + if ( !cpuid_hypervisor_leaves(cpuid->input[0], cpuid->input[1],
> + &cpuid->eax,&cpuid->ebx,
> + &cpuid->ecx, &cpuid->edx) )
> + asm ( "cpuid"
> + :"=a" (cpuid->eax), "=b" (cpuid->ebx),
> + "=c" (cpuid->ecx), "=d" (cpuid->edx)
> + : "0" (cpuid->input[0]), "1" (cpuid->input[1]) );
This will likely bypass masking/levelling for a domain. As suggested,
the hypervisor leaves should be plumbed properly through to be usable
from domain_cpuid().
~Andrew
> +
> + if ( __copy_to_guest(u_sysctl, sysctl, 1) )
> + ret = -EFAULT;
> + }
> + break;
> +
> default:
> ret = -ENOSYS;
> break;
> diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
> index c462317..e1f39d9 100644
> --- a/xen/arch/x86/traps.c
> +++ b/xen/arch/x86/traps.c
> @@ -690,6 +690,9 @@ int cpuid_hypervisor_leaves( uint32_t idx, uint32_t sub_idx,
> if ( idx > limit )
> return 0;
>
> + if ( domain_cpuid_exists(d, base + idx, sub_idx, eax, ebx, ecx, edx) )
> + return 1;
> +
> switch ( idx )
> {
> case 0:
> diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
> index 49f7c0c..5fd47de 100644
> --- a/xen/include/asm-x86/domain.h
> +++ b/xen/include/asm-x86/domain.h
> @@ -471,6 +471,13 @@ unsigned long pv_guest_cr4_fixup(const struct vcpu *, unsigned long guest_cr4);
> ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD | \
> X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE))
>
> +bool_t domain_cpuid_exists(struct domain *d,
> + unsigned int input,
> + unsigned int sub_input,
> + unsigned int *eax,
> + unsigned int *ebx,
> + unsigned int *ecx,
> + unsigned int *edx);
> void domain_cpuid(struct domain *d,
> unsigned int input,
> unsigned int sub_input,
> diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
> index 8437d31..7c1c662 100644
> --- a/xen/include/public/sysctl.h
> +++ b/xen/include/public/sysctl.h
> @@ -632,6 +632,20 @@ struct xen_sysctl_coverage_op {
> typedef struct xen_sysctl_coverage_op xen_sysctl_coverage_op_t;
> DEFINE_XEN_GUEST_HANDLE(xen_sysctl_coverage_op_t);
>
> +#if defined(__i386__) || defined(__x86_64__)
> +/* XEN_SYSCTL_cpuid_op */
> +/* Get CPUID of a particular leaf */
> +
> +struct xen_sysctl_cpuid {
> + uint32_t input[2];
> + uint32_t eax;
> + uint32_t ebx;
> + uint32_t ecx;
> + uint32_t edx;
> +};
> +typedef struct xen_sysctl_cpuid xen_sysctl_cpuid_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpuid_t);
> +#endif
>
> struct xen_sysctl {
> uint32_t cmd;
> @@ -654,6 +668,7 @@ struct xen_sysctl {
> #define XEN_SYSCTL_cpupool_op 18
> #define XEN_SYSCTL_scheduler_op 19
> #define XEN_SYSCTL_coverage_op 20
> +#define XEN_SYSCTL_cpuid_op 21
> uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */
> union {
> struct xen_sysctl_readconsole readconsole;
> @@ -675,6 +690,9 @@ struct xen_sysctl {
> struct xen_sysctl_cpupool_op cpupool_op;
> struct xen_sysctl_scheduler_op scheduler_op;
> struct xen_sysctl_coverage_op coverage_op;
> +#if defined(__i386__) || defined(__x86_64__)
> + struct xen_sysctl_cpuid cpuid;
> +#endif
> uint8_t pad[128];
> } u;
> };
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH v2 1/4] xen/libxc: Allow changes to hypervisor CPUID leaf from config file
2014-03-11 10:10 ` Andrew Cooper
@ 2014-03-11 14:07 ` Boris Ostrovsky
2014-03-11 14:26 ` Andrew Cooper
0 siblings, 1 reply; 16+ messages in thread
From: Boris Ostrovsky @ 2014-03-11 14:07 UTC (permalink / raw)
To: Andrew Cooper
Cc: keir, jbeulich, stefano.stabellini, eddie.dong, ian.jackson,
xen-devel, jun.nakajima, ian.campbell
On 03/11/2014 06:10 AM, Andrew Cooper wrote:
> On 11/03/14 03:54, Boris Ostrovsky wrote:
>> Currently only "real" cpuid leaves can be overwritten by users via
>> 'cpuid' option in the configuration file. This patch provides ability to
>> do the same for hypervisor leaves (those in the 0x40000000 range).
>>
>> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> How? There is nothing stopping leaves in 0x40000000 being set in the
> policy with XEN_DOMCTL_set_cpuid, but I dont see anything which plumbs
> this together at the Xen level.
Right. What this patch mostly provides is ability to query the
hypervisor (via sysctl) about default values of hypervisor CPUID leaf
from userspace. We cannot use CPUID instruction here (for dom0). And
/dev/cpu/<n>/cpuid may not exist.
We then use these values plus whatever the user requested (because the
user may ask for only one of the 4 registers) to pass to the subsequent
XEN_DOMCTL_set_cpuid call.
>
>> ---
>> tools/libxc/xc_cpuid_x86.c | 23 ++++++++++++++++++++++-
>> tools/libxc/xc_misc.c | 18 ++++++++++++++++++
>> tools/libxc/xenctrl.h | 2 ++
>> xen/arch/x86/domain.c | 19 ++++++++++++++++---
>> xen/arch/x86/sysctl.c | 17 +++++++++++++++++
>> xen/arch/x86/traps.c | 3 +++
>> xen/include/asm-x86/domain.h | 7 +++++++
>> xen/include/public/sysctl.h | 18 ++++++++++++++++++
>> 8 files changed, 103 insertions(+), 4 deletions(-)
>>
>> diff --git a/tools/libxc/xc_cpuid_x86.c b/tools/libxc/xc_cpuid_x86.c
>> index bbbf9b8..544a0fd 100644
>> --- a/tools/libxc/xc_cpuid_x86.c
>> +++ b/tools/libxc/xc_cpuid_x86.c
>> @@ -33,6 +33,8 @@
>> #define DEF_MAX_INTELEXT 0x80000008u
>> #define DEF_MAX_AMDEXT 0x8000001cu
>>
>> +#define HYPERVISOR_LEAF(idx) (((idx) & 0x40000000) == 0x40000000)
>> +
> This check is wrong.
Because of Viridian leaves? Or something else?
>
>> static int hypervisor_is_64bit(xc_interface *xch)
>> {
>> xen_capabilities_info_t xen_caps = "";
>> @@ -555,6 +557,9 @@ static int xc_cpuid_policy(
>> {
>> xc_dominfo_t info;
>>
>> + if ( HYPERVISOR_LEAF(input[0]) )
>> + return 0;
>> +
>> if ( xc_domain_getinfo(xch, domid, 1, &info) == 0 )
>> return -EINVAL;
>>
>> @@ -754,7 +759,23 @@ int xc_cpuid_set(
>>
>> memset(config_transformed, 0, 4 * sizeof(*config_transformed));
>>
>> - cpuid(input, regs);
>> + if ( HYPERVISOR_LEAF(input[0]) )
>> + {
>> + xc_cpuid_t cpuid;
>> +
>> + cpuid.input[0] = input[0];
>> + cpuid.input[1] = input[1];
>> +
>> + if (xc_cpuid(xch, &cpuid))
>> + regs[0] = regs[1] = regs[2] = regs[3] = 0;
>> + else {
>> + regs[0] = cpuid.eax;
>> + regs[1] = cpuid.ebx;
>> + regs[2] = cpuid.ecx;
>> + regs[3] = cpuid.edx;
>> + }
>> + } else
>> + cpuid(input, regs);
>>
>> memcpy(polregs, regs, sizeof(regs));
>> xc_cpuid_policy(xch, domid, input, polregs);
>> diff --git a/tools/libxc/xc_misc.c b/tools/libxc/xc_misc.c
>> index 3303454..4e8669b 100644
>> --- a/tools/libxc/xc_misc.c
>> +++ b/tools/libxc/xc_misc.c
>> @@ -159,6 +159,24 @@ int xc_send_debug_keys(xc_interface *xch, char *keys)
>> return ret;
>> }
>>
>> +int xc_cpuid(xc_interface *xch,
>> + xc_cpuid_t *cpuid)
>> +{
>> + int ret;
>> + DECLARE_SYSCTL;
>> +
>> + sysctl.cmd = XEN_SYSCTL_cpuid_op;
>> +
>> + memcpy(&sysctl.u.cpuid, cpuid, sizeof(*cpuid));
>> +
>> + if ( (ret = do_sysctl(xch, &sysctl)) != 0 )
>> + return ret;
>> +
>> + memcpy(cpuid, &sysctl.u.cpuid, sizeof(*cpuid));
>> +
>> + return 0;
>> +}
>> +
>> int xc_physinfo(xc_interface *xch,
>> xc_physinfo_t *put_info)
>> {
>> diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
>> index 13f816b..6c709f5 100644
>> --- a/tools/libxc/xenctrl.h
>> +++ b/tools/libxc/xenctrl.h
>> @@ -1135,6 +1135,7 @@ int xc_send_debug_keys(xc_interface *xch, char *keys);
>> typedef xen_sysctl_physinfo_t xc_physinfo_t;
>> typedef xen_sysctl_topologyinfo_t xc_topologyinfo_t;
>> typedef xen_sysctl_numainfo_t xc_numainfo_t;
>> +typedef xen_sysctl_cpuid_t xc_cpuid_t;
>>
>> typedef uint32_t xc_cpu_to_node_t;
>> typedef uint32_t xc_cpu_to_socket_t;
>> @@ -1146,6 +1147,7 @@ typedef uint32_t xc_node_to_node_dist_t;
>> int xc_physinfo(xc_interface *xch, xc_physinfo_t *info);
>> int xc_topologyinfo(xc_interface *xch, xc_topologyinfo_t *info);
>> int xc_numainfo(xc_interface *xch, xc_numainfo_t *info);
>> +int xc_cpuid(xc_interface *xch, xc_cpuid_t *cpuid);
>>
>> int xc_sched_id(xc_interface *xch,
>> int *sched_id);
>> diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
>> index c42a079..98e2b5f 100644
>> --- a/xen/arch/x86/domain.c
>> +++ b/xen/arch/x86/domain.c
>> @@ -1997,7 +1997,7 @@ void arch_dump_vcpu_info(struct vcpu *v)
>> vpmu_dump(v);
>> }
>>
>> -void domain_cpuid(
>> +bool_t domain_cpuid_exists(
>> struct domain *d,
>> unsigned int input,
>> unsigned int sub_input,
>> @@ -2030,11 +2030,24 @@ void domain_cpuid(
>> !d->disable_migrate && !d->arch.vtsc )
>> *edx &= ~(1u<<8); /* TSC Invariant */
>>
>> - return;
>> + return 1;
>> }
>> }
>>
>> - *eax = *ebx = *ecx = *edx = 0;
>> + return 0;
>> +}
>> +
>> +void domain_cpuid(
>> + struct domain *d,
>> + unsigned int input,
>> + unsigned int sub_input,
>> + unsigned int *eax,
>> + unsigned int *ebx,
>> + unsigned int *ecx,
>> + unsigned int *edx)
>> +{
>> + if ( !domain_cpuid_exists(d, input, sub_input, eax, ebx, ecx, edx) )
>> + *eax = *ebx = *ecx = *edx = 0;
>> }
>>
>> void vcpu_kick(struct vcpu *v)
>> diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c
>> index 15d4b91..08f8038 100644
>> --- a/xen/arch/x86/sysctl.c
>> +++ b/xen/arch/x86/sysctl.c
>> @@ -101,6 +101,23 @@ long arch_do_sysctl(
>> }
>> break;
>>
>> + case XEN_SYSCTL_cpuid_op:
> This would appear to be a cpuid instruction in the context of a domain,
> which should be a domctl, not a sysctl. I have a different
> implementation of sysctl cpuid posted, which takes a pcpu parameter.
No, this is not intended to handle CPUID instruction. This is invoked
only as part of a sysctl.
As for domctl vs sysctl --- I in fact would have preferred to do this
via domctl since we already have a data structure to handle this
(xen_domctl_cpuid). I decided to use sysctl since the intent here is to
query what the system provides, not what a domain sees.
>
>> + {
>> + struct xen_sysctl_cpuid *cpuid = &sysctl->u.cpuid;
>> +
>> + if ( !cpuid_hypervisor_leaves(cpuid->input[0], cpuid->input[1],
>> + &cpuid->eax,&cpuid->ebx,
>> + &cpuid->ecx, &cpuid->edx) )
>> + asm ( "cpuid"
>> + :"=a" (cpuid->eax), "=b" (cpuid->ebx),
>> + "=c" (cpuid->ecx), "=d" (cpuid->edx)
>> + : "0" (cpuid->input[0]), "1" (cpuid->input[1]) );
> This will likely bypass masking/levelling for a domain. As suggested,
> the hypervisor leaves should be plumbed properly through to be usable
> from domain_cpuid().
Yes, that was the intent. The thinking here is that we provide to the
sysctl caller what the actual CPUID is. Now, this (the asm part) is not
used anywhere so if we limit this sysctl to hypervisor leaves (or even
to leaf 0x40000001 only as Jan suggested) we won't need this.
(Moreover, for 0x40000001-only approach we may not even need this whole
sysctl business)
Thanks.
-boris
>
> ~Andrew
>
>> +
>> + if ( __copy_to_guest(u_sysctl, sysctl, 1) )
>> + ret = -EFAULT;
>> + }
>> + break;
>> +
>> default:
>> ret = -ENOSYS;
>> break;
>> diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
>> index c462317..e1f39d9 100644
>> --- a/xen/arch/x86/traps.c
>> +++ b/xen/arch/x86/traps.c
>> @@ -690,6 +690,9 @@ int cpuid_hypervisor_leaves( uint32_t idx, uint32_t sub_idx,
>> if ( idx > limit )
>> return 0;
>>
>> + if ( domain_cpuid_exists(d, base + idx, sub_idx, eax, ebx, ecx, edx) )
>> + return 1;
>> +
>> switch ( idx )
>> {
>> case 0:
>> diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
>> index 49f7c0c..5fd47de 100644
>> --- a/xen/include/asm-x86/domain.h
>> +++ b/xen/include/asm-x86/domain.h
>> @@ -471,6 +471,13 @@ unsigned long pv_guest_cr4_fixup(const struct vcpu *, unsigned long guest_cr4);
>> ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD | \
>> X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE))
>>
>> +bool_t domain_cpuid_exists(struct domain *d,
>> + unsigned int input,
>> + unsigned int sub_input,
>> + unsigned int *eax,
>> + unsigned int *ebx,
>> + unsigned int *ecx,
>> + unsigned int *edx);
>> void domain_cpuid(struct domain *d,
>> unsigned int input,
>> unsigned int sub_input,
>> diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
>> index 8437d31..7c1c662 100644
>> --- a/xen/include/public/sysctl.h
>> +++ b/xen/include/public/sysctl.h
>> @@ -632,6 +632,20 @@ struct xen_sysctl_coverage_op {
>> typedef struct xen_sysctl_coverage_op xen_sysctl_coverage_op_t;
>> DEFINE_XEN_GUEST_HANDLE(xen_sysctl_coverage_op_t);
>>
>> +#if defined(__i386__) || defined(__x86_64__)
>> +/* XEN_SYSCTL_cpuid_op */
>> +/* Get CPUID of a particular leaf */
>> +
>> +struct xen_sysctl_cpuid {
>> + uint32_t input[2];
>> + uint32_t eax;
>> + uint32_t ebx;
>> + uint32_t ecx;
>> + uint32_t edx;
>> +};
>> +typedef struct xen_sysctl_cpuid xen_sysctl_cpuid_t;
>> +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpuid_t);
>> +#endif
>>
>> struct xen_sysctl {
>> uint32_t cmd;
>> @@ -654,6 +668,7 @@ struct xen_sysctl {
>> #define XEN_SYSCTL_cpupool_op 18
>> #define XEN_SYSCTL_scheduler_op 19
>> #define XEN_SYSCTL_coverage_op 20
>> +#define XEN_SYSCTL_cpuid_op 21
>> uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */
>> union {
>> struct xen_sysctl_readconsole readconsole;
>> @@ -675,6 +690,9 @@ struct xen_sysctl {
>> struct xen_sysctl_cpupool_op cpupool_op;
>> struct xen_sysctl_scheduler_op scheduler_op;
>> struct xen_sysctl_coverage_op coverage_op;
>> +#if defined(__i386__) || defined(__x86_64__)
>> + struct xen_sysctl_cpuid cpuid;
>> +#endif
>> uint8_t pad[128];
>> } u;
>> };
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH v2 1/4] xen/libxc: Allow changes to hypervisor CPUID leaf from config file
2014-03-11 14:07 ` Boris Ostrovsky
@ 2014-03-11 14:26 ` Andrew Cooper
2014-03-11 14:48 ` Boris Ostrovsky
2014-03-11 16:19 ` Jan Beulich
0 siblings, 2 replies; 16+ messages in thread
From: Andrew Cooper @ 2014-03-11 14:26 UTC (permalink / raw)
To: Boris Ostrovsky
Cc: keir, jbeulich, stefano.stabellini, eddie.dong, ian.jackson,
xen-devel, jun.nakajima, ian.campbell
On 11/03/14 14:07, Boris Ostrovsky wrote:
> On 03/11/2014 06:10 AM, Andrew Cooper wrote:
>> On 11/03/14 03:54, Boris Ostrovsky wrote:
>>> Currently only "real" cpuid leaves can be overwritten by users via
>>> 'cpuid' option in the configuration file. This patch provides
>>> ability to
>>> do the same for hypervisor leaves (those in the 0x40000000 range).
>>>
>>> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
>> How? There is nothing stopping leaves in 0x40000000 being set in the
>> policy with XEN_DOMCTL_set_cpuid, but I dont see anything which plumbs
>> this together at the Xen level.
>
> Right. What this patch mostly provides is ability to query the
> hypervisor (via sysctl) about default values of hypervisor CPUID leaf
> from userspace. We cannot use CPUID instruction here (for dom0). And
> /dev/cpu/<n>/cpuid may not exist.
The XEN_FORCED_EMULATION prefix would be fine, and not require a new
custom hypercall, but only an HVM guest is going to care whether it can
find this magic leaf.
>
> We then use these values plus whatever the user requested (because the
> user may ask for only one of the 4 registers) to pass to the
> subsequent XEN_DOMCTL_set_cpuid call.
I currently have a project to fix this braindead thinking of having Xen
and libxc guess at what eachother supports and will report.
>
>>
>>> ---
>>> tools/libxc/xc_cpuid_x86.c | 23 ++++++++++++++++++++++-
>>> tools/libxc/xc_misc.c | 18 ++++++++++++++++++
>>> tools/libxc/xenctrl.h | 2 ++
>>> xen/arch/x86/domain.c | 19 ++++++++++++++++---
>>> xen/arch/x86/sysctl.c | 17 +++++++++++++++++
>>> xen/arch/x86/traps.c | 3 +++
>>> xen/include/asm-x86/domain.h | 7 +++++++
>>> xen/include/public/sysctl.h | 18 ++++++++++++++++++
>>> 8 files changed, 103 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/tools/libxc/xc_cpuid_x86.c b/tools/libxc/xc_cpuid_x86.c
>>> index bbbf9b8..544a0fd 100644
>>> --- a/tools/libxc/xc_cpuid_x86.c
>>> +++ b/tools/libxc/xc_cpuid_x86.c
>>> @@ -33,6 +33,8 @@
>>> #define DEF_MAX_INTELEXT 0x80000008u
>>> #define DEF_MAX_AMDEXT 0x8000001cu
>>> +#define HYPERVISOR_LEAF(idx) (((idx) & 0x40000000) == 0x40000000)
>>> +
>> This check is wrong.
>
> Because of Viridian leaves? Or something else?
It should be (((idx) & 0xf0000000) == 0x40000000)
According to the AMD and Intel manuals, it is strictly leaf 0x40000000
reserved for hypervisor use, not 0xC0000000 or others.
>
>>
>>> static int hypervisor_is_64bit(xc_interface *xch)
>>> {
>>> xen_capabilities_info_t xen_caps = "";
>>> @@ -555,6 +557,9 @@ static int xc_cpuid_policy(
>>> {
>>> xc_dominfo_t info;
>>> + if ( HYPERVISOR_LEAF(input[0]) )
>>> + return 0;
>>> +
>>> if ( xc_domain_getinfo(xch, domid, 1, &info) == 0 )
>>> return -EINVAL;
>>> @@ -754,7 +759,23 @@ int xc_cpuid_set(
>>> memset(config_transformed, 0, 4 * sizeof(*config_transformed));
>>> - cpuid(input, regs);
>>> + if ( HYPERVISOR_LEAF(input[0]) )
>>> + {
>>> + xc_cpuid_t cpuid;
>>> +
>>> + cpuid.input[0] = input[0];
>>> + cpuid.input[1] = input[1];
>>> +
>>> + if (xc_cpuid(xch, &cpuid))
>>> + regs[0] = regs[1] = regs[2] = regs[3] = 0;
>>> + else {
>>> + regs[0] = cpuid.eax;
>>> + regs[1] = cpuid.ebx;
>>> + regs[2] = cpuid.ecx;
>>> + regs[3] = cpuid.edx;
>>> + }
>>> + } else
>>> + cpuid(input, regs);
>>> memcpy(polregs, regs, sizeof(regs));
>>> xc_cpuid_policy(xch, domid, input, polregs);
>>> diff --git a/tools/libxc/xc_misc.c b/tools/libxc/xc_misc.c
>>> index 3303454..4e8669b 100644
>>> --- a/tools/libxc/xc_misc.c
>>> +++ b/tools/libxc/xc_misc.c
>>> @@ -159,6 +159,24 @@ int xc_send_debug_keys(xc_interface *xch, char
>>> *keys)
>>> return ret;
>>> }
>>> +int xc_cpuid(xc_interface *xch,
>>> + xc_cpuid_t *cpuid)
>>> +{
>>> + int ret;
>>> + DECLARE_SYSCTL;
>>> +
>>> + sysctl.cmd = XEN_SYSCTL_cpuid_op;
>>> +
>>> + memcpy(&sysctl.u.cpuid, cpuid, sizeof(*cpuid));
>>> +
>>> + if ( (ret = do_sysctl(xch, &sysctl)) != 0 )
>>> + return ret;
>>> +
>>> + memcpy(cpuid, &sysctl.u.cpuid, sizeof(*cpuid));
>>> +
>>> + return 0;
>>> +}
>>> +
>>> int xc_physinfo(xc_interface *xch,
>>> xc_physinfo_t *put_info)
>>> {
>>> diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
>>> index 13f816b..6c709f5 100644
>>> --- a/tools/libxc/xenctrl.h
>>> +++ b/tools/libxc/xenctrl.h
>>> @@ -1135,6 +1135,7 @@ int xc_send_debug_keys(xc_interface *xch, char
>>> *keys);
>>> typedef xen_sysctl_physinfo_t xc_physinfo_t;
>>> typedef xen_sysctl_topologyinfo_t xc_topologyinfo_t;
>>> typedef xen_sysctl_numainfo_t xc_numainfo_t;
>>> +typedef xen_sysctl_cpuid_t xc_cpuid_t;
>>> typedef uint32_t xc_cpu_to_node_t;
>>> typedef uint32_t xc_cpu_to_socket_t;
>>> @@ -1146,6 +1147,7 @@ typedef uint32_t xc_node_to_node_dist_t;
>>> int xc_physinfo(xc_interface *xch, xc_physinfo_t *info);
>>> int xc_topologyinfo(xc_interface *xch, xc_topologyinfo_t *info);
>>> int xc_numainfo(xc_interface *xch, xc_numainfo_t *info);
>>> +int xc_cpuid(xc_interface *xch, xc_cpuid_t *cpuid);
>>> int xc_sched_id(xc_interface *xch,
>>> int *sched_id);
>>> diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
>>> index c42a079..98e2b5f 100644
>>> --- a/xen/arch/x86/domain.c
>>> +++ b/xen/arch/x86/domain.c
>>> @@ -1997,7 +1997,7 @@ void arch_dump_vcpu_info(struct vcpu *v)
>>> vpmu_dump(v);
>>> }
>>> -void domain_cpuid(
>>> +bool_t domain_cpuid_exists(
>>> struct domain *d,
>>> unsigned int input,
>>> unsigned int sub_input,
>>> @@ -2030,11 +2030,24 @@ void domain_cpuid(
>>> !d->disable_migrate && !d->arch.vtsc )
>>> *edx &= ~(1u<<8); /* TSC Invariant */
>>> - return;
>>> + return 1;
>>> }
>>> }
>>> - *eax = *ebx = *ecx = *edx = 0;
>>> + return 0;
>>> +}
>>> +
>>> +void domain_cpuid(
>>> + struct domain *d,
>>> + unsigned int input,
>>> + unsigned int sub_input,
>>> + unsigned int *eax,
>>> + unsigned int *ebx,
>>> + unsigned int *ecx,
>>> + unsigned int *edx)
>>> +{
>>> + if ( !domain_cpuid_exists(d, input, sub_input, eax, ebx, ecx,
>>> edx) )
>>> + *eax = *ebx = *ecx = *edx = 0;
>>> }
>>> void vcpu_kick(struct vcpu *v)
>>> diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c
>>> index 15d4b91..08f8038 100644
>>> --- a/xen/arch/x86/sysctl.c
>>> +++ b/xen/arch/x86/sysctl.c
>>> @@ -101,6 +101,23 @@ long arch_do_sysctl(
>>> }
>>> break;
>>> + case XEN_SYSCTL_cpuid_op:
>> This would appear to be a cpuid instruction in the context of a domain,
>> which should be a domctl, not a sysctl. I have a different
>> implementation of sysctl cpuid posted, which takes a pcpu parameter.
>
> No, this is not intended to handle CPUID instruction. This is invoked
> only as part of a sysctl.
>
> As for domctl vs sysctl --- I in fact would have preferred to do this
> via domctl since we already have a data structure to handle this
> (xen_domctl_cpuid). I decided to use sysctl since the intent here is
> to query what the system provides, not what a domain sees.
>
>
>>
>>> + {
>>> + struct xen_sysctl_cpuid *cpuid = &sysctl->u.cpuid;
>>> +
>>> + if ( !cpuid_hypervisor_leaves(cpuid->input[0],
>>> cpuid->input[1],
>>> + &cpuid->eax,&cpuid->ebx,
>>> + &cpuid->ecx, &cpuid->edx) )
>>> + asm ( "cpuid"
>>> + :"=a" (cpuid->eax), "=b" (cpuid->ebx),
>>> + "=c" (cpuid->ecx), "=d" (cpuid->edx)
>>> + : "0" (cpuid->input[0]), "1" (cpuid->input[1]) );
>> This will likely bypass masking/levelling for a domain. As suggested,
>> the hypervisor leaves should be plumbed properly through to be usable
>> from domain_cpuid().
>
> Yes, that was the intent. The thinking here is that we provide to the
> sysctl caller what the actual CPUID is. Now, this (the asm part) is
> not used anywhere so if we limit this sysctl to hypervisor leaves (or
> even to leaf 0x40000001 only as Jan suggested) we won't need this.
>
> (Moreover, for 0x40000001-only approach we may not even need this
> whole sysctl business)
>
> Thanks.
> -boris
All of this smells like it would be substantially more simple under the
proposition in my design to fix heterogeneous feature levelling, where
Xen presents a full and completely CPUID policy for PV and HVM domains
for consumption my the toolstack.
This removes the need for these "bypass the current domains policy so I
can see something about real hardware to build another domain against",
and avoids having libxc create a wrong idea of what it thinks Xen will
provide to the guest, just to have Xen fix up later anyway.
~Andrew
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH v2 1/4] xen/libxc: Allow changes to hypervisor CPUID leaf from config file
2014-03-11 14:26 ` Andrew Cooper
@ 2014-03-11 14:48 ` Boris Ostrovsky
2014-03-11 16:19 ` Jan Beulich
1 sibling, 0 replies; 16+ messages in thread
From: Boris Ostrovsky @ 2014-03-11 14:48 UTC (permalink / raw)
To: Andrew Cooper
Cc: keir, jbeulich, stefano.stabellini, eddie.dong, ian.jackson,
xen-devel, jun.nakajima, ian.campbell
On 03/11/2014 10:26 AM, Andrew Cooper wrote:
> On 11/03/14 14:07, Boris Ostrovsky wrote:
>> On 03/11/2014 06:10 AM, Andrew Cooper wrote:
>>> On 11/03/14 03:54, Boris Ostrovsky wrote:
>>>> Currently only "real" cpuid leaves can be overwritten by users via
>>>> 'cpuid' option in the configuration file. This patch provides
>>>> ability to
>>>> do the same for hypervisor leaves (those in the 0x40000000 range).
>>>>
>>>> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
>>> How? There is nothing stopping leaves in 0x40000000 being set in the
>>> policy with XEN_DOMCTL_set_cpuid, but I dont see anything which plumbs
>>> this together at the Xen level.
>> Right. What this patch mostly provides is ability to query the
>> hypervisor (via sysctl) about default values of hypervisor CPUID leaf
>> from userspace. We cannot use CPUID instruction here (for dom0). And
>> /dev/cpu/<n>/cpuid may not exist.
> The XEN_FORCED_EMULATION prefix would be fine, and not require a new
> custom hypercall, but only an HVM guest is going to care whether it can
> find this magic leaf.
Doh! For some reasons I decided that it won't work for userland. But of
course it will. Which eliminates the need for sysctl.
-boris
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v2 1/4] xen/libxc: Allow changes to hypervisor CPUID leaf from config file
2014-03-11 14:26 ` Andrew Cooper
2014-03-11 14:48 ` Boris Ostrovsky
@ 2014-03-11 16:19 ` Jan Beulich
1 sibling, 0 replies; 16+ messages in thread
From: Jan Beulich @ 2014-03-11 16:19 UTC (permalink / raw)
To: Andrew Cooper, Boris Ostrovsky
Cc: keir, ian.campbell, stefano.stabellini, ian.jackson, eddie.dong,
xen-devel, jun.nakajima
>>> On 11.03.14 at 15:26, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
> On 11/03/14 14:07, Boris Ostrovsky wrote:
>> On 03/11/2014 06:10 AM, Andrew Cooper wrote:
>>> On 11/03/14 03:54, Boris Ostrovsky wrote:
>>>> --- a/tools/libxc/xc_cpuid_x86.c
>>>> +++ b/tools/libxc/xc_cpuid_x86.c
>>>> @@ -33,6 +33,8 @@
>>>> #define DEF_MAX_INTELEXT 0x80000008u
>>>> #define DEF_MAX_AMDEXT 0x8000001cu
>>>> +#define HYPERVISOR_LEAF(idx) (((idx) & 0x40000000) == 0x40000000)
>>>> +
>>> This check is wrong.
>>
>> Because of Viridian leaves? Or something else?
>
> It should be (((idx) & 0xf0000000) == 0x40000000)
>
> According to the AMD and Intel manuals, it is strictly leaf 0x40000000
> reserved for hypervisor use, not 0xC0000000 or others.
And consequently the mask ought to be 0xffff0000.
Jan
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH v2 2/4] x86/hvm: Revert 80ecb40362365ba77e68fc609de8bd3b7208ae19
2014-03-11 3:53 [PATCH v2 0/4] Expose HW APIC virtualization support to HVM guests Boris Ostrovsky
2014-03-11 3:54 ` [PATCH v2 1/4] xen/libxc: Allow changes to hypervisor CPUID leaf from config file Boris Ostrovsky
@ 2014-03-11 3:54 ` Boris Ostrovsky
2014-03-11 3:54 ` [PATCH v2 3/4] x86/hvm: Add HVM-specific hypervisor CPUID leaf Boris Ostrovsky
` (3 subsequent siblings)
5 siblings, 0 replies; 16+ messages in thread
From: Boris Ostrovsky @ 2014-03-11 3:54 UTC (permalink / raw)
To: jbeulich, keir, jun.nakajima, eddie.dong, ian.jackson,
stefano.stabellini, ian.campbell
Cc: boris.ostrovsky, xen-devel
The Solaris bug that commit 80ecb40362365ba77e68fc609de8bd3b7208ae19
addressed has been fixed and backported to earlier releases.
Those still using those releases can specify number of hypervisor leaves
explicitly via 'cpuid' xl config option.
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
xen/arch/x86/traps.c | 11 ++---------
1 file changed, 2 insertions(+), 9 deletions(-)
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index e1f39d9..75a0ceb 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -677,17 +677,10 @@ int cpuid_hypervisor_leaves( uint32_t idx, uint32_t sub_idx,
struct domain *d = current->domain;
/* Optionally shift out of the way of Viridian architectural leaves. */
uint32_t base = is_viridian_domain(d) ? 0x40000100 : 0x40000000;
- uint32_t limit;
idx -= base;
- /*
- * Some Solaris PV drivers fail if max > base + 2. Help them out by
- * hiding the PVRDTSCP leaf if PVRDTSCP is disabled.
- */
- limit = (d->arch.tsc_mode < TSC_MODE_PVRDTSCP) ? 2 : 3;
-
- if ( idx > limit )
+ if ( idx > 3 )
return 0;
if ( domain_cpuid_exists(d, base + idx, sub_idx, eax, ebx, ecx, edx) )
@@ -696,7 +689,7 @@ int cpuid_hypervisor_leaves( uint32_t idx, uint32_t sub_idx,
switch ( idx )
{
case 0:
- *eax = base + limit; /* Largest leaf */
+ *eax = base + 3; /* Largest leaf */
*ebx = XEN_CPUID_SIGNATURE_EBX;
*ecx = XEN_CPUID_SIGNATURE_ECX;
*edx = XEN_CPUID_SIGNATURE_EDX;
--
1.7.10.4
^ permalink raw reply related [flat|nested] 16+ messages in thread* [PATCH v2 3/4] x86/hvm: Add HVM-specific hypervisor CPUID leaf
2014-03-11 3:53 [PATCH v2 0/4] Expose HW APIC virtualization support to HVM guests Boris Ostrovsky
2014-03-11 3:54 ` [PATCH v2 1/4] xen/libxc: Allow changes to hypervisor CPUID leaf from config file Boris Ostrovsky
2014-03-11 3:54 ` [PATCH v2 2/4] x86/hvm: Revert 80ecb40362365ba77e68fc609de8bd3b7208ae19 Boris Ostrovsky
@ 2014-03-11 3:54 ` Boris Ostrovsky
2014-03-11 3:54 ` [PATCH v2 4/4] x86/hvm: Indicate avaliability of HW support of APIC virtualization to HVM guests Boris Ostrovsky
` (2 subsequent siblings)
5 siblings, 0 replies; 16+ messages in thread
From: Boris Ostrovsky @ 2014-03-11 3:54 UTC (permalink / raw)
To: jbeulich, keir, jun.nakajima, eddie.dong, ian.jackson,
stefano.stabellini, ian.campbell
Cc: boris.ostrovsky, xen-devel
CPUID leaf 0x40000004 is for HVM-specific features.
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
xen/arch/x86/hvm/hvm.c | 16 ++++++++++++++++
xen/arch/x86/traps.c | 8 ++++++--
xen/include/asm-x86/hvm/hvm.h | 7 +++++++
3 files changed, 29 insertions(+), 2 deletions(-)
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index ae24211..fcbdcfc 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2980,6 +2980,22 @@ unsigned long copy_from_user_hvm(void *to, const void *from, unsigned len)
return rc ? len : 0; /* fake a copy_from_user() return code */
}
+void hvm_hypervisor_cpuid_leaf(uint32_t idx, uint32_t sub_idx,
+ uint32_t *eax, uint32_t *ebx,
+ uint32_t *ecx, uint32_t *edx)
+{
+ if ( idx != 4 )
+ return;
+
+ *eax = *ebx = *ecx = *edx = 0;
+ if ( !hvm_funcs.hypervisor_cpuid_leaf )
+ return;
+
+ hvm_funcs.hypervisor_cpuid_leaf(idx, sub_idx, eax, ebx, ecx, edx);
+
+ return;
+}
+
void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx)
{
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 75a0ceb..88956b8 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -680,7 +680,7 @@ int cpuid_hypervisor_leaves( uint32_t idx, uint32_t sub_idx,
idx -= base;
- if ( idx > 3 )
+ if ( idx > 4 )
return 0;
if ( domain_cpuid_exists(d, base + idx, sub_idx, eax, ebx, ecx, edx) )
@@ -689,7 +689,7 @@ int cpuid_hypervisor_leaves( uint32_t idx, uint32_t sub_idx,
switch ( idx )
{
case 0:
- *eax = base + 3; /* Largest leaf */
+ *eax = base + 4; /* Largest leaf */
*ebx = XEN_CPUID_SIGNATURE_EBX;
*ecx = XEN_CPUID_SIGNATURE_ECX;
*edx = XEN_CPUID_SIGNATURE_EDX;
@@ -718,6 +718,10 @@ int cpuid_hypervisor_leaves( uint32_t idx, uint32_t sub_idx,
cpuid_time_leaf( sub_idx, eax, ebx, ecx, edx );
break;
+ case 4:
+ hvm_hypervisor_cpuid_leaf(idx, sub_idx, eax, ebx, ecx, edx);
+ break;
+
default:
BUG();
}
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index dcc3483..e6b1399 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -200,6 +200,10 @@ struct hvm_function_table {
paddr_t *L1_gpa, unsigned int *page_order,
uint8_t *p2m_acc, bool_t access_r,
bool_t access_w, bool_t access_x);
+
+ void (*hypervisor_cpuid_leaf)(uint32_t idx, uint32_t sub_idx,
+ uint32_t *eax, uint32_t *ebx,
+ uint32_t *ecx, uint32_t *edx);
};
extern struct hvm_function_table hvm_funcs;
@@ -336,6 +340,9 @@ static inline unsigned long hvm_get_shadow_gs_base(struct vcpu *v)
#define is_viridian_domain(_d) \
(is_hvm_domain(_d) && ((_d)->arch.hvm_domain.params[HVM_PARAM_VIRIDIAN]))
+void hvm_hypervisor_cpuid_leaf(uint32_t idx, uint32_t sub_idx,
+ uint32_t *eax, uint32_t *ebx,
+ uint32_t *ecx, uint32_t *edx);
void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx);
void hvm_migrate_timers(struct vcpu *v);
--
1.7.10.4
^ permalink raw reply related [flat|nested] 16+ messages in thread* [PATCH v2 4/4] x86/hvm: Indicate avaliability of HW support of APIC virtualization to HVM guests
2014-03-11 3:53 [PATCH v2 0/4] Expose HW APIC virtualization support to HVM guests Boris Ostrovsky
` (2 preceding siblings ...)
2014-03-11 3:54 ` [PATCH v2 3/4] x86/hvm: Add HVM-specific hypervisor CPUID leaf Boris Ostrovsky
@ 2014-03-11 3:54 ` Boris Ostrovsky
2014-03-11 7:37 ` [PATCH v2 0/4] Expose HW APIC virtualization support " Zhang, Yang Z
2014-03-11 11:00 ` David Vrabel
5 siblings, 0 replies; 16+ messages in thread
From: Boris Ostrovsky @ 2014-03-11 3:54 UTC (permalink / raw)
To: jbeulich, keir, jun.nakajima, eddie.dong, ian.jackson,
stefano.stabellini, ian.campbell
Cc: boris.ostrovsky, xen-devel
Set bits in hypervisor CPUID leaf indicating that HW provides (and the
hypervisor enables) HW support for APIC and x2APIC virtualization.
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
xen/arch/x86/hvm/vmx/vmx.c | 12 ++++++++++++
xen/include/public/arch-x86/cpuid.h | 10 ++++++++++
2 files changed, 22 insertions(+)
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 8395e86..e498c26 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -57,6 +57,7 @@
#include <asm/apic.h>
#include <asm/hvm/nestedhvm.h>
#include <asm/event.h>
+#include <public/arch-x86/cpuid.h>
enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised };
@@ -1646,6 +1647,16 @@ static void vmx_handle_eoi(u8 vector)
__vmwrite(GUEST_INTR_STATUS, status);
}
+void vmx_hypervisor_cpuid_leaf(uint32_t idx, uint32_t sub_idx,
+ uint32_t *eax, uint32_t *ebx,
+ uint32_t *ecx, uint32_t *edx)
+{
+ if ( cpu_has_vmx_apic_reg_virt )
+ *eax |= XEN_HVM_CPUID_APIC_ACCESS_VIRT;
+ if ( cpu_has_vmx_virtualize_x2apic_mode )
+ *eax |= XEN_HVM_CPUID_X2APIC_VIRT;
+}
+
static struct hvm_function_table __initdata vmx_function_table = {
.name = "VMX",
.cpu_up_prepare = vmx_cpu_up_prepare,
@@ -1703,6 +1714,7 @@ static struct hvm_function_table __initdata vmx_function_table = {
.sync_pir_to_irr = vmx_sync_pir_to_irr,
.handle_eoi = vmx_handle_eoi,
.nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m,
+ .hypervisor_cpuid_leaf= vmx_hypervisor_cpuid_leaf,
};
const struct hvm_function_table * __init start_vmx(void)
diff --git a/xen/include/public/arch-x86/cpuid.h b/xen/include/public/arch-x86/cpuid.h
index d9bd627..7118760 100644
--- a/xen/include/public/arch-x86/cpuid.h
+++ b/xen/include/public/arch-x86/cpuid.h
@@ -65,4 +65,14 @@
#define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0
#define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD (1u<<0)
+/*
+ * Leaf 5 (0x40000004)
+ * HVM-specific features
+ */
+
+/* EAX Features */
+#define XEN_HVM_CPUID_APIC_ACCESS_VIRT (1u << 0) /* Virtualized APIC registers */
+#define XEN_HVM_CPUID_X2APIC_VIRT (1u << 1) /* Virtualized x2APIC accesses */
+
+
#endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */
--
1.7.10.4
^ permalink raw reply related [flat|nested] 16+ messages in thread* Re: [PATCH v2 0/4] Expose HW APIC virtualization support to HVM guests
2014-03-11 3:53 [PATCH v2 0/4] Expose HW APIC virtualization support to HVM guests Boris Ostrovsky
` (3 preceding siblings ...)
2014-03-11 3:54 ` [PATCH v2 4/4] x86/hvm: Indicate avaliability of HW support of APIC virtualization to HVM guests Boris Ostrovsky
@ 2014-03-11 7:37 ` Zhang, Yang Z
2014-03-11 14:32 ` Boris Ostrovsky
2014-03-11 11:00 ` David Vrabel
5 siblings, 1 reply; 16+ messages in thread
From: Zhang, Yang Z @ 2014-03-11 7:37 UTC (permalink / raw)
To: Boris Ostrovsky, jbeulich@suse.com, keir@xen.org, Nakajima, Jun,
Dong, Eddie, ian.jackson@eu.citrix.com,
stefano.stabellini@eu.citrix.com, ian.campbell@citrix.com
Cc: xen-devel@lists.xen.org
Boris Ostrovsky wrote on 2014-03-11:
>
> Version 2:
> * Added ability to specify hypervisor CPUID leaves in config file (this requires
> new sysctl)
> * Use 2 bits to indicate what is supported --- one for APIC memory access and
> the
> other for x2APIC. Still not sure whether virtual interrupt delivery should be
> exposed as well.
>
>
> HVM guests running on HW that supports HW APIC virtualization features
> (APIC-register virtualization, virtual interrupt delivery, etc) may
> want to use APIC instead of hvm_pirqs. Since we are not guaranteed to
> have these features on VMX (for example, there is a boot option to
> turn it off) and there is no such support on SVM we need to make the
> guest aware that its APIC accesses may not be so bad.
>
> CPUID seems to be a good way to provide this info to the guest.
>
> Having a guest switch to APIC shows fairly good impact on number of
> VMEXITs. For example, with a pass-through NIC, netperf sees almost
> half as many. Here are results for 'xentrace -e 0x00083fff -c 2 -D -T 2'
>
> (The guest here essentially turned off XENFEAT_hvm_pirqs but we may
> want to use APIC for MSI interrupts only and leave pirqs for gsi).
>
>
> [root@ovs105 virt]# cat orig |xentrace_format
> ~/xen/tools/xentrace/formats | awk '{print $5}' | sort | uniq -c
> 94 cpu_change
> 13944 HLT
> 26341 INJ_VIRQ
> 12054 INTR
> 30784 INTR_WINDOW
> 10126 TRAP
> 124783 VMENTRY
> 124782 VMEXIT
> 59217 VMMCALL
> 35 wrap_buffer
>
> [root@ovs105 virt]# cat apicv |xentrace_format
> ~/xen/tools/xentrace/formats | awk '{print $5}' | sort | uniq -c
> 49 cpu_change 16157 HLT 31 INJ_VIRQ 10652 INTR 38 INTR_WINDOW 10 NPF
> 10286 TRAP
> 71269 VMENTRY
> 71269 VMEXIT
> 34129 VMMCALL
> 15 wrap_buffer
> The difference is even larger when the guest is busy.
>
> These results are in line with what has been reported for KVM. For
> example
> http://events.linuxfoundation.org/sites/events/files/cojp13_natapov.pdf
> http://www.linuxplumbersconf.org/2012/wp-content/uploads/2012/09/2012-lp
> c-virt-intel-vt-feat-nakajima.pdf
>
> I am also not sure whether (cpu_has_vmx_apic_reg_virt &
> cpu_has_vmx_virtualize_x2apic_mode) is sufficient to declare full HW
> APIC support to a guest. The tests show ~95K VMEXITs when virtual
The former is enough. Hardware has APICv must have the virtualize x2apic too.
> interrupt delivery and posted interrupts are turned off so there
> appears to still be some benefit. I suppose we can use another CPUID
> bit for these two (although I am not particularly eager to do this).
The three features are coexisting. I think you can use one bit to show them.
>
> Boris Ostrovsky (4):
> xen/libxc: Allow changes to hypervisor CPUID leaf from config file
> x86/hvm: Revert 80ecb40362365ba77e68fc609de8bd3b7208ae19
> x86/hvm: Add HVM-specific hypervisor CPUID leaf
> x86/hvm: Indicate avaliability of HW support of APIC virtualization
> to HVM guests
> tools/libxc/xc_cpuid_x86.c | 23 ++++++++++++++++++++++-
> tools/libxc/xc_misc.c | 18 ++++++++++++++++++
> tools/libxc/xenctrl.h | 2 ++
> xen/arch/x86/domain.c | 19 ++++++++++++++++---
> xen/arch/x86/hvm/hvm.c | 16 ++++++++++++++++
> xen/arch/x86/hvm/vmx/vmx.c | 12 ++++++++++++
> xen/arch/x86/sysctl.c | 17 +++++++++++++++++
> xen/arch/x86/traps.c | 18 +++++++++---------
> xen/include/asm-x86/domain.h | 7 +++++++
> xen/include/asm-x86/hvm/hvm.h | 7 +++++++
> xen/include/public/arch-x86/cpuid.h | 10 ++++++++++
> xen/include/public/sysctl.h | 18 ++++++++++++++++++
> 12 files changed, 154 insertions(+), 13 deletions(-)
Best regards,
Yang
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH v2 0/4] Expose HW APIC virtualization support to HVM guests
2014-03-11 7:37 ` [PATCH v2 0/4] Expose HW APIC virtualization support " Zhang, Yang Z
@ 2014-03-11 14:32 ` Boris Ostrovsky
0 siblings, 0 replies; 16+ messages in thread
From: Boris Ostrovsky @ 2014-03-11 14:32 UTC (permalink / raw)
To: Zhang, Yang Z
Cc: keir@xen.org, jbeulich@suse.com, stefano.stabellini@eu.citrix.com,
Dong, Eddie, ian.jackson@eu.citrix.com, xen-devel@lists.xen.org,
Nakajima, Jun, ian.campbell@citrix.com
On 03/11/2014 03:37 AM, Zhang, Yang Z wrote:
> Boris Ostrovsky wrote on 2014-03-11:
>> Version 2:
>> * Added ability to specify hypervisor CPUID leaves in config file (this requires
>> new sysctl)
>> * Use 2 bits to indicate what is supported --- one for APIC memory access and
>> the
>> other for x2APIC. Still not sure whether virtual interrupt delivery should be
>> exposed as well.
>>
>> ...
>>
>> I am also not sure whether (cpu_has_vmx_apic_reg_virt &
>> cpu_has_vmx_virtualize_x2apic_mode) is sufficient to declare full HW
>> APIC support to a guest. The tests show ~95K VMEXITs when virtual
> The former is enough. Hardware has APICv must have the virtualize x2apic too.
>
>> interrupt delivery and posted interrupts are turned off so there
>> appears to still be some benefit. I suppose we can use another CPUID
>> bit for these two (although I am not particularly eager to do this).
> The three features are coexisting. I think you can use one bit to show them.
As I learned yesterday (the hard way) this is not necessarily the case.
I was testing this on a laptop that has MSR 0x48b
(MSR_IA32_VMX_PROCBASED_CTLS2) return 0x8ff00000000: yes for x2APIC
virtualization but no for APIC register access virtualization.
Which is why I added another bit to distinguish the two
Thanks.
-boris
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v2 0/4] Expose HW APIC virtualization support to HVM guests
2014-03-11 3:53 [PATCH v2 0/4] Expose HW APIC virtualization support to HVM guests Boris Ostrovsky
` (4 preceding siblings ...)
2014-03-11 7:37 ` [PATCH v2 0/4] Expose HW APIC virtualization support " Zhang, Yang Z
@ 2014-03-11 11:00 ` David Vrabel
2014-03-11 14:14 ` Boris Ostrovsky
5 siblings, 1 reply; 16+ messages in thread
From: David Vrabel @ 2014-03-11 11:00 UTC (permalink / raw)
To: Boris Ostrovsky
Cc: keir, jbeulich, stefano.stabellini, eddie.dong, ian.jackson,
xen-devel, jun.nakajima, ian.campbell
On 11/03/14 03:53, Boris Ostrovsky wrote:
>
> Version 2:
> * Added ability to specify hypervisor CPUID leaves in config file (this requires
> new sysctl)
> * Use 2 bits to indicate what is supported --- one for APIC memory access and the
> other for x2APIC. Still not sure whether virtual interrupt delivery should be
> exposed as well.
>
>
>
> HVM guests running on HW that supports HW APIC virtualization features
> (APIC-register virtualization, virtual interrupt delivery, etc) may
> want to use APIC instead of hvm_pirqs. Since we are not guaranteed to
> have these features on VMX (for example, there is a boot option to
> turn it off) and there is no such support on SVM we need to make the
> guest aware that its APIC accesses may not be so bad.
>
> CPUID seems to be a good way to provide this info to the guest.
It's seems a bit odd to use a Xen-specific (?) CPUID leaf to report that
the guest should use a bare-metal feature -- that's the default
behaviour of a HVM guest.
I think it makes more sense to hint that the guest that a PV alternate
is available and then omit this hint if using a virtualized APIC
performs better.
David
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [PATCH v2 0/4] Expose HW APIC virtualization support to HVM guests
2014-03-11 11:00 ` David Vrabel
@ 2014-03-11 14:14 ` Boris Ostrovsky
0 siblings, 0 replies; 16+ messages in thread
From: Boris Ostrovsky @ 2014-03-11 14:14 UTC (permalink / raw)
To: David Vrabel
Cc: keir, jun.nakajima, stefano.stabellini, ian.jackson, eddie.dong,
xen-devel, jbeulich, ian.campbell
On 03/11/2014 07:00 AM, David Vrabel wrote:
> On 11/03/14 03:53, Boris Ostrovsky wrote:
>> Version 2:
>> * Added ability to specify hypervisor CPUID leaves in config file (this requires
>> new sysctl)
>> * Use 2 bits to indicate what is supported --- one for APIC memory access and the
>> other for x2APIC. Still not sure whether virtual interrupt delivery should be
>> exposed as well.
>>
>>
>>
>> HVM guests running on HW that supports HW APIC virtualization features
>> (APIC-register virtualization, virtual interrupt delivery, etc) may
>> want to use APIC instead of hvm_pirqs. Since we are not guaranteed to
>> have these features on VMX (for example, there is a boot option to
>> turn it off) and there is no such support on SVM we need to make the
>> guest aware that its APIC accesses may not be so bad.
>>
>> CPUID seems to be a good way to provide this info to the guest.
> It's seems a bit odd to use a Xen-specific (?) CPUID leaf to report that
> the guest should use a bare-metal feature -- that's the default
> behaviour of a HVM guest.
>
> I think it makes more sense to hint that the guest that a PV alternate
> is available and then omit this hint if using a virtualized APIC
> performs better.
That's what I originally thought too.
However, I don't think we can do this since we don't know guest's
features. For example, we can have a guest that doesn't have APIC
support (no CONFIG_X86_LOCAL_APIC). If we don't provide
XENFEAT_hvm_pirqs (which is essentially what this CPUID bit is trying to
suggest that the guest doesn't use) then I am not sure how well this
will work out.
More generally, having one feature doesn't necessarily makes another
feature disappear (and this is true in this case).
Thanks.
-boris
^ permalink raw reply [flat|nested] 16+ messages in thread