* [PATCH 1/5]KVM: x86, apicv: add APICv register virtualization support
@ 2012-09-05 5:41 Li, Jiongxi
2012-09-06 16:01 ` Avi Kivity
0 siblings, 1 reply; 6+ messages in thread
From: Li, Jiongxi @ 2012-09-05 5:41 UTC (permalink / raw)
To: kvm@vger.kernel.org; +Cc: avi@redhat.com
- APIC read doesn't cause VM-Exit
- APIC write becomes trap-like
Signed-off-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Jiongxi Li <jiongxi.li@intel.com>
---
arch/x86/include/asm/vmx.h | 2 ++
arch/x86/kvm/lapic.c | 16 ++++++++++++++++
arch/x86/kvm/lapic.h | 2 ++
arch/x86/kvm/vmx.c | 30 ++++++++++++++++++++++++++++++
4 files changed, 50 insertions(+), 0 deletions(-)
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 74fcb96..4a8193e 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -59,6 +59,7 @@
#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
+#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100
#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
@@ -282,6 +283,7 @@ enum vmcs_field {
#define EXIT_REASON_EPT_MISCONFIG 49
#define EXIT_REASON_WBINVD 54
#define EXIT_REASON_XSETBV 55
+#define EXIT_REASON_APIC_WRITE 56
#define EXIT_REASON_INVPCID 58
/*
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ce87878..4a6d3a4 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1037,6 +1037,22 @@ static int apic_mmio_write(struct kvm_io_device *this,
return 0;
}
+/* emulate APIC access in a trap manner */
+int kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
+{
+ u32 val;
+
+ /* hw has done the conditional check and inst decode */
+ offset &= 0xff0;
+ if ((offset != APIC_EOI) &&
+ apic_reg_read(vcpu->arch.apic, offset, 4, &val))
+ return 1;
+
+ /* TODO: optimize to just emulate side effect w/o one more write */
+ return apic_reg_write(vcpu->arch.apic, offset, val);
+}
+EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
+
void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 4af5405..cd4875e 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -55,6 +55,8 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data);
+int kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset);
+
void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c00f03d..3d92277 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -83,6 +83,9 @@ module_param(vmm_exclusive, bool, S_IRUGO);
static bool __read_mostly fasteoi = 1;
module_param(fasteoi, bool, S_IRUGO);
+static bool __read_mostly enable_apicv_reg = 0;
+module_param(enable_apicv_reg, bool, S_IRUGO);
+
/*
* If nested=1, nested virtualization is supported, i.e., guests may use
* VMX and be a hypervisor for its own guests. If nested=0, guests may not
@@ -760,6 +763,12 @@ static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
}
+static inline bool cpu_has_vmx_apic_register_virt(void)
+{
+ return vmcs_config.cpu_based_2nd_exec_ctrl &
+ SECONDARY_EXEC_APIC_REGISTER_VIRT;
+}
+
static inline bool cpu_has_vmx_flexpriority(void)
{
return cpu_has_vmx_tpr_shadow() &&
@@ -2475,6 +2484,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
SECONDARY_EXEC_UNRESTRICTED_GUEST |
SECONDARY_EXEC_PAUSE_LOOP_EXITING |
SECONDARY_EXEC_RDTSCP |
+ SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_ENABLE_INVPCID;
if (adjust_vmx_controls(min2, opt2,
MSR_IA32_VMX_PROCBASED_CTLS2,
@@ -2486,6 +2496,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
_cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
#endif
+
+ if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
+ _cpu_based_2nd_exec_control &= ~(
+ SECONDARY_EXEC_APIC_REGISTER_VIRT);
+
if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
/* CR3 accesses and invlpg don't need to cause VM Exits when EPT
enabled */
@@ -2683,6 +2698,9 @@ static __init int hardware_setup(void)
if (!cpu_has_vmx_ple())
ple_gap = 0;
+ if (!cpu_has_vmx_apic_register_virt())
+ enable_apicv_reg = 0;
+
if (nested)
nested_vmx_setup_ctls_msrs();
@@ -3812,6 +3830,8 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
if (!ple_gap)
exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
+ if (!enable_apicv_reg)
+ exec_control &= ~SECONDARY_EXEC_APIC_REGISTER_VIRT;
return exec_control;
}
@@ -4773,6 +4793,15 @@ static int handle_apic_access(struct kvm_vcpu *vcpu)
return emulate_instruction(vcpu, 0) == EMULATE_DONE;
}
+static int handle_apic_write(struct kvm_vcpu *vcpu)
+{
+ unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ u32 offset = exit_qualification & 0xfff;
+
+ /* APIC-write VM exit is trap-like and thus no need to adjust IP */
+ return kvm_apic_write_nodecode(vcpu, offset) == 0;
+}
+
static int handle_task_switch(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -5712,6 +5741,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_VMON] = handle_vmon,
[EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold,
[EXIT_REASON_APIC_ACCESS] = handle_apic_access,
+ [EXIT_REASON_APIC_WRITE] = handle_apic_write,
[EXIT_REASON_WBINVD] = handle_wbinvd,
[EXIT_REASON_XSETBV] = handle_xsetbv,
[EXIT_REASON_TASK_SWITCH] = handle_task_switch,
--
1.7.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH 1/5]KVM: x86, apicv: add APICv register virtualization support
2012-09-05 5:41 [PATCH 1/5]KVM: x86, apicv: add APICv register virtualization support Li, Jiongxi
@ 2012-09-06 16:01 ` Avi Kivity
2012-09-14 14:14 ` Li, Jiongxi
2012-09-18 14:38 ` Li, Jiongxi
0 siblings, 2 replies; 6+ messages in thread
From: Avi Kivity @ 2012-09-06 16:01 UTC (permalink / raw)
To: Li, Jiongxi; +Cc: kvm@vger.kernel.org
On 09/05/2012 08:41 AM, Li, Jiongxi wrote:
> - APIC read doesn't cause VM-Exit
> - APIC write becomes trap-like
>
>
> +/* emulate APIC access in a trap manner */
> +int kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
> +{
> + u32 val;
> +
> + /* hw has done the conditional check and inst decode */
> + offset &= 0xff0;
> + if ((offset != APIC_EOI) &&
> + apic_reg_read(vcpu->arch.apic, offset, 4, &val))
> + return 1;
TMICT is a write-only register IIRC.
> +
> + /* TODO: optimize to just emulate side effect w/o one more write */
> + return apic_reg_write(vcpu->arch.apic, offset, val);
val may be uninitialized here.
> +}
> +EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
> +
> void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
> {
>
> +static bool __read_mostly enable_apicv_reg = 0;
Enable by default.
> +module_param(enable_apicv_reg, bool, S_IRUGO);
Let's have one module parameter for all related features, called apicv.
So modeprobe kvm-intel apicv=0 disables it.
>
> +static int handle_apic_write(struct kvm_vcpu *vcpu)
> +{
> + unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
> + u32 offset = exit_qualification & 0xfff;
> +
> + /* APIC-write VM exit is trap-like and thus no need to adjust IP */
> + return kvm_apic_write_nodecode(vcpu, offset) == 0;
> +}
Return 1 here means exit to userspace. This will go crazy.
You need to return 0 always. If this is an msr write to a read-only
register, you need to inject a #GP (IIRC).
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 6+ messages in thread
* RE: [PATCH 1/5]KVM: x86, apicv: add APICv register virtualization support
2012-09-06 16:01 ` Avi Kivity
@ 2012-09-14 14:14 ` Li, Jiongxi
2012-09-16 9:41 ` Avi Kivity
2012-09-18 14:38 ` Li, Jiongxi
1 sibling, 1 reply; 6+ messages in thread
From: Li, Jiongxi @ 2012-09-14 14:14 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm@vger.kernel.org
Sorry for the late response
> -----Original Message-----
> From: Avi Kivity [mailto:avi@redhat.com]
> Sent: Friday, September 07, 2012 12:02 AM
> To: Li, Jiongxi
> Cc: kvm@vger.kernel.org
> Subject: Re: [PATCH 1/5]KVM: x86, apicv: add APICv register virtualization
> support
>
> On 09/05/2012 08:41 AM, Li, Jiongxi wrote:
> > - APIC read doesn't cause VM-Exit
> > - APIC write becomes trap-like
> >
> >
> > +/* emulate APIC access in a trap manner */ int
> > +kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) {
> > + u32 val;
> > +
> > + /* hw has done the conditional check and inst decode */
> > + offset &= 0xff0;
> > + if ((offset != APIC_EOI) &&
> > + apic_reg_read(vcpu->arch.apic, offset, 4, &val))
> > + return 1;
>
> TMICT is a write-only register IIRC.
>
I haven't seen TMICT write-only in SDM. Also in there is ' apic_get_reg(apci,APIC_TMICT)' call in 'apic_get_tmcct' function.
> > +
> > + /* TODO: optimize to just emulate side effect w/o one more write */
> > + return apic_reg_write(vcpu->arch.apic, offset, val);
>
> val may be uninitialized here.
>
Can you elaborate that? For APIC_EOI, there is no need to use val. For apic_reg_read fails case, it will return before apic_reg_write
> > +}
> > +EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
> > +
> > void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) {
> >
> > +static bool __read_mostly enable_apicv_reg = 0;
>
> Enable by default.
>
> > +module_param(enable_apicv_reg, bool, S_IRUGO);
>
> Let's have one module parameter for all related features, called apicv.
> So modeprobe kvm-intel apicv=0 disables it.
Ok, We will change that
>
> >
> > +static int handle_apic_write(struct kvm_vcpu *vcpu) {
> > + unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
> > + u32 offset = exit_qualification & 0xfff;
> > +
> > + /* APIC-write VM exit is trap-like and thus no need to adjust IP */
> > + return kvm_apic_write_nodecode(vcpu, offset) == 0; }
>
> Return 1 here means exit to userspace. This will go crazy.
>
> You need to return 0 always. If this is an msr write to a read-only register, you
> need to inject a #GP (IIRC).
Oks.
>
>
>
> --
> error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 1/5]KVM: x86, apicv: add APICv register virtualization support
2012-09-14 14:14 ` Li, Jiongxi
@ 2012-09-16 9:41 ` Avi Kivity
0 siblings, 0 replies; 6+ messages in thread
From: Avi Kivity @ 2012-09-16 9:41 UTC (permalink / raw)
To: Li, Jiongxi; +Cc: kvm@vger.kernel.org
On 09/14/2012 05:14 PM, Li, Jiongxi wrote:
> Sorry for the late response
>
>> -----Original Message-----
>> From: Avi Kivity [mailto:avi@redhat.com]
>> Sent: Friday, September 07, 2012 12:02 AM
>> To: Li, Jiongxi
>> Cc: kvm@vger.kernel.org
>> Subject: Re: [PATCH 1/5]KVM: x86, apicv: add APICv register virtualization
>> support
>>
>> On 09/05/2012 08:41 AM, Li, Jiongxi wrote:
>> > - APIC read doesn't cause VM-Exit
>> > - APIC write becomes trap-like
>> >
>> >
>> > +/* emulate APIC access in a trap manner */ int
>> > +kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) {
>> > + u32 val;
>> > +
>> > + /* hw has done the conditional check and inst decode */
>> > + offset &= 0xff0;
>> > + if ((offset != APIC_EOI) &&
>> > + apic_reg_read(vcpu->arch.apic, offset, 4, &val))
>> > + return 1;
>>
>> TMICT is a write-only register IIRC.
>>
> I haven't seen TMICT write-only in SDM. Also in there is ' apic_get_reg(apci,APIC_TMICT)' call in 'apic_get_tmcct' function.
I can't see it either now. So this is okay.
>> > +
>> > + /* TODO: optimize to just emulate side effect w/o one more write */
>> > + return apic_reg_write(vcpu->arch.apic, offset, val);
>>
>> val may be uninitialized here.
>>
> Can you elaborate that? For APIC_EOI, there is no need to use val. For apic_reg_read fails case, it will return before apic_reg_write
Right, but the compiler may complain. Best to initialize val to zero.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 6+ messages in thread
* RE: [PATCH 1/5]KVM: x86, apicv: add APICv register virtualization support
2012-09-06 16:01 ` Avi Kivity
2012-09-14 14:14 ` Li, Jiongxi
@ 2012-09-18 14:38 ` Li, Jiongxi
2012-09-19 9:12 ` Avi Kivity
1 sibling, 1 reply; 6+ messages in thread
From: Li, Jiongxi @ 2012-09-18 14:38 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm@vger.kernel.org
> -----Original Message-----
> From: Avi Kivity [mailto:avi@redhat.com]
> Sent: Friday, September 07, 2012 12:02 AM
> To: Li, Jiongxi
> Cc: kvm@vger.kernel.org
> Subject: Re: [PATCH 1/5]KVM: x86, apicv: add APICv register virtualization
> support
>
> On 09/05/2012 08:41 AM, Li, Jiongxi wrote:
> > - APIC read doesn't cause VM-Exit
> > - APIC write becomes trap-like
> >
> >
> > +/* emulate APIC access in a trap manner */ int
> > +kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) {
> > + u32 val;
> > +
> > + /* hw has done the conditional check and inst decode */
> > + offset &= 0xff0;
> > + if ((offset != APIC_EOI) &&
> > + apic_reg_read(vcpu->arch.apic, offset, 4, &val))
> > + return 1;
>
> TMICT is a write-only register IIRC.
>
> > +
> > + /* TODO: optimize to just emulate side effect w/o one more write */
> > + return apic_reg_write(vcpu->arch.apic, offset, val);
>
> val may be uninitialized here.
>
> > +}
> > +EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
> > +
> > void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) {
> >
> > +static bool __read_mostly enable_apicv_reg = 0;
>
> Enable by default.
>
> > +module_param(enable_apicv_reg, bool, S_IRUGO);
>
> Let's have one module parameter for all related features, called apicv.
> So modeprobe kvm-intel apicv=0 disables it.
>
> >
> > +static int handle_apic_write(struct kvm_vcpu *vcpu) {
> > + unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
> > + u32 offset = exit_qualification & 0xfff;
> > +
> > + /* APIC-write VM exit is trap-like and thus no need to adjust IP */
> > + return kvm_apic_write_nodecode(vcpu, offset) == 0; }
>
> Return 1 here means exit to userspace. This will go crazy.
>
> You need to return 0 always. If this is an msr write to a read-only register, you
> need to inject a #GP (IIRC).
Return 0 means exit to userspace, so it should return 1, right?
__vcpu_run
{
while (r>0)
{
r = vcpu_enter_guest(vcpu)
if (r<=0)
break;
}
}
>
>
>
> --
> error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 1/5]KVM: x86, apicv: add APICv register virtualization support
2012-09-18 14:38 ` Li, Jiongxi
@ 2012-09-19 9:12 ` Avi Kivity
0 siblings, 0 replies; 6+ messages in thread
From: Avi Kivity @ 2012-09-19 9:12 UTC (permalink / raw)
To: Li, Jiongxi; +Cc: kvm@vger.kernel.org
On 09/18/2012 05:38 PM, Li, Jiongxi wrote:
>>
>> >
>> > +static int handle_apic_write(struct kvm_vcpu *vcpu) {
>> > + unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
>> > + u32 offset = exit_qualification & 0xfff;
>> > +
>> > + /* APIC-write VM exit is trap-like and thus no need to adjust IP */
>> > + return kvm_apic_write_nodecode(vcpu, offset) == 0; }
>>
>> Return 1 here means exit to userspace. This will go crazy.
>>
>> You need to return 0 always. If this is an msr write to a read-only register, you
>> need to inject a #GP (IIRC).
>
> Return 0 means exit to userspace, so it should return 1, right?
> __vcpu_run
> {
> while (r>0)
> {
> r = vcpu_enter_guest(vcpu)
> if (r<=0)
> break;
> }
> }
Yes, sorry. We should switch to a symbolic constant one day, it's
confusing.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2012-09-19 9:12 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-09-05 5:41 [PATCH 1/5]KVM: x86, apicv: add APICv register virtualization support Li, Jiongxi
2012-09-06 16:01 ` Avi Kivity
2012-09-14 14:14 ` Li, Jiongxi
2012-09-16 9:41 ` Avi Kivity
2012-09-18 14:38 ` Li, Jiongxi
2012-09-19 9:12 ` Avi Kivity
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox