public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/5]KVM: x86, apicv: add APICv register virtualization support
@ 2012-09-05  5:41 Li, Jiongxi
  2012-09-06 16:01 ` Avi Kivity
  0 siblings, 1 reply; 6+ messages in thread
From: Li, Jiongxi @ 2012-09-05  5:41 UTC (permalink / raw)
  To: kvm@vger.kernel.org; +Cc: avi@redhat.com

- APIC read doesn't cause VM-Exit
- APIC write becomes trap-like

Signed-off-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Jiongxi Li <jiongxi.li@intel.com>
---
 arch/x86/include/asm/vmx.h |    2 ++
 arch/x86/kvm/lapic.c       |   16 ++++++++++++++++
 arch/x86/kvm/lapic.h       |    2 ++
 arch/x86/kvm/vmx.c         |   30 ++++++++++++++++++++++++++++++
 4 files changed, 50 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 74fcb96..4a8193e 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -59,6 +59,7 @@
 #define SECONDARY_EXEC_ENABLE_VPID              0x00000020
 #define SECONDARY_EXEC_WBINVD_EXITING		0x00000040
 #define SECONDARY_EXEC_UNRESTRICTED_GUEST	0x00000080
+#define SECONDARY_EXEC_APIC_REGISTER_VIRT       0x00000100
 #define SECONDARY_EXEC_PAUSE_LOOP_EXITING	0x00000400
 #define SECONDARY_EXEC_ENABLE_INVPCID		0x00001000
 
@@ -282,6 +283,7 @@ enum vmcs_field {
 #define EXIT_REASON_EPT_MISCONFIG       49
 #define EXIT_REASON_WBINVD		54
 #define EXIT_REASON_XSETBV		55
+#define EXIT_REASON_APIC_WRITE		56
 #define EXIT_REASON_INVPCID		58
 
 /*
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ce87878..4a6d3a4 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1037,6 +1037,22 @@ static int apic_mmio_write(struct kvm_io_device *this,
 	return 0;
 }
 
+/* emulate APIC access in a trap manner */
+int kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
+{
+	u32 val;
+
+	/* hw has done the conditional check and inst decode */
+	offset &= 0xff0;
+	if ((offset != APIC_EOI) &&
+	     apic_reg_read(vcpu->arch.apic, offset, 4, &val))
+		return 1;
+
+	/* TODO: optimize to just emulate side effect w/o one more write */
+	return apic_reg_write(vcpu->arch.apic, offset, val);
+}
+EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
+
 void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 4af5405..cd4875e 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -55,6 +55,8 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
 u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
 void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data);
 
+int kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset);
+
 void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
 void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c00f03d..3d92277 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -83,6 +83,9 @@ module_param(vmm_exclusive, bool, S_IRUGO);
 static bool __read_mostly fasteoi = 1;
 module_param(fasteoi, bool, S_IRUGO);
 
+static bool __read_mostly enable_apicv_reg = 0;
+module_param(enable_apicv_reg, bool, S_IRUGO);
+
 /*
  * If nested=1, nested virtualization is supported, i.e., guests may use
  * VMX and be a hypervisor for its own guests. If nested=0, guests may not
@@ -760,6 +763,12 @@ static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
 		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
 }
 
+static inline bool cpu_has_vmx_apic_register_virt(void)
+{
+	return vmcs_config.cpu_based_2nd_exec_ctrl &
+		SECONDARY_EXEC_APIC_REGISTER_VIRT;
+}
+
 static inline bool cpu_has_vmx_flexpriority(void)
 {
 	return cpu_has_vmx_tpr_shadow() &&
@@ -2475,6 +2484,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 			SECONDARY_EXEC_UNRESTRICTED_GUEST |
 			SECONDARY_EXEC_PAUSE_LOOP_EXITING |
 			SECONDARY_EXEC_RDTSCP |
+			SECONDARY_EXEC_APIC_REGISTER_VIRT |
 			SECONDARY_EXEC_ENABLE_INVPCID;
 		if (adjust_vmx_controls(min2, opt2,
 					MSR_IA32_VMX_PROCBASED_CTLS2,
@@ -2486,6 +2496,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 				SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
 		_cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
 #endif
+
+	if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
+		_cpu_based_2nd_exec_control &= ~(
+				SECONDARY_EXEC_APIC_REGISTER_VIRT);
+
 	if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
 		/* CR3 accesses and invlpg don't need to cause VM Exits when EPT
 		   enabled */
@@ -2683,6 +2698,9 @@ static __init int hardware_setup(void)
 	if (!cpu_has_vmx_ple())
 		ple_gap = 0;
 
+	if (!cpu_has_vmx_apic_register_virt())
+		enable_apicv_reg = 0;
+
 	if (nested)
 		nested_vmx_setup_ctls_msrs();
 
@@ -3812,6 +3830,8 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
 		exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
 	if (!ple_gap)
 		exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
+	if (!enable_apicv_reg)
+		exec_control &= ~SECONDARY_EXEC_APIC_REGISTER_VIRT;
 	return exec_control;
 }
 
@@ -4773,6 +4793,15 @@ static int handle_apic_access(struct kvm_vcpu *vcpu)
 	return emulate_instruction(vcpu, 0) == EMULATE_DONE;
 }
 
+static int handle_apic_write(struct kvm_vcpu *vcpu)
+{
+	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+	u32 offset = exit_qualification & 0xfff;
+
+	/* APIC-write VM exit is trap-like and thus no need to adjust IP */
+	return kvm_apic_write_nodecode(vcpu, offset) == 0;
+}
+
 static int handle_task_switch(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -5712,6 +5741,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_VMON]                    = handle_vmon,
 	[EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold,
 	[EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
+	[EXIT_REASON_APIC_WRITE]              = handle_apic_write,
 	[EXIT_REASON_WBINVD]                  = handle_wbinvd,
 	[EXIT_REASON_XSETBV]                  = handle_xsetbv,
 	[EXIT_REASON_TASK_SWITCH]             = handle_task_switch,
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/5]KVM: x86, apicv: add APICv register virtualization support
  2012-09-05  5:41 [PATCH 1/5]KVM: x86, apicv: add APICv register virtualization support Li, Jiongxi
@ 2012-09-06 16:01 ` Avi Kivity
  2012-09-14 14:14   ` Li, Jiongxi
  2012-09-18 14:38   ` Li, Jiongxi
  0 siblings, 2 replies; 6+ messages in thread
From: Avi Kivity @ 2012-09-06 16:01 UTC (permalink / raw)
  To: Li, Jiongxi; +Cc: kvm@vger.kernel.org

On 09/05/2012 08:41 AM, Li, Jiongxi wrote:
> - APIC read doesn't cause VM-Exit
> - APIC write becomes trap-like
> 
>  
> +/* emulate APIC access in a trap manner */
> +int kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
> +{
> +	u32 val;
> +
> +	/* hw has done the conditional check and inst decode */
> +	offset &= 0xff0;
> +	if ((offset != APIC_EOI) &&
> +	     apic_reg_read(vcpu->arch.apic, offset, 4, &val))
> +		return 1;

TMICT is a write-only register IIRC.

> +
> +	/* TODO: optimize to just emulate side effect w/o one more write */
> +	return apic_reg_write(vcpu->arch.apic, offset, val);

val may be uninitialized here.

> +}
> +EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
> +
>  void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
>  {
>  
> +static bool __read_mostly enable_apicv_reg = 0;

Enable by default.

> +module_param(enable_apicv_reg, bool, S_IRUGO);

Let's have one module parameter for all related features, called apicv.
 So modeprobe kvm-intel apicv=0 disables it.

>  
> +static int handle_apic_write(struct kvm_vcpu *vcpu)
> +{
> +	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
> +	u32 offset = exit_qualification & 0xfff;
> +
> +	/* APIC-write VM exit is trap-like and thus no need to adjust IP */
> +	return kvm_apic_write_nodecode(vcpu, offset) == 0;
> +}

Return 1 here means exit to userspace.  This will go crazy.

You need to return 0 always.  If this is an msr write to a read-only
register, you need to inject a #GP (IIRC).



-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH 1/5]KVM: x86, apicv: add APICv register virtualization support
  2012-09-06 16:01 ` Avi Kivity
@ 2012-09-14 14:14   ` Li, Jiongxi
  2012-09-16  9:41     ` Avi Kivity
  2012-09-18 14:38   ` Li, Jiongxi
  1 sibling, 1 reply; 6+ messages in thread
From: Li, Jiongxi @ 2012-09-14 14:14 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm@vger.kernel.org

Sorry for the late response

> -----Original Message-----
> From: Avi Kivity [mailto:avi@redhat.com]
> Sent: Friday, September 07, 2012 12:02 AM
> To: Li, Jiongxi
> Cc: kvm@vger.kernel.org
> Subject: Re: [PATCH 1/5]KVM: x86, apicv: add APICv register virtualization
> support
> 
> On 09/05/2012 08:41 AM, Li, Jiongxi wrote:
> > - APIC read doesn't cause VM-Exit
> > - APIC write becomes trap-like
> >
> >
> > +/* emulate APIC access in a trap manner */ int
> > +kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) {
> > +	u32 val;
> > +
> > +	/* hw has done the conditional check and inst decode */
> > +	offset &= 0xff0;
> > +	if ((offset != APIC_EOI) &&
> > +	     apic_reg_read(vcpu->arch.apic, offset, 4, &val))
> > +		return 1;
> 
> TMICT is a write-only register IIRC.
> 
I haven't seen TMICT write-only in SDM. Also in there is ' apic_get_reg(apci,APIC_TMICT)' call in 'apic_get_tmcct' function.
> > +
> > +	/* TODO: optimize to just emulate side effect w/o one more write */
> > +	return apic_reg_write(vcpu->arch.apic, offset, val);
> 
> val may be uninitialized here.
> 
Can you elaborate that? For APIC_EOI, there is no need to use val. For apic_reg_read fails case, it will return before apic_reg_write
> > +}
> > +EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
> > +
> >  void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)  {
> >
> > +static bool __read_mostly enable_apicv_reg = 0;
> 
> Enable by default.
> 
> > +module_param(enable_apicv_reg, bool, S_IRUGO);
> 
> Let's have one module parameter for all related features, called apicv.
>  So modeprobe kvm-intel apicv=0 disables it.
Ok, We will change that
> 
> >
> > +static int handle_apic_write(struct kvm_vcpu *vcpu) {
> > +	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
> > +	u32 offset = exit_qualification & 0xfff;
> > +
> > +	/* APIC-write VM exit is trap-like and thus no need to adjust IP */
> > +	return kvm_apic_write_nodecode(vcpu, offset) == 0; }
> 
> Return 1 here means exit to userspace.  This will go crazy.
> 
> You need to return 0 always.  If this is an msr write to a read-only register, you
> need to inject a #GP (IIRC).
Oks.
> 
> 
> 
> --
> error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/5]KVM: x86, apicv: add APICv register virtualization support
  2012-09-14 14:14   ` Li, Jiongxi
@ 2012-09-16  9:41     ` Avi Kivity
  0 siblings, 0 replies; 6+ messages in thread
From: Avi Kivity @ 2012-09-16  9:41 UTC (permalink / raw)
  To: Li, Jiongxi; +Cc: kvm@vger.kernel.org

On 09/14/2012 05:14 PM, Li, Jiongxi wrote:
> Sorry for the late response
> 
>> -----Original Message-----
>> From: Avi Kivity [mailto:avi@redhat.com]
>> Sent: Friday, September 07, 2012 12:02 AM
>> To: Li, Jiongxi
>> Cc: kvm@vger.kernel.org
>> Subject: Re: [PATCH 1/5]KVM: x86, apicv: add APICv register virtualization
>> support
>> 
>> On 09/05/2012 08:41 AM, Li, Jiongxi wrote:
>> > - APIC read doesn't cause VM-Exit
>> > - APIC write becomes trap-like
>> >
>> >
>> > +/* emulate APIC access in a trap manner */ int
>> > +kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) {
>> > +	u32 val;
>> > +
>> > +	/* hw has done the conditional check and inst decode */
>> > +	offset &= 0xff0;
>> > +	if ((offset != APIC_EOI) &&
>> > +	     apic_reg_read(vcpu->arch.apic, offset, 4, &val))
>> > +		return 1;
>> 
>> TMICT is a write-only register IIRC.
>> 
> I haven't seen TMICT write-only in SDM. Also in there is ' apic_get_reg(apci,APIC_TMICT)' call in 'apic_get_tmcct' function.

I can't see it either now.  So this is okay.

>> > +
>> > +	/* TODO: optimize to just emulate side effect w/o one more write */
>> > +	return apic_reg_write(vcpu->arch.apic, offset, val);
>> 
>> val may be uninitialized here.
>> 
> Can you elaborate that? For APIC_EOI, there is no need to use val. For apic_reg_read fails case, it will return before apic_reg_write

Right, but the compiler may complain.  Best to initialize val to zero.


-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH 1/5]KVM: x86, apicv: add APICv register virtualization support
  2012-09-06 16:01 ` Avi Kivity
  2012-09-14 14:14   ` Li, Jiongxi
@ 2012-09-18 14:38   ` Li, Jiongxi
  2012-09-19  9:12     ` Avi Kivity
  1 sibling, 1 reply; 6+ messages in thread
From: Li, Jiongxi @ 2012-09-18 14:38 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm@vger.kernel.org



> -----Original Message-----
> From: Avi Kivity [mailto:avi@redhat.com]
> Sent: Friday, September 07, 2012 12:02 AM
> To: Li, Jiongxi
> Cc: kvm@vger.kernel.org
> Subject: Re: [PATCH 1/5]KVM: x86, apicv: add APICv register virtualization
> support
> 
> On 09/05/2012 08:41 AM, Li, Jiongxi wrote:
> > - APIC read doesn't cause VM-Exit
> > - APIC write becomes trap-like
> >
> >
> > +/* emulate APIC access in a trap manner */ int
> > +kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) {
> > +	u32 val;
> > +
> > +	/* hw has done the conditional check and inst decode */
> > +	offset &= 0xff0;
> > +	if ((offset != APIC_EOI) &&
> > +	     apic_reg_read(vcpu->arch.apic, offset, 4, &val))
> > +		return 1;
> 
> TMICT is a write-only register IIRC.
> 
> > +
> > +	/* TODO: optimize to just emulate side effect w/o one more write */
> > +	return apic_reg_write(vcpu->arch.apic, offset, val);
> 
> val may be uninitialized here.
> 
> > +}
> > +EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
> > +
> >  void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)  {
> >
> > +static bool __read_mostly enable_apicv_reg = 0;
> 
> Enable by default.
> 
> > +module_param(enable_apicv_reg, bool, S_IRUGO);
> 
> Let's have one module parameter for all related features, called apicv.
>  So modeprobe kvm-intel apicv=0 disables it.
> 
> >
> > +static int handle_apic_write(struct kvm_vcpu *vcpu) {
> > +	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
> > +	u32 offset = exit_qualification & 0xfff;
> > +
> > +	/* APIC-write VM exit is trap-like and thus no need to adjust IP */
> > +	return kvm_apic_write_nodecode(vcpu, offset) == 0; }
> 
> Return 1 here means exit to userspace.  This will go crazy.
> 
> You need to return 0 always.  If this is an msr write to a read-only register, you
> need to inject a #GP (IIRC).

Return 0 means exit to userspace, so it should return 1, right?
__vcpu_run
{
  while (r>0)
  {  
     r = vcpu_enter_guest(vcpu)
     if (r<=0)
       break;
  }
}
> 
> 
> 
> --
> error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/5]KVM: x86, apicv: add APICv register virtualization support
  2012-09-18 14:38   ` Li, Jiongxi
@ 2012-09-19  9:12     ` Avi Kivity
  0 siblings, 0 replies; 6+ messages in thread
From: Avi Kivity @ 2012-09-19  9:12 UTC (permalink / raw)
  To: Li, Jiongxi; +Cc: kvm@vger.kernel.org

On 09/18/2012 05:38 PM, Li, Jiongxi wrote:
>> 
>> >
>> > +static int handle_apic_write(struct kvm_vcpu *vcpu) {
>> > +	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
>> > +	u32 offset = exit_qualification & 0xfff;
>> > +
>> > +	/* APIC-write VM exit is trap-like and thus no need to adjust IP */
>> > +	return kvm_apic_write_nodecode(vcpu, offset) == 0; }
>> 
>> Return 1 here means exit to userspace.  This will go crazy.
>> 
>> You need to return 0 always.  If this is an msr write to a read-only register, you
>> need to inject a #GP (IIRC).
> 
> Return 0 means exit to userspace, so it should return 1, right?
> __vcpu_run
> {
>   while (r>0)
>   {  
>      r = vcpu_enter_guest(vcpu)
>      if (r<=0)
>        break;
>   }
> }

Yes, sorry.  We should switch to a symbolic constant one day, it's
confusing.


-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2012-09-19  9:12 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-09-05  5:41 [PATCH 1/5]KVM: x86, apicv: add APICv register virtualization support Li, Jiongxi
2012-09-06 16:01 ` Avi Kivity
2012-09-14 14:14   ` Li, Jiongxi
2012-09-16  9:41     ` Avi Kivity
2012-09-18 14:38   ` Li, Jiongxi
2012-09-19  9:12     ` Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox