All of lore.kernel.org
 help / color / mirror / Atom feed
From: Gleb Natapov <gleb@redhat.com>
To: "Zhang, Yang Z" <yang.z.zhang@intel.com>
Cc: "kvm@vger.kernel.org" <kvm@vger.kernel.org>,
	"Shan, Haitao" <haitao.shan@intel.com>,
	"mtosatti@redhat.com" <mtosatti@redhat.com>,
	"Tian, Kevin" <kevin.tian@intel.com>
Subject: Re: [PATCH v9 2/3] x86, apicv: add virtual x2apic support
Date: Fri, 11 Jan 2013 15:51:23 +0200	[thread overview]
Message-ID: <20130111135123.GA6513@redhat.com> (raw)
In-Reply-To: <A9667DDFB95DB7438FA9D7D576C3D87E2F7518@SHSMSX101.ccr.corp.intel.com>

On Fri, Jan 11, 2013 at 02:37:15AM +0000, Zhang, Yang Z wrote:
> Gleb Natapov wrote on 2013-01-10:
> > On Thu, Jan 10, 2013 at 08:32:06AM +0000, Zhang, Yang Z wrote:
> >> Gleb Natapov wrote on 2013-01-10:
> >>> On Thu, Jan 10, 2013 at 03:26:07PM +0800, Yang Zhang wrote:
> >>>> From: Yang Zhang <yang.z.zhang@Intel.com>
> >>>> 
> >>>> basically to benefit from apicv, we need to enable virtualized x2apic mode.
> >>>> Currently, we only enable it when guest is really using x2apic.
> >>>> 
> >>>> Also, clear MSR bitmap for corresponding x2apic MSRs when guest enabled
> >>> x2apic:
> >>>>     0x800 - 0x8ff: no read intercept for apicv register virtualization,
> >>>>     		   except APIC ID and TMCCT.
> >>>>     APIC ID and TMCCT: need software's assistance to get right value.
> >>>>     TPR,EOI,SELF-IPI: no write intercept for virtual interrupt delivery.
> >>>> Signed-off-by: Kevin Tian <kevin.tian@intel.com>
> >>>> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
> >>>> ---
> >>>>  arch/x86/include/asm/kvm_host.h |    2 + arch/x86/include/asm/vmx.h
> >>>>    |    1 + arch/x86/kvm/lapic.c            |    5 +-
> >>>>    arch/x86/kvm/svm.c              |    6 + arch/x86/kvm/vmx.c |  194
> >>>>    +++++++++++++++++++++++++++++++++++++-- 5 files
> > changed, 200
> >>>>  insertions(+), 8 deletions(-)
> >>>> diff --git a/arch/x86/include/asm/kvm_host.h
> >>>> b/arch/x86/include/asm/kvm_host.h index c431b33..572a562 100644 ---
> >>>> a/arch/x86/include/asm/kvm_host.h +++
> >>>> b/arch/x86/include/asm/kvm_host.h @@ -697,6 +697,8 @@ struct
> >>>> kvm_x86_ops {
> >>>>  	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
> >>>>  	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
> >>>>  	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
> >>>> +	void (*enable_virtual_x2apic_mode)(struct kvm_vcpu *vcpu);
> >>>> +	void (*disable_virtual_x2apic_mode)(struct kvm_vcpu *vcpu);
> >>> Make one callback with enable/disable parameter. And do not forget SVM.
> >>> 
> >>>>  	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
> >>>>  	int (*get_tdp_level)(void);
> >>>>  	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool
> > is_mmio);
> >>>> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> >>>> index 44c3f7e..0a54df0 100644
> >>>> --- a/arch/x86/include/asm/vmx.h
> >>>> +++ b/arch/x86/include/asm/vmx.h
> >>>> @@ -139,6 +139,7 @@
> >>>>  #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 #define
> >>>>  SECONDARY_EXEC_ENABLE_EPT               0x00000002 #define
> >>>>  SECONDARY_EXEC_RDTSCP			0x00000008 +#define
> >>>>  SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE   0x00000010 #define
> >>>>  SECONDARY_EXEC_ENABLE_VPID              0x00000020 #define
> >>>>  SECONDARY_EXEC_WBINVD_EXITING		0x00000040 #define
> >>>>  SECONDARY_EXEC_UNRESTRICTED_GUEST	0x00000080
> >>>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> >>>> index 0664c13..ec38906 100644
> >>>> --- a/arch/x86/kvm/lapic.c
> >>>> +++ b/arch/x86/kvm/lapic.c
> >>>> @@ -1328,7 +1328,10 @@ void kvm_lapic_set_base(struct kvm_vcpu
> > *vcpu,
> >>> u64 value)
> >>>>  		u32 id = kvm_apic_id(apic);
> >>>>  		u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
> >>>>  		kvm_apic_set_ldr(apic, ldr);
> >>>> -	}
> >>>> +		kvm_x86_ops->enable_virtual_x2apic_mode(vcpu);
> >>>> +	} else
> >>>> +		kvm_x86_ops->disable_virtual_x2apic_mode(vcpu);
> >>>> +
> >>> You just broke SVM.
> >>>>  	apic->base_address = apic->vcpu->arch.apic_base &
> >>>>  			     MSR_IA32_APICBASE_BASE;
> >>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> >>>> index d29d3cd..0b82cb1 100644
> >>>> --- a/arch/x86/kvm/svm.c
> >>>> +++ b/arch/x86/kvm/svm.c
> >>>> @@ -3571,6 +3571,11 @@ static void update_cr8_intercept(struct
> > kvm_vcpu
> >>> *vcpu, int tpr, int irr)
> >>>>  		set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
> >>>>  }
> >>>> +static void svm_enable_virtual_x2apic_mode(struct kvm_vcpu *vcpu)
> >>>> +{
> >>>> +	return;
> >>>> +}
> >>>> +
> >>>>  static int svm_nmi_allowed(struct kvm_vcpu *vcpu) { 	struct vcpu_svm
> >>>>  *svm = to_svm(vcpu); @@ -4290,6 +4295,7 @@ static struct kvm_x86_ops
> >>>>  svm_x86_ops = { 	.enable_nmi_window = enable_nmi_window,
> >>>>  	.enable_irq_window = enable_irq_window, 	.update_cr8_intercept =
> >>>>  update_cr8_intercept,
> >>>> +	.enable_virtual_x2apic_mode = svm_enable_virtual_x2apic_mode,
> >>>> 
> >>>>  	.set_tss_addr = svm_set_tss_addr,
> >>>>  	.get_tdp_level = get_npt_level,
> >>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> >>>> index 688f43f..b203ce7 100644
> >>>> --- a/arch/x86/kvm/vmx.c
> >>>> +++ b/arch/x86/kvm/vmx.c
> >>>> @@ -433,6 +433,8 @@ struct vcpu_vmx {
> >>>> 
> >>>>  	bool rdtscp_enabled;
> >>>> +	bool virtual_x2apic_enabled;
> >>>> +
> >>>>  	/* Support for a guest hypervisor (nested VMX) */
> >>>>  	struct nested_vmx nested;
> >>>>  };
> >>>> @@ -767,12 +769,23 @@ static inline bool
> >>> cpu_has_vmx_virtualize_apic_accesses(void)
> >>>>  		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
> >>>>  }
> >>>> +static inline bool cpu_has_vmx_virtualize_x2apic_mode(void)
> >>>> +{
> >>>> +	return vmcs_config.cpu_based_2nd_exec_ctrl &
> >>>> +		SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
> >>>> +}
> >>>> +
> >>>>  static inline bool cpu_has_vmx_apic_register_virt(void)
> >>>>  {
> >>>>  	return vmcs_config.cpu_based_2nd_exec_ctrl &
> >>>>  		SECONDARY_EXEC_APIC_REGISTER_VIRT;
> >>>>  }
> >>>> +static inline bool cpu_has_vmx_virtual_intr_delivery(void)
> >>>> +{
> >>>> +	return false;
> >>>> +}
> >>>> +
> >>>>  static inline bool cpu_has_vmx_flexpriority(void)
> >>>>  {
> >>>>  	return cpu_has_vmx_tpr_shadow() &&
> >>>> @@ -2544,6 +2557,7 @@ static __init int setup_vmcs_config(struct
> >>> vmcs_config *vmcs_conf)
> >>>>  	if (_cpu_based_exec_control &
> >>>>  CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { 		min2 = 0; 		opt2 =
> >>>>  SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
> >>>>  +			SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
> >>>>  			SECONDARY_EXEC_WBINVD_EXITING | 	SECONDARY_EXEC_ENABLE_VPID |
> >>>>  			SECONDARY_EXEC_ENABLE_EPT | @@ -3731,7 +3745,45 @@ static void
> >>>>  free_vpid(struct vcpu_vmx *vmx) 	spin_unlock(&vmx_vpid_lock); }
> >>>> -static void __vmx_disable_intercept_for_msr(unsigned long
> >>>> *msr_bitmap, u32 msr) +#define MSR_TYPE_R	1 +#define MSR_TYPE_W	2
> >>>> +static void __vmx_disable_intercept_for_msr(unsigned long
> >>>> *msr_bitmap, + 					u32 msr, int type) +{ +	int f = sizeof(unsigned
> >>>> long); + +	if (!cpu_has_vmx_msr_bitmap()) +		return; + +	/* +	 * See
> >>>> Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals +	 *
> >>>> have the write-low and read-high bitmap offsets the wrong way round.
> >>>> +	 * We can control MSRs 0x00000000-0x00001fff and
> >>>> 0xc0000000-0xc0001fff. +	 */ +	if (msr <= 0x1fff) { +		if (type &
> >>>> MSR_TYPE_R) +			/* read-low */ +			__clear_bit(msr, msr_bitmap +
> >>>> 0x000 / f); + +		if (type & MSR_TYPE_W) +			/* write-low */ +
> >>>> 	__clear_bit(msr, msr_bitmap + 0x800 / f); + +	} else if ((msr >=
> >>>> 0xc0000000) && (msr <= 0xc0001fff)) { +		msr &= 0x1fff; +		if (type &
> >>>> MSR_TYPE_R) + 	/* read-high */ +			__clear_bit(msr, msr_bitmap +
> >>>> 0x400 / f); + +		if (type & MSR_TYPE_W) +			/* write-high */ +
> >>>> 	__clear_bit(msr, msr_bitmap + 0xc00 / f); + +	} +} + +static void
> >>>> __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, + 					u32
> >>>> msr, int type)
> >>>>  {
> >>>>  	int f = sizeof(unsigned long);
> >>>> @@ -3744,20 +3796,75 @@ static void
> >>> __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
> >>>>  	 * We can control MSRs 0x00000000-0x00001fff and
> >>>>  0xc0000000-0xc0001fff. 	 */ 	if (msr <= 0x1fff) {
> >>>> -		__clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */
> >>>> -		__clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */
> >>>> +		if (type & MSR_TYPE_R)
> >>>> +			/* read-low */
> >>>> +			__set_bit(msr, msr_bitmap + 0x000 / f);
> >>>> +
> >>>> +		if (type & MSR_TYPE_W)
> >>>> +			/* write-low */
> >>>> +			__set_bit(msr, msr_bitmap + 0x800 / f);
> >>>> +
> >>>>  	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
> >>>>  		msr &= 0x1fff;
> >>>> -		__clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */
> >>>> -		__clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */
> >>>> +		if (type & MSR_TYPE_R)
> >>>> +			/* read-high */
> >>>> +			__set_bit(msr, msr_bitmap + 0x400 / f);
> >>>> +
> >>>> +		if (type & MSR_TYPE_W)
> >>>> +			/* write-high */
> >>>> +			__set_bit(msr, msr_bitmap + 0xc00 / f);
> >>>> +
> >>>>  	} } + static void vmx_disable_intercept_for_msr(u32 msr, bool
> >>>>  longmode_only) { 	if (!longmode_only)
> >>>> -		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr);
> >>>> -	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr);
> >>>> +		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, +						msr,
> >>>> MSR_TYPE_R | MSR_TYPE_W);
> >>>> +	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
> >>>> +						msr, MSR_TYPE_R | MSR_TYPE_W); +} + +static void
> >>>> vmx_intercept_for_msr_read(u32 msr, bool longmode_only, +					bool
> >>>> set) +{ +	if (!longmode_only) { +		if (set) +
> >>>> 	__vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy, +					msr,
> >>>> MSR_TYPE_R); +		else +
> >>>> 	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, +					msr,
> >>>> MSR_TYPE_R); + +	} +	if (set)
> >>>> +		__vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode, +				msr,
> >>>> MSR_TYPE_R); +	else
> >>>> +		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, +				msr,
> >>>> MSR_TYPE_R); +} + +static void vmx_intercept_for_msr_write(u32 msr,
> >>>> bool longmode_only, +					bool set) +{ +	if (!longmode_only) { +		if
> >>>> (set) + 	__vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy,
> >>>> +					msr, MSR_TYPE_W); +		else +
> >>>> 	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, +					msr,
> >>>> MSR_TYPE_W); + +	} +	if (set)
> >>>> +		__vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode, +				msr,
> >>>> MSR_TYPE_W); +	else
> >>>> +		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, +				msr,
> >>>> MSR_TYPE_W);
> >>>>  }
> >>>>  
> >>>>  /*
> >>>> @@ -3855,6 +3962,7 @@ static u32 vmx_secondary_exec_control(struct
> >>> vcpu_vmx *vmx)
> >>>>  		exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; 	if
> >>>>  (!enable_apicv_reg_vid) 		exec_control &=
> >>>>  ~SECONDARY_EXEC_APIC_REGISTER_VIRT; +	exec_control &=
> >>>>  ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; 	return
> > exec_control; }
> >>>> @@ -6110,6 +6218,76 @@ static void update_cr8_intercept(struct
> > kvm_vcpu
> >>> *vcpu, int tpr, int irr)
> >>>>  	vmcs_write32(TPR_THRESHOLD, irr);
> >>>>  }
> >>>> +static void vmx_enable_virtual_x2apic_mode(struct kvm_vcpu *vcpu)
> >>>> +{
> >>>> +	u32 exec_control;
> >>>> +	int msr;
> >>>> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> >>>> +
> >>>> +	if (!cpu_has_vmx_virtualize_x2apic_mode())
> >>>> +		return;
> >>>> +
> >>>> +	exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
> >>>> +	/* virtualize x2apic mode relies on tpr shadow */
> >>>> +	if (!(exec_control & CPU_BASED_TPR_SHADOW))
> >>>> +		return;
> >>>> +
> >>>> +	exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
> >>>> +	exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
> >>>> +	exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
> >>>> +	vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
> >>>> +	vmx->virtual_x2apic_enabled = true;
> >>> Why track it?
> >> With this flag, we don't need to read vmcs to check whether we enabled
> >> virtua x2apic before.
> >> 
> > Why do you care? Just disabled it regardless.
> kvm_lapic_set_base will be called when creating lapic. At that time, vcpu didn't initialized. Then read/write vmcs in vmx_disable_virtual_x2apic_mode will cause error.
> With this flag, we only disable the virtual x2apic mode if it is enabled before. 
> 
Then call vmx_enable_virtual_x2apic_mode() only when mode actually changes.
kvm_lapic_set_base() can track it like it does to MSR_IA32_APICBASE_ENABLE.

--
			Gleb.

  reply	other threads:[~2013-01-11 13:51 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-01-10  7:26 [PATCH v9 0/3] x86, apicv: Add APIC virtualization support Yang Zhang
2013-01-10  7:26 ` [PATCH v9 1/3] x86, apicv: add APICv register " Yang Zhang
2013-01-10 20:25   ` Marcelo Tosatti
2013-01-10  7:26 ` [PATCH v9 2/3] x86, apicv: add virtual x2apic support Yang Zhang
2013-01-10  7:55   ` Gleb Natapov
2013-01-10  8:32     ` Zhang, Yang Z
2013-01-10  8:52       ` Gleb Natapov
2013-01-10 11:54         ` Zhang, Yang Z
2013-01-10 12:16           ` Gleb Natapov
2013-01-10 12:22             ` Zhang, Yang Z
2013-01-10 12:34               ` Gleb Natapov
2013-01-11  7:36                 ` Zhang, Yang Z
2013-01-11 16:54                   ` Gleb Natapov
2013-01-14  1:03                     ` Zhang, Yang Z
2013-01-14  1:14                     ` Zhang, Yang Z
2013-01-11  2:37         ` Zhang, Yang Z
2013-01-11 13:51           ` Gleb Natapov [this message]
2013-01-10  8:25   ` Gleb Natapov
2013-01-10  8:31     ` Zhang, Yang Z
2013-01-10  8:53       ` Gleb Natapov
2013-01-10  7:26 ` [PATCH v9 3/3] x86, apicv: add virtual interrupt delivery support Yang Zhang
2013-01-10  8:23   ` Gleb Natapov
2013-01-10 12:04     ` Zhang, Yang Z
2013-01-10 21:36   ` Marcelo Tosatti
2013-01-11 14:09     ` Gleb Natapov
2013-01-11 17:58       ` Marcelo Tosatti

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130111135123.GA6513@redhat.com \
    --to=gleb@redhat.com \
    --cc=haitao.shan@intel.com \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=mtosatti@redhat.com \
    --cc=yang.z.zhang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.