All of lore.kernel.org
 help / color / mirror / Atom feed
From: Marcelo Tosatti <mtosatti@redhat.com>
To: Yang Zhang <yang.z.zhang@intel.com>
Cc: kvm@vger.kernel.org, gleb@redhat.com, Kevin Tian <kevin.tian@intel.com>
Subject: Re: [PATCH v3 3/4] x86, apicv: add virtual interrupt delivery support
Date: Wed, 5 Dec 2012 00:00:16 -0200	[thread overview]
Message-ID: <20121205020016.GA32458@amt.cnet> (raw)
In-Reply-To: <1354518064-3066-4-git-send-email-yang.z.zhang@intel.com>

On Mon, Dec 03, 2012 at 03:01:03PM +0800, Yang Zhang wrote:
> Virtual interrupt delivery avoids KVM to inject vAPIC interrupts
> manually, which is fully taken care of by the hardware. This needs
> some special awareness into existing interrupr injection path:
> 
> - for pending interrupt, instead of direct injection, we may need
>   update architecture specific indicators before resuming to guest.
> 
> - A pending interrupt, which is masked by ISR, should be also
>   considered in above update action, since hardware will decide
>   when to inject it at right time. Current has_interrupt and
>   get_interrupt only returns a valid vector from injection p.o.v.
> 
> Signed-off-by: Yang Zhang <yang.z.zhang@intel.com>
> Signed-off-by: Kevin Tian <kevin.tian@intel.com>
> ---
>  arch/x86/include/asm/kvm_host.h |    4 +
>  arch/x86/include/asm/vmx.h      |   11 +++
>  arch/x86/kvm/irq.c              |   53 ++++++++++-----
>  arch/x86/kvm/lapic.c            |   56 +++++++++++++---
>  arch/x86/kvm/lapic.h            |    6 ++
>  arch/x86/kvm/svm.c              |   19 +++++
>  arch/x86/kvm/vmx.c              |  140 ++++++++++++++++++++++++++++++++++++++-
>  arch/x86/kvm/x86.c              |   34 ++++++++--
>  virt/kvm/ioapic.c               |    1 +
>  9 files changed, 291 insertions(+), 33 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index dc87b65..e5352c8 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -697,6 +697,10 @@ struct kvm_x86_ops {
>  	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
>  	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
>  	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
> +	int (*has_virtual_interrupt_delivery)(struct kvm_vcpu *vcpu);
> +	void (*update_irq)(struct kvm_vcpu *vcpu);
> +	void (*set_eoi_exitmap)(struct kvm_vcpu *vcpu, int vector,
> +			int trig_mode, int always_set);
>  	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
>  	int (*get_tdp_level)(void);
>  	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index 21101b6..1003341 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -62,6 +62,7 @@
>  #define EXIT_REASON_MCE_DURING_VMENTRY  41
>  #define EXIT_REASON_TPR_BELOW_THRESHOLD 43
>  #define EXIT_REASON_APIC_ACCESS         44
> +#define EXIT_REASON_EOI_INDUCED         45
>  #define EXIT_REASON_EPT_VIOLATION       48
>  #define EXIT_REASON_EPT_MISCONFIG       49
>  #define EXIT_REASON_WBINVD              54
> @@ -143,6 +144,7 @@
>  #define SECONDARY_EXEC_WBINVD_EXITING		0x00000040
>  #define SECONDARY_EXEC_UNRESTRICTED_GUEST	0x00000080
>  #define SECONDARY_EXEC_APIC_REGISTER_VIRT       0x00000100
> +#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY    0x00000200
>  #define SECONDARY_EXEC_PAUSE_LOOP_EXITING	0x00000400
>  #define SECONDARY_EXEC_ENABLE_INVPCID		0x00001000
>  
> @@ -180,6 +182,7 @@ enum vmcs_field {
>  	GUEST_GS_SELECTOR               = 0x0000080a,
>  	GUEST_LDTR_SELECTOR             = 0x0000080c,
>  	GUEST_TR_SELECTOR               = 0x0000080e,
> +	GUEST_INTR_STATUS               = 0x00000810,
>  	HOST_ES_SELECTOR                = 0x00000c00,
>  	HOST_CS_SELECTOR                = 0x00000c02,
>  	HOST_SS_SELECTOR                = 0x00000c04,
> @@ -207,6 +210,14 @@ enum vmcs_field {
>  	APIC_ACCESS_ADDR_HIGH		= 0x00002015,
>  	EPT_POINTER                     = 0x0000201a,
>  	EPT_POINTER_HIGH                = 0x0000201b,
> +	EOI_EXIT_BITMAP0                = 0x0000201c,
> +	EOI_EXIT_BITMAP0_HIGH           = 0x0000201d,
> +	EOI_EXIT_BITMAP1                = 0x0000201e,
> +	EOI_EXIT_BITMAP1_HIGH           = 0x0000201f,
> +	EOI_EXIT_BITMAP2                = 0x00002020,
> +	EOI_EXIT_BITMAP2_HIGH           = 0x00002021,
> +	EOI_EXIT_BITMAP3                = 0x00002022,
> +	EOI_EXIT_BITMAP3_HIGH           = 0x00002023,
>  	GUEST_PHYSICAL_ADDRESS          = 0x00002400,
>  	GUEST_PHYSICAL_ADDRESS_HIGH     = 0x00002401,
>  	VMCS_LINK_POINTER               = 0x00002800,
> diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
> index 7e06ba1..f782788 100644
> --- a/arch/x86/kvm/irq.c
> +++ b/arch/x86/kvm/irq.c
> @@ -43,45 +43,64 @@ EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
>   */
>  int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
>  {
> -	struct kvm_pic *s;
> -
>  	if (!irqchip_in_kernel(v->kvm))
>  		return v->arch.interrupt.pending;
>  
> -	if (kvm_apic_has_interrupt(v) == -1) {	/* LAPIC */
> -		if (kvm_apic_accept_pic_intr(v)) {
> -			s = pic_irqchip(v->kvm);	/* PIC */
> -			return s->output;
> -		} else
> -			return 0;
> -	}
> +	if (kvm_apic_has_interrupt(v) == -1) /* LAPIC */
> +		return kvm_cpu_has_extint(v); /* non-APIC */
>  	return 1;
>  }
>  EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
>  
>  /*
> + * check if there is pending interrupt from
> + * non-APIC source without intack.
> + */
> +int kvm_cpu_has_extint(struct kvm_vcpu *v)
> +{
> +	struct kvm_pic *s;
> +
> +	if (kvm_apic_accept_pic_intr(v)) {
> +		s = pic_irqchip(v->kvm); /* PIC */
> +		return s->output;
> +	} else
> +		return 0;
> +}
> +
> +/*
>   * Read pending interrupt vector and intack.
>   */
>  int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
>  {
> -	struct kvm_pic *s;
>  	int vector;
>  
>  	if (!irqchip_in_kernel(v->kvm))
>  		return v->arch.interrupt.nr;
>  
>  	vector = kvm_get_apic_interrupt(v);	/* APIC */
> -	if (vector == -1) {
> -		if (kvm_apic_accept_pic_intr(v)) {
> -			s = pic_irqchip(v->kvm);
> -			s->output = 0;		/* PIC */
> -			vector = kvm_pic_read_irq(v->kvm);
> -		}
> -	}
> +	if (vector == -1)
> +		return kvm_cpu_get_extint(v); /* non-APIC */
>  	return vector;
>  }
>  EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
>  
> +/*
> + * Read pending interrupt(from non-APIC source)
> + * vector and intack.
> + */
> +int kvm_cpu_get_extint(struct kvm_vcpu *v)
> +{
> +	struct kvm_pic *s;
> +	int vector = -1;
> +
> +	if (kvm_apic_accept_pic_intr(v)) {
> +		s = pic_irqchip(v->kvm);
> +		s->output = 0;		/* PIC */
> +		vector = kvm_pic_read_irq(v->kvm);
> +	}
> +	return vector;
> +}
> +
>  void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
>  {
>  	kvm_inject_apic_timer_irqs(vcpu);
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 7c96012..400d3ba 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -643,6 +643,14 @@ out:
>  	return ret;
>  }
>  
> +void kvm_set_eoi_exitmap(struct kvm_vcpu *vcpu, int vector,
> +		int trig_mode, int always_set)
> +{
> +	if (kvm_x86_ops->set_eoi_exitmap)
> +		kvm_x86_ops->set_eoi_exitmap(vcpu, vector,
> +					trig_mode, always_set);
> +}
> +
>  /*
>   * Add a pending IRQ into lapic.
>   * Return 1 if successfully added and 0 if discarded.
> @@ -661,6 +669,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
>  		if (unlikely(!apic_enabled(apic)))
>  			break;
>  
> +		kvm_set_eoi_exitmap(vcpu, vector, trig_mode, 0);
>  		if (trig_mode) {
>  			apic_debug("level trig mode for vector %d", vector);
>  			apic_set_vector(vector, apic->regs + APIC_TMR);
> @@ -740,6 +749,19 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
>  	return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
>  }
>  
> +static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
> +{
> +	if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
> +	    kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
> +		int trigger_mode;
> +		if (apic_test_vector(vector, apic->regs + APIC_TMR))
> +			trigger_mode = IOAPIC_LEVEL_TRIG;
> +		else
> +			trigger_mode = IOAPIC_EDGE_TRIG;
> +		kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
> +	}
> +}
> +
>  static int apic_set_eoi(struct kvm_lapic *apic)
>  {
>  	int vector = apic_find_highest_isr(apic);
> @@ -756,19 +778,24 @@ static int apic_set_eoi(struct kvm_lapic *apic)
>  	apic_clear_isr(vector, apic);
>  	apic_update_ppr(apic);
>  
> -	if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
> -	    kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
> -		int trigger_mode;
> -		if (apic_test_vector(vector, apic->regs + APIC_TMR))
> -			trigger_mode = IOAPIC_LEVEL_TRIG;
> -		else
> -			trigger_mode = IOAPIC_EDGE_TRIG;
> -		kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
> -	}
> +	kvm_ioapic_send_eoi(apic, vector);
>  	kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
>  	return vector;
>  }
>  
> +/*
> + * this interface assumes a trap-like exit, which has already finished
> + * desired side effect including vISR and vPPR update.
> + */
> +void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
> +{
> +	struct kvm_lapic *apic = vcpu->arch.apic;
> +
> +	kvm_ioapic_send_eoi(apic, vector);
> +	kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
> +}
> +EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
> +
>  static void apic_send_ipi(struct kvm_lapic *apic)
>  {
>  	u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR);
> @@ -1533,6 +1560,17 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
>  	return highest_irr;
>  }
>  
> +int kvm_apic_get_highest_irr(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm_lapic *apic = vcpu->arch.apic;
> +
> +	if (!apic || !apic_enabled(apic))
> +		return -1;
> +
> +	return apic_find_highest_irr(apic);
> +}
> +EXPORT_SYMBOL_GPL(kvm_apic_get_highest_irr);
> +
>  int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
>  {
>  	u32 lvt0 = kvm_apic_get_reg(vcpu->arch.apic, APIC_LVT0);
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index c42f111..749661a 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -39,6 +39,9 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu);
>  int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
>  int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu);
>  int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
> +int kvm_cpu_has_extint(struct kvm_vcpu *v);
> +int kvm_cpu_get_extint(struct kvm_vcpu *v);
> +int kvm_apic_get_highest_irr(struct kvm_vcpu *vcpu);
>  void kvm_lapic_reset(struct kvm_vcpu *vcpu);
>  u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
>  void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
> @@ -50,6 +53,8 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu);
>  int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
>  int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
>  int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
> +void kvm_set_eoi_exitmap(struct kvm_vcpu *vcpu, int vector,
> +		int need_eoi, int global);
>  int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
>  
>  bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
> @@ -65,6 +70,7 @@ u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
>  void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data);
>  
>  int kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset);
> +void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector);
>  
>  void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
>  void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index dcb7952..8f0903b 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -3573,6 +3573,22 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
>  		set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
>  }
>  
> +static int svm_has_virtual_interrupt_delivery(struct kvm_vcpu *vcpu)
> +{
> +	return 0;
> +}
> +
> +static void svm_update_irq(struct kvm_vcpu *vcpu)
> +{
> +	return ;
> +}
> +
> +static void svm_set_eoi_exitmap(struct kvm_vcpu *vcpu, int vector,
> +				int trig_mode, int always_set)
> +{
> +	return ;
> +}
> +
>  static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
>  {
>  	struct vcpu_svm *svm = to_svm(vcpu);
> @@ -4292,6 +4308,9 @@ static struct kvm_x86_ops svm_x86_ops = {
>  	.enable_nmi_window = enable_nmi_window,
>  	.enable_irq_window = enable_irq_window,
>  	.update_cr8_intercept = update_cr8_intercept,
> +	.has_virtual_interrupt_delivery = svm_has_virtual_interrupt_delivery,
> +	.update_irq = svm_update_irq;
> +	.set_eoi_exitmap = svm_set_eoi_exitmap;
>  
>  	.set_tss_addr = svm_set_tss_addr,
>  	.get_tdp_level = get_npt_level,
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 6a5f651..909ce90 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -86,6 +86,9 @@ module_param(fasteoi, bool, S_IRUGO);
>  static bool __read_mostly enable_apicv_reg;
>  module_param(enable_apicv_reg, bool, S_IRUGO);
>  
> +static bool __read_mostly enable_apicv_vid;
> +module_param(enable_apicv_vid, bool, S_IRUGO);
> +
>  /*
>   * If nested=1, nested virtualization is supported, i.e., guests may use
>   * VMX and be a hypervisor for its own guests. If nested=0, guests may not
> @@ -432,6 +435,9 @@ struct vcpu_vmx {
>  
>  	bool rdtscp_enabled;
>  
> +	u8 eoi_exitmap_changed;
> +	u32 eoi_exit_bitmap[8];
> +
>  	/* Support for a guest hypervisor (nested VMX) */
>  	struct nested_vmx nested;
>  };
> @@ -770,6 +776,12 @@ static inline bool cpu_has_vmx_apic_register_virt(void)
>  		SECONDARY_EXEC_APIC_REGISTER_VIRT;
>  }
>  
> +static inline bool cpu_has_vmx_virtual_intr_delivery(void)
> +{
> +	return vmcs_config.cpu_based_2nd_exec_ctrl &
> +		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
> +}
> +
>  static inline bool cpu_has_vmx_flexpriority(void)
>  {
>  	return cpu_has_vmx_tpr_shadow() &&
> @@ -2508,7 +2520,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
>  			SECONDARY_EXEC_PAUSE_LOOP_EXITING |
>  			SECONDARY_EXEC_RDTSCP |
>  			SECONDARY_EXEC_ENABLE_INVPCID |
> -			SECONDARY_EXEC_APIC_REGISTER_VIRT;
> +			SECONDARY_EXEC_APIC_REGISTER_VIRT |
> +			SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
>  		if (adjust_vmx_controls(min2, opt2,
>  					MSR_IA32_VMX_PROCBASED_CTLS2,
>  					&_cpu_based_2nd_exec_control) < 0)
> @@ -2522,7 +2535,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
>  
>  	if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
>  		_cpu_based_2nd_exec_control &= ~(
> -				SECONDARY_EXEC_APIC_REGISTER_VIRT);
> +				SECONDARY_EXEC_APIC_REGISTER_VIRT |
> +				SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
>  
>  	if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
>  		/* CR3 accesses and invlpg don't need to cause VM Exits when EPT
> @@ -2724,6 +2738,14 @@ static __init int hardware_setup(void)
>  	if (!cpu_has_vmx_apic_register_virt())
>  		enable_apicv_reg = 0;
>  
> +	if (!cpu_has_vmx_virtual_intr_delivery())
> +		enable_apicv_vid = 0;
> +
> +	if (!enable_apicv_vid) {
> +		kvm_x86_ops->update_irq = NULL;
> +		kvm_x86_ops->update_cr8_intercept = NULL;
> +	}
> +
>  	if (nested)
>  		nested_vmx_setup_ctls_msrs();
>  
> @@ -3839,6 +3861,8 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
>  		exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
>  	if (!enable_apicv_reg)
>  		exec_control &= ~SECONDARY_EXEC_APIC_REGISTER_VIRT;
> +	if (!enable_apicv_vid)
> +		exec_control &= ~SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
>  	return exec_control;
>  }
>  
> @@ -3883,6 +3907,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
>  				vmx_secondary_exec_control(vmx));
>  	}
>  
> +	if (enable_apicv_vid) {
> +		vmcs_write64(EOI_EXIT_BITMAP0, 0);
> +		vmcs_write64(EOI_EXIT_BITMAP1, 0);
> +		vmcs_write64(EOI_EXIT_BITMAP2, 0);
> +		vmcs_write64(EOI_EXIT_BITMAP3, 0);
> +
> +		vmcs_write16(GUEST_INTR_STATUS, 0);
> +	}
> +
>  	if (ple_gap) {
>  		vmcs_write32(PLE_GAP, ple_gap);
>  		vmcs_write32(PLE_WINDOW, ple_window);
> @@ -4806,6 +4839,16 @@ static int handle_apic_access(struct kvm_vcpu *vcpu)
>  	return emulate_instruction(vcpu, 0) == EMULATE_DONE;
>  }
>  
> +static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
> +{
> +	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
> +	int vector = exit_qualification & 0xff;
> +
> +	/* EOI-induced VM exit is trap-like and thus no need to adjust IP */
> +	kvm_apic_set_eoi_accelerated(vcpu, vector);
> +	return 1;
> +}
> +
>  static int handle_apic_write(struct kvm_vcpu *vcpu)
>  {
>  	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
> @@ -5755,6 +5798,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
>  	[EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold,
>  	[EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
>  	[EXIT_REASON_APIC_WRITE]              = handle_apic_write,
> +	[EXIT_REASON_EOI_INDUCED]             = handle_apic_eoi_induced,
>  	[EXIT_REASON_WBINVD]                  = handle_wbinvd,
>  	[EXIT_REASON_XSETBV]                  = handle_xsetbv,
>  	[EXIT_REASON_TASK_SWITCH]             = handle_task_switch,
> @@ -6096,6 +6140,11 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
>  
>  static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
>  {
> +	/* no need for tpr_threshold update if APIC virtual
> +	 * interrupt delivery is enabled */
> +	if (!enable_apicv_vid)
> +		return ;
> +
>  	if (irr == -1 || tpr < irr) {
>  		vmcs_write32(TPR_THRESHOLD, 0);
>  		return;
> @@ -6104,6 +6153,90 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
>  	vmcs_write32(TPR_THRESHOLD, irr);
>  }
>  
> +static int vmx_has_virtual_interrupt_delivery(struct kvm_vcpu *vcpu)
> +{
> +	return irqchip_in_kernel(vcpu->kvm) && enable_apicv_vid;
> +}
> +
> +static void vmx_update_eoi_exitmap(struct vcpu_vmx *vmx, int index)
> +{
> +	int tmr;
> +	tmr = kvm_apic_get_reg(vmx->vcpu.arch.apic,
> +			APIC_TMR + 0x10 * index);
> +	vmcs_write32(EOI_EXIT_BITMAP0 + index,
> +			vmx->eoi_exit_bitmap[index] | tmr);
> +}
> +
> +static void vmx_update_rvi(int vector)
> +{
> +	u16 status;
> +	u8 old;
> +
> +	status = vmcs_read16(GUEST_INTR_STATUS);
> +	old = (u8)status & 0xff;
> +	if ((u8)vector != old) {
> +		status &= ~0xff;
> +		status |= (u8)vector;
> +		vmcs_write16(GUEST_INTR_STATUS, status);
> +	}
> +}
> +
> +static void vmx_update_irq(struct kvm_vcpu *vcpu)
> +{
> +	int vector;
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +
> +	if (!enable_apicv_vid)
> +		return ;
> +
> +	vector = kvm_apic_get_highest_irr(vcpu);
> +	if (vector == -1)
> +		return;

1. Is the pseudocode sequence of virtual interrupt delivery in 29.2.2
guaranteed to be atomic (not interruptible by other events) ?
The question is, when hardware is performing virtual interrupt
delivery is it guaranteed that RVI matches VIRR ?  (the answer must be
yes, just checking).

2. Section 29.6 mentions that "Use of the posted-interrupt descriptor
differs from that of other data structures that are referenced by
pointers in a VMCS. There is a general requirement that software ensure
that each such data structure is modified only when no logical processor
with a current VMCS that references it is in VMX non-root operation.
That requirement does not apply to the posted-interrupt descriptor.
There is a requirement, however, that such modifications be done using
locked read-modify-write instructions."

The APIC virtual page is being modified by a CPU while a logical
processor with current VMCS that references it is in VMX non-root
operation, in fact even modifying the APIC virtual page with EOI
virtualizaton, virtual interrupt delivery, etc. What are the
requirements in this case?

> @@ -5657,12 +5673,20 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  	}
>  
>  	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
> +		/* update archtecture specific hints for APIC
> +		 * virtual interrupt delivery */
> +		if (kvm_x86_ops->update_irq)
> +			kvm_x86_ops->update_irq(vcpu);
> +
>  		inject_pending_event(vcpu);
>  
>  		/* enable NMI/IRQ window open exits if needed */
>  		if (vcpu->arch.nmi_pending)
>  			kvm_x86_ops->enable_nmi_window(vcpu);
> -		else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
> +		else if (kvm_apic_vid_enabled(vcpu)) {
> +			if (kvm_cpu_has_extint(vcpu))
> +				kvm_x86_ops->enable_irq_window(vcpu);

If RVI is non-zero, then interrupt window should not be enabled,
accordingly to 29.2.2:

"If a virtual interrupt has been recognized (see Section 29.2.1), it will
be delivered at an instruction boundary when the following conditions all
hold: (1) RFLAGS.IF = 1; (2) there is no blocking by STI; (3) there is no 
blocking by MOV SS or by POP SS; and (4) the “interrupt-window exiting”
VM-execution control is 0."

> +		} else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
>  			kvm_x86_ops->enable_irq_window(vcpu);
>  
>  		if (kvm_lapic_enabled(vcpu)) {
> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
> index 166c450..898aa62 100644
> --- a/virt/kvm/ioapic.c
> +++ b/virt/kvm/ioapic.c
> @@ -186,6 +186,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
>  		/* need to read apic_id from apic regiest since
>  		 * it can be rewritten */
>  		irqe.dest_id = ioapic->kvm->bsp_vcpu_id;
> +		kvm_set_eoi_exitmap(ioapic->kvm->vcpus[0], irqe.vector, 1, 1);
>  	}
>  #endif
>  	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe);

  parent reply	other threads:[~2012-12-05  2:01 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-12-03  7:01 [PATCH v3 0/4] x86, apicv: Add APIC virtualization support Yang Zhang
2012-12-03  7:01 ` [PATCH v3 1/4] x86: PIT connects to pin 2 of IOAPIC Yang Zhang
2012-12-03  9:42   ` Gleb Natapov
2012-12-04  5:32     ` Zhang, Yang Z
2012-12-04 12:55       ` Gleb Natapov
2012-12-05  1:28         ` Zhang, Yang Z
2012-12-03  7:01 ` [PATCH v3 2/4] x86, apicv: add APICv register virtualization support Yang Zhang
2012-12-05  0:29   ` Marcelo Tosatti
2012-12-05  3:17     ` Zhang, Yang Z
2012-12-05 23:11       ` Marcelo Tosatti
2012-12-06  6:45         ` Gleb Natapov
2012-12-03  7:01 ` [PATCH v3 3/4] x86, apicv: add virtual interrupt delivery support Yang Zhang
2012-12-03 11:37   ` Gleb Natapov
2012-12-04  6:39     ` Zhang, Yang Z
2012-12-04 10:58       ` Gleb Natapov
2012-12-05  1:55         ` Zhang, Yang Z
2012-12-05  5:02           ` Gleb Natapov
2012-12-05  6:02             ` Zhang, Yang Z
2012-12-05 10:18               ` Gleb Natapov
2012-12-05 13:51                 ` Zhang, Yang Z
2012-12-05 14:00                   ` Gleb Natapov
2012-12-06  2:55                     ` Zhang, Yang Z
2012-12-06  7:07                       ` Gleb Natapov
2012-12-06  7:16                         ` Zhang, Yang Z
2012-12-06  7:19                           ` Gleb Natapov
2012-12-06  7:20                             ` Zhang, Yang Z
2012-12-04 16:46   ` Michael S. Tsirkin
2012-12-05  2:00   ` Marcelo Tosatti [this message]
2012-12-05  3:43     ` Zhang, Yang Z
2012-12-05 11:14       ` Gleb Natapov
2012-12-05 14:16         ` Zhang, Yang Z
2012-12-05 14:34           ` Gleb Natapov
2012-12-06  2:58             ` Zhang, Yang Z
2012-12-05 22:38         ` Marcelo Tosatti
2012-12-06  9:28           ` Gleb Natapov
2012-12-06 11:35             ` Zhang, Yang Z
2012-12-05 22:31   ` Marcelo Tosatti
2012-12-06  3:31     ` Zhang, Yang Z
2012-12-06  5:02     ` Zhang, Yang Z
2012-12-06  6:36       ` Gleb Natapov
2012-12-06  6:56         ` Zhang, Yang Z
2012-12-06 20:08         ` Marcelo Tosatti
2012-12-07  1:00           ` Zhang, Yang Z
2012-12-07  7:28             ` Gleb Natapov
2012-12-03  7:01 ` [PATCH v3 4/4] x86, apicv: add virtual x2apic support Yang Zhang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20121205020016.GA32458@amt.cnet \
    --to=mtosatti@redhat.com \
    --cc=gleb@redhat.com \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=yang.z.zhang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.