kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCHv3] KVM: optimize apic interrupt delivery
@ 2012-09-13  9:00 Gleb Natapov
  2012-09-13  9:15 ` Avi Kivity
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Gleb Natapov @ 2012-09-13  9:00 UTC (permalink / raw)
  To: kvm; +Cc: avi, mst, mtosatti

Most interrupt are delivered to only one vcpu. Use pre-build tables to
find interrupt destination instead of looping through all vcpus. In case
of logical mode loop only through vcpus in a logical cluster irq is sent
to.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 Changelog:

  - v2->v3
   * sparse annotation for rcu usage
   * move mutex above map
   * use mask/shift to calculate cluster/dst ids
   * use gotos
   * add comment about logic behind logical table creation

  - v1->v2
   * fix race Avi noticed
   * rcu_read_lock() out of the block as per Avi
   * fix rcu issues pointed to by MST. All but one. Still use
     call_rcu(). Do not think this is serious issue. If it is should be
     solved by RCU subsystem.
   * Fix phys_map overflow pointed to by MST
   * recalculate_apic_map() does not return error any more.
   * add optimization for low prio logical mode with one cpu as dst (it
     happens)


diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 64adb61..9dcfd3e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -511,6 +511,14 @@ struct kvm_arch_memory_slot {
 	struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
 };
 
+struct kvm_apic_map {
+	struct rcu_head rcu;
+	u8 ldr_bits;
+	u32 cid_shift, cid_mask, lid_mask;
+	struct kvm_lapic *phys_map[256];
+	struct kvm_lapic *logical_map[16][16];
+};
+
 struct kvm_arch {
 	unsigned int n_used_mmu_pages;
 	unsigned int n_requested_mmu_pages;
@@ -528,6 +536,8 @@ struct kvm_arch {
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
 	int vapics_in_nmi_mode;
+	struct mutex apic_map_lock;
+	struct kvm_apic_map *apic_map;
 
 	unsigned int tss_addr;
 	struct page *apic_access_page;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 07ad628..a03d4aa 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -139,11 +139,105 @@ static inline int apic_enabled(struct kvm_lapic *apic)
 	(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
 	 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
 
+static inline int apic_x2apic_mode(struct kvm_lapic *apic)
+{
+	return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
+}
+
 static inline int kvm_apic_id(struct kvm_lapic *apic)
 {
 	return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
 }
 
+static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr)
+{
+	ldr >>= 32 - map->ldr_bits;
+	return (ldr >> map->cid_shift) & map->cid_mask;
+}
+
+static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr)
+{
+	ldr >>= (32 - map->ldr_bits);
+	return ldr & map->lid_mask;
+}
+
+static inline void recalculate_apic_map(struct kvm *kvm)
+{
+	struct kvm_apic_map *new, *old = NULL;
+	struct kvm_vcpu *vcpu;
+	int i;
+
+	new = kzalloc(sizeof(struct kvm_apic_map), GFP_KERNEL);
+
+	mutex_lock(&kvm->arch.apic_map_lock);
+
+	if (!new)
+		goto out;
+
+	new->ldr_bits = 8;
+	/* flat mode is deafult */
+	new->cid_shift = 8;
+	new->cid_mask = 0;
+	new->lid_mask = 0xff;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		struct kvm_lapic *apic = vcpu->arch.apic;
+		u16 cid, lid;
+		u32 ldr;
+
+		if (!kvm_apic_present(vcpu))
+			continue;
+
+		/*
+		 * All APICs have to be configured in the same mode by an OS.
+		 * We take advatage of this while building logical id loockup
+		 * table. After reset APICs are in xapic/flat mode, so if we
+		 * find apic with different setting we assume this is the mode
+		 * os wants all apics to be in and build lookup table
+		 * accordingly.
+		 */
+		if (apic_x2apic_mode(apic)) {
+			new->ldr_bits = 32;
+			new->cid_shift = 16;
+			new->cid_mask = new->lid_mask = 0xffff;
+		} else if (kvm_apic_sw_enabled(apic) &&
+				!new->cid_mask /* flat mode */ &&
+				kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) {
+			new->cid_shift = 4; 
+			new->cid_mask = 0xf;
+			new->lid_mask = 0xf;
+		}
+
+		new->phys_map[kvm_apic_id(apic)] = apic;
+
+		ldr = kvm_apic_get_reg(apic, APIC_LDR);
+		cid = apic_cluster_id(new, ldr);
+		lid = apic_logical_id(new, ldr);
+
+		if (lid)
+			new->logical_map[cid][ffs(lid) - 1] = apic;
+	}
+out:
+	old = rcu_dereference_protected(kvm->arch.apic_map, 1);
+	rcu_assign_pointer(kvm->arch.apic_map, new);
+	mutex_unlock(&kvm->arch.apic_map_lock);
+
+	if (old)
+		kfree_rcu(old, rcu);
+}
+
+static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
+{
+	apic_set_reg(apic, APIC_ID, id << 24);
+	recalculate_apic_map(apic->vcpu->kvm);
+}
+
+static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
+{
+	apic_set_reg(apic, APIC_LDR, id);
+	recalculate_apic_map(apic->vcpu->kvm);
+}
+
 static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
 {
 	return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
@@ -193,11 +287,6 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
 	apic_set_reg(apic, APIC_LVR, v);
 }
 
-static inline int apic_x2apic_mode(struct kvm_lapic *apic)
-{
-	return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
-}
-
 static const unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
 	LVT_MASK ,      /* part LVTT mask, timer mode mask added at runtime */
 	LVT_MASK | APIC_MODE_MASK,	/* LVTTHMR */
@@ -477,6 +566,73 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 	return result;
 }
 
+bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
+		struct kvm_lapic_irq *irq, int *r)
+{
+	struct kvm_apic_map *map;
+	unsigned long bitmap = 1;
+	struct kvm_lapic **dst;
+	int i;
+	bool ret = false;
+
+	*r = -1;
+
+	if (irq->shorthand == APIC_DEST_SELF) {
+		*r = kvm_apic_set_irq(src->vcpu, irq);
+		return true;
+	}
+
+	if (irq->shorthand)
+		return false;
+
+	rcu_read_lock();
+	map = rcu_dereference(kvm->arch.apic_map);
+
+	if (!map)
+		goto out;
+
+	if (irq->dest_mode == 0) { /* physical mode */
+		if (irq->delivery_mode == APIC_DM_LOWEST ||
+				irq->dest_id == 0xff)
+			goto out;
+		dst = &map->phys_map[irq->dest_id & 0xff];
+	} else {
+		u32 mda = irq->dest_id << (32 - map->ldr_bits);
+
+		dst = map->logical_map[apic_cluster_id(map, mda)];
+
+		bitmap = apic_logical_id(map, mda);
+
+		if (irq->delivery_mode == APIC_DM_LOWEST &&
+				hweight_long(bitmap) > 1) {
+			int l = -1;
+			for_each_set_bit(i, &bitmap, 16) {
+				if (!dst[i])
+					continue;
+				if (l < 0)
+					l = i;
+				else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0)
+					l = i;
+			}
+
+			bitmap = (l >= 0) ? 1 << l : 0;
+		}
+	}
+
+	for_each_set_bit(i, &bitmap, 16) {
+		if (!dst[i])
+			continue;
+		if (*r < 0)
+			*r = 0;
+		*r += kvm_apic_set_irq(dst[i]->vcpu, irq);
+	}
+
+	ret = true;
+out:
+	rcu_read_unlock();
+	return ret;
+}
+
 /*
  * Add a pending IRQ into lapic.
  * Return 1 if successfully added and 0 if discarded.
@@ -880,7 +1036,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 	switch (reg) {
 	case APIC_ID:		/* Local APIC ID */
 		if (!apic_x2apic_mode(apic))
-			apic_set_reg(apic, APIC_ID, val);
+			kvm_apic_set_id(apic, val >> 24);
 		else
 			ret = 1;
 		break;
@@ -896,15 +1052,16 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 
 	case APIC_LDR:
 		if (!apic_x2apic_mode(apic))
-			apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK);
+			kvm_apic_set_ldr(apic, val & APIC_LDR_MASK);
 		else
 			ret = 1;
 		break;
 
 	case APIC_DFR:
-		if (!apic_x2apic_mode(apic))
+		if (!apic_x2apic_mode(apic)) {
 			apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
-		else
+			recalculate_apic_map(apic->vcpu->kvm);
+		} else
 			ret = 1;
 		break;
 
@@ -1135,6 +1292,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 			static_key_slow_dec_deferred(&apic_hw_disabled);
 		else
 			static_key_slow_inc(&apic_hw_disabled.key);
+		recalculate_apic_map(vcpu->kvm);
 	}
 
 	if (!kvm_vcpu_is_bsp(apic->vcpu))
@@ -1144,7 +1302,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 	if (apic_x2apic_mode(apic)) {
 		u32 id = kvm_apic_id(apic);
 		u32 ldr = ((id & ~0xf) << 16) | (1 << (id & 0xf));
-		apic_set_reg(apic, APIC_LDR, ldr);
+		kvm_apic_set_ldr(apic, ldr);
 	}
 	apic->base_address = apic->vcpu->arch.apic_base &
 			     MSR_IA32_APICBASE_BASE;
@@ -1169,7 +1327,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 	/* Stop the timer in case it's a reset to an active apic */
 	hrtimer_cancel(&apic->lapic_timer.timer);
 
-	apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24);
+	kvm_apic_set_id(apic, (u8)vcpu->vcpu_id);
 	kvm_apic_set_version(apic->vcpu);
 
 	for (i = 0; i < APIC_LVT_NUM; i++)
@@ -1180,7 +1338,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 	apic_set_reg(apic, APIC_DFR, 0xffffffffU);
 	apic_set_spiv(apic, 0xff);
 	apic_set_reg(apic, APIC_TASKPRI, 0);
-	apic_set_reg(apic, APIC_LDR, 0);
+	kvm_apic_set_ldr(apic, 0);
 	apic_set_reg(apic, APIC_ESR, 0);
 	apic_set_reg(apic, APIC_ICR, 0);
 	apic_set_reg(apic, APIC_ICR2, 0);
@@ -1398,6 +1556,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
 	/* set SPIV separately to get count of SW disabled APICs right */
 	apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
 	memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
+	/* call kvm_apic_set_id() to put apic into apic_map */
+	kvm_apic_set_id(apic, kvm_apic_id(apic));
 	kvm_apic_set_version(vcpu);
 
 	apic_update_ppr(apic);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 615a8b0..e5ebf9f 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -52,6 +52,9 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
 
+bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
+		struct kvm_lapic_irq *irq, int *r);
+
 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
 void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
 void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f91e2c9..5be2ea6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6269,6 +6269,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
 
 	raw_spin_lock_init(&kvm->arch.tsc_write_lock);
+	mutex_init(&kvm->arch.apic_map_lock);
 
 	return 0;
 }
@@ -6319,6 +6320,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 		put_page(kvm->arch.apic_access_page);
 	if (kvm->arch.ept_identity_pagetable)
 		put_page(kvm->arch.ept_identity_pagetable);
+	kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
 }
 
 void kvm_arch_free_memslot(struct kvm_memory_slot *free,
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 7118be0..3ca89c4 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -68,8 +68,13 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 	struct kvm_vcpu *vcpu, *lowest = NULL;
 
 	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
-			kvm_is_dm_lowest_prio(irq))
+			kvm_is_dm_lowest_prio(irq)) {
 		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
+		irq->delivery_mode = APIC_DM_FIXED;
+	}
+
+	if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r))
+		return r;
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		if (!kvm_apic_present(vcpu))
--
			Gleb.

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCHv3] KVM: optimize apic interrupt delivery
  2012-09-13  9:00 [PATCHv3] KVM: optimize apic interrupt delivery Gleb Natapov
@ 2012-09-13  9:15 ` Avi Kivity
  2012-09-13 10:29 ` Jan Kiszka
  2012-09-13 11:51 ` Michael S. Tsirkin
  2 siblings, 0 replies; 8+ messages in thread
From: Avi Kivity @ 2012-09-13  9:15 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, mst, mtosatti

On 09/13/2012 12:00 PM, Gleb Natapov wrote:
> Most interrupt are delivered to only one vcpu. Use pre-build tables to
> find interrupt destination instead of looping through all vcpus. In case
> of logical mode loop only through vcpus in a logical cluster irq is sent
> to.

Looks good.


-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCHv3] KVM: optimize apic interrupt delivery
  2012-09-13  9:00 [PATCHv3] KVM: optimize apic interrupt delivery Gleb Natapov
  2012-09-13  9:15 ` Avi Kivity
@ 2012-09-13 10:29 ` Jan Kiszka
  2012-09-13 10:33   ` Gleb Natapov
  2012-09-13 11:51 ` Michael S. Tsirkin
  2 siblings, 1 reply; 8+ messages in thread
From: Jan Kiszka @ 2012-09-13 10:29 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, avi, mst, mtosatti

On 2012-09-13 11:00, Gleb Natapov wrote:
> Most interrupt are delivered to only one vcpu. Use pre-build tables to
> find interrupt destination instead of looping through all vcpus. In case
> of logical mode loop only through vcpus in a logical cluster irq is sent
> to.
> 
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>  Changelog:
> 
>   - v2->v3
>    * sparse annotation for rcu usage
>    * move mutex above map
>    * use mask/shift to calculate cluster/dst ids
>    * use gotos
>    * add comment about logic behind logical table creation
> 
>   - v1->v2
>    * fix race Avi noticed
>    * rcu_read_lock() out of the block as per Avi
>    * fix rcu issues pointed to by MST. All but one. Still use
>      call_rcu(). Do not think this is serious issue. If it is should be
>      solved by RCU subsystem.
>    * Fix phys_map overflow pointed to by MST
>    * recalculate_apic_map() does not return error any more.
>    * add optimization for low prio logical mode with one cpu as dst (it
>      happens)
> 
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 64adb61..9dcfd3e 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -511,6 +511,14 @@ struct kvm_arch_memory_slot {
>  	struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
>  };
>  
> +struct kvm_apic_map {
> +	struct rcu_head rcu;
> +	u8 ldr_bits;
> +	u32 cid_shift, cid_mask, lid_mask;
> +	struct kvm_lapic *phys_map[256];
> +	struct kvm_lapic *logical_map[16][16];
> +};
> +
>  struct kvm_arch {
>  	unsigned int n_used_mmu_pages;
>  	unsigned int n_requested_mmu_pages;
> @@ -528,6 +536,8 @@ struct kvm_arch {
>  	struct kvm_ioapic *vioapic;
>  	struct kvm_pit *vpit;
>  	int vapics_in_nmi_mode;
> +	struct mutex apic_map_lock;
> +	struct kvm_apic_map *apic_map;
>  
>  	unsigned int tss_addr;
>  	struct page *apic_access_page;
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 07ad628..a03d4aa 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -139,11 +139,105 @@ static inline int apic_enabled(struct kvm_lapic *apic)
>  	(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
>  	 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
>  
> +static inline int apic_x2apic_mode(struct kvm_lapic *apic)
> +{
> +	return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
> +}
> +
>  static inline int kvm_apic_id(struct kvm_lapic *apic)
>  {
>  	return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
>  }
>  
> +static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr)
> +{
> +	ldr >>= 32 - map->ldr_bits;
> +	return (ldr >> map->cid_shift) & map->cid_mask;
> +}
> +
> +static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr)
> +{
> +	ldr >>= (32 - map->ldr_bits);
> +	return ldr & map->lid_mask;
> +}
> +
> +static inline void recalculate_apic_map(struct kvm *kvm)

Inline? No recent compiler will respect it anyway, but it still looks
strange for this function.

> +{
> +	struct kvm_apic_map *new, *old = NULL;
> +	struct kvm_vcpu *vcpu;
> +	int i;
> +
> +	new = kzalloc(sizeof(struct kvm_apic_map), GFP_KERNEL);
> +
> +	mutex_lock(&kvm->arch.apic_map_lock);
> +
> +	if (!new)
> +		goto out;
> +
> +	new->ldr_bits = 8;
> +	/* flat mode is deafult */
> +	new->cid_shift = 8;
> +	new->cid_mask = 0;
> +	new->lid_mask = 0xff;
> +
> +	kvm_for_each_vcpu(i, vcpu, kvm) {
> +		struct kvm_lapic *apic = vcpu->arch.apic;
> +		u16 cid, lid;
> +		u32 ldr;
> +
> +		if (!kvm_apic_present(vcpu))
> +			continue;
> +
> +		/*
> +		 * All APICs have to be configured in the same mode by an OS.
> +		 * We take advatage of this while building logical id loockup
> +		 * table. After reset APICs are in xapic/flat mode, so if we
> +		 * find apic with different setting we assume this is the mode
> +		 * os wants all apics to be in and build lookup table
> +		 * accordingly.
> +		 */
> +		if (apic_x2apic_mode(apic)) {
> +			new->ldr_bits = 32;
> +			new->cid_shift = 16;
> +			new->cid_mask = new->lid_mask = 0xffff;
> +		} else if (kvm_apic_sw_enabled(apic) &&
> +				!new->cid_mask /* flat mode */ &&
> +				kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) {
> +			new->cid_shift = 4; 
> +			new->cid_mask = 0xf;
> +			new->lid_mask = 0xf;
> +		}
> +
> +		new->phys_map[kvm_apic_id(apic)] = apic;
> +
> +		ldr = kvm_apic_get_reg(apic, APIC_LDR);
> +		cid = apic_cluster_id(new, ldr);
> +		lid = apic_logical_id(new, ldr);
> +
> +		if (lid)
> +			new->logical_map[cid][ffs(lid) - 1] = apic;
> +	}
> +out:
> +	old = rcu_dereference_protected(kvm->arch.apic_map, 1);
> +	rcu_assign_pointer(kvm->arch.apic_map, new);
> +	mutex_unlock(&kvm->arch.apic_map_lock);
> +
> +	if (old)
> +		kfree_rcu(old, rcu);
> +}
> +
> +static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
> +{
> +	apic_set_reg(apic, APIC_ID, id << 24);
> +	recalculate_apic_map(apic->vcpu->kvm);
> +}
> +
> +static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
> +{
> +	apic_set_reg(apic, APIC_LDR, id);
> +	recalculate_apic_map(apic->vcpu->kvm);
> +}
> +
>  static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
>  {
>  	return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
> @@ -193,11 +287,6 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
>  	apic_set_reg(apic, APIC_LVR, v);
>  }
>  
> -static inline int apic_x2apic_mode(struct kvm_lapic *apic)
> -{
> -	return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
> -}
> -
>  static const unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
>  	LVT_MASK ,      /* part LVTT mask, timer mode mask added at runtime */
>  	LVT_MASK | APIC_MODE_MASK,	/* LVTTHMR */
> @@ -477,6 +566,73 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
>  	return result;
>  }
>  
> +bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
> +		struct kvm_lapic_irq *irq, int *r)
> +{
> +	struct kvm_apic_map *map;
> +	unsigned long bitmap = 1;
> +	struct kvm_lapic **dst;
> +	int i;
> +	bool ret = false;
> +
> +	*r = -1;
> +
> +	if (irq->shorthand == APIC_DEST_SELF) {
> +		*r = kvm_apic_set_irq(src->vcpu, irq);
> +		return true;
> +	}
> +
> +	if (irq->shorthand)
> +		return false;
> +
> +	rcu_read_lock();
> +	map = rcu_dereference(kvm->arch.apic_map);
> +
> +	if (!map)
> +		goto out;
> +
> +	if (irq->dest_mode == 0) { /* physical mode */
> +		if (irq->delivery_mode == APIC_DM_LOWEST ||
> +				irq->dest_id == 0xff)
> +			goto out;
> +		dst = &map->phys_map[irq->dest_id & 0xff];
> +	} else {
> +		u32 mda = irq->dest_id << (32 - map->ldr_bits);
> +
> +		dst = map->logical_map[apic_cluster_id(map, mda)];
> +
> +		bitmap = apic_logical_id(map, mda);
> +
> +		if (irq->delivery_mode == APIC_DM_LOWEST &&
> +				hweight_long(bitmap) > 1) {
> +			int l = -1;
> +			for_each_set_bit(i, &bitmap, 16) {
> +				if (!dst[i])
> +					continue;
> +				if (l < 0)
> +					l = i;
> +				else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0)
> +					l = i;
> +			}
> +
> +			bitmap = (l >= 0) ? 1 << l : 0;
> +		}
> +	}
> +
> +	for_each_set_bit(i, &bitmap, 16) {
> +		if (!dst[i])
> +			continue;
> +		if (*r < 0)
> +			*r = 0;
> +		*r += kvm_apic_set_irq(dst[i]->vcpu, irq);
> +	}
> +
> +	ret = true;
> +out:
> +	rcu_read_unlock();
> +	return ret;
> +}
> +
>  /*
>   * Add a pending IRQ into lapic.
>   * Return 1 if successfully added and 0 if discarded.
> @@ -880,7 +1036,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
>  	switch (reg) {
>  	case APIC_ID:		/* Local APIC ID */
>  		if (!apic_x2apic_mode(apic))
> -			apic_set_reg(apic, APIC_ID, val);
> +			kvm_apic_set_id(apic, val >> 24);
>  		else
>  			ret = 1;
>  		break;
> @@ -896,15 +1052,16 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
>  
>  	case APIC_LDR:
>  		if (!apic_x2apic_mode(apic))
> -			apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK);
> +			kvm_apic_set_ldr(apic, val & APIC_LDR_MASK);
>  		else
>  			ret = 1;
>  		break;
>  
>  	case APIC_DFR:
> -		if (!apic_x2apic_mode(apic))
> +		if (!apic_x2apic_mode(apic)) {
>  			apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
> -		else
> +			recalculate_apic_map(apic->vcpu->kvm);
> +		} else
>  			ret = 1;
>  		break;
>  
> @@ -1135,6 +1292,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
>  			static_key_slow_dec_deferred(&apic_hw_disabled);
>  		else
>  			static_key_slow_inc(&apic_hw_disabled.key);
> +		recalculate_apic_map(vcpu->kvm);
>  	}
>  
>  	if (!kvm_vcpu_is_bsp(apic->vcpu))
> @@ -1144,7 +1302,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
>  	if (apic_x2apic_mode(apic)) {
>  		u32 id = kvm_apic_id(apic);
>  		u32 ldr = ((id & ~0xf) << 16) | (1 << (id & 0xf));
> -		apic_set_reg(apic, APIC_LDR, ldr);
> +		kvm_apic_set_ldr(apic, ldr);
>  	}
>  	apic->base_address = apic->vcpu->arch.apic_base &
>  			     MSR_IA32_APICBASE_BASE;
> @@ -1169,7 +1327,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
>  	/* Stop the timer in case it's a reset to an active apic */
>  	hrtimer_cancel(&apic->lapic_timer.timer);
>  
> -	apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24);
> +	kvm_apic_set_id(apic, (u8)vcpu->vcpu_id);
>  	kvm_apic_set_version(apic->vcpu);
>  
>  	for (i = 0; i < APIC_LVT_NUM; i++)
> @@ -1180,7 +1338,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
>  	apic_set_reg(apic, APIC_DFR, 0xffffffffU);
>  	apic_set_spiv(apic, 0xff);
>  	apic_set_reg(apic, APIC_TASKPRI, 0);
> -	apic_set_reg(apic, APIC_LDR, 0);
> +	kvm_apic_set_ldr(apic, 0);
>  	apic_set_reg(apic, APIC_ESR, 0);
>  	apic_set_reg(apic, APIC_ICR, 0);
>  	apic_set_reg(apic, APIC_ICR2, 0);
> @@ -1398,6 +1556,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
>  	/* set SPIV separately to get count of SW disabled APICs right */
>  	apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
>  	memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
> +	/* call kvm_apic_set_id() to put apic into apic_map */
> +	kvm_apic_set_id(apic, kvm_apic_id(apic));
>  	kvm_apic_set_version(vcpu);
>  
>  	apic_update_ppr(apic);
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index 615a8b0..e5ebf9f 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -52,6 +52,9 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
>  int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
>  int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
>  
> +bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
> +		struct kvm_lapic_irq *irq, int *r);
> +
>  u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
>  void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
>  void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index f91e2c9..5be2ea6 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6269,6 +6269,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>  	set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
>  
>  	raw_spin_lock_init(&kvm->arch.tsc_write_lock);
> +	mutex_init(&kvm->arch.apic_map_lock);
>  
>  	return 0;
>  }
> @@ -6319,6 +6320,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
>  		put_page(kvm->arch.apic_access_page);
>  	if (kvm->arch.ept_identity_pagetable)
>  		put_page(kvm->arch.ept_identity_pagetable);
> +	kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
>  }
>  
>  void kvm_arch_free_memslot(struct kvm_memory_slot *free,
> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> index 7118be0..3ca89c4 100644
> --- a/virt/kvm/irq_comm.c
> +++ b/virt/kvm/irq_comm.c
> @@ -68,8 +68,13 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
>  	struct kvm_vcpu *vcpu, *lowest = NULL;
>  
>  	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
> -			kvm_is_dm_lowest_prio(irq))
> +			kvm_is_dm_lowest_prio(irq)) {
>  		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
> +		irq->delivery_mode = APIC_DM_FIXED;
> +	}
> +
> +	if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r))
> +		return r;
>  
>  	kvm_for_each_vcpu(i, vcpu, kvm) {
>  		if (!kvm_apic_present(vcpu))
> --
> 			Gleb.

So, this can be the foundation for direct MSI delivery as well, right?

Jan

-- 
Siemens AG, Corporate Technology, CT RTC ITP SDP-DE
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCHv3] KVM: optimize apic interrupt delivery
  2012-09-13 10:29 ` Jan Kiszka
@ 2012-09-13 10:33   ` Gleb Natapov
  2012-09-13 10:36     ` Jan Kiszka
  0 siblings, 1 reply; 8+ messages in thread
From: Gleb Natapov @ 2012-09-13 10:33 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: kvm, avi, mst, mtosatti

On Thu, Sep 13, 2012 at 12:29:44PM +0200, Jan Kiszka wrote:
> On 2012-09-13 11:00, Gleb Natapov wrote:
> > Most interrupt are delivered to only one vcpu. Use pre-build tables to
> > find interrupt destination instead of looping through all vcpus. In case
> > of logical mode loop only through vcpus in a logical cluster irq is sent
> > to.
> > 
> > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > ---
> >  Changelog:
> > 
> >   - v2->v3
> >    * sparse annotation for rcu usage
> >    * move mutex above map
> >    * use mask/shift to calculate cluster/dst ids
> >    * use gotos
> >    * add comment about logic behind logical table creation
> > 
> >   - v1->v2
> >    * fix race Avi noticed
> >    * rcu_read_lock() out of the block as per Avi
> >    * fix rcu issues pointed to by MST. All but one. Still use
> >      call_rcu(). Do not think this is serious issue. If it is should be
> >      solved by RCU subsystem.
> >    * Fix phys_map overflow pointed to by MST
> >    * recalculate_apic_map() does not return error any more.
> >    * add optimization for low prio logical mode with one cpu as dst (it
> >      happens)
> > 
> > 
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index 64adb61..9dcfd3e 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -511,6 +511,14 @@ struct kvm_arch_memory_slot {
> >  	struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
> >  };
> >  
> > +struct kvm_apic_map {
> > +	struct rcu_head rcu;
> > +	u8 ldr_bits;
> > +	u32 cid_shift, cid_mask, lid_mask;
> > +	struct kvm_lapic *phys_map[256];
> > +	struct kvm_lapic *logical_map[16][16];
> > +};
> > +
> >  struct kvm_arch {
> >  	unsigned int n_used_mmu_pages;
> >  	unsigned int n_requested_mmu_pages;
> > @@ -528,6 +536,8 @@ struct kvm_arch {
> >  	struct kvm_ioapic *vioapic;
> >  	struct kvm_pit *vpit;
> >  	int vapics_in_nmi_mode;
> > +	struct mutex apic_map_lock;
> > +	struct kvm_apic_map *apic_map;
> >  
> >  	unsigned int tss_addr;
> >  	struct page *apic_access_page;
> > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > index 07ad628..a03d4aa 100644
> > --- a/arch/x86/kvm/lapic.c
> > +++ b/arch/x86/kvm/lapic.c
> > @@ -139,11 +139,105 @@ static inline int apic_enabled(struct kvm_lapic *apic)
> >  	(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
> >  	 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
> >  
> > +static inline int apic_x2apic_mode(struct kvm_lapic *apic)
> > +{
> > +	return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
> > +}
> > +
> >  static inline int kvm_apic_id(struct kvm_lapic *apic)
> >  {
> >  	return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
> >  }
> >  
> > +static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr)
> > +{
> > +	ldr >>= 32 - map->ldr_bits;
> > +	return (ldr >> map->cid_shift) & map->cid_mask;
> > +}
> > +
> > +static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr)
> > +{
> > +	ldr >>= (32 - map->ldr_bits);
> > +	return ldr & map->lid_mask;
> > +}
> > +
> > +static inline void recalculate_apic_map(struct kvm *kvm)
> 
> Inline? No recent compiler will respect it anyway, but it still looks
> strange for this function.
Agree. I marked it inline when it was much smaller. Avi/Marcelo should I
resend or you can edit before applying?

> 
> > +{
> > +	struct kvm_apic_map *new, *old = NULL;
> > +	struct kvm_vcpu *vcpu;
> > +	int i;
> > +
> > +	new = kzalloc(sizeof(struct kvm_apic_map), GFP_KERNEL);
> > +
> > +	mutex_lock(&kvm->arch.apic_map_lock);
> > +
> > +	if (!new)
> > +		goto out;
> > +
> > +	new->ldr_bits = 8;
> > +	/* flat mode is deafult */
> > +	new->cid_shift = 8;
> > +	new->cid_mask = 0;
> > +	new->lid_mask = 0xff;
> > +
> > +	kvm_for_each_vcpu(i, vcpu, kvm) {
> > +		struct kvm_lapic *apic = vcpu->arch.apic;
> > +		u16 cid, lid;
> > +		u32 ldr;
> > +
> > +		if (!kvm_apic_present(vcpu))
> > +			continue;
> > +
> > +		/*
> > +		 * All APICs have to be configured in the same mode by an OS.
> > +		 * We take advatage of this while building logical id loockup
> > +		 * table. After reset APICs are in xapic/flat mode, so if we
> > +		 * find apic with different setting we assume this is the mode
> > +		 * os wants all apics to be in and build lookup table
> > +		 * accordingly.
> > +		 */
> > +		if (apic_x2apic_mode(apic)) {
> > +			new->ldr_bits = 32;
> > +			new->cid_shift = 16;
> > +			new->cid_mask = new->lid_mask = 0xffff;
> > +		} else if (kvm_apic_sw_enabled(apic) &&
> > +				!new->cid_mask /* flat mode */ &&
> > +				kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) {
> > +			new->cid_shift = 4; 
> > +			new->cid_mask = 0xf;
> > +			new->lid_mask = 0xf;
> > +		}
> > +
> > +		new->phys_map[kvm_apic_id(apic)] = apic;
> > +
> > +		ldr = kvm_apic_get_reg(apic, APIC_LDR);
> > +		cid = apic_cluster_id(new, ldr);
> > +		lid = apic_logical_id(new, ldr);
> > +
> > +		if (lid)
> > +			new->logical_map[cid][ffs(lid) - 1] = apic;
> > +	}
> > +out:
> > +	old = rcu_dereference_protected(kvm->arch.apic_map, 1);
> > +	rcu_assign_pointer(kvm->arch.apic_map, new);
> > +	mutex_unlock(&kvm->arch.apic_map_lock);
> > +
> > +	if (old)
> > +		kfree_rcu(old, rcu);
> > +}
> > +
> > +static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
> > +{
> > +	apic_set_reg(apic, APIC_ID, id << 24);
> > +	recalculate_apic_map(apic->vcpu->kvm);
> > +}
> > +
> > +static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
> > +{
> > +	apic_set_reg(apic, APIC_LDR, id);
> > +	recalculate_apic_map(apic->vcpu->kvm);
> > +}
> > +
> >  static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
> >  {
> >  	return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
> > @@ -193,11 +287,6 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
> >  	apic_set_reg(apic, APIC_LVR, v);
> >  }
> >  
> > -static inline int apic_x2apic_mode(struct kvm_lapic *apic)
> > -{
> > -	return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
> > -}
> > -
> >  static const unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
> >  	LVT_MASK ,      /* part LVTT mask, timer mode mask added at runtime */
> >  	LVT_MASK | APIC_MODE_MASK,	/* LVTTHMR */
> > @@ -477,6 +566,73 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
> >  	return result;
> >  }
> >  
> > +bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
> > +		struct kvm_lapic_irq *irq, int *r)
> > +{
> > +	struct kvm_apic_map *map;
> > +	unsigned long bitmap = 1;
> > +	struct kvm_lapic **dst;
> > +	int i;
> > +	bool ret = false;
> > +
> > +	*r = -1;
> > +
> > +	if (irq->shorthand == APIC_DEST_SELF) {
> > +		*r = kvm_apic_set_irq(src->vcpu, irq);
> > +		return true;
> > +	}
> > +
> > +	if (irq->shorthand)
> > +		return false;
> > +
> > +	rcu_read_lock();
> > +	map = rcu_dereference(kvm->arch.apic_map);
> > +
> > +	if (!map)
> > +		goto out;
> > +
> > +	if (irq->dest_mode == 0) { /* physical mode */
> > +		if (irq->delivery_mode == APIC_DM_LOWEST ||
> > +				irq->dest_id == 0xff)
> > +			goto out;
> > +		dst = &map->phys_map[irq->dest_id & 0xff];
> > +	} else {
> > +		u32 mda = irq->dest_id << (32 - map->ldr_bits);
> > +
> > +		dst = map->logical_map[apic_cluster_id(map, mda)];
> > +
> > +		bitmap = apic_logical_id(map, mda);
> > +
> > +		if (irq->delivery_mode == APIC_DM_LOWEST &&
> > +				hweight_long(bitmap) > 1) {
> > +			int l = -1;
> > +			for_each_set_bit(i, &bitmap, 16) {
> > +				if (!dst[i])
> > +					continue;
> > +				if (l < 0)
> > +					l = i;
> > +				else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0)
> > +					l = i;
> > +			}
> > +
> > +			bitmap = (l >= 0) ? 1 << l : 0;
> > +		}
> > +	}
> > +
> > +	for_each_set_bit(i, &bitmap, 16) {
> > +		if (!dst[i])
> > +			continue;
> > +		if (*r < 0)
> > +			*r = 0;
> > +		*r += kvm_apic_set_irq(dst[i]->vcpu, irq);
> > +	}
> > +
> > +	ret = true;
> > +out:
> > +	rcu_read_unlock();
> > +	return ret;
> > +}
> > +
> >  /*
> >   * Add a pending IRQ into lapic.
> >   * Return 1 if successfully added and 0 if discarded.
> > @@ -880,7 +1036,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
> >  	switch (reg) {
> >  	case APIC_ID:		/* Local APIC ID */
> >  		if (!apic_x2apic_mode(apic))
> > -			apic_set_reg(apic, APIC_ID, val);
> > +			kvm_apic_set_id(apic, val >> 24);
> >  		else
> >  			ret = 1;
> >  		break;
> > @@ -896,15 +1052,16 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
> >  
> >  	case APIC_LDR:
> >  		if (!apic_x2apic_mode(apic))
> > -			apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK);
> > +			kvm_apic_set_ldr(apic, val & APIC_LDR_MASK);
> >  		else
> >  			ret = 1;
> >  		break;
> >  
> >  	case APIC_DFR:
> > -		if (!apic_x2apic_mode(apic))
> > +		if (!apic_x2apic_mode(apic)) {
> >  			apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
> > -		else
> > +			recalculate_apic_map(apic->vcpu->kvm);
> > +		} else
> >  			ret = 1;
> >  		break;
> >  
> > @@ -1135,6 +1292,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
> >  			static_key_slow_dec_deferred(&apic_hw_disabled);
> >  		else
> >  			static_key_slow_inc(&apic_hw_disabled.key);
> > +		recalculate_apic_map(vcpu->kvm);
> >  	}
> >  
> >  	if (!kvm_vcpu_is_bsp(apic->vcpu))
> > @@ -1144,7 +1302,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
> >  	if (apic_x2apic_mode(apic)) {
> >  		u32 id = kvm_apic_id(apic);
> >  		u32 ldr = ((id & ~0xf) << 16) | (1 << (id & 0xf));
> > -		apic_set_reg(apic, APIC_LDR, ldr);
> > +		kvm_apic_set_ldr(apic, ldr);
> >  	}
> >  	apic->base_address = apic->vcpu->arch.apic_base &
> >  			     MSR_IA32_APICBASE_BASE;
> > @@ -1169,7 +1327,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
> >  	/* Stop the timer in case it's a reset to an active apic */
> >  	hrtimer_cancel(&apic->lapic_timer.timer);
> >  
> > -	apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24);
> > +	kvm_apic_set_id(apic, (u8)vcpu->vcpu_id);
> >  	kvm_apic_set_version(apic->vcpu);
> >  
> >  	for (i = 0; i < APIC_LVT_NUM; i++)
> > @@ -1180,7 +1338,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
> >  	apic_set_reg(apic, APIC_DFR, 0xffffffffU);
> >  	apic_set_spiv(apic, 0xff);
> >  	apic_set_reg(apic, APIC_TASKPRI, 0);
> > -	apic_set_reg(apic, APIC_LDR, 0);
> > +	kvm_apic_set_ldr(apic, 0);
> >  	apic_set_reg(apic, APIC_ESR, 0);
> >  	apic_set_reg(apic, APIC_ICR, 0);
> >  	apic_set_reg(apic, APIC_ICR2, 0);
> > @@ -1398,6 +1556,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
> >  	/* set SPIV separately to get count of SW disabled APICs right */
> >  	apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
> >  	memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
> > +	/* call kvm_apic_set_id() to put apic into apic_map */
> > +	kvm_apic_set_id(apic, kvm_apic_id(apic));
> >  	kvm_apic_set_version(vcpu);
> >  
> >  	apic_update_ppr(apic);
> > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> > index 615a8b0..e5ebf9f 100644
> > --- a/arch/x86/kvm/lapic.h
> > +++ b/arch/x86/kvm/lapic.h
> > @@ -52,6 +52,9 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
> >  int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
> >  int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
> >  
> > +bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
> > +		struct kvm_lapic_irq *irq, int *r);
> > +
> >  u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
> >  void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
> >  void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index f91e2c9..5be2ea6 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -6269,6 +6269,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
> >  	set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
> >  
> >  	raw_spin_lock_init(&kvm->arch.tsc_write_lock);
> > +	mutex_init(&kvm->arch.apic_map_lock);
> >  
> >  	return 0;
> >  }
> > @@ -6319,6 +6320,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
> >  		put_page(kvm->arch.apic_access_page);
> >  	if (kvm->arch.ept_identity_pagetable)
> >  		put_page(kvm->arch.ept_identity_pagetable);
> > +	kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
> >  }
> >  
> >  void kvm_arch_free_memslot(struct kvm_memory_slot *free,
> > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > index 7118be0..3ca89c4 100644
> > --- a/virt/kvm/irq_comm.c
> > +++ b/virt/kvm/irq_comm.c
> > @@ -68,8 +68,13 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
> >  	struct kvm_vcpu *vcpu, *lowest = NULL;
> >  
> >  	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
> > -			kvm_is_dm_lowest_prio(irq))
> > +			kvm_is_dm_lowest_prio(irq)) {
> >  		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
> > +		irq->delivery_mode = APIC_DM_FIXED;
> > +	}
> > +
> > +	if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r))
> > +		return r;
> >  
> >  	kvm_for_each_vcpu(i, vcpu, kvm) {
> >  		if (!kvm_apic_present(vcpu))
> > --
> > 			Gleb.
> 
> So, this can be the foundation for direct MSI delivery as well, right?
> 
What do you mean by "direct MSI delivery"? kvm_irq_delivery_to_apic() is
called by MSI. If you mean delivery from irq context, then yes, mst
plans to do so.

--
			Gleb.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCHv3] KVM: optimize apic interrupt delivery
  2012-09-13 10:33   ` Gleb Natapov
@ 2012-09-13 10:36     ` Jan Kiszka
  2012-09-16 10:56       ` Michael S. Tsirkin
  0 siblings, 1 reply; 8+ messages in thread
From: Jan Kiszka @ 2012-09-13 10:36 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: kvm@vger.kernel.org, avi@redhat.com, mst@redhat.com,
	mtosatti@redhat.com

On 2012-09-13 12:33, Gleb Natapov wrote:
>>
>> So, this can be the foundation for direct MSI delivery as well, right?
>>
> What do you mean by "direct MSI delivery"? kvm_irq_delivery_to_apic() is
> called by MSI. If you mean delivery from irq context, then yes, mst
> plans to do so.

Yes, that's what I was aiming at.

Jan

-- 
Siemens AG, Corporate Technology, CT RTC ITP SDP-DE
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCHv3] KVM: optimize apic interrupt delivery
  2012-09-13  9:00 [PATCHv3] KVM: optimize apic interrupt delivery Gleb Natapov
  2012-09-13  9:15 ` Avi Kivity
  2012-09-13 10:29 ` Jan Kiszka
@ 2012-09-13 11:51 ` Michael S. Tsirkin
  2 siblings, 0 replies; 8+ messages in thread
From: Michael S. Tsirkin @ 2012-09-13 11:51 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, avi, mtosatti

On Thu, Sep 13, 2012 at 12:00:59PM +0300, Gleb Natapov wrote:
> Most interrupt are delivered to only one vcpu. Use pre-build tables to
> find interrupt destination instead of looping through all vcpus. In case
> of logical mode loop only through vcpus in a logical cluster irq is sent
> to.
> 
> Signed-off-by: Gleb Natapov <gleb@redhat.com>


Some comments below.
The code's pretty complex now, I think adding some comments will be
helpful. Below, I noted where this would be especially beneficial.

Thanks!

> ---
>  Changelog:
> 
>   - v2->v3
>    * sparse annotation for rcu usage
>    * move mutex above map
>    * use mask/shift to calculate cluster/dst ids
>    * use gotos
>    * add comment about logic behind logical table creation
> 
>   - v1->v2
>    * fix race Avi noticed
>    * rcu_read_lock() out of the block as per Avi
>    * fix rcu issues pointed to by MST. All but one. Still use
>      call_rcu(). Do not think this is serious issue. If it is should be
>      solved by RCU subsystem.
>    * Fix phys_map overflow pointed to by MST
>    * recalculate_apic_map() does not return error any more.
>    * add optimization for low prio logical mode with one cpu as dst (it
>      happens)
> 
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 64adb61..9dcfd3e 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -511,6 +511,14 @@ struct kvm_arch_memory_slot {
>  	struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
>  };
>  
> +struct kvm_apic_map {
> +	struct rcu_head rcu;
> +	u8 ldr_bits;

ldr_bits are never used directly, always 32 - ldr_bits.
It might be a good idea to just store 32 - ldr_bits.
I am not sure.

> +	u32 cid_shift, cid_mask, lid_mask;
> +	struct kvm_lapic *phys_map[256];
> +	struct kvm_lapic *logical_map[16][16];


Would be nice to add documentation for structure fields:
what does each field include? For example what are
the index values into logical_map? When is each mode used?
I am guessing this will address some questions below.

16 is used in sevral places it code. We also have
0xf which is really 16 - 1. Would be nice to have defines here.


> +};
> +
>  struct kvm_arch {
>  	unsigned int n_used_mmu_pages;
>  	unsigned int n_requested_mmu_pages;
> @@ -528,6 +536,8 @@ struct kvm_arch {
>  	struct kvm_ioapic *vioapic;
>  	struct kvm_pit *vpit;
>  	int vapics_in_nmi_mode;
> +	struct mutex apic_map_lock;
> +	struct kvm_apic_map *apic_map;
>  
>  	unsigned int tss_addr;
>  	struct page *apic_access_page;
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 07ad628..a03d4aa 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -139,11 +139,105 @@ static inline int apic_enabled(struct kvm_lapic *apic)
>  	(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
>  	 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
>  
> +static inline int apic_x2apic_mode(struct kvm_lapic *apic)
> +{
> +	return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
> +}
> +
>  static inline int kvm_apic_id(struct kvm_lapic *apic)
>  {
>  	return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
>  }
>  
> +static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr)

Why is this u16? It seems the only legal values are 0-15
since this is used as index in lookup in logical_map.
Maybe add a comment explaning legal values are 0-15.
Or maybe BUG_ON to check result is 0 to 15.


> +{
> +	ldr >>= 32 - map->ldr_bits;
> +	return (ldr >> map->cid_shift) & map->cid_mask;
> +}
> +
> +static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr)
> +{
> +	ldr >>= (32 - map->ldr_bits);
> +	return ldr & map->lid_mask;
> +}
> +
> +static inline void recalculate_apic_map(struct kvm *kvm)
> +{
> +	struct kvm_apic_map *new, *old = NULL;
> +	struct kvm_vcpu *vcpu;
> +	int i;
> +
> +	new = kzalloc(sizeof(struct kvm_apic_map), GFP_KERNEL);
> +
> +	mutex_lock(&kvm->arch.apic_map_lock);
> +
> +	if (!new)
> +		goto out;
> +
> +	new->ldr_bits = 8;
> +	/* flat mode is deafult */

Typo

> +	new->cid_shift = 8;
> +	new->cid_mask = 0;
> +	new->lid_mask = 0xff;
> +
> +	kvm_for_each_vcpu(i, vcpu, kvm) {
> +		struct kvm_lapic *apic = vcpu->arch.apic;
> +		u16 cid, lid;
> +		u32 ldr;
> +
> +		if (!kvm_apic_present(vcpu))
> +			continue;
> +
> +		/*
> +		 * All APICs have to be configured in the same mode by an OS.
> +		 * We take advatage of this while building logical id loockup
> +		 * table. After reset APICs are in xapic/flat mode, so if we
> +		 * find apic with different setting we assume this is the mode
> +		 * os wants all apics to be in


s/os/OS (for consistency).

> and build lookup table accordingly.

A bit clearer:
; we  build the lookup table accordingly.

(otherwise it reads as if os builds the lookup table)

> +		 */
> +		if (apic_x2apic_mode(apic)) {
> +			new->ldr_bits = 32;
> +			new->cid_shift = 16;
> +			new->cid_mask = new->lid_mask = 0xffff;

This confuses me. cid_mask is only used by apic_cluster_id which
should get result 0 to 15. why does 0xffff make sense?

> +		} else if (kvm_apic_sw_enabled(apic) &&
> +				!new->cid_mask /* flat mode */ &&
> +				kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) {
> +			new->cid_shift = 4; 
> +			new->cid_mask = 0xf;
> +			new->lid_mask = 0xf;
> +		}
> +
> +		new->phys_map[kvm_apic_id(apic)] = apic;
> +
> +		ldr = kvm_apic_get_reg(apic, APIC_LDR);
> +		cid = apic_cluster_id(new, ldr);
> +		lid = apic_logical_id(new, ldr);
> +
> +		if (lid)
> +			new->logical_map[cid][ffs(lid) - 1] = apic;

Does manual say only one bit is ever set?

> +	}
> +out:
> +	old = rcu_dereference_protected(kvm->arch.apic_map, 1);

This should be
	old = rcu_dereference_protected(kvm->arch.apic_map,
			lockdep_is_held(&kvm->arch.apic_map_lock));


> +	rcu_assign_pointer(kvm->arch.apic_map, new);
> +	mutex_unlock(&kvm->arch.apic_map_lock);
> +
> +	if (old)
> +		kfree_rcu(old, rcu);
> +}
> +
> +static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
> +{
> +	apic_set_reg(apic, APIC_ID, id << 24);
> +	recalculate_apic_map(apic->vcpu->kvm);
> +}
> +
> +static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
> +{
> +	apic_set_reg(apic, APIC_LDR, id);
> +	recalculate_apic_map(apic->vcpu->kvm);
> +}
> +
>  static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
>  {
>  	return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
> @@ -193,11 +287,6 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
>  	apic_set_reg(apic, APIC_LVR, v);
>  }
>  
> -static inline int apic_x2apic_mode(struct kvm_lapic *apic)
> -{
> -	return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
> -}
> -
>  static const unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
>  	LVT_MASK ,      /* part LVTT mask, timer mode mask added at runtime */
>  	LVT_MASK | APIC_MODE_MASK,	/* LVTTHMR */
> @@ -477,6 +566,73 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
>  	return result;
>  }
>  
> +bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
> +		struct kvm_lapic_irq *irq, int *r)
> +{
> +	struct kvm_apic_map *map;
> +	unsigned long bitmap = 1;
> +	struct kvm_lapic **dst;
> +	int i;
> +	bool ret = false;
> +
> +	*r = -1;
> +
> +	if (irq->shorthand == APIC_DEST_SELF) {
> +		*r = kvm_apic_set_irq(src->vcpu, irq);
> +		return true;
> +	}
> +
> +	if (irq->shorthand)
> +		return false;
> +
> +	rcu_read_lock();
> +	map = rcu_dereference(kvm->arch.apic_map);
> +
> +	if (!map)
> +		goto out;
> +
> +	if (irq->dest_mode == 0) { /* physical mode */
> +		if (irq->delivery_mode == APIC_DM_LOWEST ||
> +				irq->dest_id == 0xff)
> +			goto out;
> +		dst = &map->phys_map[irq->dest_id & 0xff];
> +	} else {
> +		u32 mda = irq->dest_id << (32 - map->ldr_bits);
> +
> +		dst = map->logical_map[apic_cluster_id(map, mda)];
> +
> +		bitmap = apic_logical_id(map, mda);
> +
> +		if (irq->delivery_mode == APIC_DM_LOWEST &&
> +				hweight_long(bitmap) > 1) {

I am guessing this hweight_long call is an optimization for when there's
1 bit or less set in bitmap? But isn't it a premature one?
The logic below assumes for_each_set_bit is essentially
free with 1 bit set, why check hweight_long one extra time?

> +			int l = -1;
> +			for_each_set_bit(i, &bitmap, 16) {
> +				if (!dst[i])
> +					continue;
> +				if (l < 0)
> +					l = i;
> +				else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0)
> +					l = i;
> +			}
> +
> +			bitmap = (l >= 0) ? 1 << l : 0;
> +		}
> +	}
> +
> +	for_each_set_bit(i, &bitmap, 16) {
> +		if (!dst[i])
> +			continue;
> +		if (*r < 0)
> +			*r = 0;
> +		*r += kvm_apic_set_irq(dst[i]->vcpu, irq);
> +	}
> +
> +	ret = true;
> +out:
> +	rcu_read_unlock();
> +	return ret;
> +}
> +
>  /*
>   * Add a pending IRQ into lapic.
>   * Return 1 if successfully added and 0 if discarded.
> @@ -880,7 +1036,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
>  	switch (reg) {
>  	case APIC_ID:		/* Local APIC ID */
>  		if (!apic_x2apic_mode(apic))
> -			apic_set_reg(apic, APIC_ID, val);
> +			kvm_apic_set_id(apic, val >> 24);
>  		else
>  			ret = 1;
>  		break;
> @@ -896,15 +1052,16 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
>  
>  	case APIC_LDR:
>  		if (!apic_x2apic_mode(apic))
> -			apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK);
> +			kvm_apic_set_ldr(apic, val & APIC_LDR_MASK);
>  		else
>  			ret = 1;
>  		break;
>  
>  	case APIC_DFR:
> -		if (!apic_x2apic_mode(apic))
> +		if (!apic_x2apic_mode(apic)) {
>  			apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
> -		else
> +			recalculate_apic_map(apic->vcpu->kvm);
> +		} else
>  			ret = 1;
>  		break;
>  
> @@ -1135,6 +1292,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
>  			static_key_slow_dec_deferred(&apic_hw_disabled);
>  		else
>  			static_key_slow_inc(&apic_hw_disabled.key);
> +		recalculate_apic_map(vcpu->kvm);
>  	}
>  
>  	if (!kvm_vcpu_is_bsp(apic->vcpu))
> @@ -1144,7 +1302,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
>  	if (apic_x2apic_mode(apic)) {
>  		u32 id = kvm_apic_id(apic);
>  		u32 ldr = ((id & ~0xf) << 16) | (1 << (id & 0xf));
> -		apic_set_reg(apic, APIC_LDR, ldr);
> +		kvm_apic_set_ldr(apic, ldr);
>  	}
>  	apic->base_address = apic->vcpu->arch.apic_base &
>  			     MSR_IA32_APICBASE_BASE;
> @@ -1169,7 +1327,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
>  	/* Stop the timer in case it's a reset to an active apic */
>  	hrtimer_cancel(&apic->lapic_timer.timer);
>  
> -	apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24);
> +	kvm_apic_set_id(apic, (u8)vcpu->vcpu_id);

Cast is not needed here and it is better to avoid it.

>  	kvm_apic_set_version(apic->vcpu);
>  
>  	for (i = 0; i < APIC_LVT_NUM; i++)
> @@ -1180,7 +1338,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
>  	apic_set_reg(apic, APIC_DFR, 0xffffffffU);
>  	apic_set_spiv(apic, 0xff);
>  	apic_set_reg(apic, APIC_TASKPRI, 0);
> -	apic_set_reg(apic, APIC_LDR, 0);
> +	kvm_apic_set_ldr(apic, 0);
>  	apic_set_reg(apic, APIC_ESR, 0);
>  	apic_set_reg(apic, APIC_ICR, 0);
>  	apic_set_reg(apic, APIC_ICR2, 0);
> @@ -1398,6 +1556,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
>  	/* set SPIV separately to get count of SW disabled APICs right */
>  	apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
>  	memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
> +	/* call kvm_apic_set_id() to put apic into apic_map */
> +	kvm_apic_set_id(apic, kvm_apic_id(apic));
>  	kvm_apic_set_version(vcpu);
>  
>  	apic_update_ppr(apic);
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index 615a8b0..e5ebf9f 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -52,6 +52,9 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
>  int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
>  int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
>  
> +bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
> +		struct kvm_lapic_irq *irq, int *r);
> +
>  u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
>  void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
>  void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index f91e2c9..5be2ea6 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6269,6 +6269,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>  	set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
>  
>  	raw_spin_lock_init(&kvm->arch.tsc_write_lock);
> +	mutex_init(&kvm->arch.apic_map_lock);
>  
>  	return 0;
>  }
> @@ -6319,6 +6320,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
>  		put_page(kvm->arch.apic_access_page);
>  	if (kvm->arch.ept_identity_pagetable)
>  		put_page(kvm->arch.ept_identity_pagetable);
> +	kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
>  }
>  
>  void kvm_arch_free_memslot(struct kvm_memory_slot *free,
> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> index 7118be0..3ca89c4 100644
> --- a/virt/kvm/irq_comm.c
> +++ b/virt/kvm/irq_comm.c
> @@ -68,8 +68,13 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
>  	struct kvm_vcpu *vcpu, *lowest = NULL;
>  
>  	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
> -			kvm_is_dm_lowest_prio(irq))
> +			kvm_is_dm_lowest_prio(irq)) {
>  		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
> +		irq->delivery_mode = APIC_DM_FIXED;
> +	}
> +
> +	if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r))
> +		return r;
>  
>  	kvm_for_each_vcpu(i, vcpu, kvm) {
>  		if (!kvm_apic_present(vcpu))
> --
> 			Gleb.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCHv3] KVM: optimize apic interrupt delivery
  2012-09-13 10:36     ` Jan Kiszka
@ 2012-09-16 10:56       ` Michael S. Tsirkin
  2012-09-16 11:04         ` Avi Kivity
  0 siblings, 1 reply; 8+ messages in thread
From: Michael S. Tsirkin @ 2012-09-16 10:56 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: Gleb Natapov, kvm@vger.kernel.org, avi@redhat.com,
	mtosatti@redhat.com

On Thu, Sep 13, 2012 at 12:36:30PM +0200, Jan Kiszka wrote:
> On 2012-09-13 12:33, Gleb Natapov wrote:
> >>
> >> So, this can be the foundation for direct MSI delivery as well, right?
> >>
> > What do you mean by "direct MSI delivery"? kvm_irq_delivery_to_apic() is
> > called by MSI. If you mean delivery from irq context, then yes, mst
> > plans to do so.
> 
> Yes, that's what I was aiming at.
> 
> Jan

Avi, I've been debating with self whether it's still worth it
to bounce interrupts that don't get optimized such as
broadcasts, out to thread? Maybe we can just do direct delivery
unconditionally? Guests don't seem to use broadcasts at all widely ...

> -- 
> Siemens AG, Corporate Technology, CT RTC ITP SDP-DE
> Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCHv3] KVM: optimize apic interrupt delivery
  2012-09-16 10:56       ` Michael S. Tsirkin
@ 2012-09-16 11:04         ` Avi Kivity
  0 siblings, 0 replies; 8+ messages in thread
From: Avi Kivity @ 2012-09-16 11:04 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Jan Kiszka, Gleb Natapov, kvm@vger.kernel.org,
	mtosatti@redhat.com

On 09/16/2012 01:56 PM, Michael S. Tsirkin wrote:
> On Thu, Sep 13, 2012 at 12:36:30PM +0200, Jan Kiszka wrote:
>> On 2012-09-13 12:33, Gleb Natapov wrote:
>> >>
>> >> So, this can be the foundation for direct MSI delivery as well, right?
>> >>
>> > What do you mean by "direct MSI delivery"? kvm_irq_delivery_to_apic() is
>> > called by MSI. If you mean delivery from irq context, then yes, mst
>> > plans to do so.
>> 
>> Yes, that's what I was aiming at.
>> 
>> Jan
> 
> Avi, I've been debating with self whether it's still worth it
> to bounce interrupts that don't get optimized such as
> broadcasts, out to thread? Maybe we can just do direct delivery
> unconditionally? Guests don't seem to use broadcasts at all widely ...

Malicious guests do, all the time.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2012-09-16 11:05 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-09-13  9:00 [PATCHv3] KVM: optimize apic interrupt delivery Gleb Natapov
2012-09-13  9:15 ` Avi Kivity
2012-09-13 10:29 ` Jan Kiszka
2012-09-13 10:33   ` Gleb Natapov
2012-09-13 10:36     ` Jan Kiszka
2012-09-16 10:56       ` Michael S. Tsirkin
2012-09-16 11:04         ` Avi Kivity
2012-09-13 11:51 ` Michael S. Tsirkin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).