All of lore.kernel.org
 help / color / mirror / Atom feed
From: Gleb Natapov <gleb@redhat.com>
To: kvm@vger.kernel.org
Cc: avi@redhat.com, mtosatti@redhat.com, mst@redhat.com
Subject: [RFC PATCH] KVM: optimize apic interrupt delivery
Date: Mon, 10 Sep 2012 16:09:15 +0300	[thread overview]
Message-ID: <20120910130915.GB20907@redhat.com> (raw)

Most interrupt are delivered to only one vcpu. Use pre-build tables to
find interrupt destination instead of looping through all vcpus.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 64adb61..121f308 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -511,6 +511,14 @@ struct kvm_arch_memory_slot {
 	struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
 };
 
+struct kvm_apic_map {
+	struct rcu_head rcu;
+	bool flat;
+	u8 ldr_bits;
+	struct kvm_lapic *phys_map[255];
+	struct kvm_lapic *logical_map[16][16];
+};
+
 struct kvm_arch {
 	unsigned int n_used_mmu_pages;
 	unsigned int n_requested_mmu_pages;
@@ -528,6 +536,8 @@ struct kvm_arch {
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
 	int vapics_in_nmi_mode;
+	struct kvm_apic_map *apic_map;
+	struct mutex apic_map_lock;
 
 	unsigned int tss_addr;
 	struct page *apic_access_page;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 07ad628..d18ddd2 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -139,11 +139,101 @@ static inline int apic_enabled(struct kvm_lapic *apic)
 	(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
 	 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
 
+static inline int apic_x2apic_mode(struct kvm_lapic *apic)
+{
+	return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
+}
+
+
 static inline int kvm_apic_id(struct kvm_lapic *apic)
 {
 	return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
 }
 
+static void rcu_free_apic_map(struct rcu_head *head)
+{
+	struct kvm_apic_map *p = container_of(head,
+			struct kvm_apic_map, rcu);
+
+	kfree(p);
+}
+
+static void kvm_apic_get_logical_id(u32 ldr, bool flat, u8 ldr_bits,
+		u16 *cid, u16 *lid)
+{
+	if (ldr_bits == 32) {
+		*cid = ldr >> 16;
+		*lid = ldr & 0xffff;
+	} else {
+		ldr = GET_APIC_LOGICAL_ID(ldr);
+
+		if (flat) {
+			*cid = 0;
+			*lid = ldr;
+		} else {
+			*cid = ldr >> 4;
+			*lid = ldr & 0xf;
+		}
+	}
+}
+
+static inline int recalculate_apic_map(struct kvm *kvm)
+{
+	struct kvm_apic_map *new, *old = NULL;
+	struct kvm_vcpu *vcpu;
+	int i;
+
+	new = kzalloc(sizeof(struct kvm_apic_map), GFP_KERNEL);
+
+	if (!new)
+		return -ENOMEM;
+
+	new->ldr_bits = 8;
+	new->flat = true;
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		u16 cid, lid;
+		struct kvm_lapic *apic = vcpu->arch.apic;
+
+		if (!kvm_apic_present(vcpu))
+			continue;
+
+		if (apic_x2apic_mode(apic)) {
+			new->ldr_bits = 32;
+			new->flat = false;
+		} else if (kvm_apic_sw_enabled(apic) && new->flat &&
+				kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER)
+			new->flat = false;
+
+		new->phys_map[kvm_apic_id(apic)] = apic;
+		kvm_apic_get_logical_id(kvm_apic_get_reg(apic, APIC_LDR),
+				new->flat, new->ldr_bits, &cid, &lid);
+
+		if (lid)
+			new->logical_map[cid][ffs(lid) - 1] = apic;
+	}
+	mutex_lock(&kvm->arch.apic_map_lock);
+	old = kvm->arch.apic_map;
+	rcu_assign_pointer(kvm->arch.apic_map, new);
+	mutex_unlock(&kvm->arch.apic_map_lock);
+
+	if (old)
+		call_rcu(&old->rcu, rcu_free_apic_map);
+
+	return 0;
+}
+
+static inline int kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
+{
+	apic_set_reg(apic, APIC_ID, id << 24);
+	return recalculate_apic_map(apic->vcpu->kvm);
+}
+
+static inline int kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
+{
+	apic_set_reg(apic, APIC_LDR, id);
+	return recalculate_apic_map(apic->vcpu->kvm);
+}
+
 static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
 {
 	return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
@@ -193,11 +283,6 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
 	apic_set_reg(apic, APIC_LVR, v);
 }
 
-static inline int apic_x2apic_mode(struct kvm_lapic *apic)
-{
-	return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
-}
-
 static const unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
 	LVT_MASK ,      /* part LVTT mask, timer mode mask added at runtime */
 	LVT_MASK | APIC_MODE_MASK,	/* LVTTHMR */
@@ -477,6 +562,67 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 	return result;
 }
 
+bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
+		struct kvm_lapic_irq *irq, int *r)
+{
+	unsigned long bitmap = 1;
+	struct kvm_lapic **dst;
+	int i;
+
+	if (irq->shorthand == APIC_DEST_SELF) {
+		*r = kvm_apic_set_irq(src->vcpu, irq);
+		return true;
+	}
+
+	if (irq->shorthand)
+		return false;
+
+	if (irq->dest_mode == 0) { /* physical mode */
+		if (irq->delivery_mode == APIC_DM_LOWEST ||
+				irq->dest_id == 0xff)
+			return false;
+		rcu_read_lock();
+		dst = &kvm->arch.apic_map->phys_map[irq->dest_id & 0xff];
+	} else {
+		u16 cid, lid;
+		u32 mda = irq->dest_id;
+
+		rcu_read_lock();
+		if (kvm->arch.apic_map->ldr_bits == 8)
+			mda <<= 24;
+
+		kvm_apic_get_logical_id(mda, kvm->arch.apic_map->flat,
+				kvm->arch.apic_map->ldr_bits, &cid, &lid);
+		dst = kvm->arch.apic_map->logical_map[cid];
+
+		bitmap = lid;
+		if (irq->delivery_mode == APIC_DM_LOWEST) {
+			int l = -1;
+			for_each_set_bit(i, &bitmap, 16) {
+				if (!dst[i])
+					continue;
+				if (l < 0)
+					l = i;
+				else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0)
+					l = i;
+			}
+
+			bitmap = (l >= 0) ? 1 << l : 0;
+		}
+	}
+
+	for_each_set_bit(i, &bitmap, 16) {
+		if (!dst[i])
+			continue;
+		if (*r < 0)
+			*r = 0;
+		*r += kvm_apic_set_irq(dst[i]->vcpu, irq);
+	}
+
+	rcu_read_unlock();
+	return true;
+}
+
 /*
  * Add a pending IRQ into lapic.
  * Return 1 if successfully added and 0 if discarded.
@@ -880,7 +1026,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 	switch (reg) {
 	case APIC_ID:		/* Local APIC ID */
 		if (!apic_x2apic_mode(apic))
-			apic_set_reg(apic, APIC_ID, val);
+			kvm_apic_set_id(apic, val >> 24);
 		else
 			ret = 1;
 		break;
@@ -896,15 +1042,16 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 
 	case APIC_LDR:
 		if (!apic_x2apic_mode(apic))
-			apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK);
+			kvm_apic_set_ldr(apic, val & APIC_LDR_MASK);
 		else
 			ret = 1;
 		break;
 
 	case APIC_DFR:
-		if (!apic_x2apic_mode(apic))
+		if (!apic_x2apic_mode(apic)) {
 			apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
-		else
+			recalculate_apic_map(apic->vcpu->kvm);
+		} else
 			ret = 1;
 		break;
 
@@ -1135,6 +1282,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 			static_key_slow_dec_deferred(&apic_hw_disabled);
 		else
 			static_key_slow_inc(&apic_hw_disabled.key);
+		recalculate_apic_map(vcpu->kvm);
 	}
 
 	if (!kvm_vcpu_is_bsp(apic->vcpu))
@@ -1144,7 +1292,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 	if (apic_x2apic_mode(apic)) {
 		u32 id = kvm_apic_id(apic);
 		u32 ldr = ((id & ~0xf) << 16) | (1 << (id & 0xf));
-		apic_set_reg(apic, APIC_LDR, ldr);
+		kvm_apic_set_ldr(apic, ldr);
 	}
 	apic->base_address = apic->vcpu->arch.apic_base &
 			     MSR_IA32_APICBASE_BASE;
@@ -1169,7 +1317,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 	/* Stop the timer in case it's a reset to an active apic */
 	hrtimer_cancel(&apic->lapic_timer.timer);
 
-	apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24);
+	kvm_apic_set_id(apic, (u8)vcpu->vcpu_id);
 	kvm_apic_set_version(apic->vcpu);
 
 	for (i = 0; i < APIC_LVT_NUM; i++)
@@ -1180,7 +1328,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 	apic_set_reg(apic, APIC_DFR, 0xffffffffU);
 	apic_set_spiv(apic, 0xff);
 	apic_set_reg(apic, APIC_TASKPRI, 0);
-	apic_set_reg(apic, APIC_LDR, 0);
+	kvm_apic_set_ldr(apic, 0);
 	apic_set_reg(apic, APIC_ESR, 0);
 	apic_set_reg(apic, APIC_ICR, 0);
 	apic_set_reg(apic, APIC_ICR2, 0);
@@ -1398,6 +1546,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
 	/* set SPIV separately to get count of SW disabled APICs right */
 	apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
 	memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
+	/* call kvm_apic_set_id() to put apic into apic_map */
+	kvm_apic_set_id(apic, kvm_apic_id(apic));
 	kvm_apic_set_version(vcpu);
 
 	apic_update_ppr(apic);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 615a8b0..5ce084e 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -52,6 +52,9 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
 
+bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
+		struct kvm_lapic_irq *irq, int *r);
+
 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
 void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
 void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
@@ -120,5 +123,4 @@ static inline int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
 {
 	return kvm_apic_present(vcpu) && kvm_apic_sw_enabled(vcpu->arch.apic);
 }
-
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f91e2c9..db6db8f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6269,6 +6269,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
 
 	raw_spin_lock_init(&kvm->arch.tsc_write_lock);
+	mutex_init(&kvm->arch.apic_map_lock);
 
 	return 0;
 }
@@ -6319,6 +6320,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 		put_page(kvm->arch.apic_access_page);
 	if (kvm->arch.ept_identity_pagetable)
 		put_page(kvm->arch.ept_identity_pagetable);
+	kfree(kvm->arch.apic_map);
 }
 
 void kvm_arch_free_memslot(struct kvm_memory_slot *free,
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 7118be0..3ca89c4 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -68,8 +68,13 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 	struct kvm_vcpu *vcpu, *lowest = NULL;
 
 	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
-			kvm_is_dm_lowest_prio(irq))
+			kvm_is_dm_lowest_prio(irq)) {
 		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
+		irq->delivery_mode = APIC_DM_FIXED;
+	}
+
+	if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r))
+		return r;
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		if (!kvm_apic_present(vcpu))
--
			Gleb.

             reply	other threads:[~2012-09-10 13:09 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-09-10 13:09 Gleb Natapov [this message]
2012-09-10 14:44 ` [RFC PATCH] KVM: optimize apic interrupt delivery Michael S. Tsirkin
2012-09-10 16:17   ` Gleb Natapov
2012-09-10 17:05     ` Gleb Natapov
2012-09-11  9:29       ` Avi Kivity
2012-09-11  9:35         ` Gleb Natapov
2012-09-11  9:41           ` Avi Kivity
2012-09-11  9:45             ` Gleb Natapov
2012-09-11  9:57               ` Gleb Natapov
2012-09-11 12:05                 ` Avi Kivity
2012-09-11 12:12                   ` Gleb Natapov
2012-09-11 12:04               ` Avi Kivity
2012-09-11 12:09                 ` Gleb Natapov
2012-09-11 11:20     ` Gleb Natapov
2012-09-11 11:49       ` Gleb Natapov
2012-09-10 15:09 ` Avi Kivity
2012-09-10 15:35   ` Gleb Natapov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120910130915.GB20907@redhat.com \
    --to=gleb@redhat.com \
    --cc=avi@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=mtosatti@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.