From: Gleb Natapov <gleb@redhat.com>
To: kvm@vger.kernel.org
Cc: avi@redhat.com, mtosatti@redhat.com, mst@redhat.com
Subject: [PATCHv2] KVM: optimize apic interrupt delivery
Date: Tue, 11 Sep 2012 16:02:25 +0300 [thread overview]
Message-ID: <20120911130225.GN20907@redhat.com> (raw)
Most interrupt are delivered to only one vcpu. Use pre-build tables to
find interrupt destination instead of looping through all vcpus. In case
of logical mode loop only through vcpus in a logical cluster irq is sent
to.
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
Changelog:
- v1->v2
* fix race Avi noticed
* rcu_read_lock() out of the block as per Avi
* fix rcu issues pointed to by MST. All but one. Still use
call_rcu(). Do not think this is serious issue. If it is should be
solved by RCU subsystem.
* Fix phys_map overflow pointed to by MST
* recalculate_apic_map() does not return error any more.
* add optimization for low prio logical mode with one cpu as dst (it
happens)
I did not rewrote kvm_irq_delivery_to_apic_fast() MST way since my way
looks cleaner to me.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 64adb61..3ba8951 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -511,6 +511,14 @@ struct kvm_arch_memory_slot {
struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
};
+struct kvm_apic_map {
+ struct rcu_head rcu;
+ bool flat;
+ u8 ldr_bits;
+ struct kvm_lapic *phys_map[256];
+ struct kvm_lapic *logical_map[16][16];
+};
+
struct kvm_arch {
unsigned int n_used_mmu_pages;
unsigned int n_requested_mmu_pages;
@@ -528,6 +536,8 @@ struct kvm_arch {
struct kvm_ioapic *vioapic;
struct kvm_pit *vpit;
int vapics_in_nmi_mode;
+ struct kvm_apic_map *apic_map;
+ struct mutex apic_map_lock;
unsigned int tss_addr;
struct page *apic_access_page;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 07ad628..06672e8 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -139,11 +139,92 @@ static inline int apic_enabled(struct kvm_lapic *apic)
(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
+static inline int apic_x2apic_mode(struct kvm_lapic *apic)
+{
+ return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
+}
+
static inline int kvm_apic_id(struct kvm_lapic *apic)
{
return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
}
+static void kvm_apic_get_logical_id(u32 ldr, bool flat, u8 ldr_bits,
+ u16 *cid, u16 *lid)
+{
+ if (ldr_bits == 32) {
+ *cid = ldr >> 16;
+ *lid = ldr & 0xffff;
+ } else {
+ ldr = GET_APIC_LOGICAL_ID(ldr);
+
+ if (flat) {
+ *cid = 0;
+ *lid = ldr;
+ } else {
+ *cid = ldr >> 4;
+ *lid = ldr & 0xf;
+ }
+ }
+}
+
+static inline void recalculate_apic_map(struct kvm *kvm)
+{
+ struct kvm_apic_map *new, *old = NULL;
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ new = kzalloc(sizeof(struct kvm_apic_map), GFP_KERNEL);
+
+ mutex_lock(&kvm->arch.apic_map_lock);
+
+ if (!new)
+ goto out;
+
+ new->ldr_bits = 8;
+ new->flat = true;
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ u16 cid, lid;
+ struct kvm_lapic *apic = vcpu->arch.apic;
+
+ if (!kvm_apic_present(vcpu))
+ continue;
+
+ if (apic_x2apic_mode(apic)) {
+ new->ldr_bits = 32;
+ new->flat = false;
+ } else if (kvm_apic_sw_enabled(apic) && new->flat &&
+ kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER)
+ new->flat = false;
+
+ new->phys_map[kvm_apic_id(apic)] = apic;
+ kvm_apic_get_logical_id(kvm_apic_get_reg(apic, APIC_LDR),
+ new->flat, new->ldr_bits, &cid, &lid);
+
+ if (lid)
+ new->logical_map[cid][ffs(lid) - 1] = apic;
+ }
+out:
+ old = kvm->arch.apic_map;
+ rcu_assign_pointer(kvm->arch.apic_map, new);
+ mutex_unlock(&kvm->arch.apic_map_lock);
+
+ if (old)
+ kfree_rcu(old, rcu);
+}
+
+static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
+{
+ apic_set_reg(apic, APIC_ID, id << 24);
+ recalculate_apic_map(apic->vcpu->kvm);
+}
+
+static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
+{
+ apic_set_reg(apic, APIC_LDR, id);
+ recalculate_apic_map(apic->vcpu->kvm);
+}
+
static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
{
return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
@@ -193,11 +274,6 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
apic_set_reg(apic, APIC_LVR, v);
}
-static inline int apic_x2apic_mode(struct kvm_lapic *apic)
-{
- return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
-}
-
static const unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */
LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */
@@ -477,6 +553,79 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
return result;
}
+bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
+ struct kvm_lapic_irq *irq, int *r)
+{
+ struct kvm_apic_map *map;
+ unsigned long bitmap = 1;
+ struct kvm_lapic **dst;
+ int i;
+
+ *r = -1;
+
+ if (irq->shorthand == APIC_DEST_SELF) {
+ *r = kvm_apic_set_irq(src->vcpu, irq);
+ return true;
+ }
+
+ if (irq->shorthand)
+ return false;
+
+ rcu_read_lock();
+ map = rcu_dereference(kvm->arch.apic_map);
+
+ if (!map) {
+ rcu_read_unlock();
+ return false;
+ }
+
+ if (irq->dest_mode == 0) { /* physical mode */
+ if (irq->delivery_mode == APIC_DM_LOWEST ||
+ irq->dest_id == 0xff) {
+ rcu_read_unlock();
+ return false;
+ }
+ dst = &map->phys_map[irq->dest_id & 0xff];
+ } else {
+ u16 cid, lid;
+ u32 mda = irq->dest_id;
+
+ if (map->ldr_bits == 8)
+ mda <<= 24;
+
+ kvm_apic_get_logical_id(mda, map->flat, map->ldr_bits,
+ &cid, &lid);
+ dst = map->logical_map[cid];
+
+ bitmap = lid;
+ if (irq->delivery_mode == APIC_DM_LOWEST &&
+ hweight_long(bitmap) > 1) {
+ int l = -1;
+ for_each_set_bit(i, &bitmap, 16) {
+ if (!dst[i])
+ continue;
+ if (l < 0)
+ l = i;
+ else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0)
+ l = i;
+ }
+
+ bitmap = (l >= 0) ? 1 << l : 0;
+ }
+ }
+
+ for_each_set_bit(i, &bitmap, 16) {
+ if (!dst[i])
+ continue;
+ if (*r < 0)
+ *r = 0;
+ *r += kvm_apic_set_irq(dst[i]->vcpu, irq);
+ }
+
+ rcu_read_unlock();
+ return true;
+}
+
/*
* Add a pending IRQ into lapic.
* Return 1 if successfully added and 0 if discarded.
@@ -880,7 +1029,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
switch (reg) {
case APIC_ID: /* Local APIC ID */
if (!apic_x2apic_mode(apic))
- apic_set_reg(apic, APIC_ID, val);
+ kvm_apic_set_id(apic, val >> 24);
else
ret = 1;
break;
@@ -896,15 +1045,16 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
case APIC_LDR:
if (!apic_x2apic_mode(apic))
- apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK);
+ kvm_apic_set_ldr(apic, val & APIC_LDR_MASK);
else
ret = 1;
break;
case APIC_DFR:
- if (!apic_x2apic_mode(apic))
+ if (!apic_x2apic_mode(apic)) {
apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
- else
+ recalculate_apic_map(apic->vcpu->kvm);
+ } else
ret = 1;
break;
@@ -1135,6 +1285,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
static_key_slow_dec_deferred(&apic_hw_disabled);
else
static_key_slow_inc(&apic_hw_disabled.key);
+ recalculate_apic_map(vcpu->kvm);
}
if (!kvm_vcpu_is_bsp(apic->vcpu))
@@ -1144,7 +1295,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
if (apic_x2apic_mode(apic)) {
u32 id = kvm_apic_id(apic);
u32 ldr = ((id & ~0xf) << 16) | (1 << (id & 0xf));
- apic_set_reg(apic, APIC_LDR, ldr);
+ kvm_apic_set_ldr(apic, ldr);
}
apic->base_address = apic->vcpu->arch.apic_base &
MSR_IA32_APICBASE_BASE;
@@ -1169,7 +1320,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
/* Stop the timer in case it's a reset to an active apic */
hrtimer_cancel(&apic->lapic_timer.timer);
- apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24);
+ kvm_apic_set_id(apic, (u8)vcpu->vcpu_id);
kvm_apic_set_version(apic->vcpu);
for (i = 0; i < APIC_LVT_NUM; i++)
@@ -1180,7 +1331,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
apic_set_reg(apic, APIC_DFR, 0xffffffffU);
apic_set_spiv(apic, 0xff);
apic_set_reg(apic, APIC_TASKPRI, 0);
- apic_set_reg(apic, APIC_LDR, 0);
+ kvm_apic_set_ldr(apic, 0);
apic_set_reg(apic, APIC_ESR, 0);
apic_set_reg(apic, APIC_ICR, 0);
apic_set_reg(apic, APIC_ICR2, 0);
@@ -1398,6 +1549,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
/* set SPIV separately to get count of SW disabled APICs right */
apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
+ /* call kvm_apic_set_id() to put apic into apic_map */
+ kvm_apic_set_id(apic, kvm_apic_id(apic));
kvm_apic_set_version(vcpu);
apic_update_ppr(apic);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 615a8b0..e5ebf9f 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -52,6 +52,9 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
+bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
+ struct kvm_lapic_irq *irq, int *r);
+
u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f91e2c9..db6db8f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6269,6 +6269,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
raw_spin_lock_init(&kvm->arch.tsc_write_lock);
+ mutex_init(&kvm->arch.apic_map_lock);
return 0;
}
@@ -6319,6 +6320,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
put_page(kvm->arch.apic_access_page);
if (kvm->arch.ept_identity_pagetable)
put_page(kvm->arch.ept_identity_pagetable);
+ kfree(kvm->arch.apic_map);
}
void kvm_arch_free_memslot(struct kvm_memory_slot *free,
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 7118be0..3ca89c4 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -68,8 +68,13 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_vcpu *vcpu, *lowest = NULL;
if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
- kvm_is_dm_lowest_prio(irq))
+ kvm_is_dm_lowest_prio(irq)) {
printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
+ irq->delivery_mode = APIC_DM_FIXED;
+ }
+
+ if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r))
+ return r;
kvm_for_each_vcpu(i, vcpu, kvm) {
if (!kvm_apic_present(vcpu))
--
Gleb.
next reply other threads:[~2012-09-11 13:02 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-09-11 13:02 Gleb Natapov [this message]
2012-09-11 13:26 ` [PATCHv2] KVM: optimize apic interrupt delivery Avi Kivity
2012-09-11 14:02 ` Michael S. Tsirkin
2012-09-11 14:46 ` Gleb Natapov
2012-09-11 15:51 ` Avi Kivity
2012-09-11 14:10 ` Michael S. Tsirkin
2012-09-11 17:13 ` Paul E. McKenney
2012-09-11 20:04 ` Avi Kivity
2012-09-11 22:39 ` Michael S. Tsirkin
2012-09-12 7:41 ` Avi Kivity
2012-09-11 22:33 ` Michael S. Tsirkin
2012-09-12 1:03 ` Paul E. McKenney
2012-09-12 7:45 ` Avi Kivity
2012-09-12 12:34 ` Gleb Natapov
[not found] ` <505081E9.8080505@redhat.com>
2012-09-12 12:44 ` Gleb Natapov
2012-09-12 15:13 ` Paul E. McKenney
2013-11-26 16:24 ` Michael S. Tsirkin
2013-11-26 19:35 ` Paul E. McKenney
2013-11-27 8:00 ` Gleb Natapov
2013-11-27 17:06 ` Paul E. McKenney
2013-11-28 8:55 ` Gleb Natapov
2013-12-05 23:00 ` Paul E. McKenney
2013-12-06 11:32 ` Gleb Natapov
2013-11-26 20:07 ` Marcelo Tosatti
2012-09-12 12:17 ` Gleb Natapov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20120911130225.GN20907@redhat.com \
--to=gleb@redhat.com \
--cc=avi@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=mst@redhat.com \
--cc=mtosatti@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.