[PATCH 0/8 v2] make interrupt injection lockless (almost)

kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 0/8 v2] make interrupt injection lockless (almost)
@ 2009-08-11 12:31 Gleb Natapov
  2009-08-11 12:31 ` [PATCH 1/8 v2] Change irq routing table to use gsi indexed array Gleb Natapov
                   ` (7 more replies)
  0 siblings, 8 replies; 28+ messages in thread
From: Gleb Natapov @ 2009-08-11 12:31 UTC (permalink / raw)
  To: avi; +Cc: kvm

kvm->irq_lock protects too much stuff, but still fail to protect
everything it was design to protect (see ack notifiers call in pic). I
want to make IRQ injection fast path as lockless as possible. This patch
series removes kvm->irq_lock from irq injection path effectively making
interrupt injection to lapic lockless (several kvm_irq_delivery_to_apic()
may run in parallel), but access to lapic was never fully locked in the
first place. VCPU could access lapic in parallel with interrupt injection.
Patches 1/2 changes irq routing data structure to much more efficient one.

v1->v2:
  Drop MSI injection interface (for now).
  Use irq_lock to protect irq routing and ack notifiers.
  Splitting irq routing table changes to two patches (+ comments addressed).
  Drop ioapic/pic lock before calling ack notifiers.

Gleb Natapov (8):
  Change irq routing table to use gsi indexed array.
  Maintain back mapping from irqchip/pin to gsi.
  Move irq routing data structure to rcu locking
  Move irq ack notifier list to arch independent code.
  Convert irq notifiers lists to RCU locking.
  Move IO APIC to its own lock.
  Drop kvm->irq_lock lock from irq injection path.
  Change irq_lock from mutex to spinlock.

 arch/ia64/include/asm/kvm_host.h |    1 -
 arch/ia64/kvm/kvm-ia64.c         |   29 +++++--
 arch/x86/include/asm/kvm_host.h  |    1 -
 arch/x86/kvm/i8254.c             |    2 -
 arch/x86/kvm/i8259.c             |    4 +
 arch/x86/kvm/lapic.c             |    7 +--
 arch/x86/kvm/x86.c               |   32 +++++---
 include/linux/kvm_host.h         |   20 ++++-
 virt/kvm/eventfd.c               |    2 -
 virt/kvm/ioapic.c                |   49 ++++++++-----
 virt/kvm/ioapic.h                |    1 +
 virt/kvm/irq_comm.c              |  150 ++++++++++++++++++++------------------
 virt/kvm/kvm_main.c              |    6 +-
 13 files changed, 173 insertions(+), 131 deletions(-)

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 1/8 v2] Change irq routing table to use gsi indexed array.
  2009-08-11 12:31 [PATCH 0/8 v2] make interrupt injection lockless (almost) Gleb Natapov
@ 2009-08-11 12:31 ` Gleb Natapov
  2009-08-12  8:05   ` Avi Kivity
  2009-08-11 12:31 ` [PATCH 2/8 v2] Maintain back mapping from irqchip/pin to gsi Gleb Natapov
                   ` (6 subsequent siblings)
  7 siblings, 1 reply; 28+ messages in thread
From: Gleb Natapov @ 2009-08-11 12:31 UTC (permalink / raw)
  To: avi; +Cc: kvm

Use gsi indexed array instead of scanning all entries on each interrupt
injection.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 include/linux/kvm_host.h |   16 ++++++++--
 virt/kvm/irq_comm.c      |   77 +++++++++++++++++++++++++---------------------
 virt/kvm/kvm_main.c      |    1 -
 3 files changed, 55 insertions(+), 39 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f814512..09df31d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -129,7 +129,17 @@ struct kvm_kernel_irq_routing_entry {
 		} irqchip;
 		struct msi_msg msi;
 	};
-	struct list_head link;
+	struct hlist_node link;
+};
+
+struct kvm_irq_routing_table {
+	struct kvm_kernel_irq_routing_entry *rt_entries;
+	u32 nr_rt_entries;
+	/*
+	 * Array indexed by gsi. Each entry contains list of irq chips
+	 * the gsi is connected to.
+	 */
+	struct hlist_head map[0];
 };
 
 struct kvm {
@@ -167,7 +177,7 @@ struct kvm {
 
 	struct mutex irq_lock;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
-	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
+	struct kvm_irq_routing_table *irq_routing;
 	struct hlist_head mask_notifier_list;
 #endif
 
@@ -396,7 +406,7 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
 				   union kvm_ioapic_redirect_entry *entry,
 				   unsigned long *deliver_bitmask);
 #endif
-int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
+int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian);
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 001663f..a9d2262 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -122,11 +122,13 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
  *  = 0   Interrupt was coalesced (previous irq is still pending)
  *  > 0   Number of CPUs interrupt was delivered to
  */
-int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
+int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 {
 	struct kvm_kernel_irq_routing_entry *e;
 	unsigned long *irq_state, sig_level;
 	int ret = -1;
+	struct kvm_irq_routing_table *irq_rt;
+	struct hlist_node *n;
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
 
@@ -150,8 +152,9 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 	 * IOAPIC.  So set the bit in both. The guest will ignore
 	 * writes to the unused one.
 	 */
-	list_for_each_entry(e, &kvm->irq_routing, link)
-		if (e->gsi == irq) {
+	irq_rt = kvm->irq_routing;
+	if (irq < irq_rt->nr_rt_entries)
+		hlist_for_each_entry(e, n, &irq_rt->map[irq], link) {
 			int r = e->set(e, kvm, sig_level);
 			if (r < 0)
 				continue;
@@ -163,20 +166,23 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
-	struct kvm_kernel_irq_routing_entry *e;
 	struct kvm_irq_ack_notifier *kian;
 	struct hlist_node *n;
 	unsigned gsi = pin;
+	int i;
 
 	trace_kvm_ack_irq(irqchip, pin);
 
-	list_for_each_entry(e, &kvm->irq_routing, link)
+	for (i = 0; i < kvm->irq_routing->nr_rt_entries; i++) {
+		struct kvm_kernel_irq_routing_entry *e;
+		e = &kvm->irq_routing->rt_entries[i];
 		if (e->type == KVM_IRQ_ROUTING_IRQCHIP &&
 		    e->irqchip.irqchip == irqchip &&
 		    e->irqchip.pin == pin) {
 			gsi = e->gsi;
 			break;
 		}
+	}
 
 	hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
 		if (kian->gsi == gsi)
@@ -267,22 +273,15 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
 			kimn->func(kimn, mask);
 }
 
-static void __kvm_free_irq_routing(struct list_head *irq_routing)
-{
-	struct kvm_kernel_irq_routing_entry *e, *n;
-
-	list_for_each_entry_safe(e, n, irq_routing, link)
-		kfree(e);
-}
-
 void kvm_free_irq_routing(struct kvm *kvm)
 {
 	mutex_lock(&kvm->irq_lock);
-	__kvm_free_irq_routing(&kvm->irq_routing);
+	kfree(kvm->irq_routing);
 	mutex_unlock(&kvm->irq_lock);
 }
 
-static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
+static int setup_routing_entry(struct kvm_irq_routing_table *rt,
+			       struct kvm_kernel_irq_routing_entry *e,
 			       const struct kvm_irq_routing_entry *ue)
 {
 	int r = -EINVAL;
@@ -319,6 +318,8 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 	default:
 		goto out;
 	}
+
+	hlist_add_head(&e->link, &rt->map[e->gsi]);
 	r = 0;
 out:
 	return r;
@@ -330,43 +331,49 @@ int kvm_set_irq_routing(struct kvm *kvm,
 			unsigned nr,
 			unsigned flags)
 {
-	struct list_head irq_list = LIST_HEAD_INIT(irq_list);
-	struct list_head tmp = LIST_HEAD_INIT(tmp);
-	struct kvm_kernel_irq_routing_entry *e = NULL;
-	unsigned i;
+	struct kvm_irq_routing_table *new, *old;
+	u32 i, nr_rt_entries = 0;
 	int r;
 
 	for (i = 0; i < nr; ++i) {
+		if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES)
+			return -EINVAL;
+		nr_rt_entries = max(nr_rt_entries, ue[i].gsi);
+	}
+
+	nr_rt_entries += 1;
+
+	new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head))
+		      + (nr * sizeof(struct kvm_kernel_irq_routing_entry)),
+		      GFP_KERNEL);
+
+	if (!new)
+		return -ENOMEM;
+
+	new->rt_entries = (void *)&new->map[nr_rt_entries];
+
+	new->nr_rt_entries = nr_rt_entries;
+
+	for (i = 0; i < nr; ++i) {
 		r = -EINVAL;
-		if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
-			goto out;
 		if (ue->flags)
 			goto out;
-		r = -ENOMEM;
-		e = kzalloc(sizeof(*e), GFP_KERNEL);
-		if (!e)
-			goto out;
-		r = setup_routing_entry(e, ue);
+		r = setup_routing_entry(new, &new->rt_entries[i], ue);
 		if (r)
 			goto out;
 		++ue;
-		list_add(&e->link, &irq_list);
-		e = NULL;
 	}
 
 	mutex_lock(&kvm->irq_lock);
-	list_splice(&kvm->irq_routing, &tmp);
-	INIT_LIST_HEAD(&kvm->irq_routing);
-	list_splice(&irq_list, &kvm->irq_routing);
-	INIT_LIST_HEAD(&irq_list);
-	list_splice(&tmp, &irq_list);
+	old = kvm->irq_routing;
+	kvm->irq_routing = new;
 	mutex_unlock(&kvm->irq_lock);
 
+	new = old;
 	r = 0;
 
 out:
-	kfree(e);
-	__kvm_free_irq_routing(&irq_list);
+	kfree(new);
 	return r;
 }
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1df4c04..e8b03ee 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -944,7 +944,6 @@ static struct kvm *kvm_create_vm(void)
 	if (IS_ERR(kvm))
 		goto out;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
-	INIT_LIST_HEAD(&kvm->irq_routing);
 	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
 #endif
 
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 2/8 v2] Maintain back mapping from irqchip/pin to gsi.
  2009-08-11 12:31 [PATCH 0/8 v2] make interrupt injection lockless (almost) Gleb Natapov
  2009-08-11 12:31 ` [PATCH 1/8 v2] Change irq routing table to use gsi indexed array Gleb Natapov
@ 2009-08-11 12:31 ` Gleb Natapov
  2009-08-12  8:09   ` Avi Kivity
  2009-08-11 12:31 ` [PATCH 3/8 v2] Move irq routing data structure to rcu locking Gleb Natapov
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 28+ messages in thread
From: Gleb Natapov @ 2009-08-11 12:31 UTC (permalink / raw)
  To: avi; +Cc: kvm

Maintain back mapping from irqchip/pin to gsi to speedup
interrupt acknowledgment notifications.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 include/linux/kvm_host.h |    1 +
 virt/kvm/irq_comm.c      |   24 +++++++++++-------------
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 09df31d..d2b8eb3 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -133,6 +133,7 @@ struct kvm_kernel_irq_routing_entry {
 };
 
 struct kvm_irq_routing_table {
+	int chip[3][KVM_IOAPIC_NUM_PINS];
 	struct kvm_kernel_irq_routing_entry *rt_entries;
 	u32 nr_rt_entries;
 	/*
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index a9d2262..42994c4 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -168,21 +168,13 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
 	struct kvm_irq_ack_notifier *kian;
 	struct hlist_node *n;
-	unsigned gsi = pin;
-	int i;
+	unsigned gsi;
 
 	trace_kvm_ack_irq(irqchip, pin);
 
-	for (i = 0; i < kvm->irq_routing->nr_rt_entries; i++) {
-		struct kvm_kernel_irq_routing_entry *e;
-		e = &kvm->irq_routing->rt_entries[i];
-		if (e->type == KVM_IRQ_ROUTING_IRQCHIP &&
-		    e->irqchip.irqchip == irqchip &&
-		    e->irqchip.pin == pin) {
-			gsi = e->gsi;
-			break;
-		}
-	}
+	gsi = kvm->irq_routing->chip[irqchip][pin];
+	if (gsi == -1)
+		gsi = pin;
 
 	hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
 		if (kian->gsi == gsi)
@@ -308,6 +300,9 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
 		}
 		e->irqchip.irqchip = ue->u.irqchip.irqchip;
 		e->irqchip.pin = ue->u.irqchip.pin + delta;
+		if (e->irqchip.pin > KVM_IOAPIC_NUM_PINS)
+			goto out;
+		rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
 		break;
 	case KVM_IRQ_ROUTING_MSI:
 		e->set = kvm_set_msi;
@@ -332,7 +327,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
 			unsigned flags)
 {
 	struct kvm_irq_routing_table *new, *old;
-	u32 i, nr_rt_entries = 0;
+	u32 i, j, nr_rt_entries = 0;
 	int r;
 
 	for (i = 0; i < nr; ++i) {
@@ -353,6 +348,9 @@ int kvm_set_irq_routing(struct kvm *kvm,
 	new->rt_entries = (void *)&new->map[nr_rt_entries];
 
 	new->nr_rt_entries = nr_rt_entries;
+	for (i = 0; i < 3; i++)
+		for (j = 0; j < KVM_IOAPIC_NUM_PINS; j++)
+			new->chip[i][j] = -1;
 
 	for (i = 0; i < nr; ++i) {
 		r = -EINVAL;
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 3/8 v2] Move irq routing data structure to rcu locking
  2009-08-11 12:31 [PATCH 0/8 v2] make interrupt injection lockless (almost) Gleb Natapov
  2009-08-11 12:31 ` [PATCH 1/8 v2] Change irq routing table to use gsi indexed array Gleb Natapov
  2009-08-11 12:31 ` [PATCH 2/8 v2] Maintain back mapping from irqchip/pin to gsi Gleb Natapov
@ 2009-08-11 12:31 ` Gleb Natapov
  2009-08-11 12:31 ` [PATCH 4/8 v2] Move irq ack notifier list to arch independent code Gleb Natapov
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 28+ messages in thread
From: Gleb Natapov @ 2009-08-11 12:31 UTC (permalink / raw)
  To: avi; +Cc: kvm


Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 virt/kvm/irq_comm.c |   15 ++++++++++-----
 1 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 42994c4..9f41dde 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -152,7 +152,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 	 * IOAPIC.  So set the bit in both. The guest will ignore
 	 * writes to the unused one.
 	 */
-	irq_rt = kvm->irq_routing;
+	rcu_read_lock();
+	irq_rt = rcu_dereference(kvm->irq_routing);
 	if (irq < irq_rt->nr_rt_entries)
 		hlist_for_each_entry(e, n, &irq_rt->map[irq], link) {
 			int r = e->set(e, kvm, sig_level);
@@ -161,6 +162,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 
 			ret = r + ((ret < 0) ? 0 : ret);
 		}
+	rcu_read_unlock();
 	return ret;
 }
 
@@ -172,9 +174,11 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 
 	trace_kvm_ack_irq(irqchip, pin);
 
-	gsi = kvm->irq_routing->chip[irqchip][pin];
+	rcu_read_lock();
+	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
 	if (gsi == -1)
 		gsi = pin;
+	rcu_read_unlock();
 
 	hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
 		if (kian->gsi == gsi)
@@ -267,9 +271,9 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
 
 void kvm_free_irq_routing(struct kvm *kvm)
 {
-	mutex_lock(&kvm->irq_lock);
+	/* Called only during vm destruction. Nobody can use the pointer
+	   at this stage */
 	kfree(kvm->irq_routing);
-	mutex_unlock(&kvm->irq_lock);
 }
 
 static int setup_routing_entry(struct kvm_irq_routing_table *rt,
@@ -364,8 +368,9 @@ int kvm_set_irq_routing(struct kvm *kvm,
 
 	mutex_lock(&kvm->irq_lock);
 	old = kvm->irq_routing;
-	kvm->irq_routing = new;
+	rcu_assign_pointer(kvm->irq_routing, new);
 	mutex_unlock(&kvm->irq_lock);
+	synchronize_rcu();
 
 	new = old;
 	r = 0;
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 4/8 v2] Move irq ack notifier list to arch independent code.
  2009-08-11 12:31 [PATCH 0/8 v2] make interrupt injection lockless (almost) Gleb Natapov
                   ` (2 preceding siblings ...)
  2009-08-11 12:31 ` [PATCH 3/8 v2] Move irq routing data structure to rcu locking Gleb Natapov
@ 2009-08-11 12:31 ` Gleb Natapov
  2009-08-11 12:31 ` [PATCH 5/8 v2] Convert irq notifiers lists to RCU locking Gleb Natapov
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 28+ messages in thread
From: Gleb Natapov @ 2009-08-11 12:31 UTC (permalink / raw)
  To: avi; +Cc: kvm

Mask irq notifier list is already there.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/ia64/include/asm/kvm_host.h |    1 -
 arch/x86/include/asm/kvm_host.h  |    1 -
 include/linux/kvm_host.h         |    1 +
 virt/kvm/irq_comm.c              |    4 ++--
 virt/kvm/kvm_main.c              |    1 +
 5 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index d9b6325..a362e67 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -475,7 +475,6 @@ struct kvm_arch {
 	struct list_head assigned_dev_head;
 	struct iommu_domain *iommu_domain;
 	int iommu_flags;
-	struct hlist_head irq_ack_notifier_list;
 
 	unsigned long irq_sources_bitmap;
 	unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b17d845..7dfde38 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -400,7 +400,6 @@ struct kvm_arch{
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
-	struct hlist_head irq_ack_notifier_list;
 	int vapics_in_nmi_mode;
 
 	unsigned int tss_addr;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d2b8eb3..56b2a12 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -180,6 +180,7 @@ struct kvm {
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	struct kvm_irq_routing_table *irq_routing;
 	struct hlist_head mask_notifier_list;
+	struct hlist_head irq_ack_notifier_list;
 #endif
 
 #ifdef KVM_ARCH_WANT_MMU_NOTIFIER
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 9f41dde..ff12fb5 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -180,7 +180,7 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 		gsi = pin;
 	rcu_read_unlock();
 
-	hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
+	hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
 		if (kian->gsi == gsi)
 			kian->irq_acked(kian);
 }
@@ -189,7 +189,7 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian)
 {
 	mutex_lock(&kvm->irq_lock);
-	hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
+	hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
 	mutex_unlock(&kvm->irq_lock);
 }
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e8b03ee..fcc0c44 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -945,6 +945,7 @@ static struct kvm *kvm_create_vm(void)
 		goto out;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
+	INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
 #endif
 
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 5/8 v2] Convert irq notifiers lists to RCU locking.
  2009-08-11 12:31 [PATCH 0/8 v2] make interrupt injection lockless (almost) Gleb Natapov
                   ` (3 preceding siblings ...)
  2009-08-11 12:31 ` [PATCH 4/8 v2] Move irq ack notifier list to arch independent code Gleb Natapov
@ 2009-08-11 12:31 ` Gleb Natapov
  2009-08-11 12:31 ` [PATCH 6/8 v2] Move IO APIC to its own lock Gleb Natapov
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 28+ messages in thread
From: Gleb Natapov @ 2009-08-11 12:31 UTC (permalink / raw)
  To: avi; +Cc: kvm

Use RCU locking for mask/ack notifiers lists.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 virt/kvm/irq_comm.c |   20 +++++++++++---------
 1 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index ff12fb5..ba880e8 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -178,18 +178,18 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
 	if (gsi == -1)
 		gsi = pin;
-	rcu_read_unlock();
 
-	hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
+	hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
 		if (kian->gsi == gsi)
 			kian->irq_acked(kian);
+	rcu_read_unlock();
 }
 
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian)
 {
 	mutex_lock(&kvm->irq_lock);
-	hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
+	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
 	mutex_unlock(&kvm->irq_lock);
 }
 
@@ -197,8 +197,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
 				    struct kvm_irq_ack_notifier *kian)
 {
 	mutex_lock(&kvm->irq_lock);
-	hlist_del_init(&kian->link);
+	hlist_del_init_rcu(&kian->link);
 	mutex_unlock(&kvm->irq_lock);
+	synchronize_rcu();
 }
 
 int kvm_request_irq_source_id(struct kvm *kvm)
@@ -245,7 +246,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
 {
 	mutex_lock(&kvm->irq_lock);
 	kimn->irq = irq;
-	hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
+	hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
 	mutex_unlock(&kvm->irq_lock);
 }
 
@@ -253,8 +254,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 				      struct kvm_irq_mask_notifier *kimn)
 {
 	mutex_lock(&kvm->irq_lock);
-	hlist_del(&kimn->link);
+	hlist_del_rcu(&kimn->link);
 	mutex_unlock(&kvm->irq_lock);
+	synchronize_rcu();
 }
 
 void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
@@ -262,11 +264,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
 	struct kvm_irq_mask_notifier *kimn;
 	struct hlist_node *n;
 
-	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
-
-	hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link)
 		if (kimn->irq == irq)
 			kimn->func(kimn, mask);
+	rcu_read_unlock();
 }
 
 void kvm_free_irq_routing(struct kvm *kvm)
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 6/8 v2] Move IO APIC to its own lock.
  2009-08-11 12:31 [PATCH 0/8 v2] make interrupt injection lockless (almost) Gleb Natapov
                   ` (4 preceding siblings ...)
  2009-08-11 12:31 ` [PATCH 5/8 v2] Convert irq notifiers lists to RCU locking Gleb Natapov
@ 2009-08-11 12:31 ` Gleb Natapov
  2009-08-12  8:27   ` Avi Kivity
  2009-08-11 12:31 ` [PATCH 7/8 v2] Drop kvm->irq_lock lock from irq injection path Gleb Natapov
  2009-08-11 12:31 ` [PATCH 8/8 v2] Change irq_lock from mutex to spinlock Gleb Natapov
  7 siblings, 1 reply; 28+ messages in thread
From: Gleb Natapov @ 2009-08-11 12:31 UTC (permalink / raw)
  To: avi; +Cc: kvm


Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c |   27 ++++++++++++++++++------
 arch/x86/kvm/i8259.c     |    4 +++
 arch/x86/kvm/lapic.c     |    5 +---
 arch/x86/kvm/x86.c       |   30 ++++++++++++++++++---------
 virt/kvm/ioapic.c        |   49 ++++++++++++++++++++++++++++-----------------
 virt/kvm/ioapic.h        |    1 +
 6 files changed, 76 insertions(+), 40 deletions(-)

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 0ad09f0..dd7ef2d 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -850,9 +850,16 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
 
 	r = 0;
 	switch (chip->chip_id) {
-	case KVM_IRQCHIP_IOAPIC:
-		memcpy(&chip->chip.ioapic, ioapic_irqchip(kvm),
-				sizeof(struct kvm_ioapic_state));
+	case KVM_IRQCHIP_IOAPIC: {
+		struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
+		if (ioapic) {
+			spin_lock(&ioapic->lock);
+			memcpy(&chip->chip.ioapic, ioapic,
+			       sizeof(struct kvm_ioapic_state));
+			spin_unlock(&ioapic->lock);
+		} else
+			r = -EINVAL;
+	}
 		break;
 	default:
 		r = -EINVAL;
@@ -867,10 +874,16 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 
 	r = 0;
 	switch (chip->chip_id) {
-	case KVM_IRQCHIP_IOAPIC:
-		memcpy(ioapic_irqchip(kvm),
-				&chip->chip.ioapic,
-				sizeof(struct kvm_ioapic_state));
+	case KVM_IRQCHIP_IOAPIC: {
+		struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
+		if (ioapic) {
+			spin_lock(&ioapic->lock);
+			memcpy(ioapic, &chip->chip.ioapic,
+			       sizeof(struct kvm_ioapic_state));
+			spin_unlock(&ioapic->lock);
+		} else
+			r = -EINVAL;
+	}
 		break;
 	default:
 		r = -EINVAL;
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 01f1516..a988c0e 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -38,7 +38,9 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
 	s->isr_ack |= (1 << irq);
 	if (s != &s->pics_state->pics[0])
 		irq += 8;
+	spin_unlock(&s->pics_state->lock);
 	kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq);
+	spin_lock(&s->pics_state->lock);
 }
 
 void kvm_pic_clear_isr_ack(struct kvm *kvm)
@@ -238,7 +240,9 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
 		if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0))
 			if (s->irr & (1 << irq) || s->isr & (1 << irq)) {
 				n = irq + irqbase;
+				spin_unlock(&s->pics_state->lock);
 				kvm_notify_acked_irq(kvm, SELECT_PIC(n), n);
+				spin_lock(&s->pics_state->lock);
 			}
 	}
 	s->last_irr = 0;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ce195f8..f24d4d0 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -471,11 +471,8 @@ static void apic_set_eoi(struct kvm_lapic *apic)
 		trigger_mode = IOAPIC_LEVEL_TRIG;
 	else
 		trigger_mode = IOAPIC_EDGE_TRIG;
-	if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) {
-		mutex_lock(&apic->vcpu->kvm->irq_lock);
+	if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI))
 		kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
-		mutex_unlock(&apic->vcpu->kvm->irq_lock);
-	}
 }
 
 static void apic_send_ipi(struct kvm_lapic *apic)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 850cf56..b0906a0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2022,10 +2022,16 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 			&pic_irqchip(kvm)->pics[1],
 			sizeof(struct kvm_pic_state));
 		break;
-	case KVM_IRQCHIP_IOAPIC:
-		memcpy(&chip->chip.ioapic,
-			ioapic_irqchip(kvm),
-			sizeof(struct kvm_ioapic_state));
+	case KVM_IRQCHIP_IOAPIC: {
+		struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
+		if (ioapic) {
+			spin_lock(&ioapic->lock);
+			memcpy(&chip->chip.ioapic, ioapic,
+			       sizeof(struct kvm_ioapic_state));
+			spin_unlock(&ioapic->lock);
+		} else
+			r = -EINVAL;
+	}
 		break;
 	default:
 		r = -EINVAL;
@@ -2054,12 +2060,16 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 			sizeof(struct kvm_pic_state));
 		spin_unlock(&pic_irqchip(kvm)->lock);
 		break;
-	case KVM_IRQCHIP_IOAPIC:
-		mutex_lock(&kvm->irq_lock);
-		memcpy(ioapic_irqchip(kvm),
-			&chip->chip.ioapic,
-			sizeof(struct kvm_ioapic_state));
-		mutex_unlock(&kvm->irq_lock);
+	case KVM_IRQCHIP_IOAPIC: {
+		struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
+		if (ioapic) {
+			spin_lock(&ioapic->lock);
+			memcpy(ioapic, &chip->chip.ioapic,
+			       sizeof(struct kvm_ioapic_state));
+			spin_unlock(&ioapic->lock);
+		} else
+			r = -EINVAL;
+	}
 		break;
 	default:
 		r = -EINVAL;
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index fa05f67..881d083 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -182,6 +182,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
 	union kvm_ioapic_redirect_entry entry;
 	int ret = 1;
 
+	spin_lock(&ioapic->lock);
 	if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
 		entry = ioapic->redirtbl[irq];
 		level ^= entry.fields.polarity;
@@ -196,34 +197,43 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
 		}
 		trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
 	}
+	spin_unlock(&ioapic->lock);
+
 	return ret;
 }
 
-static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin,
-				    int trigger_mode)
+static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector,
+				     int trigger_mode)
 {
-	union kvm_ioapic_redirect_entry *ent;
+	int i;
+
+	for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+		union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
+
+		if (ent->fields.vector != vector)
+			continue;
 
-	ent = &ioapic->redirtbl[pin];
+		spin_unlock(&ioapic->lock);
+		kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
+		spin_lock(&ioapic->lock);
 
-	kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin);
+		if (trigger_mode != IOAPIC_LEVEL_TRIG)
+			continue;
 
-	if (trigger_mode == IOAPIC_LEVEL_TRIG) {
 		ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
 		ent->fields.remote_irr = 0;
-		if (!ent->fields.mask && (ioapic->irr & (1 << pin)))
-			ioapic_service(ioapic, pin);
+		if (!ent->fields.mask && (ioapic->irr & (1 << i)))
+			ioapic_service(ioapic, i);
 	}
 }
 
 void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
 {
 	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
-	int i;
 
-	for (i = 0; i < IOAPIC_NUM_PINS; i++)
-		if (ioapic->redirtbl[i].fields.vector == vector)
-			__kvm_ioapic_update_eoi(ioapic, i, trigger_mode);
+	spin_lock(&ioapic->lock);
+	__kvm_ioapic_update_eoi(ioapic, vector, trigger_mode);
+	spin_unlock(&ioapic->lock);
 }
 
 static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev)
@@ -248,8 +258,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
 	ioapic_debug("addr %lx\n", (unsigned long)addr);
 	ASSERT(!(addr & 0xf));	/* check alignment */
 
-	mutex_lock(&ioapic->kvm->irq_lock);
 	addr &= 0xff;
+	spin_lock(&ioapic->lock);
 	switch (addr) {
 	case IOAPIC_REG_SELECT:
 		result = ioapic->ioregsel;
@@ -263,6 +273,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
 		result = 0;
 		break;
 	}
+	spin_unlock(&ioapic->lock);
+
 	switch (len) {
 	case 8:
 		*(u64 *) val = result;
@@ -275,7 +287,6 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
 	default:
 		printk(KERN_WARNING "ioapic: wrong length %d\n", len);
 	}
-	mutex_unlock(&ioapic->kvm->irq_lock);
 	return 0;
 }
 
@@ -291,15 +302,15 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
 		     (void*)addr, len, val);
 	ASSERT(!(addr & 0xf));	/* check alignment */
 
-	mutex_lock(&ioapic->kvm->irq_lock);
 	if (len == 4 || len == 8)
 		data = *(u32 *) val;
 	else {
 		printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
-		goto unlock;
+		return 0;
 	}
 
 	addr &= 0xff;
+	spin_lock(&ioapic->lock);
 	switch (addr) {
 	case IOAPIC_REG_SELECT:
 		ioapic->ioregsel = data;
@@ -310,15 +321,14 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
 		break;
 #ifdef	CONFIG_IA64
 	case IOAPIC_REG_EOI:
-		kvm_ioapic_update_eoi(ioapic->kvm, data, IOAPIC_LEVEL_TRIG);
+		__kvm_ioapic_update_eoi(ioapic, data, IOAPIC_LEVEL_TRIG);
 		break;
 #endif
 
 	default:
 		break;
 	}
-unlock:
-	mutex_unlock(&ioapic->kvm->irq_lock);
+	spin_unlock(&ioapic->lock);
 	return 0;
 }
 
@@ -347,6 +357,7 @@ int kvm_ioapic_init(struct kvm *kvm)
 	ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
 	if (!ioapic)
 		return -ENOMEM;
+	spin_lock_init(&ioapic->lock);
 	kvm->arch.vioapic = ioapic;
 	kvm_ioapic_reset(ioapic);
 	kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 7080b71..557107e 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -44,6 +44,7 @@ struct kvm_ioapic {
 	struct kvm_io_device dev;
 	struct kvm *kvm;
 	void (*ack_notifier)(void *opaque, int irq);
+	spinlock_t lock;
 };
 
 #ifdef DEBUG
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 7/8 v2] Drop kvm->irq_lock lock from irq injection path.
  2009-08-11 12:31 [PATCH 0/8 v2] make interrupt injection lockless (almost) Gleb Natapov
                   ` (5 preceding siblings ...)
  2009-08-11 12:31 ` [PATCH 6/8 v2] Move IO APIC to its own lock Gleb Natapov
@ 2009-08-11 12:31 ` Gleb Natapov
  2009-08-11 12:31 ` [PATCH 8/8 v2] Change irq_lock from mutex to spinlock Gleb Natapov
  7 siblings, 0 replies; 28+ messages in thread
From: Gleb Natapov @ 2009-08-11 12:31 UTC (permalink / raw)
  To: avi; +Cc: kvm

The only thing it protects now is interrupt injection into lapic and
this can work lockless. Even now with kvm->irq_lock in place access
to lapic is not entirely serialized since vcpu access doesn't take
kvm->irq_lock.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c |    2 --
 arch/x86/kvm/i8254.c     |    2 --
 arch/x86/kvm/lapic.c     |    2 --
 arch/x86/kvm/x86.c       |    2 --
 virt/kvm/eventfd.c       |    2 --
 virt/kvm/irq_comm.c      |    6 +-----
 virt/kvm/kvm_main.c      |    2 --
 7 files changed, 1 insertions(+), 17 deletions(-)

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index dd7ef2d..8f1fc3a 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -998,10 +998,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			goto out;
 		if (irqchip_in_kernel(kvm)) {
 			__s32 status;
-			mutex_lock(&kvm->irq_lock);
 			status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
 				    irq_event.irq, irq_event.level);
-			mutex_unlock(&kvm->irq_lock);
 			if (ioctl == KVM_IRQ_LINE_STATUS) {
 				irq_event.status = status;
 				if (copy_to_user(argp, &irq_event,
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 82ad523..b857ca3 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -688,10 +688,8 @@ static void __inject_pit_timer_intr(struct kvm *kvm)
 	struct kvm_vcpu *vcpu;
 	int i;
 
-	mutex_lock(&kvm->irq_lock);
 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
-	mutex_unlock(&kvm->irq_lock);
 
 	/*
 	 * Provides NMI watchdog support via Virtual Wire mode.
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index f24d4d0..e41e948 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -501,9 +501,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
 		   irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
 		   irq.vector);
 
-	mutex_lock(&apic->vcpu->kvm->irq_lock);
 	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
-	mutex_unlock(&apic->vcpu->kvm->irq_lock);
 }
 
 static u32 apic_get_tmcct(struct kvm_lapic *apic)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b0906a0..13dc33a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2284,10 +2284,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			goto out;
 		if (irqchip_in_kernel(kvm)) {
 			__s32 status;
-			mutex_lock(&kvm->irq_lock);
 			status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
 					irq_event.irq, irq_event.level);
-			mutex_unlock(&kvm->irq_lock);
 			if (ioctl == KVM_IRQ_LINE_STATUS) {
 				irq_event.status = status;
 				if (copy_to_user(argp, &irq_event,
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 99017e8..95954ad 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -61,10 +61,8 @@ irqfd_inject(struct work_struct *work)
 	struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
 	struct kvm *kvm = irqfd->kvm;
 
-	mutex_lock(&kvm->irq_lock);
 	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
 	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
-	mutex_unlock(&kvm->irq_lock);
 }
 
 /*
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index ba880e8..4b36917 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -63,8 +63,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 	int i, r = -1;
 	struct kvm_vcpu *vcpu, *lowest = NULL;
 
-	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
-
 	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
 			kvm_is_dm_lowest_prio(irq))
 		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
@@ -116,7 +114,7 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 	return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
 }
 
-/* This should be called with the kvm->irq_lock mutex held
+/*
  * Return value:
  *  < 0   Interrupt was ignored (masked or not delivered for other reasons)
  *  = 0   Interrupt was coalesced (previous irq is still pending)
@@ -132,8 +130,6 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
 
-	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
-
 	if (irq < KVM_IOAPIC_NUM_PINS) {
 		irq_state = (unsigned long *)&kvm->arch.irq_states[irq];
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index fcc0c44..ef96c04 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -137,7 +137,6 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 				    interrupt_work);
 	kvm = assigned_dev->kvm;
 
-	mutex_lock(&kvm->irq_lock);
 	spin_lock_irq(&assigned_dev->assigned_dev_lock);
 	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
 		struct kvm_guest_msix_entry *guest_entries =
@@ -156,7 +155,6 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 			    assigned_dev->guest_irq, 1);
 
 	spin_unlock_irq(&assigned_dev->assigned_dev_lock);
-	mutex_unlock(&assigned_dev->kvm->irq_lock);
 }
 
 static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 8/8 v2] Change irq_lock from mutex to spinlock.
  2009-08-11 12:31 [PATCH 0/8 v2] make interrupt injection lockless (almost) Gleb Natapov
                   ` (6 preceding siblings ...)
  2009-08-11 12:31 ` [PATCH 7/8 v2] Drop kvm->irq_lock lock from irq injection path Gleb Natapov
@ 2009-08-11 12:31 ` Gleb Natapov
  2009-08-12  8:29   ` Avi Kivity
  7 siblings, 1 reply; 28+ messages in thread
From: Gleb Natapov @ 2009-08-11 12:31 UTC (permalink / raw)
  To: avi; +Cc: kvm

Change irq_lock from mutex to spinlock. We do not sleep while holding
it.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 include/linux/kvm_host.h |    2 +-
 virt/kvm/irq_comm.c      |   28 ++++++++++++++--------------
 virt/kvm/kvm_main.c      |    2 +-
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 56b2a12..ccc054a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -176,7 +176,7 @@ struct kvm {
 	struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
 #endif
 
-	struct mutex irq_lock;
+	spinlock_t irq_lock;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	struct kvm_irq_routing_table *irq_routing;
 	struct hlist_head mask_notifier_list;
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 4b36917..15eab15 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -184,17 +184,17 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian)
 {
-	mutex_lock(&kvm->irq_lock);
+	spin_lock(&kvm->irq_lock);
 	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
-	mutex_unlock(&kvm->irq_lock);
+	spin_unlock(&kvm->irq_lock);
 }
 
 void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
 				    struct kvm_irq_ack_notifier *kian)
 {
-	mutex_lock(&kvm->irq_lock);
+	spin_lock(&kvm->irq_lock);
 	hlist_del_init_rcu(&kian->link);
-	mutex_unlock(&kvm->irq_lock);
+	spin_unlock(&kvm->irq_lock);
 	synchronize_rcu();
 }
 
@@ -203,7 +203,7 @@ int kvm_request_irq_source_id(struct kvm *kvm)
 	unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
 	int irq_source_id;
 
-	mutex_lock(&kvm->irq_lock);
+	spin_lock(&kvm->irq_lock);
 	irq_source_id = find_first_zero_bit(bitmap,
 				sizeof(kvm->arch.irq_sources_bitmap));
 
@@ -214,7 +214,7 @@ int kvm_request_irq_source_id(struct kvm *kvm)
 
 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
 	set_bit(irq_source_id, bitmap);
-	mutex_unlock(&kvm->irq_lock);
+	spin_unlock(&kvm->irq_lock);
 
 	return irq_source_id;
 }
@@ -225,7 +225,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
 
 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
 
-	mutex_lock(&kvm->irq_lock);
+	spin_lock(&kvm->irq_lock);
 	if (irq_source_id < 0 ||
 	    irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
 		printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
@@ -234,24 +234,24 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
 	for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
 		clear_bit(irq_source_id, &kvm->arch.irq_states[i]);
 	clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
-	mutex_unlock(&kvm->irq_lock);
+	spin_unlock(&kvm->irq_lock);
 }
 
 void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
 				    struct kvm_irq_mask_notifier *kimn)
 {
-	mutex_lock(&kvm->irq_lock);
+	spin_lock(&kvm->irq_lock);
 	kimn->irq = irq;
 	hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
-	mutex_unlock(&kvm->irq_lock);
+	spin_unlock(&kvm->irq_lock);
 }
 
 void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 				      struct kvm_irq_mask_notifier *kimn)
 {
-	mutex_lock(&kvm->irq_lock);
+	spin_lock(&kvm->irq_lock);
 	hlist_del_rcu(&kimn->link);
-	mutex_unlock(&kvm->irq_lock);
+	spin_unlock(&kvm->irq_lock);
 	synchronize_rcu();
 }
 
@@ -364,10 +364,10 @@ int kvm_set_irq_routing(struct kvm *kvm,
 		++ue;
 	}
 
-	mutex_lock(&kvm->irq_lock);
+	spin_lock(&kvm->irq_lock);
 	old = kvm->irq_routing;
 	rcu_assign_pointer(kvm->irq_routing, new);
-	mutex_unlock(&kvm->irq_lock);
+	spin_unlock(&kvm->irq_lock);
 	synchronize_rcu();
 
 	new = old;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ef96c04..8b60039 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -978,7 +978,7 @@ static struct kvm *kvm_create_vm(void)
 	kvm_io_bus_init(&kvm->pio_bus);
 	kvm_eventfd_init(kvm);
 	mutex_init(&kvm->lock);
-	mutex_init(&kvm->irq_lock);
+	spin_lock_init(&kvm->irq_lock);
 	kvm_io_bus_init(&kvm->mmio_bus);
 	init_rwsem(&kvm->slots_lock);
 	atomic_set(&kvm->users_count, 1);
-- 
1.6.3.3


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/8 v2] Change irq routing table to use gsi indexed array.
  2009-08-11 12:31 ` [PATCH 1/8 v2] Change irq routing table to use gsi indexed array Gleb Natapov
@ 2009-08-12  8:05   ` Avi Kivity
  2009-08-12  8:41     ` Gleb Natapov
  0 siblings, 1 reply; 28+ messages in thread
From: Avi Kivity @ 2009-08-12  8:05 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

On 08/11/2009 03:31 PM, Gleb Natapov wrote:
> Use gsi indexed array instead of scanning all entries on each interrupt
> injection.
>
>
> @@ -163,20 +166,23 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
>
>   void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
>   {
> -	struct kvm_kernel_irq_routing_entry *e;
>   	struct kvm_irq_ack_notifier *kian;
>   	struct hlist_node *n;
>   	unsigned gsi = pin;
> +	int i;
>
>   	trace_kvm_ack_irq(irqchip, pin);
>
> -	list_for_each_entry(e,&kvm->irq_routing, link)
> +	for (i = 0; i<  kvm->irq_routing->nr_rt_entries; i++) {
> +		struct kvm_kernel_irq_routing_entry *e;
> +		e =&kvm->irq_routing->rt_entries[i];
>   		if (e->type == KVM_IRQ_ROUTING_IRQCHIP&&
>   		e->irqchip.irqchip == irqchip&&
>   		e->irqchip.pin == pin) {
>   			gsi = e->gsi;
>   			break;
>   		}
> +	}
>    

Don't you need to iterate over all the entries for a gsi?

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 2/8 v2] Maintain back mapping from irqchip/pin to gsi.
  2009-08-11 12:31 ` [PATCH 2/8 v2] Maintain back mapping from irqchip/pin to gsi Gleb Natapov
@ 2009-08-12  8:09   ` Avi Kivity
  2009-08-12  8:42     ` Gleb Natapov
  0 siblings, 1 reply; 28+ messages in thread
From: Avi Kivity @ 2009-08-12  8:09 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

On 08/11/2009 03:31 PM, Gleb Natapov wrote:
> Maintain back mapping from irqchip/pin to gsi to speedup
> interrupt acknowledgment notifications.
>
> Signed-off-by: Gleb Natapov<gleb@redhat.com>
> ---
>   include/linux/kvm_host.h |    1 +
>   virt/kvm/irq_comm.c      |   24 +++++++++++-------------
>   2 files changed, 12 insertions(+), 13 deletions(-)
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 09df31d..d2b8eb3 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -133,6 +133,7 @@ struct kvm_kernel_irq_routing_entry {
>   };
>
>   struct kvm_irq_routing_table {
> +	int chip[3][KVM_IOAPIC_NUM_PINS];
>    

KVM_NR_IRQCHIPS

An alternative implementation is to embed a lookup table in each 
irqchip.  I don't think it's really necessary.

> --- a/virt/kvm/irq_comm.c
> +++ b/virt/kvm/irq_comm.c
> @@ -168,21 +168,13 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
>   {
>   	struct kvm_irq_ack_notifier *kian;
>   	struct hlist_node *n;
> -	unsigned gsi = pin;
> -	int i;
> +	unsigned gsi;
>
>   	trace_kvm_ack_irq(irqchip, pin);
>
> -	for (i = 0; i<  kvm->irq_routing->nr_rt_entries; i++) {
> -		struct kvm_kernel_irq_routing_entry *e;
> -		e =&kvm->irq_routing->rt_entries[i];
> -		if (e->type == KVM_IRQ_ROUTING_IRQCHIP&&
> -		    e->irqchip.irqchip == irqchip&&
> -		    e->irqchip.pin == pin) {
> -			gsi = e->gsi;
> -			break;
> -		}
> -	}
> +	gsi = kvm->irq_routing->chip[irqchip][pin];
> +	if (gsi == -1)
> +		gsi = pin;
>    

What's this?


-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/8 v2] Move IO APIC to its own lock.
  2009-08-11 12:31 ` [PATCH 6/8 v2] Move IO APIC to its own lock Gleb Natapov
@ 2009-08-12  8:27   ` Avi Kivity
  2009-08-12  9:04     ` Gleb Natapov
  0 siblings, 1 reply; 28+ messages in thread
From: Avi Kivity @ 2009-08-12  8:27 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

On 08/11/2009 03:31 PM, Gleb Natapov wrote:


What is the motivation for this change?

Why a spinlock and not a mutex?

> diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
> index 0ad09f0..dd7ef2d 100644
> --- a/arch/ia64/kvm/kvm-ia64.c
> +++ b/arch/ia64/kvm/kvm-ia64.c
> @@ -850,9 +850,16 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
>
>   	r = 0;
>   	switch (chip->chip_id) {
> -	case KVM_IRQCHIP_IOAPIC:
> -		memcpy(&chip->chip.ioapic, ioapic_irqchip(kvm),
> -				sizeof(struct kvm_ioapic_state));
> +	case KVM_IRQCHIP_IOAPIC: {
> +		struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
> +		if (ioapic) {
> +			spin_lock(&ioapic->lock);
> +			memcpy(&chip->chip.ioapic, ioapic,
> +			       sizeof(struct kvm_ioapic_state));
> +			spin_unlock(&ioapic->lock);
>    

Better to add an accessor than to reach into internals like this.

> +		} else
> +			r = -EINVAL;
> +	}
>   		break;
>   	default:
>   		r = -EINVAL;
> @@ -867,10 +874,16 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
>
>   	r = 0;
>   	switch (chip->chip_id) {
> -	case KVM_IRQCHIP_IOAPIC:
> -		memcpy(ioapic_irqchip(kvm),
> -				&chip->chip.ioapic,
> -				sizeof(struct kvm_ioapic_state));
> +	case KVM_IRQCHIP_IOAPIC: {
> +		struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
> +		if (ioapic) {
> +			spin_lock(&ioapic->lock);
> +			memcpy(ioapic,&chip->chip.ioapic,
> +			       sizeof(struct kvm_ioapic_state));
> +			spin_unlock(&ioapic->lock);
> +		} else
> +			r = -EINVAL;
> +	}
>    

... and better to deduplicate the code too.

>   		break;
>   	default:
>   		r = -EINVAL;
> diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
> index 01f1516..a988c0e 100644
> --- a/arch/x86/kvm/i8259.c
> +++ b/arch/x86/kvm/i8259.c
> @@ -38,7 +38,9 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
>   	s->isr_ack |= (1<<  irq);
>   	if (s !=&s->pics_state->pics[0])
>   		irq += 8;
> +	spin_unlock(&s->pics_state->lock);
>   	kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq);
> +	spin_lock(&s->pics_state->lock);
>   }
>    

Need to explain why this is safe.  I'm not sure it is, because we touch 
state afterwards in pic_intack().  We need to do all vcpu-synchronous 
operations before dropping the lock.

>    void kvm_pic_clear_isr_ack(struct kvm *kvm)
> @@ -238,7 +240,9 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
>   		if (vcpu0&&  kvm_apic_accept_pic_intr(vcpu0))
>   			if (s->irr&  (1<<  irq) || s->isr&  (1<<  irq)) {
>   				n = irq + irqbase;
> +				spin_unlock(&s->pics_state->lock);
>   				kvm_notify_acked_irq(kvm, SELECT_PIC(n), n);
> +				spin_lock(&s->pics_state->lock);
>    

Ditto here, needs to be moved until after done changing state.

>
> -static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin,
> -				    int trigger_mode)
> +static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector,
> +				     int trigger_mode)
>   {
> -	union kvm_ioapic_redirect_entry *ent;
> +	int i;
> +
> +	for (i = 0; i<  IOAPIC_NUM_PINS; i++) {
> +		union kvm_ioapic_redirect_entry *ent =&ioapic->redirtbl[i];
> +
> +		if (ent->fields.vector != vector)
> +			continue;
>
> -	ent =&ioapic->redirtbl[pin];
> +		spin_unlock(&ioapic->lock);
> +		kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
> +		spin_lock(&ioapic->lock);
>
>    

I *think* we need to clear remote_irr before dropping the lock.  I 
*know* there's a missing comment here.

> -	kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin);
> +		if (trigger_mode != IOAPIC_LEVEL_TRIG)
> +			continue;
>
> -	if (trigger_mode == IOAPIC_LEVEL_TRIG) {
>   		ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
>   		ent->fields.remote_irr = 0;
> -		if (!ent->fields.mask&&  (ioapic->irr&  (1<<  pin)))
> -			ioapic_service(ioapic, pin);
> +		if (!ent->fields.mask&&  (ioapic->irr&  (1<<  i)))
> +			ioapic_service(ioapic, i);
>   	}
>   }
>    

To make the patch easier to read, suggest keeping the loop in the other 
function.

>
>   void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
>   {
>   	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
> -	int i;
>
> -	for (i = 0; i<  IOAPIC_NUM_PINS; i++)
> -		if (ioapic->redirtbl[i].fields.vector == vector)
> -			__kvm_ioapic_update_eoi(ioapic, i, trigger_mode);
> +	spin_lock(&ioapic->lock);
> +	__kvm_ioapic_update_eoi(ioapic, vector, trigger_mode);
> +	spin_unlock(&ioapic->lock);
>   }
>
>    

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 8/8 v2] Change irq_lock from mutex to spinlock.
  2009-08-11 12:31 ` [PATCH 8/8 v2] Change irq_lock from mutex to spinlock Gleb Natapov
@ 2009-08-12  8:29   ` Avi Kivity
  2009-08-12  9:11     ` Gleb Natapov
  0 siblings, 1 reply; 28+ messages in thread
From: Avi Kivity @ 2009-08-12  8:29 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

On 08/11/2009 03:31 PM, Gleb Natapov wrote:
> Change irq_lock from mutex to spinlock. We do not sleep while holding
> it.
>    

But why change?

The only motivation I can see is to allow injection from irqfd and 
interrupt contexts without requiring a tasklet/work.  But that needs 
spin_lock_irqsave(), not spin_lock().

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/8 v2] Change irq routing table to use gsi indexed array.
  2009-08-12  8:05   ` Avi Kivity
@ 2009-08-12  8:41     ` Gleb Natapov
  2009-08-12  9:06       ` Avi Kivity
  0 siblings, 1 reply; 28+ messages in thread
From: Gleb Natapov @ 2009-08-12  8:41 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm

On Wed, Aug 12, 2009 at 11:05:06AM +0300, Avi Kivity wrote:
> On 08/11/2009 03:31 PM, Gleb Natapov wrote:
>> Use gsi indexed array instead of scanning all entries on each interrupt
>> injection.
>>
>>
>> @@ -163,20 +166,23 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
>>
>>   void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
>>   {
>> -	struct kvm_kernel_irq_routing_entry *e;
>>   	struct kvm_irq_ack_notifier *kian;
>>   	struct hlist_node *n;
>>   	unsigned gsi = pin;
>> +	int i;
>>
>>   	trace_kvm_ack_irq(irqchip, pin);
>>
>> -	list_for_each_entry(e,&kvm->irq_routing, link)
>> +	for (i = 0; i<  kvm->irq_routing->nr_rt_entries; i++) {
>> +		struct kvm_kernel_irq_routing_entry *e;
>> +		e =&kvm->irq_routing->rt_entries[i];
>>   		if (e->type == KVM_IRQ_ROUTING_IRQCHIP&&
>>   		e->irqchip.irqchip == irqchip&&
>>   		e->irqchip.pin == pin) {
>>   			gsi = e->gsi;
>>   			break;
>>   		}
>> +	}
>>    
>
> Don't you need to iterate over all the entries for a gsi?
>
One pin/irqchip cannot be mapped to more than one GSI. Besides
this is what the current code does.

--
			Gleb.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 2/8 v2] Maintain back mapping from irqchip/pin to gsi.
  2009-08-12  8:09   ` Avi Kivity
@ 2009-08-12  8:42     ` Gleb Natapov
  2009-08-12  9:06       ` Avi Kivity
  0 siblings, 1 reply; 28+ messages in thread
From: Gleb Natapov @ 2009-08-12  8:42 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm

On Wed, Aug 12, 2009 at 11:09:58AM +0300, Avi Kivity wrote:
> On 08/11/2009 03:31 PM, Gleb Natapov wrote:
>> Maintain back mapping from irqchip/pin to gsi to speedup
>> interrupt acknowledgment notifications.
>>
>> Signed-off-by: Gleb Natapov<gleb@redhat.com>
>> ---
>>   include/linux/kvm_host.h |    1 +
>>   virt/kvm/irq_comm.c      |   24 +++++++++++-------------
>>   2 files changed, 12 insertions(+), 13 deletions(-)
>>
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index 09df31d..d2b8eb3 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -133,6 +133,7 @@ struct kvm_kernel_irq_routing_entry {
>>   };
>>
>>   struct kvm_irq_routing_table {
>> +	int chip[3][KVM_IOAPIC_NUM_PINS];
>>    
>
> KVM_NR_IRQCHIPS
>
> An alternative implementation is to embed a lookup table in each  
> irqchip.  I don't think it's really necessary.
>
>> --- a/virt/kvm/irq_comm.c
>> +++ b/virt/kvm/irq_comm.c
>> @@ -168,21 +168,13 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
>>   {
>>   	struct kvm_irq_ack_notifier *kian;
>>   	struct hlist_node *n;
>> -	unsigned gsi = pin;
>> -	int i;
>> +	unsigned gsi;
>>
>>   	trace_kvm_ack_irq(irqchip, pin);
>>
>> -	for (i = 0; i<  kvm->irq_routing->nr_rt_entries; i++) {
>> -		struct kvm_kernel_irq_routing_entry *e;
>> -		e =&kvm->irq_routing->rt_entries[i];
>> -		if (e->type == KVM_IRQ_ROUTING_IRQCHIP&&
>> -		    e->irqchip.irqchip == irqchip&&
>> -		    e->irqchip.pin == pin) {
>> -			gsi = e->gsi;
>> -			break;
>> -		}
>> -	}
>> +	gsi = kvm->irq_routing->chip[irqchip][pin];
>> +	if (gsi == -1)
>> +		gsi = pin;
>>    
>
> What's this?
>
If there is not GSI mapping use pin as gsi. This what current code does.
We can BUG() here I think.

--
			Gleb.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/8 v2] Move IO APIC to its own lock.
  2009-08-12  8:27   ` Avi Kivity
@ 2009-08-12  9:04     ` Gleb Natapov
  2009-08-12  9:18       ` Avi Kivity
  0 siblings, 1 reply; 28+ messages in thread
From: Gleb Natapov @ 2009-08-12  9:04 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm

On Wed, Aug 12, 2009 at 11:27:13AM +0300, Avi Kivity wrote:
> On 08/11/2009 03:31 PM, Gleb Natapov wrote:
>
>
> What is the motivation for this change?
>
The motivation was explained in 0/0. I want to get rid of lock on
general irq injection path so the lock have to be pushed into ioapic
since multiple cpus can access it concurrently. PIC has such lock
already.

> Why a spinlock and not a mutex?
>
Protected sections are small and we do not sleep there.

>> diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
>> index 0ad09f0..dd7ef2d 100644
>> --- a/arch/ia64/kvm/kvm-ia64.c
>> +++ b/arch/ia64/kvm/kvm-ia64.c
>> @@ -850,9 +850,16 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
>>
>>   	r = 0;
>>   	switch (chip->chip_id) {
>> -	case KVM_IRQCHIP_IOAPIC:
>> -		memcpy(&chip->chip.ioapic, ioapic_irqchip(kvm),
>> -				sizeof(struct kvm_ioapic_state));
>> +	case KVM_IRQCHIP_IOAPIC: {
>> +		struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
>> +		if (ioapic) {
>> +			spin_lock(&ioapic->lock);
>> +			memcpy(&chip->chip.ioapic, ioapic,
>> +			       sizeof(struct kvm_ioapic_state));
>> +			spin_unlock(&ioapic->lock);
>>    
>
> Better to add an accessor than to reach into internals like this.
>
Agree.

>> +		} else
>> +			r = -EINVAL;
>> +	}
>>   		break;
>>   	default:
>>   		r = -EINVAL;
>> @@ -867,10 +874,16 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
>>
>>   	r = 0;
>>   	switch (chip->chip_id) {
>> -	case KVM_IRQCHIP_IOAPIC:
>> -		memcpy(ioapic_irqchip(kvm),
>> -				&chip->chip.ioapic,
>> -				sizeof(struct kvm_ioapic_state));
>> +	case KVM_IRQCHIP_IOAPIC: {
>> +		struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
>> +		if (ioapic) {
>> +			spin_lock(&ioapic->lock);
>> +			memcpy(ioapic,&chip->chip.ioapic,
>> +			       sizeof(struct kvm_ioapic_state));
>> +			spin_unlock(&ioapic->lock);
>> +		} else
>> +			r = -EINVAL;
>> +	}
>>    
>
> ... and better to deduplicate the code too.
>
>>   		break;
>>   	default:
>>   		r = -EINVAL;
>> diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
>> index 01f1516..a988c0e 100644
>> --- a/arch/x86/kvm/i8259.c
>> +++ b/arch/x86/kvm/i8259.c
>> @@ -38,7 +38,9 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
>>   	s->isr_ack |= (1<<  irq);
>>   	if (s !=&s->pics_state->pics[0])
>>   		irq += 8;
>> +	spin_unlock(&s->pics_state->lock);
>>   	kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq);
>> +	spin_lock(&s->pics_state->lock);
>>   }
>>    
>
> Need to explain why this is safe.  I'm not sure it is, because we touch  
> state afterwards in pic_intack().  We need to do all vcpu-synchronous  
> operations before dropping the lock.
Forst pic_intack() calls pic_clear_isr() only in auto eoi mode and this mode
is already broken for assigned devices. Second for level triggered
interrupts pic_intack() does nothing after calling pic_clear_isr() and
third I can move pic_clear_isr() call to the end of pic_intack().
>
>>    void kvm_pic_clear_isr_ack(struct kvm *kvm)
>> @@ -238,7 +240,9 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
>>   		if (vcpu0&&  kvm_apic_accept_pic_intr(vcpu0))
>>   			if (s->irr&  (1<<  irq) || s->isr&  (1<<  irq)) {
>>   				n = irq + irqbase;
>> +				spin_unlock(&s->pics_state->lock);
>>   				kvm_notify_acked_irq(kvm, SELECT_PIC(n), n);
>> +				spin_lock(&s->pics_state->lock);
>>    
>
> Ditto here, needs to be moved until after done changing state.
>
I am not sure this code is even needed. IOAPIC don't call notifiers on
reset.

>>
>> -static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin,
>> -				    int trigger_mode)
>> +static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector,
>> +				     int trigger_mode)
>>   {
>> -	union kvm_ioapic_redirect_entry *ent;
>> +	int i;
>> +
>> +	for (i = 0; i<  IOAPIC_NUM_PINS; i++) {
>> +		union kvm_ioapic_redirect_entry *ent =&ioapic->redirtbl[i];
>> +
>> +		if (ent->fields.vector != vector)
>> +			continue;
>>
>> -	ent =&ioapic->redirtbl[pin];
>> +		spin_unlock(&ioapic->lock);
>> +		kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
>> +		spin_lock(&ioapic->lock);
>>
>>    
>
> I *think* we need to clear remote_irr before dropping the lock.  I  
> *know* there's a missing comment here.
I don't see why we clear remote_irr before dropping the lock. If, while
lock was dropped, interrupt was delivered to this entry it will be
injected when ack notifier returns.

>
>> -	kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin);
>> +		if (trigger_mode != IOAPIC_LEVEL_TRIG)
>> +			continue;
>>
>> -	if (trigger_mode == IOAPIC_LEVEL_TRIG) {
>>   		ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
>>   		ent->fields.remote_irr = 0;
>> -		if (!ent->fields.mask&&  (ioapic->irr&  (1<<  pin)))
>> -			ioapic_service(ioapic, pin);
>> +		if (!ent->fields.mask&&  (ioapic->irr&  (1<<  i)))
>> +			ioapic_service(ioapic, i);
>>   	}
>>   }
>>    
>
> To make the patch easier to read, suggest keeping the loop in the other  
> function.
>
I don't follow. All __kvm_ioapic_update_eoi() contains is the loop, so
the loop is already in its own function. Do you mean move the context of
the loop into the other function and leave only for(;;) fun(); in
__kvm_ioapic_update_eoi()?

>>
>>   void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
>>   {
>>   	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
>> -	int i;
>>
>> -	for (i = 0; i<  IOAPIC_NUM_PINS; i++)
>> -		if (ioapic->redirtbl[i].fields.vector == vector)
>> -			__kvm_ioapic_update_eoi(ioapic, i, trigger_mode);
>> +	spin_lock(&ioapic->lock);
>> +	__kvm_ioapic_update_eoi(ioapic, vector, trigger_mode);
>> +	spin_unlock(&ioapic->lock);
>>   }
>>
>>    
>
> -- 
> error compiling committee.c: too many arguments to function

--
			Gleb.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/8 v2] Change irq routing table to use gsi indexed array.
  2009-08-12  8:41     ` Gleb Natapov
@ 2009-08-12  9:06       ` Avi Kivity
  0 siblings, 0 replies; 28+ messages in thread
From: Avi Kivity @ 2009-08-12  9:06 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

On 08/12/2009 11:41 AM, Gleb Natapov wrote:
> On Wed, Aug 12, 2009 at 11:05:06AM +0300, Avi Kivity wrote:
>    
>> On 08/11/2009 03:31 PM, Gleb Natapov wrote:
>>      
>>> Use gsi indexed array instead of scanning all entries on each interrupt
>>> injection.
>>>
>>>
>>> @@ -163,20 +166,23 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
>>>
>>>    void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
>>>    {
>>> -	struct kvm_kernel_irq_routing_entry *e;
>>>    	struct kvm_irq_ack_notifier *kian;
>>>    	struct hlist_node *n;
>>>    	unsigned gsi = pin;
>>> +	int i;
>>>
>>>    	trace_kvm_ack_irq(irqchip, pin);
>>>
>>> -	list_for_each_entry(e,&kvm->irq_routing, link)
>>> +	for (i = 0; i<   kvm->irq_routing->nr_rt_entries; i++) {
>>> +		struct kvm_kernel_irq_routing_entry *e;
>>> +		e =&kvm->irq_routing->rt_entries[i];
>>>    		if (e->type == KVM_IRQ_ROUTING_IRQCHIP&&
>>>    		e->irqchip.irqchip == irqchip&&
>>>    		e->irqchip.pin == pin) {
>>>    			gsi = e->gsi;
>>>    			break;
>>>    		}
>>> +	}
>>>
>>>        
>> Don't you need to iterate over all the entries for a gsi?
>>
>>      
> One pin/irqchip cannot be mapped to more than one GSI.

But one gsi can be mapped to multiple irqchip/pin combinations.

> Besides
> this is what the current code does.
>    

The current code stops on first match; the new code doesn't consider 
some entries at all.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 2/8 v2] Maintain back mapping from irqchip/pin to gsi.
  2009-08-12  8:42     ` Gleb Natapov
@ 2009-08-12  9:06       ` Avi Kivity
  2009-08-12 10:17         ` Gleb Natapov
  0 siblings, 1 reply; 28+ messages in thread
From: Avi Kivity @ 2009-08-12  9:06 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

On 08/12/2009 11:42 AM, Gleb Natapov wrote:
>
>    
>> What's this?
>>
>>      
> If there is not GSI mapping use pin as gsi. This what current code does.
> We can BUG() here I think.
>
>    

We can't BUG() userspace can change the mapping before the ack is received.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 8/8 v2] Change irq_lock from mutex to spinlock.
  2009-08-12  8:29   ` Avi Kivity
@ 2009-08-12  9:11     ` Gleb Natapov
  2009-08-12  9:22       ` Avi Kivity
  0 siblings, 1 reply; 28+ messages in thread
From: Gleb Natapov @ 2009-08-12  9:11 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm

On Wed, Aug 12, 2009 at 11:29:00AM +0300, Avi Kivity wrote:
> On 08/11/2009 03:31 PM, Gleb Natapov wrote:
>> Change irq_lock from mutex to spinlock. We do not sleep while holding
>> it.
>>    
>
> But why change?
>
Isn't it more lightweight? For the remaining use of the lock it doesn't
really matters, but if I see mutex used somewhere I assume there are
users that sleeps.

> The only motivation I can see is to allow injection from irqfd and  
> interrupt contexts without requiring a tasklet/work.  But that needs  
> spin_lock_irqsave(), not spin_lock().
>
After this series the lock is used only to protect modification of irq
table, add/delete of ack notifiers and irq source id allocator. None of
this affects injection from irqfd.

--
			Gleb.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/8 v2] Move IO APIC to its own lock.
  2009-08-12  9:04     ` Gleb Natapov
@ 2009-08-12  9:18       ` Avi Kivity
  2009-08-12  9:47         ` Gleb Natapov
  0 siblings, 1 reply; 28+ messages in thread
From: Avi Kivity @ 2009-08-12  9:18 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

On 08/12/2009 12:04 PM, Gleb Natapov wrote:
> On Wed, Aug 12, 2009 at 11:27:13AM +0300, Avi Kivity wrote:
>    
>> On 08/11/2009 03:31 PM, Gleb Natapov wrote:
>>
>>
>> What is the motivation for this change?
>>
>>      
> The motivation was explained in 0/0. I want to get rid of lock on
> general irq injection path so the lock have to be pushed into ioapic
> since multiple cpus can access it concurrently. PIC has such lock
> already.
>    

Ah, the real motivation is msi.  Pushing locks down doesn't help if we 
keep locking them.  But for msi we avoid the lock entirely.

>> Why a spinlock and not a mutex?
>>
>>      
> Protected sections are small and we do not sleep there.
>    

So what?  A mutex is better since it allows preemption (and still has 
spinlock performance if it isn't preempted).



>> Need to explain why this is safe.  I'm not sure it is, because we touch
>> state afterwards in pic_intack().  We need to do all vcpu-synchronous
>> operations before dropping the lock.
>>      
> Forst pic_intack() calls pic_clear_isr() only in auto eoi mode and this mode
> is already broken for assigned devices. Second for level triggered
> interrupts pic_intack() does nothing after calling pic_clear_isr() and
> third I can move pic_clear_isr() call to the end of pic_intack().
>    

I meant, in a comment.

>>>     void kvm_pic_clear_isr_ack(struct kvm *kvm)
>>> @@ -238,7 +240,9 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
>>>    		if (vcpu0&&   kvm_apic_accept_pic_intr(vcpu0))
>>>    			if (s->irr&   (1<<   irq) || s->isr&   (1<<   irq)) {
>>>    				n = irq + irqbase;
>>> +				spin_unlock(&s->pics_state->lock);
>>>    				kvm_notify_acked_irq(kvm, SELECT_PIC(n), n);
>>> +				spin_lock(&s->pics_state->lock);
>>>
>>>        
>> Ditto here, needs to be moved until after done changing state.
>>
>>      
> I am not sure this code is even needed. IOAPIC don't call notifiers on
> reset.
>    

It should.  What if there's a reset with an assigned device?  We need to 
release the device interrupt (after doing FLR?).

>    
>>> -static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin,
>>> -				    int trigger_mode)
>>> +static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector,
>>> +				     int trigger_mode)
>>>    {
>>> -	union kvm_ioapic_redirect_entry *ent;
>>> +	int i;
>>> +
>>> +	for (i = 0; i<   IOAPIC_NUM_PINS; i++) {
>>> +		union kvm_ioapic_redirect_entry *ent =&ioapic->redirtbl[i];
>>> +
>>> +		if (ent->fields.vector != vector)
>>> +			continue;
>>>
>>> -	ent =&ioapic->redirtbl[pin];
>>> +		spin_unlock(&ioapic->lock);
>>> +		kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
>>> +		spin_lock(&ioapic->lock);
>>>
>>>
>>>        
>> I *think* we need to clear remote_irr before dropping the lock.  I
>> *know* there's a missing comment here.
>>      
> I don't see why we clear remote_irr before dropping the lock. If, while
> lock was dropped, interrupt was delivered to this entry it will be
> injected when ack notifier returns.
>    

But we'll clear remote_irr afterward the redelivery, and we should to 
that only after the new interrupt is acked.

>>> -	kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin);
>>> +		if (trigger_mode != IOAPIC_LEVEL_TRIG)
>>> +			continue;
>>>
>>> -	if (trigger_mode == IOAPIC_LEVEL_TRIG) {
>>>    		ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
>>>    		ent->fields.remote_irr = 0;
>>> -		if (!ent->fields.mask&&   (ioapic->irr&   (1<<   pin)))
>>> -			ioapic_service(ioapic, pin);
>>> +		if (!ent->fields.mask&&   (ioapic->irr&   (1<<   i)))
>>> +			ioapic_service(ioapic, i);
>>>    	}
>>>    }
>>>
>>>        
>> To make the patch easier to read, suggest keeping the loop in the other
>> function.
>>
>>      
> I don't follow. All __kvm_ioapic_update_eoi() contains is the loop, so
> the loop is already in its own function. Do you mean move the context of
> the loop into the other function and leave only for(;;) fun(); in
> __kvm_ioapic_update_eoi()?
>    

No, I mean keep the for loop in kvm_ioapic_update_eoi().

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 8/8 v2] Change irq_lock from mutex to spinlock.
  2009-08-12  9:11     ` Gleb Natapov
@ 2009-08-12  9:22       ` Avi Kivity
  2009-08-12  9:47         ` Gleb Natapov
  0 siblings, 1 reply; 28+ messages in thread
From: Avi Kivity @ 2009-08-12  9:22 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

On 08/12/2009 12:11 PM, Gleb Natapov wrote:
> On Wed, Aug 12, 2009 at 11:29:00AM +0300, Avi Kivity wrote:
>    
>> On 08/11/2009 03:31 PM, Gleb Natapov wrote:
>>      
>>> Change irq_lock from mutex to spinlock. We do not sleep while holding
>>> it.
>>>
>>>        
>> But why change?
>>
>>      
> Isn't it more lightweight? For the remaining use of the lock it doesn't
> really matters, but if I see mutex used somewhere I assume there are
> users that sleeps.
>    

Before the recent change, a mutex was more expensive if there was 
contention (waiter would schedule out).  Recently the mutex code was 
changed to spin while the holder was running.

>> The only motivation I can see is to allow injection from irqfd and
>> interrupt contexts without requiring a tasklet/work.  But that needs
>> spin_lock_irqsave(), not spin_lock().
>>
>>      
> After this series the lock is used only to protect modification of irq
> table, add/delete of ack notifiers and irq source id allocator. None of
> this affects injection from irqfd.
>
>    

Then it can be definitely left as mutex.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/8 v2] Move IO APIC to its own lock.
  2009-08-12  9:18       ` Avi Kivity
@ 2009-08-12  9:47         ` Gleb Natapov
  2009-08-12  9:57           ` Avi Kivity
  0 siblings, 1 reply; 28+ messages in thread
From: Gleb Natapov @ 2009-08-12  9:47 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm

On Wed, Aug 12, 2009 at 12:18:10PM +0300, Avi Kivity wrote:
> On 08/12/2009 12:04 PM, Gleb Natapov wrote:
>> On Wed, Aug 12, 2009 at 11:27:13AM +0300, Avi Kivity wrote:
>>    
>>> On 08/11/2009 03:31 PM, Gleb Natapov wrote:
>>>
>>>
>>> What is the motivation for this change?
>>>
>>>      
>> The motivation was explained in 0/0. I want to get rid of lock on
>> general irq injection path so the lock have to be pushed into ioapic
>> since multiple cpus can access it concurrently. PIC has such lock
>> already.
>>    
>
> Ah, the real motivation is msi.  Pushing locks down doesn't help if we  
> keep locking them.  But for msi we avoid the lock entirely.
>
Yes. MSI is one. Multiple IOAPICs may inject interrupt in parallel too
(if we will choose to implement multiple IOAPICs sometime).

>>> Why a spinlock and not a mutex?
>>>
>>>      
>> Protected sections are small and we do not sleep there.
>>    
>
> So what?  A mutex is better since it allows preemption (and still has  
> spinlock performance if it isn't preempted).
>
This lock will be taken during irq injection from irqfd, so may be leave
it as spinlock and take it _irqsave()? Do we want to allow irq injection
from interrupt context? Otherwise if you say that performance is the
same I don't care one way or the other.

>
>
>>> Need to explain why this is safe.  I'm not sure it is, because we touch
>>> state afterwards in pic_intack().  We need to do all vcpu-synchronous
>>> operations before dropping the lock.
>>>      
>> Forst pic_intack() calls pic_clear_isr() only in auto eoi mode and this mode
>> is already broken for assigned devices. Second for level triggered
>> interrupts pic_intack() does nothing after calling pic_clear_isr() and
>> third I can move pic_clear_isr() call to the end of pic_intack().
>>    
>
> I meant, in a comment.
I you agree with above I'll add it as a comment.

>
>>>>     void kvm_pic_clear_isr_ack(struct kvm *kvm)
>>>> @@ -238,7 +240,9 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
>>>>    		if (vcpu0&&   kvm_apic_accept_pic_intr(vcpu0))
>>>>    			if (s->irr&   (1<<   irq) || s->isr&   (1<<   irq)) {
>>>>    				n = irq + irqbase;
>>>> +				spin_unlock(&s->pics_state->lock);
>>>>    				kvm_notify_acked_irq(kvm, SELECT_PIC(n), n);
>>>> +				spin_lock(&s->pics_state->lock);
>>>>
>>>>        
>>> Ditto here, needs to be moved until after done changing state.
>>>
>>>      
>> I am not sure this code is even needed. IOAPIC don't call notifiers on
>> reset.
>>    
>
> It should.  What if there's a reset with an assigned device?  We need to  
> release the device interrupt (after doing FLR?).
Doing this will just re-enable host interrupt while irq condition is not
cleared in the device. The host will hang. So I think we really shouldn't.

>
>>    
>>>> -static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin,
>>>> -				    int trigger_mode)
>>>> +static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector,
>>>> +				     int trigger_mode)
>>>>    {
>>>> -	union kvm_ioapic_redirect_entry *ent;
>>>> +	int i;
>>>> +
>>>> +	for (i = 0; i<   IOAPIC_NUM_PINS; i++) {
>>>> +		union kvm_ioapic_redirect_entry *ent =&ioapic->redirtbl[i];
>>>> +
>>>> +		if (ent->fields.vector != vector)
>>>> +			continue;
>>>>
>>>> -	ent =&ioapic->redirtbl[pin];
>>>> +		spin_unlock(&ioapic->lock);
>>>> +		kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
>>>> +		spin_lock(&ioapic->lock);
>>>>
>>>>
>>>>        
>>> I *think* we need to clear remote_irr before dropping the lock.  I
>>> *know* there's a missing comment here.
>>>      
>> I don't see why we clear remote_irr before dropping the lock. If, while
>> lock was dropped, interrupt was delivered to this entry it will be
>> injected when ack notifier returns.
>>    
>
> But we'll clear remote_irr afterward the redelivery, and we should to  
> that only after the new interrupt is acked.
It depend on whether you consider calling ack notifiers a part of
interrupt acknowledgement process. If you do then remote_irr should not
be cleared before ack notifiers since ack process is not completed yet.
With current users functionally it shouldn't matter when we clear
remote_irr. I prefer doing it like we do it now since this how it was
before my patches and since code is simpler this way.

>
>>>> -	kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin);
>>>> +		if (trigger_mode != IOAPIC_LEVEL_TRIG)
>>>> +			continue;
>>>>
>>>> -	if (trigger_mode == IOAPIC_LEVEL_TRIG) {
>>>>    		ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
>>>>    		ent->fields.remote_irr = 0;
>>>> -		if (!ent->fields.mask&&   (ioapic->irr&   (1<<   pin)))
>>>> -			ioapic_service(ioapic, pin);
>>>> +		if (!ent->fields.mask&&   (ioapic->irr&   (1<<   i)))
>>>> +			ioapic_service(ioapic, i);
>>>>    	}
>>>>    }
>>>>
>>>>        
>>> To make the patch easier to read, suggest keeping the loop in the other
>>> function.
>>>
>>>      
>> I don't follow. All __kvm_ioapic_update_eoi() contains is the loop, so
>> the loop is already in its own function. Do you mean move the context of
>> the loop into the other function and leave only for(;;) fun(); in
>> __kvm_ioapic_update_eoi()?
>>    
>
> No, I mean keep the for loop in kvm_ioapic_update_eoi().
>
Can't do that. __kvm_ioapic_update_eoi() is called from other place with
lock held already.

--
			Gleb.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 8/8 v2] Change irq_lock from mutex to spinlock.
  2009-08-12  9:22       ` Avi Kivity
@ 2009-08-12  9:47         ` Gleb Natapov
  0 siblings, 0 replies; 28+ messages in thread
From: Gleb Natapov @ 2009-08-12  9:47 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm

On Wed, Aug 12, 2009 at 12:22:34PM +0300, Avi Kivity wrote:
> On 08/12/2009 12:11 PM, Gleb Natapov wrote:
>> On Wed, Aug 12, 2009 at 11:29:00AM +0300, Avi Kivity wrote:
>>    
>>> On 08/11/2009 03:31 PM, Gleb Natapov wrote:
>>>      
>>>> Change irq_lock from mutex to spinlock. We do not sleep while holding
>>>> it.
>>>>
>>>>        
>>> But why change?
>>>
>>>      
>> Isn't it more lightweight? For the remaining use of the lock it doesn't
>> really matters, but if I see mutex used somewhere I assume there are
>> users that sleeps.
>>    
>
> Before the recent change, a mutex was more expensive if there was  
> contention (waiter would schedule out).  Recently the mutex code was  
> changed to spin while the holder was running.
>
>>> The only motivation I can see is to allow injection from irqfd and
>>> interrupt contexts without requiring a tasklet/work.  But that needs
>>> spin_lock_irqsave(), not spin_lock().
>>>
>>>      
>> After this series the lock is used only to protect modification of irq
>> table, add/delete of ack notifiers and irq source id allocator. None of
>> this affects injection from irqfd.
>>
>>    
>
> Then it can be definitely left as mutex.
>
OK.

--
			Gleb.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/8 v2] Move IO APIC to its own lock.
  2009-08-12  9:47         ` Gleb Natapov
@ 2009-08-12  9:57           ` Avi Kivity
  2009-08-12 10:05             ` Gleb Natapov
  0 siblings, 1 reply; 28+ messages in thread
From: Avi Kivity @ 2009-08-12  9:57 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

On 08/12/2009 12:47 PM, Gleb Natapov wrote:
>> Ah, the real motivation is msi.  Pushing locks down doesn't help if we
>> keep locking them.  But for msi we avoid the lock entirely.
>>
>>      
> Yes. MSI is one. Multiple IOAPICs may inject interrupt in parallel too
> (if we will choose to implement multiple IOAPICs sometime).
>    

Right.  Given msi, I don't think we'll have multiple ioapics though.

>>>> Why a spinlock and not a mutex?
>>>>
>>>>
>>>>          
>>> Protected sections are small and we do not sleep there.
>>>
>>>        
>> So what?  A mutex is better since it allows preemption (and still has
>> spinlock performance if it isn't preempted).
>>
>>      
> This lock will be taken during irq injection from irqfd, so may be leave
> it as spinlock and take it _irqsave()? Do we want to allow irq injection
> from interrupt context? Otherwise if you say that performance is the
> same I don't care one way or the other.
>    

Let's leave _irqsave() until later since that has other implications.

>>>> Need to explain why this is safe.  I'm not sure it is, because we touch
>>>> state afterwards in pic_intack().  We need to do all vcpu-synchronous
>>>> operations before dropping the lock.
>>>>
>>>>          
>>> Forst pic_intack() calls pic_clear_isr() only in auto eoi mode and this mode
>>> is already broken for assigned devices. Second for level triggered
>>> interrupts pic_intack() does nothing after calling pic_clear_isr() and
>>> third I can move pic_clear_isr() call to the end of pic_intack().
>>>
>>>        
>> I meant, in a comment.
>>      
> I you agree with above I'll add it as a comment.
>    

Sure.

>> It should.  What if there's a reset with an assigned device?  We need to
>> release the device interrupt (after doing FLR?).
>>      
> Doing this will just re-enable host interrupt while irq condition is not
> cleared in the device. The host will hang. So I think we really shouldn't.
>    

Ok.  What about timer acks?




>>> I don't see why we clear remote_irr before dropping the lock. If, while
>>> lock was dropped, interrupt was delivered to this entry it will be
>>> injected when ack notifier returns.
>>>
>>>        
>> But we'll clear remote_irr afterward the redelivery, and we should to
>> that only after the new interrupt is acked.
>>      
> It depend on whether you consider calling ack notifiers a part of
> interrupt acknowledgement process.

I don't really care, but the ack process has to be atomic.  Since we 
need to drop the lock, it means the notifier is not part of the process.

> If you do then remote_irr should not
> be cleared before ack notifiers since ack process is not completed yet.
> With current users functionally it shouldn't matter when we clear
> remote_irr. I prefer doing it like we do it now since this how it was
> before my patches and since code is simpler this way.
>    

No, I think it introduces a race if an interrupt is raised while the ack 
notifier is running.



>> No, I mean keep the for loop in kvm_ioapic_update_eoi().
>>
>>      
> Can't do that. __kvm_ioapic_update_eoi() is called from other place with
> lock held already.
>    

Ok.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/8 v2] Move IO APIC to its own lock.
  2009-08-12  9:57           ` Avi Kivity
@ 2009-08-12 10:05             ` Gleb Natapov
  2009-08-12 10:07               ` Avi Kivity
  0 siblings, 1 reply; 28+ messages in thread
From: Gleb Natapov @ 2009-08-12 10:05 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm

On Wed, Aug 12, 2009 at 12:57:07PM +0300, Avi Kivity wrote:
> On 08/12/2009 12:47 PM, Gleb Natapov wrote:
>>> Ah, the real motivation is msi.  Pushing locks down doesn't help if we
>>> keep locking them.  But for msi we avoid the lock entirely.
>>>
>>>      
>> Yes. MSI is one. Multiple IOAPICs may inject interrupt in parallel too
>> (if we will choose to implement multiple IOAPICs sometime).
>>    
>
> Right.  Given msi, I don't think we'll have multiple ioapics though.
>
>>>>> Why a spinlock and not a mutex?
>>>>>
>>>>>
>>>>>          
>>>> Protected sections are small and we do not sleep there.
>>>>
>>>>        
>>> So what?  A mutex is better since it allows preemption (and still has
>>> spinlock performance if it isn't preempted).
>>>
>>>      
>> This lock will be taken during irq injection from irqfd, so may be leave
>> it as spinlock and take it _irqsave()? Do we want to allow irq injection
>> from interrupt context? Otherwise if you say that performance is the
>> same I don't care one way or the other.
>>    
>
> Let's leave _irqsave() until later since that has other implications.
>
So change it to mutex then?

>>>>> Need to explain why this is safe.  I'm not sure it is, because we touch
>>>>> state afterwards in pic_intack().  We need to do all vcpu-synchronous
>>>>> operations before dropping the lock.
>>>>>
>>>>>          
>>>> Forst pic_intack() calls pic_clear_isr() only in auto eoi mode and this mode
>>>> is already broken for assigned devices. Second for level triggered
>>>> interrupts pic_intack() does nothing after calling pic_clear_isr() and
>>>> third I can move pic_clear_isr() call to the end of pic_intack().
>>>>
>>>>        
>>> I meant, in a comment.
>>>      
>> I you agree with above I'll add it as a comment.
>>    
>
> Sure.
>
>>> It should.  What if there's a reset with an assigned device?  We need to
>>> release the device interrupt (after doing FLR?).
>>>      
>> Doing this will just re-enable host interrupt while irq condition is not
>> cleared in the device. The host will hang. So I think we really shouldn't.
>>    
>
> Ok.  What about timer acks?
>
Interrupt wasn't processed by a guest. Why should it be accounted as
such?

>
>
>
>>>> I don't see why we clear remote_irr before dropping the lock. If, while
>>>> lock was dropped, interrupt was delivered to this entry it will be
>>>> injected when ack notifier returns.
>>>>
>>>>        
>>> But we'll clear remote_irr afterward the redelivery, and we should to
>>> that only after the new interrupt is acked.
>>>      
>> It depend on whether you consider calling ack notifiers a part of
>> interrupt acknowledgement process.
>
> I don't really care, but the ack process has to be atomic.  Since we  
> need to drop the lock, it means the notifier is not part of the process.
>
>> If you do then remote_irr should not
>> be cleared before ack notifiers since ack process is not completed yet.
>> With current users functionally it shouldn't matter when we clear
>> remote_irr. I prefer doing it like we do it now since this how it was
>> before my patches and since code is simpler this way.
>>    
>
> No, I think it introduces a race if an interrupt is raised while the ack  
> notifier is running.
>
>
>
>>> No, I mean keep the for loop in kvm_ioapic_update_eoi().
>>>
>>>      
>> Can't do that. __kvm_ioapic_update_eoi() is called from other place with
>> lock held already.
>>    
>
> Ok.
>
> -- 
> error compiling committee.c: too many arguments to function

--
			Gleb.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 6/8 v2] Move IO APIC to its own lock.
  2009-08-12 10:05             ` Gleb Natapov
@ 2009-08-12 10:07               ` Avi Kivity
  0 siblings, 0 replies; 28+ messages in thread
From: Avi Kivity @ 2009-08-12 10:07 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

On 08/12/2009 01:05 PM, Gleb Natapov wrote:
>
>    
>> Let's leave _irqsave() until later since that has other implications.
>>
>>      
> So change it to mutex then?
>
>    

Yes please.

>>> Doing this will just re-enable host interrupt while irq condition is not
>>> cleared in the device. The host will hang. So I think we really shouldn't.
>>>
>>>        
>> Ok.  What about timer acks?
>>
>>      
> Interrupt wasn't processed by a guest. Why should it be accounted as
> such?
>    

Good question.  I guess it shouldn't.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 2/8 v2] Maintain back mapping from irqchip/pin to gsi.
  2009-08-12  9:06       ` Avi Kivity
@ 2009-08-12 10:17         ` Gleb Natapov
  2009-08-12 10:19           ` Avi Kivity
  0 siblings, 1 reply; 28+ messages in thread
From: Gleb Natapov @ 2009-08-12 10:17 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm

On Wed, Aug 12, 2009 at 12:06:59PM +0300, Avi Kivity wrote:
> On 08/12/2009 11:42 AM, Gleb Natapov wrote:
>>
>>    
>>> What's this?
>>>
>>>      
>> If there is not GSI mapping use pin as gsi. This what current code does.
>> We can BUG() here I think.
>>
>>    
>
> We can't BUG() userspace can change the mapping before the ack is received.
>
Yes. Malicious userspace can remove GSI mapping for GSI with ack
notifier registered, so we can't BUG(). But making GSI=pin doesn't look
correct too. Should we just skip calling ack notifier?

--
			Gleb.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 2/8 v2] Maintain back mapping from irqchip/pin to gsi.
  2009-08-12 10:17         ` Gleb Natapov
@ 2009-08-12 10:19           ` Avi Kivity
  0 siblings, 0 replies; 28+ messages in thread
From: Avi Kivity @ 2009-08-12 10:19 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

On 08/12/2009 01:17 PM, Gleb Natapov wrote:
> Yes. Malicious userspace can remove GSI mapping for GSI with ack
> notifier registered, so we can't BUG(). But making GSI=pin doesn't look
> correct too. Should we just skip calling ack notifier?
>    

I think so.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2009-08-12 10:19 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-08-11 12:31 [PATCH 0/8 v2] make interrupt injection lockless (almost) Gleb Natapov
2009-08-11 12:31 ` [PATCH 1/8 v2] Change irq routing table to use gsi indexed array Gleb Natapov
2009-08-12  8:05   ` Avi Kivity
2009-08-12  8:41     ` Gleb Natapov
2009-08-12  9:06       ` Avi Kivity
2009-08-11 12:31 ` [PATCH 2/8 v2] Maintain back mapping from irqchip/pin to gsi Gleb Natapov
2009-08-12  8:09   ` Avi Kivity
2009-08-12  8:42     ` Gleb Natapov
2009-08-12  9:06       ` Avi Kivity
2009-08-12 10:17         ` Gleb Natapov
2009-08-12 10:19           ` Avi Kivity
2009-08-11 12:31 ` [PATCH 3/8 v2] Move irq routing data structure to rcu locking Gleb Natapov
2009-08-11 12:31 ` [PATCH 4/8 v2] Move irq ack notifier list to arch independent code Gleb Natapov
2009-08-11 12:31 ` [PATCH 5/8 v2] Convert irq notifiers lists to RCU locking Gleb Natapov
2009-08-11 12:31 ` [PATCH 6/8 v2] Move IO APIC to its own lock Gleb Natapov
2009-08-12  8:27   ` Avi Kivity
2009-08-12  9:04     ` Gleb Natapov
2009-08-12  9:18       ` Avi Kivity
2009-08-12  9:47         ` Gleb Natapov
2009-08-12  9:57           ` Avi Kivity
2009-08-12 10:05             ` Gleb Natapov
2009-08-12 10:07               ` Avi Kivity
2009-08-11 12:31 ` [PATCH 7/8 v2] Drop kvm->irq_lock lock from irq injection path Gleb Natapov
2009-08-11 12:31 ` [PATCH 8/8 v2] Change irq_lock from mutex to spinlock Gleb Natapov
2009-08-12  8:29   ` Avi Kivity
2009-08-12  9:11     ` Gleb Natapov
2009-08-12  9:22       ` Avi Kivity
2009-08-12  9:47         ` Gleb Natapov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).