public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH] Add irqdevice interface + userint implementation
@ 2007-04-09 20:27 Gregory Haskins
       [not found] ` <461A5B6C.BA47.005A.0-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 17+ messages in thread
From: Gregory Haskins @ 2007-04-09 20:27 UTC (permalink / raw)
  To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Here is the next pass of the patch with changes based on feedback.

I still have not converted over to kerneldoc format as I cannot find an example anywhere yet, and the documentation under Documentation/kernel-doc-nano-HOWTO was a little vague.  Pointers appreciated.

-Greg

---
KVM: Add irqdevice object

The current code is geared towards using a user-mode (A)PIC.  This patch adds
an "irqdevice" abstraction, and implements a "userint" model to handle the
duties of the original code.  Later, we can develop other irqdevice models 
to handle objects like LAPIC, IOAPIC, i8259, etc, as appropriate

Signed-off-by: Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
---

 drivers/kvm/Makefile    |    2 
 drivers/kvm/irqdevice.h |  202 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/kvm/kvm.h       |    9 +-
 drivers/kvm/kvm_main.c  |   57 +++++++++----
 drivers/kvm/svm.c       |   33 ++++---
 drivers/kvm/userint.c   |  210 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/kvm/vmx.c       |   29 +++---
 7 files changed, 489 insertions(+), 53 deletions(-)

diff --git a/drivers/kvm/Makefile b/drivers/kvm/Makefile
index c0a789f..540afbc 100644
--- a/drivers/kvm/Makefile
+++ b/drivers/kvm/Makefile
@@ -2,7 +2,7 @@
 # Makefile for Kernel-based Virtual Machine module
 #
 
-kvm-objs := kvm_main.o mmu.o x86_emulate.o
+kvm-objs := kvm_main.o mmu.o x86_emulate.o userint.o
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/drivers/kvm/irqdevice.h b/drivers/kvm/irqdevice.h
new file mode 100644
index 0000000..e4377d4
--- /dev/null
+++ b/drivers/kvm/irqdevice.h
@@ -0,0 +1,202 @@
+/*
+ * Defines an interface for an abstract interrupt controller.  The model 
+ * consists of a unit with an arbitrary number of input lines (IRQ0-N), an
+ * output line (INTR), and methods for completing an interrupt-acknowledge
+ * cycle (INTA).  A particular implementation of this model will define
+ * various policies, such as irq-to-vector translation, INTA/auto-EOI policy,
+ * etc.  
+ * 
+ * In addition, the INTR callback mechanism allows the unit to be "wired" to
+ * an interruptible source in a very flexible manner. For instance, an 
+ * irqdevice could have its INTR wired to a VCPU (ala LAPIC), or another 
+ * interrupt controller (ala cascaded i8259s)
+ *
+ * Copyright (C) 2007 Novell
+ *
+ * Authors:
+ *   Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#ifndef __IRQDEVICE_H
+#define __IRQDEVICE_H
+
+#define KVM_IRQFLAGS_NMI  (1 << 0)
+#define KVM_IRQFLAGS_PEEK (1 << 1)
+
+struct kvm_irqdevice;
+
+struct kvm_irqsink {
+	void (*raise_intr)(struct kvm_irqsink *this, 
+			   struct kvm_irqdevice *dev);
+
+	void *private;
+};
+
+struct kvm_irqdevice {
+	int  (*pending)(struct kvm_irqdevice *this, int flags);
+	int  (*read_vector)(struct kvm_irqdevice *this, int flags); 
+	int  (*set_pin)(struct kvm_irqdevice *this, int pin, int level);
+	int  (*summary)(struct kvm_irqdevice *this, void *data);
+	void (*destructor)(struct kvm_irqdevice *this);
+
+	void               *private;
+	struct kvm_irqsink  sink;
+};
+
+/*
+ * kvm_irqdevice_init()
+ *
+ * Description:
+ *    Initialized the kvm_irqdevice for use.  Should be called before calling
+ *    any derived implementation init functions
+ * 
+ * Parameters:
+ *    struct kvm_irqdevice *dev: This device
+ *
+ * Returns: (void)
+ */
+static inline void kvm_irqdevice_init(struct kvm_irqdevice *dev)
+{
+	memset(dev, 0, sizeof(*dev));
+}
+
+/*
+ * kvm_irqdevice_pending()
+ *
+ * Description:
+ *    Efficiently determines if an interrupt is pending on an irqdevice
+ *
+ * Parameters:
+ *    struct kvm_irqdevice *dev: The device
+ *    int flags: Modifies the behavior as follows:
+ * 
+ *                   KVM_IRQFLAGS_NMI: Mask everything but NMIs
+ *
+ * Returns: (int)
+ *    0 = no iterrupts pending (per "flags" criteria)
+ *    1 = one or more interrupts are pending
+ */
+static inline int kvm_irqdevice_pending(struct kvm_irqdevice *dev, int flags)
+{
+	return dev->pending(dev, flags);
+}
+
+/*
+ * kvm_irqdevice_read_vector()
+ *
+ * Description:
+ *    Read the highest priority pending vector from the device, potentially
+ *    invoking auto-EOI depending on device policy
+ *
+ * Parameters:
+ *    struct kvm_irqdevice *dev: The device
+ *    int flags: Modifies the behavior as follows:
+ *
+ *                   KVM_IRQFLAGS_NMI: Mask everything but NMIs
+ *                   KVM_IRQFLAGS_PEEK: Do not auto-acknowledge interrupt
+ *
+ * Returns: (int)
+ *   -1 = no interrupts pending (per "flags" criteria)
+ *  >=0 = the highest pending vector
+ */
+static inline int kvm_irqdevice_read_vector(struct kvm_irqdevice *dev, 
+					    int flags)
+{
+	return dev->read_vector(dev, flags);
+}
+
+/*
+ * kvm_irqdevice_set_pin()
+ *
+ * Description:
+ *    Allows the caller to assert/deassert an IRQ input pin to the device 
+ *    according to device policy.
+ *
+ * Parameters:
+ *    struct kvm_irqdevice *dev: The device
+ *    int pin: The input pin to alter
+ *    int level: The value to set (1 = assert, 0 = deassert)
+ *
+ * Returns: (int)
+ *   -1 = failure
+ *    0 = success
+ */
+static inline int kvm_irqdevice_set_pin(struct kvm_irqdevice *dev, int pin,
+				  int level)
+{
+	return dev->set_pin(dev, pin, level);
+}
+
+/*
+ * kvm_irqdevice_summary()
+ *
+ * Description:
+ *    Loads a summary bitmask of all pending vectors (0-255)
+ *
+ * Parameters:
+ *    struct kvm_irqdevice *dev: The device
+ *    void *data: A pointer to a region capable of holding a 256 bit bitmap
+ *
+ * Returns: (int)
+ *   -1 = failure
+ *    0 = success
+ */
+static inline int kvm_irqdevice_summary(struct kvm_irqdevice *dev, void *data)
+{
+	return dev->summary(dev, data);
+}
+
+/*
+ * kvm_irqdevice_register_sink()
+ *
+ * Description:
+ *    Registers an kvm_irqsink object as an INTR callback
+ *
+ * Parameters:
+ *    struct kvm_irqdevice *dev: The device
+ *    struct kvm_irqsink *sink: The sink to register.  Data will be copied
+ *                              so building object from transient storage is 
+ *                              ok.
+ *
+ * Returns: (void)
+ */
+static inline void kvm_irqdevice_register_sink(struct kvm_irqdevice *dev, 
+					       const struct kvm_irqsink *sink)
+{
+	dev->sink = *sink;
+}
+
+/*
+ * kvm_irqdevice_raise_intr()
+ *
+ * Description:
+ *    Invokes a registered INTR callback (if present).  This function is
+ *    meant to be used privately by a irqdevice implementation. 
+ *
+ * Parameters:
+ *    struct kvm_irqdevice *dev: The device
+ *
+ * Returns: (void)
+ */
+static inline void kvm_irqdevice_raise_intr(struct kvm_irqdevice *dev)
+{
+	struct kvm_irqsink *sink = &dev->sink;
+	if (sink->raise_intr)
+		sink->raise_intr(sink, dev);
+}
+
+#endif /*  __IRQDEVICE_H */
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 181099f..58966d9 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -13,6 +13,7 @@
 #include <linux/mm.h>
 
 #include "vmx.h"
+#include "irqdevice.h"
 #include <linux/kvm.h>
 #include <linux/kvm_para.h>
 
@@ -157,6 +158,8 @@ struct vmcs {
 
 struct kvm_vcpu;
 
+int kvm_userint_init(struct kvm_irqdevice *dev);
+
 /*
  * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
  * 32-bit).  The kvm_mmu structure abstracts the details of the current mmu
@@ -266,6 +269,8 @@ struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr);
 void kvm_io_bus_register_dev(struct kvm_io_bus *bus, 
 			     struct kvm_io_device *dev);
 
+#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
+
 struct kvm_vcpu {
 	struct kvm *kvm;
 	union {
@@ -278,9 +283,7 @@ struct kvm_vcpu {
 	u64 host_tsc;
 	struct kvm_run *run;
 	int interrupt_window_open;
-	unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
-#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
-	unsigned long irq_pending[NR_IRQ_WORDS];
+	struct kvm_irqdevice irq_dev;
 	unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */
 	unsigned long rip;      /* needs vcpu_load_rsp_rip() */
 
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index c3c0059..7e00412 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -1989,8 +1989,7 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 	sregs->efer = vcpu->shadow_efer;
 	sregs->apic_base = vcpu->apic_base;
 
-	memcpy(sregs->interrupt_bitmap, vcpu->irq_pending,
-	       sizeof sregs->interrupt_bitmap);
+	kvm_irqdevice_summary(&vcpu->irq_dev, &sregs->interrupt_bitmap);
 
 	vcpu_put(vcpu);
 
@@ -2044,13 +2043,11 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	if (mmu_reset_needed)
 		kvm_mmu_reset_context(vcpu);
 
-	memcpy(vcpu->irq_pending, sregs->interrupt_bitmap,
-	       sizeof vcpu->irq_pending);
-	vcpu->irq_summary = 0;
-	for (i = 0; i < NR_IRQ_WORDS; ++i)
-		if (vcpu->irq_pending[i])
-			__set_bit(i, &vcpu->irq_summary);
-
+	/* walk the interrupt-bitmap and inject an IRQ for each bit found */
+	for (i = 0; i < 256; ++i)
+		if (test_bit(i, &sregs->interrupt_bitmap[0]))
+			kvm_irqdevice_set_pin(&vcpu->irq_dev, i, 1);
+ 
 	set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
 	set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
 	set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
@@ -2210,14 +2207,8 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
 {
 	if (irq->irq < 0 || irq->irq >= 256)
 		return -EINVAL;
-	vcpu_load(vcpu);
-
-	set_bit(irq->irq, vcpu->irq_pending);
-	set_bit(irq->irq / BITS_PER_LONG, &vcpu->irq_summary);
 
-	vcpu_put(vcpu);
-
-	return 0;
+	return kvm_irqdevice_set_pin(&vcpu->irq_dev, irq->irq, 1);
 }
 
 static int kvm_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
@@ -2319,6 +2310,36 @@ out1:
 }
 
 /*
+ * This function will be invoked whenever the vcpu->irq_dev raises its INTR 
+ * line
+ */
+static void kvm_vcpu_intr(struct kvm_irqsink *this, 
+			  struct kvm_irqdevice *dev)
+{
+	/*
+	 * Our irq device is requesting to interrupt the vcpu.  If it is
+	 * currently running, we should inject a host IPI to force a VMEXIT 
+	 */
+	
+	/*
+	 * FIXME: Implement this or the CPU wont notice the interrupt until
+	 * the next natural VMEXIT.  Note that this is how the system
+	 * has always worked, so nothing is broken here.  This is a future
+	 * enhancement
+	 */
+}
+
+static void kvm_vcpu_irqsink_init(struct kvm_vcpu *vcpu)
+{
+	struct kvm_irqsink sink = {
+		.raise_intr = kvm_vcpu_intr,
+		.private    = vcpu
+	};
+	
+	kvm_irqdevice_register_sink(&vcpu->irq_dev, &sink);
+}
+
+/*
  * Creates some virtual cpus.  Good luck creating more than one.
  */
 static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
@@ -2364,6 +2385,10 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
 	if (r < 0)
 		goto out_free_vcpus;
 
+	kvm_irqdevice_init(&vcpu->irq_dev);
+	kvm_vcpu_irqsink_init(vcpu);
+	kvm_userint_init(&vcpu->irq_dev);
+
 	kvm_arch_ops->vcpu_load(vcpu);
 	r = kvm_mmu_setup(vcpu);
 	if (r >= 0)
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index b7e1410..e59a548 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -108,20 +108,16 @@ static unsigned get_addr_size(struct kvm_vcpu *vcpu)
 
 static inline u8 pop_irq(struct kvm_vcpu *vcpu)
 {
-	int word_index = __ffs(vcpu->irq_summary);
-	int bit_index = __ffs(vcpu->irq_pending[word_index]);
-	int irq = word_index * BITS_PER_LONG + bit_index;
-
-	clear_bit(bit_index, &vcpu->irq_pending[word_index]);
-	if (!vcpu->irq_pending[word_index])
-		clear_bit(word_index, &vcpu->irq_summary);
-	return irq;
+	return kvm_irqdevice_read_vector(&vcpu->irq_dev, 0);
 }
 
 static inline void push_irq(struct kvm_vcpu *vcpu, u8 irq)
 {
-	set_bit(irq, vcpu->irq_pending);
-	set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
+	/* FIXME: We probably want to reserve the "set_pin" verb for
+	 * actual interrupt requests, not for putting back something
+	 * previously pending.  Lets revisit this
+	 */
+	kvm_irqdevice_set_pin(&vcpu->irq_dev, irq, 1);
 }
 
 static inline void clgi(void)
@@ -1092,7 +1088,7 @@ static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 1;
 	skip_emulated_instruction(vcpu);
-	if (vcpu->irq_summary)
+	if (kvm_irqdevice_pending(&vcpu->irq_dev, 0))
 		return 1;
 
 	kvm_run->exit_reason = KVM_EXIT_HLT;
@@ -1263,7 +1259,7 @@ static int interrupt_window_interception(struct kvm_vcpu *vcpu,
 	 * possible
 	 */
 	if (kvm_run->request_interrupt_window &&
-	    !vcpu->irq_summary) {
+	    !kvm_irqdevice_pending(&vcpu->irq_dev, 0)) {
 		++kvm_stat.irq_window_exits;
 		kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
 		return 0;
@@ -1399,7 +1395,8 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 		(!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
 		 (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF));
 
-	if (vcpu->interrupt_window_open && vcpu->irq_summary)
+	if (vcpu->interrupt_window_open && 
+	    kvm_irqdevice_pending(&vcpu->irq_dev, 0))
 		/*
 		 * If interrupts enabled, and not blocked by sti or mov ss. Good.
 		 */
@@ -1409,7 +1406,8 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 	 * Interrupts blocked.  Wait for unblock.
 	 */
 	if (!vcpu->interrupt_window_open &&
-	    (vcpu->irq_summary || kvm_run->request_interrupt_window)) {
+	    (kvm_irqdevice_pending(&vcpu->irq_dev, 0) || 
+	     kvm_run->request_interrupt_window)) {
 		control->intercept |= 1ULL << INTERCEPT_VINTR;
 	} else
 		control->intercept &= ~(1ULL << INTERCEPT_VINTR);
@@ -1418,8 +1416,9 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 			      struct kvm_run *kvm_run)
 {
-	kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open &&
-						  vcpu->irq_summary == 0);
+	kvm_run->ready_for_interrupt_injection = 
+		(vcpu->interrupt_window_open && 
+		 !kvm_irqdevice_pending(&vcpu->irq_dev, 0));
 	kvm_run->if_flag = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF) != 0;
 	kvm_run->cr8 = vcpu->cr8;
 	kvm_run->apic_base = vcpu->apic_base;
@@ -1434,7 +1433,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
 					  struct kvm_run *kvm_run)
 {
-	return (!vcpu->irq_summary &&
+	return (!kvm_irqdevice_pending(&vcpu->irq_dev, 0) &&
 		kvm_run->request_interrupt_window &&
 		vcpu->interrupt_window_open &&
 		(vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF));
diff --git a/drivers/kvm/userint.c b/drivers/kvm/userint.c
new file mode 100644
index 0000000..42b6fb3
--- /dev/null
+++ b/drivers/kvm/userint.c
@@ -0,0 +1,210 @@
+/*
+ * User Interrupts IRQ device 
+ *
+ * This acts as an extention of an interrupt controller that exists elsewhere 
+ * (typically in userspace/QEMU).  Because this PIC is a pseudo device that
+ * is downstream from a real emulated PIC, the "IRQ-to-vector" mapping has 
+ * already occured.  Therefore, this PIC has the following unusal properties:
+ *
+ * 1) It has 256 "pins" which are literal vectors (i.e. no translation)
+ * 2) It only supports "auto-EOI" behavior since it is expected that the
+ *    upstream emulated PIC will handle the real EOIs (if applicable)
+ * 3) It only listens to "asserts" on the pins (deasserts are dropped) 
+ *    because its an auto-EOI device anyway.
+ *
+ * Copyright (C) 2007 Novell
+ *
+ * bitarray code based on original vcpu->irq_pending code, 
+ *     Copyright (C) 2007 Qumranet
+ *
+ * Authors:
+ *   Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include "kvm.h"
+
+/*
+ *----------------------------------------------------------------------
+ * optimized bitarray object - works like bitarrays in bitops, but uses 
+ * a summary field to accelerate lookups.  Assumes external locking 
+ *---------------------------------------------------------------------
+ */
+
+struct bitarray {
+	unsigned long summary; /* 1 per word in pending */
+	unsigned long pending[NR_IRQ_WORDS];
+};
+
+static inline int bitarray_pending(struct bitarray *this)
+{
+	return this->summary ? 1 : 0;	
+}
+
+static inline int bitarray_findhighest(struct bitarray *this)
+{
+	if (!this->summary)
+		return -1;
+	else {
+		int word_index = __fls(this->summary);
+		int bit_index  = __fls(this->pending[word_index]);
+		
+		return word_index * BITS_PER_LONG + bit_index;	
+	}
+}
+
+static inline void bitarray_set(struct bitarray *this, int nr)
+{
+	__set_bit(nr, &this->pending);
+	__set_bit(nr / BITS_PER_LONG, &this->summary); 
+} 
+
+static inline void bitarray_clear(struct bitarray *this, int nr)
+{
+	int word = nr / BITS_PER_LONG;
+
+	__clear_bit(nr, &this->pending);
+	if (!this->pending[word])
+		__clear_bit(word, &this->summary);
+}
+
+static inline int bitarray_test(struct bitarray *this, int nr)
+{
+	return test_bit(nr, &this->pending);
+}
+
+/*
+ *----------------------------------------------------------------------
+ * userint interface - provides the actual kvm_irqdevice implementation
+ *---------------------------------------------------------------------
+ */
+
+struct kvm_userint {
+	spinlock_t      lock;
+	struct bitarray irq_pending;
+	int             nmi_pending;
+};
+
+static int userint_pending(struct kvm_irqdevice *this, int flags)
+{
+	struct kvm_userint *s = (struct kvm_userint*)this->private;
+	int ret;
+
+	spin_lock_irq(&s->lock);
+
+	if (flags & KVM_IRQFLAGS_NMI)
+		ret = s->nmi_pending;
+	else
+		ret = bitarray_pending(&s->irq_pending);
+
+	spin_unlock_irq(&s->lock);
+
+	return ret;
+}
+
+static int userint_read_vector(struct kvm_irqdevice *this, int flags)
+{
+	struct kvm_userint *s = (struct kvm_userint*)this->private;
+	int          irq;
+
+	spin_lock_irq(&s->lock);
+
+	/*
+	 * NMIs take priority, so if there is an NMI pending, or
+	 * if we are filtering out NMIs, only consider them 
+	 */
+	if (s->nmi_pending || (flags & KVM_IRQFLAGS_NMI))
+		irq = s->nmi_pending ? 2 : -1;
+	else
+		irq = bitarray_findhighest(&s->irq_pending);
+	
+	if ((irq > -1) && !(flags & KVM_IRQFLAGS_PEEK)) {
+		/*
+		 * If the "peek" flag is not set, automatically clear the 
+		 * interrupt as the EOI mechanism (if any) will take place 
+		 * in userspace 
+		 */
+		bitarray_clear(&s->irq_pending, irq);
+		if (irq == 2)
+			s->nmi_pending = 0;
+	}
+
+	spin_unlock_irq(&s->lock);
+
+	return irq;
+}
+
+static int userint_set_pin(struct kvm_irqdevice* this, int irq, int level)
+{
+	struct kvm_userint *s = (struct kvm_userint*)this->private;
+
+	if (!level)
+		return 0; /* We dont care about deasserts */
+
+	spin_lock_irq(&s->lock);
+
+	/*
+	 * Update the local state 
+	 */
+	bitarray_set(&s->irq_pending, irq);
+	if (irq == 2)
+		s->nmi_pending = 1;
+
+	spin_unlock_irq(&s->lock);
+
+	/*
+	 * And then alert the higher layer software we have changes 
+	 */
+	kvm_irqdevice_raise_intr(this);
+
+	return 0;
+}
+
+static int userint_summary(struct kvm_irqdevice* this, void *data)
+{	
+	struct kvm_userint *s = (struct kvm_userint*)this->private;
+
+	spin_lock_irq(&s->lock);
+	memcpy(data, s->irq_pending.pending, sizeof s->irq_pending.pending);
+	spin_unlock_irq(&s->lock);
+
+	return 0;
+}
+
+static void userint_destructor(struct kvm_irqdevice *this)
+{
+	kfree(this->private);
+}
+
+int kvm_userint_init(struct kvm_irqdevice *dev)
+{
+	struct kvm_userint *s;
+
+	s = kzalloc(sizeof(*s), GFP_KERNEL);
+     
+	spin_lock_init(&s->lock);
+
+	dev->pending     = userint_pending;
+	dev->read_vector = userint_read_vector;
+	dev->set_pin     = userint_set_pin;
+	dev->summary     = userint_summary;
+	dev->destructor  = userint_destructor;
+
+	dev->private = s;
+
+	return 0;
+}
+
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 61a6116..a0fdf02 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -1219,13 +1219,8 @@ static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq)
 
 static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
 {
-	int word_index = __ffs(vcpu->irq_summary);
-	int bit_index = __ffs(vcpu->irq_pending[word_index]);
-	int irq = word_index * BITS_PER_LONG + bit_index;
-
-	clear_bit(bit_index, &vcpu->irq_pending[word_index]);
-	if (!vcpu->irq_pending[word_index])
-		clear_bit(word_index, &vcpu->irq_summary);
+	int irq = kvm_irqdevice_read_vector(&vcpu->irq_dev, 0);
+	BUG_ON(irq < 0);
 
 	if (vcpu->rmode.active) {
 		inject_rmode_irq(vcpu, irq);
@@ -1246,7 +1241,7 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 		 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
 
 	if (vcpu->interrupt_window_open &&
-	    vcpu->irq_summary &&
+	    kvm_irqdevice_pending(&vcpu->irq_dev, 0) &&
 	    !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK))
 		/*
 		 * If interrupts enabled, and not blocked by sti or mov ss. Good.
@@ -1255,7 +1250,8 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 
 	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
 	if (!vcpu->interrupt_window_open &&
-	    (vcpu->irq_summary || kvm_run->request_interrupt_window))
+	    (kvm_irqdevice_pending(&vcpu->irq_dev, 0) ||
+	     kvm_run->request_interrupt_window))
 		/*
 		 * Interrupts blocked.  Wait for unblock.
 		 */
@@ -1314,8 +1310,8 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	if (is_external_interrupt(vect_info)) {
 		int irq = vect_info & VECTORING_INFO_VECTOR_MASK;
-		set_bit(irq, vcpu->irq_pending);
-		set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
+		/* FIXME: Is this right? */
+		kvm_irqdevice_set_pin(&vcpu->irq_dev, irq, 1); 
 	}
 
 	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */
@@ -1619,8 +1615,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 	kvm_run->if_flag = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) != 0;
 	kvm_run->cr8 = vcpu->cr8;
 	kvm_run->apic_base = vcpu->apic_base;
-	kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open &&
-						  vcpu->irq_summary == 0);
+	kvm_run->ready_for_interrupt_injection = 
+		(vcpu->interrupt_window_open && 
+		 !kvm_irqdevice_pending(&vcpu->irq_dev, 0));
 }
 
 static int handle_interrupt_window(struct kvm_vcpu *vcpu,
@@ -1631,7 +1628,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
 	 * possible
 	 */
 	if (kvm_run->request_interrupt_window &&
-	    !vcpu->irq_summary) {
+	    !kvm_irqdevice_pending(&vcpu->irq_dev, 0)) {
 		kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
 		++kvm_stat.irq_window_exits;
 		return 0;
@@ -1642,7 +1639,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
 static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	skip_emulated_instruction(vcpu);
-	if (vcpu->irq_summary)
+	if (kvm_irqdevice_pending(&vcpu->irq_dev, 0))
 		return 1;
 
 	kvm_run->exit_reason = KVM_EXIT_HLT;
@@ -1713,7 +1710,7 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
 					  struct kvm_run *kvm_run)
 {
-	return (!vcpu->irq_summary &&
+	return (!kvm_irqdevice_pending(&vcpu->irq_dev, 0) &&
 		kvm_run->request_interrupt_window &&
 		vcpu->interrupt_window_open &&
 		(vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));


-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys-and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV

^ permalink raw reply related	[flat|nested] 17+ messages in thread
* [PATCH] Add irqdevice interface + userint implementation
@ 2007-04-05 18:53 Gregory Haskins
       [not found] ` <4614FF5C.BA47.005A.0-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 17+ messages in thread
From: Gregory Haskins @ 2007-04-05 18:53 UTC (permalink / raw)
  To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Hi All,
  The following is another patch that I broke out of the in-kernel-apic patch that I sent earlier.  This focuses purely on the irqdevice abstraction without introducing the concept of the in-kernel LAPIC.  It also provides an implementation of a "userint" model, which should support a system with a QEMU based emulation of the (A)PIC (e.g. current behavior).  I have cleaned up quite a few things based on all of your comments as well, so hopefully its a little more polished than last time.

As far as testing, I have confirmed that I can boot a 64-bit linux guest with no discernable difference in behavior.

Note that this patch applies after the in-kernel-mmio.patch that I sent earlier today which has not yet been accepted/commited.  This patch should not depend on it.  However, if you do have problems applying it let me know and I can generate another one that is not after in-kernel-mmio in the series.

Comments please!

-Greg 


--
    KVM: Add irqdevice interface + userint implementation
    
    The current code is geared towards using a user-mode (A)PIC.  This patch 
    adds an "irqdevice" abstraction, and implements a "userint" model to handle the
    duties of the original code.  Later, we can develop other irqdevice models
    to handle objects like LAPIC, IOAPIC, i8259, etc, as appropriate
    
    Signed-off-by: Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>

---
diff --git a/drivers/kvm/Makefile b/drivers/kvm/Makefile
index c0a789f..beb6f51 100644
--- a/drivers/kvm/Makefile
+++ b/drivers/kvm/Makefile
@@ -2,7 +2,7 @@
 # Makefile for Kernel-based Virtual Machine module
 #
 
-kvm-objs := kvm_main.o mmu.o x86_emulate.o
+kvm-objs := kvm_main.o mmu.o x86_emulate.o kvm_userint.o
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index c1923df..6caf60b 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -13,6 +13,7 @@
 #include <linux/mm.h>
 
 #include "vmx.h"
+#include "kvm_irqdevice.h"
 #include <linux/kvm.h>
 #include <linux/kvm_para.h>
 
@@ -157,6 +158,8 @@ struct vmcs {
 
 struct kvm_vcpu;
 
+int kvm_userint_init(struct kvm_irqdevice *dev);
+
 /*
  * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
  * 32-bit).  The kvm_mmu structure abstracts the details of the current mmu
@@ -286,6 +289,8 @@ static inline void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io
 	bus->devs[bus->dev_count++] = dev;
 }
 
+#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
+
 struct kvm_vcpu {
 	struct kvm *kvm;
 	union {
@@ -298,9 +303,7 @@ struct kvm_vcpu {
 	u64 host_tsc;
 	struct kvm_run *run;
 	int interrupt_window_open;
-	unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
-#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
-	unsigned long irq_pending[NR_IRQ_WORDS];
+	struct kvm_irqdevice irq_dev;
 	unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */
 	unsigned long rip;      /* needs vcpu_load_rsp_rip() */
 
diff --git a/drivers/kvm/kvm_irqdevice.h b/drivers/kvm/kvm_irqdevice.h
new file mode 100644
index 0000000..9c91d15
--- /dev/null
+++ b/drivers/kvm/kvm_irqdevice.h
@@ -0,0 +1,206 @@
+/*
+ * kvm_irqdevice.h
+ *
+ * Defines an interface for an abstract interrupt controller.  The model 
+ * consists of a unit with an arbitrary number of input lines (IRQ0-N), an
+ * output line (INTR), and methods for completing an interrupt-acknowledge
+ * cycle (INTA).  A particular implementation of this model will define
+ * various policies, such as irq-to-vector translation, INTA/auto-EOI policy,
+ * etc.  
+ * 
+ * In addition, the INTR callback mechanism allows the unit to be "wired" to
+ * an interruptible source in a very flexible manner. For instance, an 
+ * irqdevice could have its INTR wired to a VCPU (ala LAPIC), or another 
+ * interrupt controller (ala cascaded i8259s)
+ *
+ * Copyright (C) 2007 Novell
+ *
+ * Authors:
+ *   Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#ifndef __KVM_IRQDEVICE_H
+#define __KVM_IRQDEVICE_H
+
+#define KVM_IRQFLAGS_NMI  (1 << 0)
+#define KVM_IRQFLAGS_PEEK (1 << 1)
+
+struct kvm_irqdevice;
+
+struct kvm_irqsink {
+	void (*raise_intr)(struct kvm_irqsink *this, 
+			   struct kvm_irqdevice *dev);
+
+	void *private;
+};
+
+struct kvm_irqdevice {
+	int  (*pending)(struct kvm_irqdevice *this, int flags);
+	int  (*read_vector)(struct kvm_irqdevice *this, int flags); 
+	int  (*set_pin)(struct kvm_irqdevice *this, int pin, int level, 
+			int flags);
+	int  (*summary)(struct kvm_irqdevice *this, void *data);
+	void (*destructor)(struct kvm_irqdevice *this);
+
+	void               *private;
+	struct kvm_irqsink  sink;
+};
+
+/*
+ * kvm_irq_init()
+ *
+ * Description:
+ *    Initialized the kvm_irqdevice for use.  Should be called before calling
+ *    any derived implementation init functions
+ * 
+ * Parameters:
+ *    struct kvm_irqdevice *dev: This device
+ *
+ * Returns: (void)
+ */
+static inline void kvm_irq_init(struct kvm_irqdevice *dev)
+{
+	memset(dev, 0, sizeof(*dev));
+}
+
+/*
+ * kvm_irq_pending()
+ *
+ * Description:
+ *    Efficiently determines if an interrupt is pending on an irqdevice
+ *
+ * Parameters:
+ *    struct kvm_irqdevice *dev: The device
+ *    int flags: Modifies the behavior as follows:
+ * 
+ *                   KVM_IRQFLAGS_NMI: Mask everything but NMIs
+ *
+ * Returns: (int)
+ *   -1 = failure
+ *    0 = no iterrupts pending (per "flags" criteria)
+ *   >0 = one or more interrupts are pending
+ */
+static inline int kvm_irq_pending(struct kvm_irqdevice *dev, int flags)
+{
+	return dev->pending(dev, flags);
+}
+
+/*
+ * kvm_irq_read_vector()
+ *
+ * Description:
+ *    Read the highest priority pending vector from the device, potentially
+ *    invoking auto-EOI depending on device policy
+ *
+ * Parameters:
+ *    struct kvm_irqdevice *dev: The device
+ *    int flags: Modifies the behavior as follows:
+ *
+ *                   KVM_IRQFLAGS_NMI: Mask everything but NMIs
+ *                   KVM_IRQFLAGS_PEEK: Do not auto-acknowledge interrupt
+ *
+ * Returns: (int)
+ *   -1 = no interrupts pending (per "flags" criteria)
+ *  >=0 = the highest pending vector
+ */
+static inline int kvm_irq_read_vector(struct kvm_irqdevice *dev, int flags)
+{
+	return dev->read_vector(dev, flags);
+}
+
+/*
+ * kvm_irq_set_pin()
+ *
+ * Description:
+ *    Allows the caller to assert/deassert an IRQ input pin to the device 
+ *    according to device policy.
+ *
+ * Parameters:
+ *    struct kvm_irqdevice *dev: The device
+ *    int pin: The input pin to alter
+ *    int level: The value to set (1 = assert, 0 = deassert)
+ *    int flags: Reserved, must be 0
+ *
+ * Returns: (int)
+ *   -1 = failure
+ *    0 = success
+ */
+static inline int kvm_irq_set_pin(struct kvm_irqdevice *dev, int pin, 
+				  int level, int flags)
+{
+	return dev->set_pin(dev, pin, level, flags);
+}
+
+/*
+ * kvm_irq_summary()
+ *
+ * Description:
+ *    Loads a summary bitmask of all pending vectors (0-255)
+ *
+ * Parameters:
+ *    struct kvm_irqdevice *dev: The device
+ *    void *data: A pointer to a region capable of holding a 256 bit bitmap
+ *
+ * Returns: (int)
+ *   -1 = failure
+ *    0 = success
+ */
+static inline int kvm_irq_summary(struct kvm_irqdevice *dev, void *data)
+{
+	return dev->summary(dev, data);
+}
+
+/*
+ * kvm_irq_register_sink()
+ *
+ * Description:
+ *    Registers an kvm_irqsink object as an INTR callback
+ *
+ * Parameters:
+ *    struct kvm_irqdevice *dev: The device
+ *    struct kvm_irqsink *sink: The sink to register.  Data will be copied
+ *                              so building object from transient storage is 
+ *                              ok.
+ *
+ * Returns: (void)
+ */
+static inline void kvm_irq_register_sink(struct kvm_irqdevice *dev, 
+					 const struct kvm_irqsink *sink)
+{
+	dev->sink = *sink;
+}
+
+/*
+ * kvm_irq_raise_intr()
+ *
+ * Description:
+ *    Invokes a registered INTR callback (if present).  This function is
+ *    meant to be used privately by a irqdevice implementation. 
+ *
+ * Parameters:
+ *    struct kvm_irqdevice *dev: The device
+ *
+ * Returns: (void)
+ */
+static inline void kvm_irq_raise_intr(struct kvm_irqdevice *dev)
+{
+	struct kvm_irqsink *sink = &dev->sink;
+	if (sink->raise_intr)
+		sink->raise_intr(sink, dev);
+}
+
+#endif /*  __KVM_IRQDEVICE_H */
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 9ca0ad3..ac8d4f4 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -1989,8 +1989,7 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 	sregs->efer = vcpu->shadow_efer;
 	sregs->apic_base = vcpu->apic_base;
 
-	memcpy(sregs->interrupt_bitmap, vcpu->irq_pending,
-	       sizeof sregs->interrupt_bitmap);
+	kvm_irq_summary(&vcpu->irq_dev, &sregs->interrupt_bitmap);
 
 	vcpu_put(vcpu);
 
@@ -2044,12 +2043,18 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	if (mmu_reset_needed)
 		kvm_mmu_reset_context(vcpu);
 
-	memcpy(vcpu->irq_pending, sregs->interrupt_bitmap,
-	       sizeof vcpu->irq_pending);
-	vcpu->irq_summary = 0;
-	for (i = 0; i < NR_IRQ_WORDS; ++i)
-		if (vcpu->irq_pending[i])
-			__set_bit(i, &vcpu->irq_summary);
+	/* walk the interrupt-bitmap and inject an IRQ for each bit found */
+	for (i = 0; i < NR_IRQ_WORDS; ++i) {
+		unsigned long word = sregs->interrupt_bitmap[i];
+		while(word) {
+			int bit_index = __ffs(word);
+			int irq = i * BITS_PER_LONG + bit_index;
+			
+			kvm_irq_set_pin(&vcpu->irq_dev, irq, 1, 0);
+
+			clear_bit(bit_index, &word);
+		}
+	}
 
 	set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
 	set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
@@ -2210,14 +2215,8 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
 {
 	if (irq->irq < 0 || irq->irq >= 256)
 		return -EINVAL;
-	vcpu_load(vcpu);
-
-	set_bit(irq->irq, vcpu->irq_pending);
-	set_bit(irq->irq / BITS_PER_LONG, &vcpu->irq_summary);
 
-	vcpu_put(vcpu);
-
-	return 0;
+	return kvm_irq_set_pin(&vcpu->irq_dev, irq->irq, 1, 0);
 }
 
 static int kvm_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
@@ -2318,6 +2317,33 @@ out1:
 	return r;
 }
 
+/* This function will be invoked whenever the vcpu->irq_dev raises its INTR 
+ * line
+ */
+static void kvm_vcpu_intr(struct kvm_irqsink *this, 
+			  struct kvm_irqdevice *dev)
+{
+	/* Our irq device is requesting to interrupt the vcpu.  If it is
+	 * currently running, we should inject a host IPI to force a VMEXIT 
+	 */
+	
+	/* FIXME: Implement this or the CPU wont notice the interrupt until
+	 * the next natural VMEXIT.  Note that this is how the system
+	 * has always worked, so nothing is broken here.  This is a future
+	 * enhancement
+	 */
+}
+
+static void kvm_vcpu_irqsink_init(struct kvm_vcpu *vcpu)
+{
+	struct kvm_irqsink sink = {
+		.raise_intr = kvm_vcpu_intr,
+		.private    = vcpu
+	};
+	
+	kvm_irq_register_sink(&vcpu->irq_dev, &sink);
+}
+
 /*
  * Creates some virtual cpus.  Good luck creating more than one.
  */
@@ -2364,6 +2390,10 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
 	if (r < 0)
 		goto out_free_vcpus;
 
+	kvm_irq_init(&vcpu->irq_dev);
+	kvm_vcpu_irqsink_init(vcpu);
+	kvm_userint_init(&vcpu->irq_dev);
+
 	kvm_arch_ops->vcpu_load(vcpu);
 	r = kvm_mmu_setup(vcpu);
 	if (r >= 0)
diff --git a/drivers/kvm/kvm_userint.c b/drivers/kvm/kvm_userint.c
new file mode 100644
index 0000000..ded3098
--- /dev/null
+++ b/drivers/kvm/kvm_userint.c
@@ -0,0 +1,201 @@
+/*
+ * kvm_userint.c: User Interrupts IRQ device 
+ *
+ * This acts as an extention of an interrupt controller that exists elsewhere 
+ * (typically in userspace/QEMU).  Because this PIC is a pseudo device that
+ * is downstream from a real emulated PIC, the "IRQ-to-vector" mapping has 
+ * already occured.  Therefore, this PIC has the following unusal properties:
+ *
+ * 1) It has 256 "pins" which are literal vectors (e.g.no translation)
+ * 2) It only supports "auto-EOI" behavior since it is expected that the
+ *    upstream emulated PIC will handle the real EOIs (if applicable)
+ * 3) It only listens to "asserts" on the pins (deasserts are dropped) 
+ *    because its an auto-EOI device anyway.
+ *
+ * Copyright (C) 2007 Novell
+ *
+ * bitarray code based on original vcpu->irq_pending code, 
+ *     Copyright (C) 2007 Qumranet
+ *
+ * Authors:
+ *   Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include "kvm.h"
+
+/*----------------------------------------------------------------------
+ * optimized bitarray object - works like bitarrays in bitops, but uses 
+ * a summary field to accelerate lookups.  Assumes external locking 
+ *---------------------------------------------------------------------*/
+
+struct bitarray {
+	unsigned long summary; /* 1 per word in pending */
+	unsigned long pending[NR_IRQ_WORDS];
+};
+
+static inline int bitarray_pending(struct bitarray *this)
+{
+	return this->summary ? 1 : 0;	
+}
+
+static inline int bitarray_findhighest(struct bitarray *this)
+{
+	if (!this->summary)
+		return -1;
+	else {
+	    int word_index = __ffs(this->summary);
+	    int bit_index  = __ffs(this->pending[word_index]);
+
+	    return word_index * BITS_PER_LONG + bit_index;	
+	}
+}
+
+static inline void bitarray_set(struct bitarray *this, int nr)
+{
+	__set_bit(nr, &this->pending);
+	__set_bit(nr / BITS_PER_LONG, &this->summary); 
+} 
+
+static inline void bitarray_clear(struct bitarray *this, int nr)
+{
+	int word = nr / BITS_PER_LONG;
+
+	__clear_bit(nr, &this->pending);
+	if (!this->pending[word])
+		__clear_bit(word, &this->summary);
+}
+
+static inline int bitarray_test(struct bitarray *this, int nr)
+{
+	return test_bit(nr, &this->pending);
+}
+
+/*----------------------------------------------------------------------
+ * userint interface - provides the actual kvm_irqdevice implementation
+ *---------------------------------------------------------------------*/
+
+typedef struct {
+	spinlock_t      lock;
+	struct bitarray irq_pending;
+	int             nmi_pending;
+}kvm_userint;
+
+static int userint_pending(struct kvm_irqdevice *this, int flags)
+{
+	kvm_userint *s = (kvm_userint*)this->private;
+	int ret;
+
+	spin_lock_irq(&s->lock);
+
+	if (flags & KVM_IRQFLAGS_NMI)
+		ret = s->nmi_pending;
+	else
+		ret = bitarray_pending(&s->irq_pending);
+
+	spin_unlock_irq(&s->lock);
+
+	return ret;
+}
+
+static int userint_read_vector(struct kvm_irqdevice *this, int flags)
+{
+	kvm_userint *s = (kvm_userint*)this->private;
+	int          irq;
+
+	spin_lock_irq(&s->lock);
+
+	/* NMIs take priority, so if there is an NMI pending, or
+	 * if we are filtering out NMIs, only consider them 
+	 */
+	if (s->nmi_pending || (flags & KVM_IRQFLAGS_NMI))
+		irq = s->nmi_pending ? 2 : -1;
+	else
+		irq = bitarray_findhighest(&s->irq_pending);
+	
+	if (!(flags & KVM_IRQFLAGS_PEEK)) {
+		/* If the "peek" flag is not set, automatically clear the 
+		 * interrupt as the EOI mechanism (if any) will take place 
+		 * in userspace 
+		 */
+		bitarray_clear(&s->irq_pending, irq);
+		if (irq == 2)
+			s->nmi_pending = 0;
+	}
+
+	spin_unlock_irq(&s->lock);
+
+	return irq;
+}
+
+static int userint_set_pin(struct kvm_irqdevice* this, int irq, int level, 
+			   int flags)
+{
+	kvm_userint *s = (kvm_userint*)this->private;
+
+	if(!level)
+	    return 0; /* We dont care about deasserts */
+
+	spin_lock_irq(&s->lock);
+
+	/* Update the local state */
+	bitarray_set(&s->irq_pending, irq);
+	if (irq == 2)
+		s->nmi_pending = 1;
+
+	spin_unlock_irq(&s->lock);
+
+	/* And then alert the higher layer software we have changes */
+	kvm_irq_raise_intr(this);
+
+	return 0;
+}
+
+static int userint_summary(struct kvm_irqdevice* this, void *data)
+{	
+	kvm_userint *s = (kvm_userint*)this->private;
+
+	spin_lock_irq(&s->lock);
+	memcpy(data, s->irq_pending.pending, sizeof s->irq_pending.pending);
+	spin_unlock_irq(&s->lock);
+
+	return 0;
+}
+
+static void userint_destructor(struct kvm_irqdevice *this)
+{
+	kfree(this->private);
+}
+
+int kvm_userint_init(struct kvm_irqdevice *dev)
+{
+	kvm_userint *s;
+
+	s = (kvm_userint *)kzalloc(sizeof(kvm_userint), GFP_KERNEL);
+     
+	spin_lock_init(&s->lock);
+
+	dev->pending     = userint_pending;
+	dev->read_vector = userint_read_vector;
+	dev->set_pin     = userint_set_pin;
+	dev->summary     = userint_summary;
+	dev->destructor  = userint_destructor;
+
+	dev->private = s;
+
+	return 0;
+}
+
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index b7e1410..04189ad 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -108,20 +108,16 @@ static unsigned get_addr_size(struct kvm_vcpu *vcpu)
 
 static inline u8 pop_irq(struct kvm_vcpu *vcpu)
 {
-	int word_index = __ffs(vcpu->irq_summary);
-	int bit_index = __ffs(vcpu->irq_pending[word_index]);
-	int irq = word_index * BITS_PER_LONG + bit_index;
-
-	clear_bit(bit_index, &vcpu->irq_pending[word_index]);
-	if (!vcpu->irq_pending[word_index])
-		clear_bit(word_index, &vcpu->irq_summary);
-	return irq;
+	return kvm_irq_read_vector(&vcpu->irq_dev, 0);
 }
 
 static inline void push_irq(struct kvm_vcpu *vcpu, u8 irq)
 {
-	set_bit(irq, vcpu->irq_pending);
-	set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
+	/* FIXME: We probably want to reserve the "set_pin" verb for
+	 * actual interrupt requests, not for putting back something
+	 * previously pending.  Lets revisit this
+	 */
+	kvm_irq_set_pin(&vcpu->irq_dev, irq, 1, 0);
 }
 
 static inline void clgi(void)
@@ -1092,7 +1088,7 @@ static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 1;
 	skip_emulated_instruction(vcpu);
-	if (vcpu->irq_summary)
+	if (kvm_irq_pending(&vcpu->irq_dev, 0))
 		return 1;
 
 	kvm_run->exit_reason = KVM_EXIT_HLT;
@@ -1263,7 +1259,7 @@ static int interrupt_window_interception(struct kvm_vcpu *vcpu,
 	 * possible
 	 */
 	if (kvm_run->request_interrupt_window &&
-	    !vcpu->irq_summary) {
+	    !kvm_irq_pending(&vcpu->irq_dev, 0)) {
 		++kvm_stat.irq_window_exits;
 		kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
 		return 0;
@@ -1399,7 +1395,7 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 		(!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
 		 (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF));
 
-	if (vcpu->interrupt_window_open && vcpu->irq_summary)
+	if (vcpu->interrupt_window_open && kvm_irq_pending(&vcpu->irq_dev, 0))
 		/*
 		 * If interrupts enabled, and not blocked by sti or mov ss. Good.
 		 */
@@ -1409,7 +1405,8 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 	 * Interrupts blocked.  Wait for unblock.
 	 */
 	if (!vcpu->interrupt_window_open &&
-	    (vcpu->irq_summary || kvm_run->request_interrupt_window)) {
+	    (kvm_irq_pending(&vcpu->irq_dev, 0) || 
+	     kvm_run->request_interrupt_window)) {
 		control->intercept |= 1ULL << INTERCEPT_VINTR;
 	} else
 		control->intercept &= ~(1ULL << INTERCEPT_VINTR);
@@ -1418,8 +1415,9 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 			      struct kvm_run *kvm_run)
 {
-	kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open &&
-						  vcpu->irq_summary == 0);
+	kvm_run->ready_for_interrupt_injection = 
+		(vcpu->interrupt_window_open && 
+		 !kvm_irq_pending(&vcpu->irq_dev, 0));
 	kvm_run->if_flag = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF) != 0;
 	kvm_run->cr8 = vcpu->cr8;
 	kvm_run->apic_base = vcpu->apic_base;
@@ -1434,7 +1432,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
 					  struct kvm_run *kvm_run)
 {
-	return (!vcpu->irq_summary &&
+	return (!kvm_irq_pending(&vcpu->irq_dev, 0) &&
 		kvm_run->request_interrupt_window &&
 		vcpu->interrupt_window_open &&
 		(vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF));
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 61a6116..0d285eb 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -1219,13 +1219,8 @@ static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq)
 
 static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
 {
-	int word_index = __ffs(vcpu->irq_summary);
-	int bit_index = __ffs(vcpu->irq_pending[word_index]);
-	int irq = word_index * BITS_PER_LONG + bit_index;
-
-	clear_bit(bit_index, &vcpu->irq_pending[word_index]);
-	if (!vcpu->irq_pending[word_index])
-		clear_bit(word_index, &vcpu->irq_summary);
+	int irq = kvm_irq_read_vector(&vcpu->irq_dev, 0);
+	BUG_ON(irq < 0);
 
 	if (vcpu->rmode.active) {
 		inject_rmode_irq(vcpu, irq);
@@ -1246,7 +1241,7 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 		 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
 
 	if (vcpu->interrupt_window_open &&
-	    vcpu->irq_summary &&
+	    kvm_irq_pending(&vcpu->irq_dev, 0) &&
 	    !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK))
 		/*
 		 * If interrupts enabled, and not blocked by sti or mov ss. Good.
@@ -1255,7 +1250,8 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 
 	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
 	if (!vcpu->interrupt_window_open &&
-	    (vcpu->irq_summary || kvm_run->request_interrupt_window))
+	    (kvm_irq_pending(&vcpu->irq_dev, 0) ||
+	     kvm_run->request_interrupt_window))
 		/*
 		 * Interrupts blocked.  Wait for unblock.
 		 */
@@ -1314,8 +1310,8 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	if (is_external_interrupt(vect_info)) {
 		int irq = vect_info & VECTORING_INFO_VECTOR_MASK;
-		set_bit(irq, vcpu->irq_pending);
-		set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
+		/* FIXME: Is this right? */
+		kvm_irq_set_pin(&vcpu->irq_dev, irq, 1, 0); 
 	}
 
 	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */
@@ -1619,8 +1615,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 	kvm_run->if_flag = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) != 0;
 	kvm_run->cr8 = vcpu->cr8;
 	kvm_run->apic_base = vcpu->apic_base;
-	kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open &&
-						  vcpu->irq_summary == 0);
+	kvm_run->ready_for_interrupt_injection = 
+		(vcpu->interrupt_window_open && 
+		 !kvm_irq_pending(&vcpu->irq_dev, 0));
 }
 
 static int handle_interrupt_window(struct kvm_vcpu *vcpu,
@@ -1631,7 +1628,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
 	 * possible
 	 */
 	if (kvm_run->request_interrupt_window &&
-	    !vcpu->irq_summary) {
+	    !kvm_irq_pending(&vcpu->irq_dev, 0)) {
 		kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
 		++kvm_stat.irq_window_exits;
 		return 0;
@@ -1642,7 +1639,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
 static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	skip_emulated_instruction(vcpu);
-	if (vcpu->irq_summary)
+	if (kvm_irq_pending(&vcpu->irq_dev, 0))
 		return 1;
 
 	kvm_run->exit_reason = KVM_EXIT_HLT;
@@ -1713,7 +1710,7 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
 					  struct kvm_run *kvm_run)
 {
-	return (!vcpu->irq_summary &&
+	return (!kvm_irq_pending(&vcpu->irq_dev, 0) &&
 		kvm_run->request_interrupt_window &&
 		vcpu->interrupt_window_open &&
 		(vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));


-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys-and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV

^ permalink raw reply related	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2007-04-10 16:08 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-04-09 20:27 [PATCH] Add irqdevice interface + userint implementation Gregory Haskins
     [not found] ` <461A5B6C.BA47.005A.0-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
2007-04-10  7:33   ` Avi Kivity
2007-04-10  8:23   ` Christoph Hellwig
  -- strict thread matches above, loose matches on Subject: below --
2007-04-05 18:53 Gregory Haskins
     [not found] ` <4614FF5C.BA47.005A.0-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
2007-04-08  6:47   ` Avi Kivity
     [not found]     ` <46189005.2090609-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-04-09 18:36       ` Gregory Haskins
     [not found]         ` <461A415C.BA47.005A.0-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
2007-04-10  7:52           ` Avi Kivity
     [not found]             ` <461B4241.5030101-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-04-10 11:36               ` Gregory Haskins
     [not found]                 ` <461B3E61.BA47.005A.0-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
2007-04-10 12:01                   ` Avi Kivity
     [not found]                     ` <461B7C81.7040102-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-04-10 14:20                       ` Gregory Haskins
     [not found]                         ` <461B64E4.BA47.005A.0-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
2007-04-10 14:46                           ` Avi Kivity
     [not found]                             ` <461BA363.5080308-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-04-10 15:39                               ` Gregory Haskins
     [not found]                                 ` <461B7755.BA47.005A.0-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
2007-04-10 16:08                                   ` Avi Kivity
2007-04-08  9:58   ` Christoph Hellwig
     [not found]     ` <20070408095853.GA31284-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>
2007-04-08 10:23       ` Avi Kivity
     [not found]         ` <4618C293.8030902-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-04-08 10:33           ` Christoph Hellwig
2007-04-09 14:53       ` Gregory Haskins

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox