public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* irqdevice INTR example
@ 2007-04-12  4:02 Gregory Haskins
       [not found] ` <461D7702.BA47.005A.0-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 13+ messages in thread
From: Gregory Haskins @ 2007-04-12  4:02 UTC (permalink / raw)
  To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

[-- Attachment #1: Type: text/plain, Size: 571 bytes --]


Hi All,
  Attached are the first three patches in my queue.  The first two you are likely familiar with at this point (though I have made some more of the requested changes to 02-irqdevice.patch).  The last item (03-preemptible-cpu.patch) adds an implementation to the previously unused kvm_vcpu_intr() callback.  This acts as a functional example of the INTR callback mechanism as Avi requested.  Note that the work related to IF/NMI/TPR classification of interrupts happens later in my queue and is not mature enough to share yet, but hopefully soon.

Thoughts?
-Greg

[-- Attachment #2: 01-mmio_handler.patch --]
[-- Type: text/plain, Size: 5730 bytes --]

KVM: Adds support for in-kernel mmio handlers

From:  <>

Signed-off-by: Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
---

 drivers/kvm/kvm.h      |   31 ++++++++++++++++++
 drivers/kvm/kvm_main.c |   82 +++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 101 insertions(+), 12 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index fceeb84..181099f 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -236,6 +236,36 @@ struct kvm_pio_request {
 	int rep;
 };
 
+struct kvm_io_device {
+	unsigned long (*read)(struct kvm_io_device *this,
+			      gpa_t addr,
+			      int length);
+	void (*write)(struct kvm_io_device *this,
+		      gpa_t addr,
+		      int length,
+		      unsigned long val);
+	int (*in_range)(struct kvm_io_device *this, gpa_t addr);
+
+	void             *private;
+};
+
+/*
+ * It would be nice to use something smarter than a linear search, TBD...
+ * Thankfully we dont expect many devices to register (famous last words :),
+ * so until then it will suffice.  At least its abstracted so we can change
+ * in one place.
+ */
+struct kvm_io_bus {
+	int                   dev_count;
+#define NR_IOBUS_DEVS 6
+	struct kvm_io_device *devs[NR_IOBUS_DEVS];
+};
+
+void kvm_io_bus_init(struct kvm_io_bus *bus);
+struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr);
+void kvm_io_bus_register_dev(struct kvm_io_bus *bus, 
+			     struct kvm_io_device *dev);
+
 struct kvm_vcpu {
 	struct kvm *kvm;
 	union {
@@ -345,6 +375,7 @@ struct kvm {
 	unsigned long rmap_overflow;
 	struct list_head vm_list;
 	struct file *filp;
+	struct kvm_io_bus mmio_bus;
 };
 
 struct kvm_stat {
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 4473174..c3c0059 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -294,6 +294,7 @@ static struct kvm *kvm_create_vm(void)
 
 	spin_lock_init(&kvm->lock);
 	INIT_LIST_HEAD(&kvm->active_mmu_pages);
+	kvm_io_bus_init(&kvm->mmio_bus);
 	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
 		struct kvm_vcpu *vcpu = &kvm->vcpus[i];
 
@@ -1015,12 +1016,25 @@ static int emulator_write_std(unsigned long addr,
 	return X86EMUL_UNHANDLEABLE;
 }
 
+static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, 
+						gpa_t addr)
+{
+	/*
+	 * Note that its important to have this wrapper function because 
+	 * in the very near future we will be checking for MMIOs against 
+	 * the LAPIC as well as the general MMIO bus 
+	 */
+	return kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr);
+}
+
 static int emulator_read_emulated(unsigned long addr,
 				  unsigned long *val,
 				  unsigned int bytes,
 				  struct x86_emulate_ctxt *ctxt)
 {
-	struct kvm_vcpu *vcpu = ctxt->vcpu;
+	struct kvm_vcpu      *vcpu = ctxt->vcpu;
+	struct kvm_io_device *mmio_dev;
+	gpa_t                 gpa;
 
 	if (vcpu->mmio_read_completed) {
 		memcpy(val, vcpu->mmio_data, bytes);
@@ -1029,18 +1043,26 @@ static int emulator_read_emulated(unsigned long addr,
 	} else if (emulator_read_std(addr, val, bytes, ctxt)
 		   == X86EMUL_CONTINUE)
 		return X86EMUL_CONTINUE;
-	else {
-		gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
 
-		if (gpa == UNMAPPED_GVA)
-			return X86EMUL_PROPAGATE_FAULT;
-		vcpu->mmio_needed = 1;
-		vcpu->mmio_phys_addr = gpa;
-		vcpu->mmio_size = bytes;
-		vcpu->mmio_is_write = 0;
+	gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
+	if (gpa == UNMAPPED_GVA)
+		return X86EMUL_PROPAGATE_FAULT;
 
-		return X86EMUL_UNHANDLEABLE;
+	/*
+	 * Is this MMIO handled locally? 
+	 */
+	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa);
+	if (mmio_dev) {
+		*val = mmio_dev->read(mmio_dev, gpa, bytes);
+		return X86EMUL_CONTINUE;
 	}
+
+	vcpu->mmio_needed = 1;
+	vcpu->mmio_phys_addr = gpa;
+	vcpu->mmio_size = bytes;
+	vcpu->mmio_is_write = 0;
+	
+	return X86EMUL_UNHANDLEABLE;
 }
 
 static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
@@ -1068,8 +1090,9 @@ static int emulator_write_emulated(unsigned long addr,
 				   unsigned int bytes,
 				   struct x86_emulate_ctxt *ctxt)
 {
-	struct kvm_vcpu *vcpu = ctxt->vcpu;
-	gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
+	struct kvm_vcpu      *vcpu = ctxt->vcpu;
+	struct kvm_io_device *mmio_dev;
+	gpa_t                 gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
 
 	if (gpa == UNMAPPED_GVA)
 		return X86EMUL_PROPAGATE_FAULT;
@@ -1077,6 +1100,15 @@ static int emulator_write_emulated(unsigned long addr,
 	if (emulator_write_phys(vcpu, gpa, val, bytes))
 		return X86EMUL_CONTINUE;
 
+	/*
+	 * Is this MMIO handled locally?
+	 */
+	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa);
+	if (mmio_dev) {
+		mmio_dev->write(mmio_dev, gpa, bytes, val);
+		return X86EMUL_CONTINUE;
+	}
+
 	vcpu->mmio_needed = 1;
 	vcpu->mmio_phys_addr = gpa;
 	vcpu->mmio_size = bytes;
@@ -2911,6 +2943,32 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
 	return NOTIFY_OK;
 }
 
+void kvm_io_bus_init(struct kvm_io_bus *bus)
+{
+	memset(bus, 0, sizeof(*bus));
+}
+
+struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr)
+{
+	int i;
+
+	for (i = 0; i < bus->dev_count; i++) {
+		struct kvm_io_device *pos = bus->devs[i];
+
+		if (pos->in_range(pos, addr))
+			return pos;
+	}
+
+	return NULL;
+}
+
+void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev)
+{
+	BUG_ON(bus->dev_count >= (NR_IOBUS_DEVS-1));
+
+	bus->devs[bus->dev_count++] = dev;
+}
+
 static struct notifier_block kvm_cpu_notifier = {
 	.notifier_call = kvm_cpu_hotplug,
 	.priority = 20, /* must be > scheduler priority */

[-- Attachment #3: 02-irqdevice.patch --]
[-- Type: text/plain, Size: 23734 bytes --]

KVM: Add irqdevice object

From:  <>

The current code is geared towards using a user-mode (A)PIC.  This patch adds
an "irqdevice" abstraction, and implements a "userint" model to handle the
duties of the original code.  Later, we can develop other irqdevice models 
to handle objects like LAPIC, IOAPIC, i8259, etc, as appropriate

Signed-off-by: Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
---

 drivers/kvm/Makefile    |    2 
 drivers/kvm/irqdevice.h |  170 ++++++++++++++++++++++++++++++++++++++++
 drivers/kvm/kvm.h       |    9 +-
 drivers/kvm/kvm_main.c  |   57 ++++++++++---
 drivers/kvm/svm.c       |   33 ++++----
 drivers/kvm/userint.c   |  202 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/kvm/vmx.c       |   29 +++----
 7 files changed, 449 insertions(+), 53 deletions(-)

diff --git a/drivers/kvm/Makefile b/drivers/kvm/Makefile
index c0a789f..540afbc 100644
--- a/drivers/kvm/Makefile
+++ b/drivers/kvm/Makefile
@@ -2,7 +2,7 @@
 # Makefile for Kernel-based Virtual Machine module
 #
 
-kvm-objs := kvm_main.o mmu.o x86_emulate.o
+kvm-objs := kvm_main.o mmu.o x86_emulate.o userint.o
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/drivers/kvm/irqdevice.h b/drivers/kvm/irqdevice.h
new file mode 100644
index 0000000..fe284bc
--- /dev/null
+++ b/drivers/kvm/irqdevice.h
@@ -0,0 +1,170 @@
+/*
+ * Defines an interface for an abstract interrupt controller.  The model 
+ * consists of a unit with an arbitrary number of input lines (IRQ0-N), an
+ * output line (INTR), and methods for completing an interrupt-acknowledge
+ * cycle (INTA).  A particular implementation of this model will define
+ * various policies, such as irq-to-vector translation, INTA/auto-EOI policy,
+ * etc.  
+ * 
+ * In addition, the INTR callback mechanism allows the unit to be "wired" to
+ * an interruptible source in a very flexible manner. For instance, an 
+ * irqdevice could have its INTR wired to a VCPU (ala LAPIC), or another 
+ * interrupt controller (ala cascaded i8259s)
+ *
+ * Copyright (C) 2007 Novell
+ *
+ * Authors:
+ *   Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef __IRQDEVICE_H
+#define __IRQDEVICE_H
+
+#define KVM_IRQFLAGS_NMI  (1 << 0)
+#define KVM_IRQFLAGS_PEEK (1 << 1)
+
+struct kvm_irqdevice;
+
+struct kvm_irqsink {
+	void (*raise_intr)(struct kvm_irqsink *this, 
+			   struct kvm_irqdevice *dev);
+
+	void *private;
+};
+
+struct kvm_irqdevice {
+	int  (*pending)(struct kvm_irqdevice *this, int flags);
+	int  (*read_vector)(struct kvm_irqdevice *this, int flags); 
+	int  (*set_pin)(struct kvm_irqdevice *this, int pin, int level);
+	int  (*summary)(struct kvm_irqdevice *this, void *data);
+	void (*destructor)(struct kvm_irqdevice *this);
+
+	void               *private;
+	struct kvm_irqsink  sink;
+};
+
+/**
+ * kvm_irqdevice_init - initialize the kvm_irqdevice for use
+ * @dev: The device
+ *
+ * Description: Initialize the kvm_irqdevice for use.  Should be called before 
+ *              calling any derived implementation init functions
+ * 
+ * Returns: (void)
+ */
+static inline void kvm_irqdevice_init(struct kvm_irqdevice *dev)
+{
+	memset(dev, 0, sizeof(*dev));
+}
+
+/**
+ * kvm_irqdevice_pending - efficiently determines if an interrupt is pending
+ * @dev: The device
+ * @flags: Modifies the behavior as follows:
+ *            [+ KVM_IRQFLAGS_NMI: Mask everything but NMIs]
+ * 
+ * Description: Efficiently determines if an interrupt is pending on an 
+ *              irqdevice
+ *
+ * Returns: (int)
+ *    [0 = no iterrupts pending (per "flags" criteria)]
+ *    [1 = one or more interrupts are pending]
+ */
+static inline int kvm_irqdevice_pending(struct kvm_irqdevice *dev, int flags)
+{
+	return dev->pending(dev, flags);
+}
+
+/**
+ * kvm_irqdevice_read_vector - read the highest priority vector from the device
+ * @dev: The device
+ * @flags: Modifies the behavior as follows:
+ *            [+ KVM_IRQFLAGS_NMI: Mask everything but NMIs]
+ *            [+ KVM_IRQFLAGS_PEEK: Do not auto-acknowledge interrupt]
+ *
+ * Description: Read the highest priority pending vector from the device, 
+ *              potentially invoking auto-EOI depending on device policy
+ *
+ * Returns: (int)
+ *   [ -1 = no interrupts pending (per "flags" criteria)]
+ *   [>=0 = the highest pending vector]
+ */
+static inline int kvm_irqdevice_read_vector(struct kvm_irqdevice *dev, 
+					    int flags)
+{
+	return dev->read_vector(dev, flags);
+}
+
+/**
+ * kvm_irqdevice_set_pin - allows the caller to assert/deassert an IRQ
+ * @dev: The device
+ * @pin: The input pin to alter
+ * @level: The value to set (1 = assert, 0 = deassert)
+ *
+ * Description: Allows the caller to assert/deassert an IRQ input pin to the 
+ *              device according to device policy.
+ *
+ * Returns: (int)
+ *   [-1 = failure]
+ *   [ 0 = success]
+ */
+static inline int kvm_irqdevice_set_pin(struct kvm_irqdevice *dev, int pin,
+				  int level)
+{
+	return dev->set_pin(dev, pin, level);
+}
+
+/**
+ * kvm_irqdevice_summary - loads a summary bitmask
+ * @dev: The device
+ * @data: A pointer to a region capable of holding a 256 bit bitmap
+ *
+ * Description: Loads a summary bitmask of all pending vectors (0-255)
+ *
+ * Returns: (int)
+ *   [-1 = failure]
+ *   [ 0 = success]
+ */
+static inline int kvm_irqdevice_summary(struct kvm_irqdevice *dev, void *data)
+{
+	return dev->summary(dev, data);
+}
+
+/**
+ * kvm_irqdevice_register_sink - registers an kvm_irqsink object
+ * @dev: The device
+ * @sink: The sink to register.  Data will be copied so building object from 
+ *        transient storage is ok.
+ *
+ * Description: Registers an kvm_irqsink object as an INTR callback
+ *
+ * Returns: (void)
+ */
+static inline void kvm_irqdevice_register_sink(struct kvm_irqdevice *dev, 
+					       const struct kvm_irqsink *sink)
+{
+	dev->sink = *sink;
+}
+
+/*
+ * kvm_irqdevice_raise_intr - invokes a registered INTR callback
+ * @dev: The device
+ *
+ * Description: Invokes a registered INTR callback (if present).  This
+ *              function is meant to be used privately by a irqdevice 
+ *              implementation. 
+ *
+ * Returns: (void)
+ */
+static inline void kvm_irqdevice_raise_intr(struct kvm_irqdevice *dev)
+{
+	struct kvm_irqsink *sink = &dev->sink;
+	if (sink->raise_intr)
+		sink->raise_intr(sink, dev);
+}
+
+#endif /*  __IRQDEVICE_H */
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 181099f..58966d9 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -13,6 +13,7 @@
 #include <linux/mm.h>
 
 #include "vmx.h"
+#include "irqdevice.h"
 #include <linux/kvm.h>
 #include <linux/kvm_para.h>
 
@@ -157,6 +158,8 @@ struct vmcs {
 
 struct kvm_vcpu;
 
+int kvm_userint_init(struct kvm_irqdevice *dev);
+
 /*
  * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
  * 32-bit).  The kvm_mmu structure abstracts the details of the current mmu
@@ -266,6 +269,8 @@ struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr);
 void kvm_io_bus_register_dev(struct kvm_io_bus *bus, 
 			     struct kvm_io_device *dev);
 
+#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
+
 struct kvm_vcpu {
 	struct kvm *kvm;
 	union {
@@ -278,9 +283,7 @@ struct kvm_vcpu {
 	u64 host_tsc;
 	struct kvm_run *run;
 	int interrupt_window_open;
-	unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
-#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
-	unsigned long irq_pending[NR_IRQ_WORDS];
+	struct kvm_irqdevice irq_dev;
 	unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */
 	unsigned long rip;      /* needs vcpu_load_rsp_rip() */
 
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index c3c0059..7e00412 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -1989,8 +1989,7 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 	sregs->efer = vcpu->shadow_efer;
 	sregs->apic_base = vcpu->apic_base;
 
-	memcpy(sregs->interrupt_bitmap, vcpu->irq_pending,
-	       sizeof sregs->interrupt_bitmap);
+	kvm_irqdevice_summary(&vcpu->irq_dev, &sregs->interrupt_bitmap);
 
 	vcpu_put(vcpu);
 
@@ -2044,13 +2043,11 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	if (mmu_reset_needed)
 		kvm_mmu_reset_context(vcpu);
 
-	memcpy(vcpu->irq_pending, sregs->interrupt_bitmap,
-	       sizeof vcpu->irq_pending);
-	vcpu->irq_summary = 0;
-	for (i = 0; i < NR_IRQ_WORDS; ++i)
-		if (vcpu->irq_pending[i])
-			__set_bit(i, &vcpu->irq_summary);
-
+	/* walk the interrupt-bitmap and inject an IRQ for each bit found */
+	for (i = 0; i < 256; ++i)
+		if (test_bit(i, &sregs->interrupt_bitmap[0]))
+			kvm_irqdevice_set_pin(&vcpu->irq_dev, i, 1);
+ 
 	set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
 	set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
 	set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
@@ -2210,14 +2207,8 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
 {
 	if (irq->irq < 0 || irq->irq >= 256)
 		return -EINVAL;
-	vcpu_load(vcpu);
-
-	set_bit(irq->irq, vcpu->irq_pending);
-	set_bit(irq->irq / BITS_PER_LONG, &vcpu->irq_summary);
 
-	vcpu_put(vcpu);
-
-	return 0;
+	return kvm_irqdevice_set_pin(&vcpu->irq_dev, irq->irq, 1);
 }
 
 static int kvm_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
@@ -2319,6 +2310,36 @@ out1:
 }
 
 /*
+ * This function will be invoked whenever the vcpu->irq_dev raises its INTR 
+ * line
+ */
+static void kvm_vcpu_intr(struct kvm_irqsink *this, 
+			  struct kvm_irqdevice *dev)
+{
+	/*
+	 * Our irq device is requesting to interrupt the vcpu.  If it is
+	 * currently running, we should inject a host IPI to force a VMEXIT 
+	 */
+	
+	/*
+	 * FIXME: Implement this or the CPU wont notice the interrupt until
+	 * the next natural VMEXIT.  Note that this is how the system
+	 * has always worked, so nothing is broken here.  This is a future
+	 * enhancement
+	 */
+}
+
+static void kvm_vcpu_irqsink_init(struct kvm_vcpu *vcpu)
+{
+	struct kvm_irqsink sink = {
+		.raise_intr = kvm_vcpu_intr,
+		.private    = vcpu
+	};
+	
+	kvm_irqdevice_register_sink(&vcpu->irq_dev, &sink);
+}
+
+/*
  * Creates some virtual cpus.  Good luck creating more than one.
  */
 static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
@@ -2364,6 +2385,10 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
 	if (r < 0)
 		goto out_free_vcpus;
 
+	kvm_irqdevice_init(&vcpu->irq_dev);
+	kvm_vcpu_irqsink_init(vcpu);
+	kvm_userint_init(&vcpu->irq_dev);
+
 	kvm_arch_ops->vcpu_load(vcpu);
 	r = kvm_mmu_setup(vcpu);
 	if (r >= 0)
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index b7e1410..e59a548 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -108,20 +108,16 @@ static unsigned get_addr_size(struct kvm_vcpu *vcpu)
 
 static inline u8 pop_irq(struct kvm_vcpu *vcpu)
 {
-	int word_index = __ffs(vcpu->irq_summary);
-	int bit_index = __ffs(vcpu->irq_pending[word_index]);
-	int irq = word_index * BITS_PER_LONG + bit_index;
-
-	clear_bit(bit_index, &vcpu->irq_pending[word_index]);
-	if (!vcpu->irq_pending[word_index])
-		clear_bit(word_index, &vcpu->irq_summary);
-	return irq;
+	return kvm_irqdevice_read_vector(&vcpu->irq_dev, 0);
 }
 
 static inline void push_irq(struct kvm_vcpu *vcpu, u8 irq)
 {
-	set_bit(irq, vcpu->irq_pending);
-	set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
+	/* FIXME: We probably want to reserve the "set_pin" verb for
+	 * actual interrupt requests, not for putting back something
+	 * previously pending.  Lets revisit this
+	 */
+	kvm_irqdevice_set_pin(&vcpu->irq_dev, irq, 1);
 }
 
 static inline void clgi(void)
@@ -1092,7 +1088,7 @@ static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 1;
 	skip_emulated_instruction(vcpu);
-	if (vcpu->irq_summary)
+	if (kvm_irqdevice_pending(&vcpu->irq_dev, 0))
 		return 1;
 
 	kvm_run->exit_reason = KVM_EXIT_HLT;
@@ -1263,7 +1259,7 @@ static int interrupt_window_interception(struct kvm_vcpu *vcpu,
 	 * possible
 	 */
 	if (kvm_run->request_interrupt_window &&
-	    !vcpu->irq_summary) {
+	    !kvm_irqdevice_pending(&vcpu->irq_dev, 0)) {
 		++kvm_stat.irq_window_exits;
 		kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
 		return 0;
@@ -1399,7 +1395,8 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 		(!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
 		 (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF));
 
-	if (vcpu->interrupt_window_open && vcpu->irq_summary)
+	if (vcpu->interrupt_window_open && 
+	    kvm_irqdevice_pending(&vcpu->irq_dev, 0))
 		/*
 		 * If interrupts enabled, and not blocked by sti or mov ss. Good.
 		 */
@@ -1409,7 +1406,8 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 	 * Interrupts blocked.  Wait for unblock.
 	 */
 	if (!vcpu->interrupt_window_open &&
-	    (vcpu->irq_summary || kvm_run->request_interrupt_window)) {
+	    (kvm_irqdevice_pending(&vcpu->irq_dev, 0) || 
+	     kvm_run->request_interrupt_window)) {
 		control->intercept |= 1ULL << INTERCEPT_VINTR;
 	} else
 		control->intercept &= ~(1ULL << INTERCEPT_VINTR);
@@ -1418,8 +1416,9 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 			      struct kvm_run *kvm_run)
 {
-	kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open &&
-						  vcpu->irq_summary == 0);
+	kvm_run->ready_for_interrupt_injection = 
+		(vcpu->interrupt_window_open && 
+		 !kvm_irqdevice_pending(&vcpu->irq_dev, 0));
 	kvm_run->if_flag = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF) != 0;
 	kvm_run->cr8 = vcpu->cr8;
 	kvm_run->apic_base = vcpu->apic_base;
@@ -1434,7 +1433,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
 					  struct kvm_run *kvm_run)
 {
-	return (!vcpu->irq_summary &&
+	return (!kvm_irqdevice_pending(&vcpu->irq_dev, 0) &&
 		kvm_run->request_interrupt_window &&
 		vcpu->interrupt_window_open &&
 		(vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF));
diff --git a/drivers/kvm/userint.c b/drivers/kvm/userint.c
new file mode 100644
index 0000000..8363060
--- /dev/null
+++ b/drivers/kvm/userint.c
@@ -0,0 +1,202 @@
+/*
+ * User Interrupts IRQ device 
+ *
+ * This acts as an extention of an interrupt controller that exists elsewhere 
+ * (typically in userspace/QEMU).  Because this PIC is a pseudo device that
+ * is downstream from a real emulated PIC, the "IRQ-to-vector" mapping has 
+ * already occured.  Therefore, this PIC has the following unusal properties:
+ *
+ * 1) It has 256 "pins" which are literal vectors (i.e. no translation)
+ * 2) It only supports "auto-EOI" behavior since it is expected that the
+ *    upstream emulated PIC will handle the real EOIs (if applicable)
+ * 3) It only listens to "asserts" on the pins (deasserts are dropped) 
+ *    because its an auto-EOI device anyway.
+ *
+ * Copyright (C) 2007 Novell
+ *
+ * bitarray code based on original vcpu->irq_pending code, 
+ *     Copyright (C) 2007 Qumranet
+ *
+ * Authors:
+ *   Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "kvm.h"
+
+/*
+ *----------------------------------------------------------------------
+ * optimized bitarray object - works like bitarrays in bitops, but uses 
+ * a summary field to accelerate lookups.  Assumes external locking 
+ *---------------------------------------------------------------------
+ */
+
+struct bitarray {
+	unsigned long summary; /* 1 per word in pending */
+	unsigned long pending[NR_IRQ_WORDS];
+};
+
+static inline int bitarray_pending(struct bitarray *this)
+{
+	return this->summary ? 1 : 0;	
+}
+
+static inline int bitarray_findhighest(struct bitarray *this)
+{
+	if (!this->summary)
+		return -1;
+	else {
+		int word_index = __fls(this->summary);
+		int bit_index  = __fls(this->pending[word_index]);
+		
+		return word_index * BITS_PER_LONG + bit_index;	
+	}
+}
+
+static inline void bitarray_set(struct bitarray *this, int nr)
+{
+	__set_bit(nr, &this->pending);
+	__set_bit(nr / BITS_PER_LONG, &this->summary); 
+} 
+
+static inline void bitarray_clear(struct bitarray *this, int nr)
+{
+	int word = nr / BITS_PER_LONG;
+
+	__clear_bit(nr, &this->pending);
+	if (!this->pending[word])
+		__clear_bit(word, &this->summary);
+}
+
+static inline int bitarray_test(struct bitarray *this, int nr)
+{
+	return test_bit(nr, &this->pending);
+}
+
+/*
+ *----------------------------------------------------------------------
+ * userint interface - provides the actual kvm_irqdevice implementation
+ *---------------------------------------------------------------------
+ */
+
+struct kvm_userint {
+	spinlock_t      lock;
+	struct bitarray irq_pending;
+	int             nmi_pending;
+};
+
+static int userint_pending(struct kvm_irqdevice *this, int flags)
+{
+	struct kvm_userint *s = (struct kvm_userint*)this->private;
+	int ret;
+
+	spin_lock_irq(&s->lock);
+
+	if (flags & KVM_IRQFLAGS_NMI)
+		ret = s->nmi_pending;
+	else
+		ret = bitarray_pending(&s->irq_pending);
+
+	spin_unlock_irq(&s->lock);
+
+	return ret;
+}
+
+static int userint_read_vector(struct kvm_irqdevice *this, int flags)
+{
+	struct kvm_userint *s = (struct kvm_userint*)this->private;
+	int          irq;
+
+	spin_lock_irq(&s->lock);
+
+	/*
+	 * NMIs take priority, so if there is an NMI pending, or
+	 * if we are filtering out NMIs, only consider them 
+	 */
+	if (s->nmi_pending || (flags & KVM_IRQFLAGS_NMI))
+		irq = s->nmi_pending ? 2 : -1;
+	else
+		irq = bitarray_findhighest(&s->irq_pending);
+	
+	if ((irq > -1) && !(flags & KVM_IRQFLAGS_PEEK)) {
+		/*
+		 * If the "peek" flag is not set, automatically clear the 
+		 * interrupt as the EOI mechanism (if any) will take place 
+		 * in userspace 
+		 */
+		bitarray_clear(&s->irq_pending, irq);
+		if (irq == 2)
+			s->nmi_pending = 0;
+	}
+
+	spin_unlock_irq(&s->lock);
+
+	return irq;
+}
+
+static int userint_set_pin(struct kvm_irqdevice* this, int irq, int level)
+{
+	struct kvm_userint *s = (struct kvm_userint*)this->private;
+
+	if (!level)
+		return 0; /* We dont care about deasserts */
+
+	spin_lock_irq(&s->lock);
+
+	/*
+	 * Update the local state 
+	 */
+	bitarray_set(&s->irq_pending, irq);
+	if (irq == 2)
+		s->nmi_pending = 1;
+
+	spin_unlock_irq(&s->lock);
+
+	/*
+	 * And then alert the higher layer software we have changes 
+	 */
+	kvm_irqdevice_raise_intr(this);
+
+	return 0;
+}
+
+static int userint_summary(struct kvm_irqdevice* this, void *data)
+{	
+	struct kvm_userint *s = (struct kvm_userint*)this->private;
+
+	spin_lock_irq(&s->lock);
+	memcpy(data, s->irq_pending.pending, sizeof s->irq_pending.pending);
+	spin_unlock_irq(&s->lock);
+
+	return 0;
+}
+
+static void userint_destructor(struct kvm_irqdevice *this)
+{
+	kfree(this->private);
+}
+
+int kvm_userint_init(struct kvm_irqdevice *dev)
+{
+	struct kvm_userint *s;
+
+	s = kzalloc(sizeof(*s), GFP_KERNEL);
+	if (!s)
+	    return -ENOMEM;
+
+	spin_lock_init(&s->lock);
+
+	dev->pending     = userint_pending;
+	dev->read_vector = userint_read_vector;
+	dev->set_pin     = userint_set_pin;
+	dev->summary     = userint_summary;
+	dev->destructor  = userint_destructor;
+
+	dev->private = s;
+
+	return 0;
+}
+
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 61a6116..a0fdf02 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -1219,13 +1219,8 @@ static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq)
 
 static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
 {
-	int word_index = __ffs(vcpu->irq_summary);
-	int bit_index = __ffs(vcpu->irq_pending[word_index]);
-	int irq = word_index * BITS_PER_LONG + bit_index;
-
-	clear_bit(bit_index, &vcpu->irq_pending[word_index]);
-	if (!vcpu->irq_pending[word_index])
-		clear_bit(word_index, &vcpu->irq_summary);
+	int irq = kvm_irqdevice_read_vector(&vcpu->irq_dev, 0);
+	BUG_ON(irq < 0);
 
 	if (vcpu->rmode.active) {
 		inject_rmode_irq(vcpu, irq);
@@ -1246,7 +1241,7 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 		 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
 
 	if (vcpu->interrupt_window_open &&
-	    vcpu->irq_summary &&
+	    kvm_irqdevice_pending(&vcpu->irq_dev, 0) &&
 	    !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK))
 		/*
 		 * If interrupts enabled, and not blocked by sti or mov ss. Good.
@@ -1255,7 +1250,8 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 
 	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
 	if (!vcpu->interrupt_window_open &&
-	    (vcpu->irq_summary || kvm_run->request_interrupt_window))
+	    (kvm_irqdevice_pending(&vcpu->irq_dev, 0) ||
+	     kvm_run->request_interrupt_window))
 		/*
 		 * Interrupts blocked.  Wait for unblock.
 		 */
@@ -1314,8 +1310,8 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	if (is_external_interrupt(vect_info)) {
 		int irq = vect_info & VECTORING_INFO_VECTOR_MASK;
-		set_bit(irq, vcpu->irq_pending);
-		set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
+		/* FIXME: Is this right? */
+		kvm_irqdevice_set_pin(&vcpu->irq_dev, irq, 1); 
 	}
 
 	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */
@@ -1619,8 +1615,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 	kvm_run->if_flag = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) != 0;
 	kvm_run->cr8 = vcpu->cr8;
 	kvm_run->apic_base = vcpu->apic_base;
-	kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open &&
-						  vcpu->irq_summary == 0);
+	kvm_run->ready_for_interrupt_injection = 
+		(vcpu->interrupt_window_open && 
+		 !kvm_irqdevice_pending(&vcpu->irq_dev, 0));
 }
 
 static int handle_interrupt_window(struct kvm_vcpu *vcpu,
@@ -1631,7 +1628,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
 	 * possible
 	 */
 	if (kvm_run->request_interrupt_window &&
-	    !vcpu->irq_summary) {
+	    !kvm_irqdevice_pending(&vcpu->irq_dev, 0)) {
 		kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
 		++kvm_stat.irq_window_exits;
 		return 0;
@@ -1642,7 +1639,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
 static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	skip_emulated_instruction(vcpu);
-	if (vcpu->irq_summary)
+	if (kvm_irqdevice_pending(&vcpu->irq_dev, 0))
 		return 1;
 
 	kvm_run->exit_reason = KVM_EXIT_HLT;
@@ -1713,7 +1710,7 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
 					  struct kvm_run *kvm_run)
 {
-	return (!vcpu->irq_summary &&
+	return (!kvm_irqdevice_pending(&vcpu->irq_dev, 0) &&
 		kvm_run->request_interrupt_window &&
 		vcpu->interrupt_window_open &&
 		(vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));

[-- Attachment #4: 03-preemptible-cpu.patch --]
[-- Type: text/plain, Size: 10834 bytes --]

KVM: Preemptible VCPU

From:  <>

This adds support for interrupting an executing CPU

Signed-off-by: Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
---

 drivers/kvm/Makefile   |    2 -
 drivers/kvm/condvar.c  |  109 ++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/kvm/condvar.h  |   36 ++++++++++++++++
 drivers/kvm/kvm.h      |   12 +++++
 drivers/kvm/kvm_main.c |   47 ++++++++++++++++++---
 drivers/kvm/svm.c      |   35 +++++++++++++++
 drivers/kvm/vmx.c      |   35 +++++++++++++++
 7 files changed, 270 insertions(+), 6 deletions(-)

diff --git a/drivers/kvm/Makefile b/drivers/kvm/Makefile
index 540afbc..b3bef0e 100644
--- a/drivers/kvm/Makefile
+++ b/drivers/kvm/Makefile
@@ -2,7 +2,7 @@
 # Makefile for Kernel-based Virtual Machine module
 #
 
-kvm-objs := kvm_main.o mmu.o x86_emulate.o userint.o
+kvm-objs := kvm_main.o mmu.o x86_emulate.o userint.o condvar.o
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/drivers/kvm/condvar.c b/drivers/kvm/condvar.c
new file mode 100644
index 0000000..87e464a
--- /dev/null
+++ b/drivers/kvm/condvar.c
@@ -0,0 +1,109 @@
+/*
+ * Condition Variable
+ *
+ * Copyright (C) 2007, Novell
+ *
+ * Authors:
+ *   Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "condvar.h"
+
+void condvar_init(struct condvar *cv)
+{
+	wait_queue_head_t __head = __WAIT_QUEUE_HEAD_INITIALIZER(cv->queue);
+
+	memset(cv, 0, sizeof(*cv));
+	cv->queue = __head;
+}
+EXPORT_SYMBOL_GPL(condvar_init);
+
+/*
+ * Assumes the lock is already held
+ */
+int condvar_wait(struct condvar *cv, void *l, long timeout)
+{
+	DEFINE_WAIT(__wait);	
+	int _ret = 0;
+	
+	BUG_ON(!cv->lock_ops);
+
+	/*
+	 * first place ourselves on the waitqueue before releasing the lock 
+	 */
+	prepare_to_wait(&cv->queue, &__wait, TASK_UNINTERRUPTIBLE);
+	
+	/*
+	 * now actually release the lock to unblock any potential signalers 
+	 */
+	cv->lock_ops->unlock(l);
+	
+	/*
+	 * finally, reschedule until we are re-awoken 
+	 */ 
+	if (timeout > -1)
+		schedule_timeout(timeout);
+	else
+		schedule();
+	finish_wait(&cv->queue, &__wait);
+	
+	/*
+	 * if we get here, its because someone signaled us.  
+	 * reaquire the lock 
+	 */
+	cv->lock_ops->lock(l);
+	
+	return _ret;
+}
+EXPORT_SYMBOL_GPL(condvar_wait);
+
+/*
+ * Assumes the lock is already held
+ */
+int condvar_signal(struct condvar *cv)
+{
+	wake_up(&cv->queue);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(condvar_signal);
+
+/*
+ *------------------------------------------------------------------------
+ * spinlock_condvar
+ *
+ * spinlock_lock/unlock can sometimes be implemented as macros, so 
+ * assigning them as function pointers directly is probably not going to 
+ * work.  Therefore we need these lightweight wrappers
+ *------------------------------------------------------------------------
+ */
+
+static void spinlock_condvar_lock(void *l)
+{
+	spinlock_t *lock = (spinlock_t*)l;
+
+	spin_lock(lock);
+} 
+
+static void spinlock_condvar_unlock(void *l)
+{
+	spinlock_t *lock = (spinlock_t*)l;
+
+	spin_unlock(lock);
+} 
+
+static struct cv_lock_ops spinlock_ops = {
+    .lock   = spinlock_condvar_lock,
+    .unlock = spinlock_condvar_unlock
+};
+
+void spinlock_condvar_init(struct condvar *cv)
+{
+	condvar_init(cv);
+
+	cv->lock_ops = &spinlock_ops;
+}
+
diff --git a/drivers/kvm/condvar.h b/drivers/kvm/condvar.h
new file mode 100644
index 0000000..58ed523
--- /dev/null
+++ b/drivers/kvm/condvar.h
@@ -0,0 +1,36 @@
+/*
+ * Condition Variable
+ *
+ * Copyright (C) 2007, Novell
+ *
+ * Authors:
+ *   Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+struct cv_lock_ops {
+	void (*lock)(void *);
+	void (*unlock)(void *);
+};
+
+struct condvar {
+	wait_queue_head_t   queue;
+	struct cv_lock_ops *lock_ops;
+};
+
+void condvar_init(struct condvar *cv);
+int condvar_wait(struct condvar *cv, void *l, long timeout);
+int condvar_signal(struct condvar *cv);
+
+/*
+ *------------------------------------------------------------------------
+ * spinlock_condvar
+ *------------------------------------------------------------------------
+ */
+
+void spinlock_condvar_init(struct condvar *cv);
+
+
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 58966d9..703ffe0 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -14,6 +14,7 @@
 
 #include "vmx.h"
 #include "irqdevice.h"
+#include "condvar.h"
 #include <linux/kvm.h>
 #include <linux/kvm_para.h>
 
@@ -271,6 +272,16 @@ void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
 
 #define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
 
+/*
+ * structure for maintaining info for interrupting an executing VCPU
+ */
+struct kvm_vcpu_irq {
+	spinlock_t          lock;
+	struct condvar      cv;
+	struct task_struct *task;
+	int                 pending;
+};
+
 struct kvm_vcpu {
 	struct kvm *kvm;
 	union {
@@ -284,6 +295,7 @@ struct kvm_vcpu {
 	struct kvm_run *run;
 	int interrupt_window_open;
 	struct kvm_irqdevice irq_dev;
+	struct kvm_vcpu_irq irq;
 	unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */
 	unsigned long rip;      /* needs vcpu_load_rsp_rip() */
 
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 7e00412..ea3609e 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -299,6 +299,11 @@ static struct kvm *kvm_create_vm(void)
 		struct kvm_vcpu *vcpu = &kvm->vcpus[i];
 
 		mutex_init(&vcpu->mutex);
+
+		memset(&vcpu->irq, 0, sizeof(vcpu->irq));
+		spin_lock_init(&vcpu->irq.lock);
+		spinlock_condvar_init(&vcpu->irq.cv);
+
 		vcpu->cpu = -1;
 		vcpu->kvm = kvm;
 		vcpu->mmu.root_hpa = INVALID_PAGE;
@@ -2320,13 +2325,45 @@ static void kvm_vcpu_intr(struct kvm_irqsink *this,
 	 * Our irq device is requesting to interrupt the vcpu.  If it is
 	 * currently running, we should inject a host IPI to force a VMEXIT 
 	 */
-	
+	struct kvm_vcpu *vcpu = (struct kvm_vcpu*)this->private;
+
 	/*
-	 * FIXME: Implement this or the CPU wont notice the interrupt until
-	 * the next natural VMEXIT.  Note that this is how the system
-	 * has always worked, so nothing is broken here.  This is a future
-	 * enhancement
+	 * HACK ALERT!
+	 *
+	 * We want to send a virtual interrupt signal to the task that owns
+	 * the guest.  However, the signal will only force a VMEXIT (via
+	 * a reschedule IPI) if the task is currently in GUEST mode.  There
+	 * is a race condition between the time that we mark the vcpu as
+	 * running and the time the system actually enter guest mode.  Since
+	 * there doesnt appear to be any way to help with this situation from
+	 * the VT hardware, we are forced to wait to make sure the guest 
+	 * actually gets interrupted in a reasonable amount of time.  If it
+	 * does not, we assume that the IPI failed because it was too early
+	 * and must try again until it does.
+	 *
+	 * This condvar/spinlock/timeout/retry eliminate the race in a safe
+	 * manner, at the expense of making the INTR delivery synchronous
 	 */
+	spin_lock(&vcpu->irq.lock);
+	
+	if (vcpu->irq.task) {
+		struct timespec tmo = {
+			.tv_sec  = 0,
+			.tv_nsec = 100000 /* 100us */
+		};
+
+		BUG_ON(vcpu->irq.task == current);
+			
+		while (vcpu->irq.task) {
+			send_sig(SIGSTOP, vcpu->irq.task, 0);
+			condvar_wait(&vcpu->irq.cv, &vcpu->irq.lock,
+				     timespec_to_jiffies(&tmo));
+		}
+		
+		vcpu->irq.pending = 1;
+	}
+	
+	spin_unlock(&vcpu->irq.lock);
 }
 
 static void kvm_vcpu_irqsink_init(struct kvm_vcpu *vcpu)
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index e59a548..6bc2fb1 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -1463,9 +1463,25 @@ static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	int r;
 
 again:
+	spin_lock(&vcpu->irq.lock);
+
+	/*
+	 * Setting vcpu->task signals to outsiders that the VMCS is 
+	 * effectively in GUEST mode, and therefore must be signalled
+	 * to transition the task back to HOST mode if any new interrupts
+	 * arrive.
+	 */
+	vcpu->irq.task = current;
+
+	/*
+	 * We also must inject interrupts (if any) while the irq_lock
+	 * is held
+	 */
 	if (!vcpu->mmio_read_completed)
 		do_interrupt_requests(vcpu, kvm_run);
 
+	spin_unlock(&vcpu->irq.lock);
+
 	clgi();
 
 	pre_svm_run(vcpu);
@@ -1617,6 +1633,25 @@ again:
 	reload_tss(vcpu);
 
 	/*
+	 * Signal that we have transitioned back to host mode 
+	 */
+	spin_lock(&vcpu->irq.lock);
+
+	vcpu->irq.task = NULL;
+	condvar_signal(&vcpu->irq.cv);
+
+	/*
+	 * If irqpending is asserted someone undoubtedly has sent us a SIGSTOP
+	 * signal.  Counter it with a SIGCONT
+	 */
+	if(vcpu->irq.pending) {
+	    send_sig(SIGCONT, current, 0);
+	    vcpu->irq.pending = 0;
+	}
+
+	spin_unlock(&vcpu->irq.lock);
+
+	/*
 	 * Profile KVM exit RIPs:
 	 */
 	if (unlikely(prof_on == KVM_PROFILING))
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index a0fdf02..f7b716b 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -1748,9 +1748,25 @@ again:
 	vmcs_writel(HOST_GS_BASE, segment_base(gs_sel));
 #endif
 
+	spin_lock(&vcpu->irq.lock);
+
+	/*
+	 * Setting vcpu->task signals to outsiders that the VMCS is 
+	 * effectively in GUEST mode, and therefore must be signalled
+	 * to transition the task back to HOST mode if any new interrupts
+	 * arrive.
+	 */
+	vcpu->irq.task = current;
+
+	/*
+	 * We also must inject interrupts (if any) while the irq_lock
+	 * is held
+	 */
 	if (!vcpu->mmio_read_completed)
 		do_interrupt_requests(vcpu, kvm_run);
 
+	spin_unlock(&vcpu->irq.lock);
+
 	if (vcpu->guest_debug.enabled)
 		kvm_guest_debug_pre(vcpu);
 
@@ -1911,6 +1927,25 @@ again:
 
 	asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
 
+	/*
+	 * Signal that we have transitioned back to host mode 
+	 */
+	spin_lock(&vcpu->irq.lock);
+
+	vcpu->irq.task = NULL;
+	condvar_signal(&vcpu->irq.cv);
+
+	/*
+	 * If irqpending is asserted someone undoubtedly has sent us a SIGSTOP
+	 * signal.  Counter it with a SIGCONT
+	 */
+	if(vcpu->irq.pending) {
+	    send_sig(SIGCONT, current, 0);
+	    vcpu->irq.pending = 0;
+	}
+
+	spin_unlock(&vcpu->irq.lock);
+
 	if (fail) {
 		kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
 		kvm_run->fail_entry.hardware_entry_failure_reason

[-- Attachment #5: series --]
[-- Type: application/octet-stream, Size: 143 bytes --]

# This series applies on GIT commit 0ea6eecef44923d66409a49d71e4fa87fa0f5bed
01-mmio_handler.patch
02-irqdevice.patch
03-preemptible-cpu.patch

[-- Attachment #6: Type: text/plain, Size: 345 bytes --]

-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys-and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV

[-- Attachment #7: Type: text/plain, Size: 186 bytes --]

_______________________________________________
kvm-devel mailing list
kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
https://lists.sourceforge.net/lists/listinfo/kvm-devel

^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2007-04-16  5:46 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-04-12  4:02 irqdevice INTR example Gregory Haskins
     [not found] ` <461D7702.BA47.005A.0-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
2007-04-12  8:02   ` Avi Kivity
     [not found]     ` <461DE791.1040707-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-04-12  8:18       ` Christoph Hellwig
2007-04-12 11:55       ` Gregory Haskins
     [not found]         ` <461DE5C9.BA47.005A.0-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
2007-04-12 12:49           ` Avi Kivity
     [not found]             ` <461E2AD5.7070905-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-04-12 13:43               ` Gregory Haskins
     [not found]                 ` <461DFF1C.BA47.005A.0-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
2007-04-12 14:14                   ` Avi Kivity
     [not found]                     ` <461E3EDB.3080002-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-04-12 16:01                       ` Gregory Haskins
2007-04-13 13:05                         ` Fwd: " Gregory Haskins
     [not found]                         ` <461E1F73.BA47.005A.0-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
2007-04-14 14:30                           ` Avi Kivity
     [not found]                             ` <4620E56A.7040207-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-04-15 22:32                               ` Gregory Haskins
2007-04-15 23:32                                 ` Gregory Haskins
     [not found]                                 ` <46226FBC.BA47.005A.0-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
2007-04-16  5:46                                   ` Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox