public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] Current patch series for review of in-kernel APIC work
@ 2007-04-16 12:19 Gregory Haskins
       [not found] ` <20070416121832.9824.40317.stgit-5CR4LY5GPkvLDviKLk5550HKjMygAv58XqFh9Ls21Oc@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Gregory Haskins @ 2007-04-16 12:19 UTC (permalink / raw)
  To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

As AVI requested, here is the entire series.


-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/3] Adds support for in-kernel mmio handlers
       [not found] ` <20070416121832.9824.40317.stgit-5CR4LY5GPkvLDviKLk5550HKjMygAv58XqFh9Ls21Oc@public.gmane.org>
@ 2007-04-16 12:19   ` Gregory Haskins
  2007-04-16 12:19   ` [PATCH 2/3] KVM: Add irqdevice object Gregory Haskins
  2007-04-16 12:19   ` [PATCH 3/3] KVM: Preemptible VCPU Gregory Haskins
  2 siblings, 0 replies; 8+ messages in thread
From: Gregory Haskins @ 2007-04-16 12:19 UTC (permalink / raw)
  To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

From: None <None>


---

 drivers/kvm/kvm.h      |   31 ++++++++++++++++++
 drivers/kvm/kvm_main.c |   82 +++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 101 insertions(+), 12 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index fceeb84..181099f 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -236,6 +236,36 @@ struct kvm_pio_request {
 	int rep;
 };
 
+struct kvm_io_device {
+	unsigned long (*read)(struct kvm_io_device *this,
+			      gpa_t addr,
+			      int length);
+	void (*write)(struct kvm_io_device *this,
+		      gpa_t addr,
+		      int length,
+		      unsigned long val);
+	int (*in_range)(struct kvm_io_device *this, gpa_t addr);
+
+	void             *private;
+};
+
+/*
+ * It would be nice to use something smarter than a linear search, TBD...
+ * Thankfully we dont expect many devices to register (famous last words :),
+ * so until then it will suffice.  At least its abstracted so we can change
+ * in one place.
+ */
+struct kvm_io_bus {
+	int                   dev_count;
+#define NR_IOBUS_DEVS 6
+	struct kvm_io_device *devs[NR_IOBUS_DEVS];
+};
+
+void kvm_io_bus_init(struct kvm_io_bus *bus);
+struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr);
+void kvm_io_bus_register_dev(struct kvm_io_bus *bus, 
+			     struct kvm_io_device *dev);
+
 struct kvm_vcpu {
 	struct kvm *kvm;
 	union {
@@ -345,6 +375,7 @@ struct kvm {
 	unsigned long rmap_overflow;
 	struct list_head vm_list;
 	struct file *filp;
+	struct kvm_io_bus mmio_bus;
 };
 
 struct kvm_stat {
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 4473174..c3c0059 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -294,6 +294,7 @@ static struct kvm *kvm_create_vm(void)
 
 	spin_lock_init(&kvm->lock);
 	INIT_LIST_HEAD(&kvm->active_mmu_pages);
+	kvm_io_bus_init(&kvm->mmio_bus);
 	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
 		struct kvm_vcpu *vcpu = &kvm->vcpus[i];
 
@@ -1015,12 +1016,25 @@ static int emulator_write_std(unsigned long addr,
 	return X86EMUL_UNHANDLEABLE;
 }
 
+static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, 
+						gpa_t addr)
+{
+	/*
+	 * Note that its important to have this wrapper function because 
+	 * in the very near future we will be checking for MMIOs against 
+	 * the LAPIC as well as the general MMIO bus 
+	 */
+	return kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr);
+}
+
 static int emulator_read_emulated(unsigned long addr,
 				  unsigned long *val,
 				  unsigned int bytes,
 				  struct x86_emulate_ctxt *ctxt)
 {
-	struct kvm_vcpu *vcpu = ctxt->vcpu;
+	struct kvm_vcpu      *vcpu = ctxt->vcpu;
+	struct kvm_io_device *mmio_dev;
+	gpa_t                 gpa;
 
 	if (vcpu->mmio_read_completed) {
 		memcpy(val, vcpu->mmio_data, bytes);
@@ -1029,18 +1043,26 @@ static int emulator_read_emulated(unsigned long addr,
 	} else if (emulator_read_std(addr, val, bytes, ctxt)
 		   == X86EMUL_CONTINUE)
 		return X86EMUL_CONTINUE;
-	else {
-		gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
 
-		if (gpa == UNMAPPED_GVA)
-			return X86EMUL_PROPAGATE_FAULT;
-		vcpu->mmio_needed = 1;
-		vcpu->mmio_phys_addr = gpa;
-		vcpu->mmio_size = bytes;
-		vcpu->mmio_is_write = 0;
+	gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
+	if (gpa == UNMAPPED_GVA)
+		return X86EMUL_PROPAGATE_FAULT;
 
-		return X86EMUL_UNHANDLEABLE;
+	/*
+	 * Is this MMIO handled locally? 
+	 */
+	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa);
+	if (mmio_dev) {
+		*val = mmio_dev->read(mmio_dev, gpa, bytes);
+		return X86EMUL_CONTINUE;
 	}
+
+	vcpu->mmio_needed = 1;
+	vcpu->mmio_phys_addr = gpa;
+	vcpu->mmio_size = bytes;
+	vcpu->mmio_is_write = 0;
+	
+	return X86EMUL_UNHANDLEABLE;
 }
 
 static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
@@ -1068,8 +1090,9 @@ static int emulator_write_emulated(unsigned long addr,
 				   unsigned int bytes,
 				   struct x86_emulate_ctxt *ctxt)
 {
-	struct kvm_vcpu *vcpu = ctxt->vcpu;
-	gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
+	struct kvm_vcpu      *vcpu = ctxt->vcpu;
+	struct kvm_io_device *mmio_dev;
+	gpa_t                 gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
 
 	if (gpa == UNMAPPED_GVA)
 		return X86EMUL_PROPAGATE_FAULT;
@@ -1077,6 +1100,15 @@ static int emulator_write_emulated(unsigned long addr,
 	if (emulator_write_phys(vcpu, gpa, val, bytes))
 		return X86EMUL_CONTINUE;
 
+	/*
+	 * Is this MMIO handled locally?
+	 */
+	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa);
+	if (mmio_dev) {
+		mmio_dev->write(mmio_dev, gpa, bytes, val);
+		return X86EMUL_CONTINUE;
+	}
+
 	vcpu->mmio_needed = 1;
 	vcpu->mmio_phys_addr = gpa;
 	vcpu->mmio_size = bytes;
@@ -2911,6 +2943,32 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
 	return NOTIFY_OK;
 }
 
+void kvm_io_bus_init(struct kvm_io_bus *bus)
+{
+	memset(bus, 0, sizeof(*bus));
+}
+
+struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr)
+{
+	int i;
+
+	for (i = 0; i < bus->dev_count; i++) {
+		struct kvm_io_device *pos = bus->devs[i];
+
+		if (pos->in_range(pos, addr))
+			return pos;
+	}
+
+	return NULL;
+}
+
+void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev)
+{
+	BUG_ON(bus->dev_count >= (NR_IOBUS_DEVS-1));
+
+	bus->devs[bus->dev_count++] = dev;
+}
+
 static struct notifier_block kvm_cpu_notifier = {
 	.notifier_call = kvm_cpu_hotplug,
 	.priority = 20, /* must be > scheduler priority */


-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/3] KVM: Add irqdevice object
       [not found] ` <20070416121832.9824.40317.stgit-5CR4LY5GPkvLDviKLk5550HKjMygAv58XqFh9Ls21Oc@public.gmane.org>
  2007-04-16 12:19   ` [PATCH 1/3] Adds support for in-kernel mmio handlers Gregory Haskins
@ 2007-04-16 12:19   ` Gregory Haskins
       [not found]     ` <20070416121936.9824.8889.stgit-5CR4LY5GPkvLDviKLk5550HKjMygAv58XqFh9Ls21Oc@public.gmane.org>
  2007-04-16 12:19   ` [PATCH 3/3] KVM: Preemptible VCPU Gregory Haskins
  2 siblings, 1 reply; 8+ messages in thread
From: Gregory Haskins @ 2007-04-16 12:19 UTC (permalink / raw)
  To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

From: None <None>

The current code is geared towards using a user-mode (A)PIC.  This patch adds
an "irqdevice" abstraction, and implements a "userint" model to handle the
duties of the original code.  Later, we can develop other irqdevice models 
to handle objects like LAPIC, IOAPIC, i8259, etc, as appropriate

Signed-off-by: Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
---

 drivers/kvm/Makefile    |    2 
 drivers/kvm/irqdevice.h |  168 +++++++++++++++++++++++++++++++++++++++
 drivers/kvm/kvm.h       |    9 +-
 drivers/kvm/kvm_main.c  |   57 ++++++++++---
 drivers/kvm/svm.c       |   33 ++++----
 drivers/kvm/userint.c   |  201 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/kvm/vmx.c       |   29 +++----
 7 files changed, 446 insertions(+), 53 deletions(-)

diff --git a/drivers/kvm/Makefile b/drivers/kvm/Makefile
index c0a789f..540afbc 100644
--- a/drivers/kvm/Makefile
+++ b/drivers/kvm/Makefile
@@ -2,7 +2,7 @@
 # Makefile for Kernel-based Virtual Machine module
 #
 
-kvm-objs := kvm_main.o mmu.o x86_emulate.o
+kvm-objs := kvm_main.o mmu.o x86_emulate.o userint.o
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/drivers/kvm/irqdevice.h b/drivers/kvm/irqdevice.h
new file mode 100644
index 0000000..706ea2c
--- /dev/null
+++ b/drivers/kvm/irqdevice.h
@@ -0,0 +1,168 @@
+/*
+ * Defines an interface for an abstract interrupt controller.  The model 
+ * consists of a unit with an arbitrary number of input lines (IRQ0-N), an
+ * output line (INTR), and methods for completing an interrupt-acknowledge
+ * cycle (INTA).  A particular implementation of this model will define
+ * various policies, such as irq-to-vector translation, INTA/auto-EOI policy,
+ * etc.  
+ * 
+ * In addition, the INTR callback mechanism allows the unit to be "wired" to
+ * an interruptible source in a very flexible manner. For instance, an 
+ * irqdevice could have its INTR wired to a VCPU (ala LAPIC), or another 
+ * interrupt controller (ala cascaded i8259s)
+ *
+ * Copyright (C) 2007 Novell
+ *
+ * Authors:
+ *   Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef __IRQDEVICE_H
+#define __IRQDEVICE_H
+
+#define KVM_IRQFLAGS_NMI  (1 << 0)
+
+struct kvm_irqdevice;
+
+struct kvm_irqsink {
+	void (*raise_intr)(struct kvm_irqsink *this, 
+			   struct kvm_irqdevice *dev);
+
+	void *private;
+};
+
+struct kvm_irqdevice {
+	int  (*pending)(struct kvm_irqdevice *this, int flags);
+	int  (*read_vector)(struct kvm_irqdevice *this, int flags);
+	int  (*set_pin)(struct kvm_irqdevice *this, int pin, int level);
+	int  (*summary)(struct kvm_irqdevice *this, void *data);
+	void (*destructor)(struct kvm_irqdevice *this);
+
+	void               *private;
+	struct kvm_irqsink  sink;
+};
+
+/**
+ * kvm_irqdevice_init - initialize the kvm_irqdevice for use
+ * @dev: The device
+ *
+ * Description: Initialize the kvm_irqdevice for use.  Should be called before 
+ *              calling any derived implementation init functions
+ * 
+ * Returns: (void)
+ */
+static inline void kvm_irqdevice_init(struct kvm_irqdevice *dev)
+{
+	memset(dev, 0, sizeof(*dev));
+}
+
+/**
+ * kvm_irqdevice_pending - efficiently determines if an interrupt is pending
+ * @dev: The device
+ * @flags: Modifies the behavior as follows:
+ *            [+ KVM_IRQFLAGS_NMI: Mask everything but NMIs]
+ * 
+ * Description: Efficiently determines if an interrupt is pending on an 
+ *              irqdevice
+ *
+ * Returns: (int)
+ *    [0 = no iterrupts pending (per "flags" criteria)]
+ *    [1 = one or more interrupts are pending]
+ */
+static inline int kvm_irqdevice_pending(struct kvm_irqdevice *dev, int flags)
+{
+	return dev->pending(dev, flags);
+}
+
+/**
+ * kvm_irqdevice_read_vector - read the highest priority vector from the device
+ * @dev: The device
+ * @flags: Modifies the behavior as follows:
+ *            [+ KVM_IRQFLAGS_NMI: Mask everything but NMIs]
+ *
+ * Description: Read the highest priority pending vector from the device, 
+ *              potentially invoking auto-EOI depending on device policy
+ *
+ * Returns: (int)
+ *   [ -1 = no interrupts pending (per "flags" criteria)]
+ *   [>=0 = the highest pending vector]
+ */
+static inline int kvm_irqdevice_read_vector(struct kvm_irqdevice *dev, 
+					    int flags)
+{
+	return dev->read_vector(dev, flags);
+}
+
+/**
+ * kvm_irqdevice_set_pin - allows the caller to assert/deassert an IRQ
+ * @dev: The device
+ * @pin: The input pin to alter
+ * @level: The value to set (1 = assert, 0 = deassert)
+ *
+ * Description: Allows the caller to assert/deassert an IRQ input pin to the 
+ *              device according to device policy.
+ *
+ * Returns: (int)
+ *   [-1 = failure]
+ *   [ 0 = success]
+ */
+static inline int kvm_irqdevice_set_pin(struct kvm_irqdevice *dev, int pin,
+				  int level)
+{
+	return dev->set_pin(dev, pin, level);
+}
+
+/**
+ * kvm_irqdevice_summary - loads a summary bitmask
+ * @dev: The device
+ * @data: A pointer to a region capable of holding a 256 bit bitmap
+ *
+ * Description: Loads a summary bitmask of all pending vectors (0-255)
+ *
+ * Returns: (int)
+ *   [-1 = failure]
+ *   [ 0 = success]
+ */
+static inline int kvm_irqdevice_summary(struct kvm_irqdevice *dev, void *data)
+{
+	return dev->summary(dev, data);
+}
+
+/**
+ * kvm_irqdevice_register_sink - registers an kvm_irqsink object
+ * @dev: The device
+ * @sink: The sink to register.  Data will be copied so building object from 
+ *        transient storage is ok.
+ *
+ * Description: Registers an kvm_irqsink object as an INTR callback
+ *
+ * Returns: (void)
+ */
+static inline void kvm_irqdevice_register_sink(struct kvm_irqdevice *dev, 
+					       const struct kvm_irqsink *sink)
+{
+	dev->sink = *sink;
+}
+
+/*
+ * kvm_irqdevice_raise_intr - invokes a registered INTR callback
+ * @dev: The device
+ *
+ * Description: Invokes a registered INTR callback (if present).  This
+ *              function is meant to be used privately by a irqdevice 
+ *              implementation. 
+ *
+ * Returns: (void)
+ */
+static inline void kvm_irqdevice_raise_intr(struct kvm_irqdevice *dev)
+{
+	struct kvm_irqsink *sink = &dev->sink;
+	if (sink->raise_intr)
+		sink->raise_intr(sink, dev);
+}
+
+#endif /*  __IRQDEVICE_H */
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 181099f..58966d9 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -13,6 +13,7 @@
 #include <linux/mm.h>
 
 #include "vmx.h"
+#include "irqdevice.h"
 #include <linux/kvm.h>
 #include <linux/kvm_para.h>
 
@@ -157,6 +158,8 @@ struct vmcs {
 
 struct kvm_vcpu;
 
+int kvm_userint_init(struct kvm_irqdevice *dev);
+
 /*
  * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
  * 32-bit).  The kvm_mmu structure abstracts the details of the current mmu
@@ -266,6 +269,8 @@ struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr);
 void kvm_io_bus_register_dev(struct kvm_io_bus *bus, 
 			     struct kvm_io_device *dev);
 
+#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
+
 struct kvm_vcpu {
 	struct kvm *kvm;
 	union {
@@ -278,9 +283,7 @@ struct kvm_vcpu {
 	u64 host_tsc;
 	struct kvm_run *run;
 	int interrupt_window_open;
-	unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
-#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
-	unsigned long irq_pending[NR_IRQ_WORDS];
+	struct kvm_irqdevice irq_dev;
 	unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */
 	unsigned long rip;      /* needs vcpu_load_rsp_rip() */
 
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index c3c0059..7e00412 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -1989,8 +1989,7 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 	sregs->efer = vcpu->shadow_efer;
 	sregs->apic_base = vcpu->apic_base;
 
-	memcpy(sregs->interrupt_bitmap, vcpu->irq_pending,
-	       sizeof sregs->interrupt_bitmap);
+	kvm_irqdevice_summary(&vcpu->irq_dev, &sregs->interrupt_bitmap);
 
 	vcpu_put(vcpu);
 
@@ -2044,13 +2043,11 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	if (mmu_reset_needed)
 		kvm_mmu_reset_context(vcpu);
 
-	memcpy(vcpu->irq_pending, sregs->interrupt_bitmap,
-	       sizeof vcpu->irq_pending);
-	vcpu->irq_summary = 0;
-	for (i = 0; i < NR_IRQ_WORDS; ++i)
-		if (vcpu->irq_pending[i])
-			__set_bit(i, &vcpu->irq_summary);
-
+	/* walk the interrupt-bitmap and inject an IRQ for each bit found */
+	for (i = 0; i < 256; ++i)
+		if (test_bit(i, &sregs->interrupt_bitmap[0]))
+			kvm_irqdevice_set_pin(&vcpu->irq_dev, i, 1);
+ 
 	set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
 	set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
 	set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
@@ -2210,14 +2207,8 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
 {
 	if (irq->irq < 0 || irq->irq >= 256)
 		return -EINVAL;
-	vcpu_load(vcpu);
-
-	set_bit(irq->irq, vcpu->irq_pending);
-	set_bit(irq->irq / BITS_PER_LONG, &vcpu->irq_summary);
 
-	vcpu_put(vcpu);
-
-	return 0;
+	return kvm_irqdevice_set_pin(&vcpu->irq_dev, irq->irq, 1);
 }
 
 static int kvm_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
@@ -2319,6 +2310,36 @@ out1:
 }
 
 /*
+ * This function will be invoked whenever the vcpu->irq_dev raises its INTR 
+ * line
+ */
+static void kvm_vcpu_intr(struct kvm_irqsink *this, 
+			  struct kvm_irqdevice *dev)
+{
+	/*
+	 * Our irq device is requesting to interrupt the vcpu.  If it is
+	 * currently running, we should inject a host IPI to force a VMEXIT 
+	 */
+	
+	/*
+	 * FIXME: Implement this or the CPU wont notice the interrupt until
+	 * the next natural VMEXIT.  Note that this is how the system
+	 * has always worked, so nothing is broken here.  This is a future
+	 * enhancement
+	 */
+}
+
+static void kvm_vcpu_irqsink_init(struct kvm_vcpu *vcpu)
+{
+	struct kvm_irqsink sink = {
+		.raise_intr = kvm_vcpu_intr,
+		.private    = vcpu
+	};
+	
+	kvm_irqdevice_register_sink(&vcpu->irq_dev, &sink);
+}
+
+/*
  * Creates some virtual cpus.  Good luck creating more than one.
  */
 static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
@@ -2364,6 +2385,10 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
 	if (r < 0)
 		goto out_free_vcpus;
 
+	kvm_irqdevice_init(&vcpu->irq_dev);
+	kvm_vcpu_irqsink_init(vcpu);
+	kvm_userint_init(&vcpu->irq_dev);
+
 	kvm_arch_ops->vcpu_load(vcpu);
 	r = kvm_mmu_setup(vcpu);
 	if (r >= 0)
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index b7e1410..e59a548 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -108,20 +108,16 @@ static unsigned get_addr_size(struct kvm_vcpu *vcpu)
 
 static inline u8 pop_irq(struct kvm_vcpu *vcpu)
 {
-	int word_index = __ffs(vcpu->irq_summary);
-	int bit_index = __ffs(vcpu->irq_pending[word_index]);
-	int irq = word_index * BITS_PER_LONG + bit_index;
-
-	clear_bit(bit_index, &vcpu->irq_pending[word_index]);
-	if (!vcpu->irq_pending[word_index])
-		clear_bit(word_index, &vcpu->irq_summary);
-	return irq;
+	return kvm_irqdevice_read_vector(&vcpu->irq_dev, 0);
 }
 
 static inline void push_irq(struct kvm_vcpu *vcpu, u8 irq)
 {
-	set_bit(irq, vcpu->irq_pending);
-	set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
+	/* FIXME: We probably want to reserve the "set_pin" verb for
+	 * actual interrupt requests, not for putting back something
+	 * previously pending.  Lets revisit this
+	 */
+	kvm_irqdevice_set_pin(&vcpu->irq_dev, irq, 1);
 }
 
 static inline void clgi(void)
@@ -1092,7 +1088,7 @@ static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 1;
 	skip_emulated_instruction(vcpu);
-	if (vcpu->irq_summary)
+	if (kvm_irqdevice_pending(&vcpu->irq_dev, 0))
 		return 1;
 
 	kvm_run->exit_reason = KVM_EXIT_HLT;
@@ -1263,7 +1259,7 @@ static int interrupt_window_interception(struct kvm_vcpu *vcpu,
 	 * possible
 	 */
 	if (kvm_run->request_interrupt_window &&
-	    !vcpu->irq_summary) {
+	    !kvm_irqdevice_pending(&vcpu->irq_dev, 0)) {
 		++kvm_stat.irq_window_exits;
 		kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
 		return 0;
@@ -1399,7 +1395,8 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 		(!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
 		 (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF));
 
-	if (vcpu->interrupt_window_open && vcpu->irq_summary)
+	if (vcpu->interrupt_window_open && 
+	    kvm_irqdevice_pending(&vcpu->irq_dev, 0))
 		/*
 		 * If interrupts enabled, and not blocked by sti or mov ss. Good.
 		 */
@@ -1409,7 +1406,8 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 	 * Interrupts blocked.  Wait for unblock.
 	 */
 	if (!vcpu->interrupt_window_open &&
-	    (vcpu->irq_summary || kvm_run->request_interrupt_window)) {
+	    (kvm_irqdevice_pending(&vcpu->irq_dev, 0) || 
+	     kvm_run->request_interrupt_window)) {
 		control->intercept |= 1ULL << INTERCEPT_VINTR;
 	} else
 		control->intercept &= ~(1ULL << INTERCEPT_VINTR);
@@ -1418,8 +1416,9 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 			      struct kvm_run *kvm_run)
 {
-	kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open &&
-						  vcpu->irq_summary == 0);
+	kvm_run->ready_for_interrupt_injection = 
+		(vcpu->interrupt_window_open && 
+		 !kvm_irqdevice_pending(&vcpu->irq_dev, 0));
 	kvm_run->if_flag = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF) != 0;
 	kvm_run->cr8 = vcpu->cr8;
 	kvm_run->apic_base = vcpu->apic_base;
@@ -1434,7 +1433,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
 					  struct kvm_run *kvm_run)
 {
-	return (!vcpu->irq_summary &&
+	return (!kvm_irqdevice_pending(&vcpu->irq_dev, 0) &&
 		kvm_run->request_interrupt_window &&
 		vcpu->interrupt_window_open &&
 		(vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF));
diff --git a/drivers/kvm/userint.c b/drivers/kvm/userint.c
new file mode 100644
index 0000000..594ee81
--- /dev/null
+++ b/drivers/kvm/userint.c
@@ -0,0 +1,201 @@
+/*
+ * User Interrupts IRQ device 
+ *
+ * This acts as an extention of an interrupt controller that exists elsewhere 
+ * (typically in userspace/QEMU).  Because this PIC is a pseudo device that
+ * is downstream from a real emulated PIC, the "IRQ-to-vector" mapping has 
+ * already occured.  Therefore, this PIC has the following unusal properties:
+ *
+ * 1) It has 256 "pins" which are literal vectors (i.e. no translation)
+ * 2) It only supports "auto-EOI" behavior since it is expected that the
+ *    upstream emulated PIC will handle the real EOIs (if applicable)
+ * 3) It only listens to "asserts" on the pins (deasserts are dropped) 
+ *    because its an auto-EOI device anyway.
+ *
+ * Copyright (C) 2007 Novell
+ *
+ * bitarray code based on original vcpu->irq_pending code, 
+ *     Copyright (C) 2007 Qumranet
+ *
+ * Authors:
+ *   Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "kvm.h"
+
+/*
+ *----------------------------------------------------------------------
+ * optimized bitarray object - works like bitarrays in bitops, but uses 
+ * a summary field to accelerate lookups.  Assumes external locking 
+ *---------------------------------------------------------------------
+ */
+
+struct bitarray {
+	unsigned long summary; /* 1 per word in pending */
+	unsigned long pending[NR_IRQ_WORDS];
+};
+
+static inline int bitarray_pending(struct bitarray *this)
+{
+	return this->summary ? 1 : 0;	
+}
+
+static inline int bitarray_findhighest(struct bitarray *this)
+{
+	if (!this->summary)
+		return -1;
+	else {
+		int word_index = __fls(this->summary);
+		int bit_index  = __fls(this->pending[word_index]);
+		
+		return word_index * BITS_PER_LONG + bit_index;	
+	}
+}
+
+static inline void bitarray_set(struct bitarray *this, int nr)
+{
+	__set_bit(nr, &this->pending);
+	__set_bit(nr / BITS_PER_LONG, &this->summary); 
+} 
+
+static inline void bitarray_clear(struct bitarray *this, int nr)
+{
+	int word = nr / BITS_PER_LONG;
+
+	__clear_bit(nr, &this->pending);
+	if (!this->pending[word])
+		__clear_bit(word, &this->summary);
+}
+
+static inline int bitarray_test(struct bitarray *this, int nr)
+{
+	return test_bit(nr, &this->pending);
+}
+
+/*
+ *----------------------------------------------------------------------
+ * userint interface - provides the actual kvm_irqdevice implementation
+ *---------------------------------------------------------------------
+ */
+
+struct kvm_userint {
+	spinlock_t      lock;
+	struct bitarray irq_pending;
+	int             nmi_pending;
+};
+
+static int userint_pending(struct kvm_irqdevice *this, int flags)
+{
+	struct kvm_userint *s = (struct kvm_userint*)this->private;
+	int ret;
+
+	spin_lock_irq(&s->lock);
+
+	if (flags & KVM_IRQFLAGS_NMI)
+		ret = s->nmi_pending;
+	else
+		ret = bitarray_pending(&s->irq_pending);
+
+	spin_unlock_irq(&s->lock);
+
+	return ret;
+}
+
+static int userint_read_vector(struct kvm_irqdevice *this, int flags)
+{
+	struct kvm_userint *s = (struct kvm_userint*)this->private;
+	int          irq;
+
+	spin_lock_irq(&s->lock);
+
+	/*
+	 * NMIs take priority, so if there is an NMI pending, or
+	 * if we are filtering out NMIs, only consider them 
+	 */
+	if (s->nmi_pending || (flags & KVM_IRQFLAGS_NMI))
+		irq = s->nmi_pending ? 2 : -1;
+	else
+		irq = bitarray_findhighest(&s->irq_pending);
+	
+	if (irq > -1) {
+		/*
+		 * Automatically clear the interrupt as the EOI mechanism 
+		 * (if any) will take place in userspace 
+		 */
+		bitarray_clear(&s->irq_pending, irq);
+		if (irq == 2)
+			s->nmi_pending = 0;
+	}
+
+	spin_unlock_irq(&s->lock);
+
+	return irq;
+}
+
+static int userint_set_pin(struct kvm_irqdevice* this, int irq, int level)
+{
+	struct kvm_userint *s = (struct kvm_userint*)this->private;
+
+	if (!level)
+		return 0; /* We dont care about deasserts */
+
+	spin_lock_irq(&s->lock);
+
+	/*
+	 * Update the local state 
+	 */
+	bitarray_set(&s->irq_pending, irq);
+	if (irq == 2)
+		s->nmi_pending = 1;
+
+	spin_unlock_irq(&s->lock);
+
+	/*
+	 * And then alert the higher layer software we have changes 
+	 */
+	kvm_irqdevice_raise_intr(this);
+
+	return 0;
+}
+
+static int userint_summary(struct kvm_irqdevice* this, void *data)
+{	
+	struct kvm_userint *s = (struct kvm_userint*)this->private;
+
+	spin_lock_irq(&s->lock);
+	memcpy(data, s->irq_pending.pending, sizeof s->irq_pending.pending);
+	spin_unlock_irq(&s->lock);
+
+	return 0;
+}
+
+static void userint_destructor(struct kvm_irqdevice *this)
+{
+	kfree(this->private);
+}
+
+int kvm_userint_init(struct kvm_irqdevice *dev)
+{
+	struct kvm_userint *s;
+
+	s = kzalloc(sizeof(*s), GFP_KERNEL);
+	if (!s)
+	    return -ENOMEM;
+
+	spin_lock_init(&s->lock);
+
+	dev->pending     = userint_pending;
+	dev->read_vector = userint_read_vector;
+	dev->set_pin     = userint_set_pin;
+	dev->summary     = userint_summary;
+	dev->destructor  = userint_destructor;
+
+	dev->private = s;
+
+	return 0;
+}
+
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 61a6116..a0fdf02 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -1219,13 +1219,8 @@ static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq)
 
 static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
 {
-	int word_index = __ffs(vcpu->irq_summary);
-	int bit_index = __ffs(vcpu->irq_pending[word_index]);
-	int irq = word_index * BITS_PER_LONG + bit_index;
-
-	clear_bit(bit_index, &vcpu->irq_pending[word_index]);
-	if (!vcpu->irq_pending[word_index])
-		clear_bit(word_index, &vcpu->irq_summary);
+	int irq = kvm_irqdevice_read_vector(&vcpu->irq_dev, 0);
+	BUG_ON(irq < 0);
 
 	if (vcpu->rmode.active) {
 		inject_rmode_irq(vcpu, irq);
@@ -1246,7 +1241,7 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 		 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
 
 	if (vcpu->interrupt_window_open &&
-	    vcpu->irq_summary &&
+	    kvm_irqdevice_pending(&vcpu->irq_dev, 0) &&
 	    !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK))
 		/*
 		 * If interrupts enabled, and not blocked by sti or mov ss. Good.
@@ -1255,7 +1250,8 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 
 	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
 	if (!vcpu->interrupt_window_open &&
-	    (vcpu->irq_summary || kvm_run->request_interrupt_window))
+	    (kvm_irqdevice_pending(&vcpu->irq_dev, 0) ||
+	     kvm_run->request_interrupt_window))
 		/*
 		 * Interrupts blocked.  Wait for unblock.
 		 */
@@ -1314,8 +1310,8 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	if (is_external_interrupt(vect_info)) {
 		int irq = vect_info & VECTORING_INFO_VECTOR_MASK;
-		set_bit(irq, vcpu->irq_pending);
-		set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
+		/* FIXME: Is this right? */
+		kvm_irqdevice_set_pin(&vcpu->irq_dev, irq, 1); 
 	}
 
 	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */
@@ -1619,8 +1615,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 	kvm_run->if_flag = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) != 0;
 	kvm_run->cr8 = vcpu->cr8;
 	kvm_run->apic_base = vcpu->apic_base;
-	kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open &&
-						  vcpu->irq_summary == 0);
+	kvm_run->ready_for_interrupt_injection = 
+		(vcpu->interrupt_window_open && 
+		 !kvm_irqdevice_pending(&vcpu->irq_dev, 0));
 }
 
 static int handle_interrupt_window(struct kvm_vcpu *vcpu,
@@ -1631,7 +1628,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
 	 * possible
 	 */
 	if (kvm_run->request_interrupt_window &&
-	    !vcpu->irq_summary) {
+	    !kvm_irqdevice_pending(&vcpu->irq_dev, 0)) {
 		kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
 		++kvm_stat.irq_window_exits;
 		return 0;
@@ -1642,7 +1639,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
 static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	skip_emulated_instruction(vcpu);
-	if (vcpu->irq_summary)
+	if (kvm_irqdevice_pending(&vcpu->irq_dev, 0))
 		return 1;
 
 	kvm_run->exit_reason = KVM_EXIT_HLT;
@@ -1713,7 +1710,7 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
 					  struct kvm_run *kvm_run)
 {
-	return (!vcpu->irq_summary &&
+	return (!kvm_irqdevice_pending(&vcpu->irq_dev, 0) &&
 		kvm_run->request_interrupt_window &&
 		vcpu->interrupt_window_open &&
 		(vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));


-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 3/3] KVM: Preemptible VCPU
       [not found] ` <20070416121832.9824.40317.stgit-5CR4LY5GPkvLDviKLk5550HKjMygAv58XqFh9Ls21Oc@public.gmane.org>
  2007-04-16 12:19   ` [PATCH 1/3] Adds support for in-kernel mmio handlers Gregory Haskins
  2007-04-16 12:19   ` [PATCH 2/3] KVM: Add irqdevice object Gregory Haskins
@ 2007-04-16 12:19   ` Gregory Haskins
  2 siblings, 0 replies; 8+ messages in thread
From: Gregory Haskins @ 2007-04-16 12:19 UTC (permalink / raw)
  To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

From: None <None>

This adds support for interrupting an executing CPU

Signed-off-by: Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
---

 drivers/kvm/kvm.h      |   13 +++++++++
 drivers/kvm/kvm_main.c |   68 ++++++++++++++++++++++++++++++++++++++++++++----
 drivers/kvm/svm.c      |   51 ++++++++++++++++++++++++++++++++++++
 drivers/kvm/vmx.c      |   55 +++++++++++++++++++++++++++++++++++++--
 4 files changed, 179 insertions(+), 8 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 58966d9..87357cc 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -271,6 +271,18 @@ void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
 
 #define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
 
+#define KVM_SIGNAL_VIRTUAL_INTERRUPT 33 /* Hardcoded for now */
+
+/*
+ * structure for maintaining info for interrupting an executing VCPU
+ */
+struct kvm_vcpu_irq {
+	spinlock_t          lock;
+	struct task_struct *task;
+	int                 signo;
+	int                 guest_mode;
+};
+
 struct kvm_vcpu {
 	struct kvm *kvm;
 	union {
@@ -284,6 +296,7 @@ struct kvm_vcpu {
 	struct kvm_run *run;
 	int interrupt_window_open;
 	struct kvm_irqdevice irq_dev;
+	struct kvm_vcpu_irq irq;
 	unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */
 	unsigned long rip;      /* needs vcpu_load_rsp_rip() */
 
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 7e00412..6d7178b 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -299,6 +299,14 @@ static struct kvm *kvm_create_vm(void)
 		struct kvm_vcpu *vcpu = &kvm->vcpus[i];
 
 		mutex_init(&vcpu->mutex);
+
+		memset(&vcpu->irq, 0, sizeof(vcpu->irq));
+		spin_lock_init(&vcpu->irq.lock);
+		/*
+		 * This should be settable by userspace someday
+		 */
+		vcpu->irq.signo = KVM_SIGNAL_VIRTUAL_INTERRUPT;
+
 		vcpu->cpu = -1;
 		vcpu->kvm = kvm;
 		vcpu->mmu.root_hpa = INVALID_PAGE;
@@ -1838,6 +1846,7 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	int r;
 	sigset_t sigsaved;
+	unsigned long irqsaved;
 
 	vcpu_load(vcpu);
 
@@ -1866,6 +1875,10 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		kvm_arch_ops->decache_regs(vcpu);
 	}
 
+	spin_lock_irqsave(&vcpu->irq.lock, irqsaved);
+	vcpu->irq.task = current;
+	spin_unlock_irqrestore(&vcpu->irq.lock, irqsaved);
+	
 	r = kvm_arch_ops->run(vcpu, kvm_run);
 
 out:
@@ -2310,6 +2323,20 @@ out1:
 }
 
 /*
+ * This function is invoked whenever we want to interrupt a vcpu that is
+ * currently executing in guest-mode.  It currently is a no-op because
+ * the simple delivery of the IPI to execute this function accomplishes our
+ * goal: To cause a VMEXIT.  We pass the vcpu (which contains the 
+ * vcpu->irq.task, etc) for future use
+ */
+static void kvm_vcpu_guest_intr(void *info)
+{
+#ifdef NOT_YET
+	struct kvm_vcpu *vcpu = (struct kvm_vcpu*)info;
+#endif
+}
+
+/*
  * This function will be invoked whenever the vcpu->irq_dev raises its INTR 
  * line
  */
@@ -2320,13 +2347,42 @@ static void kvm_vcpu_intr(struct kvm_irqsink *this,
 	 * Our irq device is requesting to interrupt the vcpu.  If it is
 	 * currently running, we should inject a host IPI to force a VMEXIT 
 	 */
+	struct kvm_vcpu *vcpu = (struct kvm_vcpu*)this->private;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vcpu->irq.lock, flags);
 	
-	/*
-	 * FIXME: Implement this or the CPU wont notice the interrupt until
-	 * the next natural VMEXIT.  Note that this is how the system
-	 * has always worked, so nothing is broken here.  This is a future
-	 * enhancement
-	 */
+	if (vcpu->irq.task && (vcpu->irq.task != current)) {
+		if (vcpu->irq.guest_mode) {
+			/*
+			 * If we are in guest mode, we can optimize the IPI
+			 * by executing a function on the owning processor.
+			 */
+			int cpu;
+
+			/*
+			 * Not sure if disabling preemption is needed
+			 * since we are in a spin-lock anyway?  The
+			 * kick_process() code does this so I copied it
+			 */
+			preempt_disable();
+			cpu = task_cpu(vcpu->irq.task);
+			BUG_ON(cpu == smp_processor_id());
+			smp_call_function_single(cpu, 
+						 kvm_vcpu_guest_intr,
+						 vcpu, 0, 0); 
+			preempt_enable();
+		} else
+			/*
+			 * If we are not in guest mode, we must assume that
+			 * we could be blocked anywhere, including userspace.
+			 * Send a signal to give everyone a chance to get
+			 * notification
+			 */
+			send_sig(vcpu->irq.signo, vcpu->irq.task, 0);
+	}
+	
+	spin_unlock_irqrestore(&vcpu->irq.lock, flags);
 }
 
 static void kvm_vcpu_irqsink_init(struct kvm_vcpu *vcpu)
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index e59a548..f5cdffe 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -1461,11 +1461,48 @@ static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	u16 gs_selector;
 	u16 ldt_selector;
 	int r;
+	unsigned long irq_flags;
 
 again:
+	/*
+	 * We disable interrupts until the next VMEXIT to eliminate a race 
+	 * condition for delivery of virtual interrutps.  Note that this is
+	 * probably not as bad as it sounds, as interrupts will still invoke
+	 * a VMEXIT once transitioned to GUEST mode (and thus exit this lock 
+	 * scope) even if they are disabled.
+	 *
+	 * FIXME: Do we need to do anything additional to mask IPI/NMIs? 
+	 */
+	local_irq_save(irq_flags);
+
+	spin_lock(&vcpu->irq.lock);
+
+	/*
+	 * If there are any signals pending (virtual interrupt related or 
+	 * otherwise), don't even bother trying to enter guest mode...
+	 */
+	if (signal_pending(current)) {
+	    kvm_run->exit_reason = KVM_EXIT_INTR;
+	    spin_unlock(&vcpu->irq.lock);
+	    local_irq_restore(irq_flags);
+	    return -EINTR;
+	}
+
+	/*
+	 * There are optimizations we can make when signaling interrupts
+	 * if we know the VCPU is in GUEST mode, so mark that here
+	 */
+	vcpu->irq.guest_mode = 1;
+
+	/*
+	 * We also must inject interrupts (if any) while the irq_lock
+	 * is held
+	 */
 	if (!vcpu->mmio_read_completed)
 		do_interrupt_requests(vcpu, kvm_run);
 
+	spin_unlock(&vcpu->irq.lock);
+
 	clgi();
 
 	pre_svm_run(vcpu);
@@ -1597,6 +1634,13 @@ again:
 #endif
 		: "cc", "memory" );
 
+	/*
+	 * FIXME: We'd like to turn on interrupts ASAP, but is this so early
+	 * that we will mess up the state of the CPU before we fully 
+	 * transition from guest to host?
+	 */
+	local_irq_restore(irq_flags);
+
 	fx_save(vcpu->guest_fx_image);
 	fx_restore(vcpu->host_fx_image);
 
@@ -1617,6 +1661,13 @@ again:
 	reload_tss(vcpu);
 
 	/*
+	 * Signal that we have transitioned back to host mode 
+	 */
+	spin_lock_irqsave(&vcpu->irq.lock, irq_flags);
+	vcpu->irq.guest_mode = 0;
+	spin_unlock_irqrestore(&vcpu->irq.lock, irq_flags);
+
+	/*
 	 * Profile KVM exit RIPs:
 	 */
 	if (unlikely(prof_on == KVM_PROFILING))
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index a0fdf02..654c4d6 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -1722,6 +1722,7 @@ static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	u16 fs_sel, gs_sel, ldt_sel;
 	int fs_gs_ldt_reload_needed;
 	int r;
+	unsigned long irq_flags;
 
 again:
 	/*
@@ -1748,11 +1749,47 @@ again:
 	vmcs_writel(HOST_GS_BASE, segment_base(gs_sel));
 #endif
 
+	if (vcpu->guest_debug.enabled)
+		kvm_guest_debug_pre(vcpu);
+
+	/*
+	 * We disable interrupts until the next VMEXIT to eliminate a race 
+	 * condition for delivery of virtual interrutps.  Note that this is
+	 * probably not as bad as it sounds, as interrupts will still invoke
+	 * a VMEXIT once transitioned to GUEST mode (and thus exit this lock 
+	 * scope) even if they are disabled.
+	 *
+	 * FIXME: Do we need to do anything additional to mask IPI/NMIs? 
+	 */
+	local_irq_save(irq_flags);
+
+	spin_lock(&vcpu->irq.lock);
+	
+	/*
+	 * If there are any signals pending (virtual interrupt related or 
+	 * otherwise), don't even bother trying to enter guest mode...
+	 */
+	if (signal_pending(current)) {
+	    kvm_run->exit_reason = KVM_EXIT_INTR;
+	    spin_unlock(&vcpu->irq.lock);
+	    local_irq_restore(irq_flags);
+	    return -EINTR;
+	}
+
+	/*
+	 * There are optimizations we can make when signaling interrupts
+	 * if we know the VCPU is in GUEST mode, so mark that here
+	 */
+	vcpu->irq.guest_mode = 1;
+	
+	/*
+	 * We also must inject interrupts (if any) while the irq.lock
+	 * is held
+	 */
 	if (!vcpu->mmio_read_completed)
 		do_interrupt_requests(vcpu, kvm_run);
 
-	if (vcpu->guest_debug.enabled)
-		kvm_guest_debug_pre(vcpu);
+	spin_unlock(&vcpu->irq.lock);
 
 	fx_save(vcpu->host_fx_image);
 	fx_restore(vcpu->guest_fx_image);
@@ -1880,6 +1917,13 @@ again:
 	      : "cc", "memory" );
 
 	/*
+	 * FIXME: We'd like to turn on interrupts ASAP, but is this so early
+	 * that we will mess up the state of the CPU before we fully 
+	 * transition from guest to host?
+	 */
+	local_irq_restore(irq_flags);
+
+	/*
 	 * Reload segment selectors ASAP. (it's needed for a functional
 	 * kernel: x86 relies on having __KERNEL_PDA in %fs and x86_64
 	 * relies on having 0 in %gs for the CPU PDA to work.)
@@ -1911,6 +1955,13 @@ again:
 
 	asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
 
+	/*
+	 * Signal that we have transitioned back to host mode 
+	 */
+	spin_lock_irqsave(&vcpu->irq.lock, irq_flags);
+	vcpu->irq.guest_mode = 0;
+	spin_unlock_irqrestore(&vcpu->irq.lock, irq_flags);
+
 	if (fail) {
 		kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
 		kvm_run->fail_entry.hardware_entry_failure_reason


-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/3] KVM: Add irqdevice object
       [not found]     ` <20070416121936.9824.8889.stgit-5CR4LY5GPkvLDviKLk5550HKjMygAv58XqFh9Ls21Oc@public.gmane.org>
@ 2007-04-16 14:07       ` Avi Kivity
       [not found]         ` <46238338.8070503-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Avi Kivity @ 2007-04-16 14:07 UTC (permalink / raw)
  To: Gregory Haskins; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Gregory Haskins wrote:
> From: None <None>
>
> The current code is geared towards using a user-mode (A)PIC.  This patch adds
> an "irqdevice" abstraction, and implements a "userint" model to handle the
> duties of the original code.  Later, we can develop other irqdevice models 
> to handle objects like LAPIC, IOAPIC, i8259, etc, as appropriate
>
>   

Viewed in light of 3/3, various races are exposed.


> @@ -2044,13 +2043,11 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
>  	if (mmu_reset_needed)
>  		kvm_mmu_reset_context(vcpu);
>  
> -	memcpy(vcpu->irq_pending, sregs->interrupt_bitmap,
> -	       sizeof vcpu->irq_pending);
> -	vcpu->irq_summary = 0;
> -	for (i = 0; i < NR_IRQ_WORDS; ++i)
> -		if (vcpu->irq_pending[i])
> -			__set_bit(i, &vcpu->irq_summary);
> -
> +	/* walk the interrupt-bitmap and inject an IRQ for each bit found */
> +	for (i = 0; i < 256; ++i)
> +		if (test_bit(i, &sregs->interrupt_bitmap[0]))
> +			kvm_irqdevice_set_pin(&vcpu->irq_dev, i, 1);
> + 
>   

You need to lower a pin here if it was previously set.


> diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
> index 61a6116..a0fdf02 100644
> --- a/drivers/kvm/vmx.c
> +++ b/drivers/kvm/vmx.c
> @@ -1219,13 +1219,8 @@ static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq)
>  
>  static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
>  {
> -	int word_index = __ffs(vcpu->irq_summary);
> -	int bit_index = __ffs(vcpu->irq_pending[word_index]);
> -	int irq = word_index * BITS_PER_LONG + bit_index;
> -
> -	clear_bit(bit_index, &vcpu->irq_pending[word_index]);
> -	if (!vcpu->irq_pending[word_index])
> -		clear_bit(word_index, &vcpu->irq_summary);
> +	int irq = kvm_irqdevice_read_vector(&vcpu->irq_dev, 0);
> +	BUG_ON(irq < 0);
>   

This BUG can trigger.  A level-triggered irq was asserted, then deasserted.

>  
>  	if (vcpu->rmode.active) {
>  		inject_rmode_irq(vcpu, irq);
> @@ -1246,7 +1241,7 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
>  		 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
>  
>  	if (vcpu->interrupt_window_open &&
> -	    vcpu->irq_summary &&
> +	    kvm_irqdevice_pending(&vcpu->irq_dev, 0) &&
>  	    !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK))
>   

What if an irq is made pending here?

>  		/*
>  		 * If interrupts enabled, and not blocked by sti or mov ss. Good.
> @@ -1255,7 +1250,8 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
>  
>  	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
>  	if (!vcpu->interrupt_window_open &&
> -	    (vcpu->irq_summary || kvm_run->request_interrupt_window))
> +	    (kvm_irqdevice_pending(&vcpu->irq_dev, 0) ||
> +	     kvm_run->request_interrupt_window))
>  		/*
>  		 * Interrupts blocked.  Wait for unblock.
>  		 */
>   

or here?

> @@ -1619,8 +1615,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
>  	kvm_run->if_flag = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) != 0;
>  	kvm_run->cr8 = vcpu->cr8;
>  	kvm_run->apic_base = vcpu->apic_base;
> -	kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open &&
> -						  vcpu->irq_summary == 0);
> +	kvm_run->ready_for_interrupt_injection = 
> +		(vcpu->interrupt_window_open && 
> +		 !kvm_irqdevice_pending(&vcpu->irq_dev, 0));
>  }
>   

or here? possibly a good answer is "don't rely on r_f_i_i if not using 
userint".

>  
>  static int handle_interrupt_window(struct kvm_vcpu *vcpu,
> @@ -1631,7 +1628,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
>  	 * possible
>  	 */
>  	if (kvm_run->request_interrupt_window &&
> -	    !vcpu->irq_summary) {
> +	    !kvm_irqdevice_pending(&vcpu->irq_dev, 0)) {
>  		kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
>  		++kvm_stat.irq_window_exits;
>  		return 0;
>   

ditto.

> @@ -1713,7 +1710,7 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
>  static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
>  					  struct kvm_run *kvm_run)
>  {
> -	return (!vcpu->irq_summary &&
> +	return (!kvm_irqdevice_pending(&vcpu->irq_dev, 0) &&
>  		kvm_run->request_interrupt_window &&
>  		vcpu->interrupt_window_open &&
>  		(vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));
>
>
>   

ditto.


-- 
error compiling committee.c: too many arguments to function


-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/3] KVM: Add irqdevice object
       [not found]         ` <46238338.8070503-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-04-16 14:42           ` Gregory Haskins
       [not found]             ` <462352F1.BA47.005A.0-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Gregory Haskins @ 2007-04-16 14:42 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

>>> On Mon, Apr 16, 2007 at 10:07 AM, in message <46238338.8070503-atKUWr5tajBWk0Htik3J/w@public.gmane.org>,
Avi Kivity <avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org> wrote: 
> Gregory Haskins wrote:
>> From: None <None>
>>
>> The current code is geared towards using a user- mode (A)PIC.  This patch adds
>> an "irqdevice" abstraction, and implements a "userint" model to handle the
>> duties of the original code.  Later, we can develop other irqdevice models 
>> to handle objects like LAPIC, IOAPIC, i8259, etc, as appropriate
>>
>>   
> 
> Viewed in light of 3/3, various races are exposed.
> 
> 
>> @@ - 2044,13 +2043,11 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu 
> *vcpu,
>>  	if (mmu_reset_needed)
>>  		kvm_mmu_reset_context(vcpu);
>>  
>> -	memcpy(vcpu- >irq_pending, sregs- >interrupt_bitmap,
>> -	       sizeof vcpu- >irq_pending);
>> -	vcpu- >irq_summary = 0;
>> -	for (i = 0; i < NR_IRQ_WORDS; ++i)
>> -		if (vcpu- >irq_pending[i])
>> -			__set_bit(i, &vcpu- >irq_summary);
>> -
>> +	/* walk the interrupt- bitmap and inject an IRQ for each bit found */
>> +	for (i = 0; i < 256; ++i)
>> +		if (test_bit(i, &sregs- >interrupt_bitmap[0]))
>> +			kvm_irqdevice_set_pin(&vcpu- >irq_dev, i, 1);
>> + 
>>   
> 
> You need to lower a pin here if it was previously set.

Hmm.  Good find.  It also means my assumption about being able to ignore de-asserts in the userint model is false.  I will fix this.

> 
> 
>> diff -- git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
>> index 61a6116..a0fdf02 100644
>> ---  a/drivers/kvm/vmx.c
>> +++ b/drivers/kvm/vmx.c
>> @@ - 1219,13 +1219,8 @@ static void inject_rmode_irq(struct kvm_vcpu *vcpu, 
> int irq)
>>  
>>  static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
>>  {
>> -	int word_index = __ffs(vcpu- >irq_summary);
>> -	int bit_index = __ffs(vcpu- >irq_pending[word_index]);
>> -	int irq = word_index * BITS_PER_LONG + bit_index;
>> -
>> -	clear_bit(bit_index, &vcpu- >irq_pending[word_index]);
>> -	if (!vcpu- >irq_pending[word_index])
>> -		clear_bit(word_index, &vcpu- >irq_summary);
>> +	int irq = kvm_irqdevice_read_vector(&vcpu- >irq_dev, 0);
>> +	BUG_ON(irq < 0);
>>   
> 
> This BUG can trigger.  A level- triggered irq was asserted, then deasserted.

Ditto for this.

> 
>>  
>>  	if (vcpu- >rmode.active) {
>>  		inject_rmode_irq(vcpu, irq);
>> @@ - 1246,7 +1241,7 @@ static void do_interrupt_requests(struct kvm_vcpu 
> *vcpu,
>>  		 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
>>  
>>  	if (vcpu- >interrupt_window_open &&
>> -	    vcpu- >irq_summary &&
>> +	    kvm_irqdevice_pending(&vcpu- >irq_dev, 0) &&
>>  	    !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK))
>>   
> 
> What if an irq is made pending here?

The only race I see is related to what you pointed out previously:  A level-sensitive interrupt could be asserted when pending() is read, and deasserted when read_vector() is read.  Handling the irq == -1 from read_vector() should fix the race.  Or are you pointing out something else?



> or here? possibly a good answer is "don't rely on r_f_i_i if not using 
> userint".

Yeah, exactly.  Once the user turns on in-kernel interrupts, some of the kvm-run interface (TBD) will be disabled/ignored due to being redundant/overlapping.

Since userspace will have to ack the in-kernel interrupt feature, it will obviously know when to stop using those features as well.

-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/3] KVM: Add irqdevice object
       [not found]             ` <462352F1.BA47.005A.0-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
@ 2007-04-16 14:55               ` Avi Kivity
       [not found]                 ` <46238E55.80401-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Avi Kivity @ 2007-04-16 14:55 UTC (permalink / raw)
  To: Gregory Haskins; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Gregory Haskins wrote:
>   
>>>  
>>>  	if (vcpu- >rmode.active) {
>>>  		inject_rmode_irq(vcpu, irq);
>>> @@ - 1246,7 +1241,7 @@ static void do_interrupt_requests(struct kvm_vcpu 
>>>       
>> *vcpu,
>>     
>>>  		 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
>>>  
>>>  	if (vcpu- >interrupt_window_open &&
>>> -	    vcpu- >irq_summary &&
>>> +	    kvm_irqdevice_pending(&vcpu- >irq_dev, 0) &&
>>>  	    !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK))
>>>   
>>>       
>> What if an irq is made pending here?
>>     
>
> The only race I see is related to what you pointed out previously:  A level-sensitive interrupt could be asserted when pending() is read, and deasserted when read_vector() is read.  Handling the irq == -1 from read_vector() should fix the race.  Or are you pointing out something else?
>   

That one.

I think there are probably a few more hiding in there.  To reduce the 
number of combinations, I'd suggest putting the irq and the inter-vcpu 
communication things under the same lock, and to make sure ->pending() 
and ->read_vector() are always called in the same critical section (or 
even better, to unify them into one function).

-- 
error compiling committee.c: too many arguments to function


-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/3] KVM: Add irqdevice object
       [not found]                 ` <46238E55.80401-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-04-16 15:17                   ` Gregory Haskins
  0 siblings, 0 replies; 8+ messages in thread
From: Gregory Haskins @ 2007-04-16 15:17 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

>>> On Mon, Apr 16, 2007 at 10:55 AM, in message <46238E55.80401-atKUWr5tajBWk0Htik3J/w@public.gmane.org>, Avi
Kivity <avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org> wrote: 
> Gregory Haskins wrote:
>>   
>>>>  
>>>>  	if (vcpu-  >rmode.active) {
>>>>  		inject_rmode_irq(vcpu, irq);
>>>> @@ -  1246,7 +1241,7 @@ static void do_interrupt_requests(struct kvm_vcpu 
>>>>       
>>> *vcpu,
>>>     
>>>>  		 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
>>>>  
>>>>  	if (vcpu-  >interrupt_window_open &&
>>>> -	    vcpu-  >irq_summary &&
>>>> +	    kvm_irqdevice_pending(&vcpu-  >irq_dev, 0) &&
>>>>  	    !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK))
>>>>   
>>>>       
>>> What if an irq is made pending here?
>>>     
>>
>> The only race I see is related to what you pointed out previously:  A 
> level- sensitive interrupt could be asserted when pending() is read, and 
> deasserted when read_vector() is read.  Handling the irq == - 1 from 
> read_vector() should fix the race.  Or are you pointing out something else?
>>   
> 
> That one.
> 
> I think there are probably a few more hiding in there.  To reduce the 
> number of combinations, I'd suggest putting the irq and the inter- vcpu 
> communication things under the same lock, and to make sure - >pending() 
> and - >read_vector() are always called in the same critical section (or 
> even better, to unify them into one function).

I think you will see that this has been addressed downstream in my series.  Its just not polished up enough to share yet.  Probably some time this week, however.

Thanks for the review!

-Greg


-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2007-04-16 15:17 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-04-16 12:19 [PATCH 0/3] Current patch series for review of in-kernel APIC work Gregory Haskins
     [not found] ` <20070416121832.9824.40317.stgit-5CR4LY5GPkvLDviKLk5550HKjMygAv58XqFh9Ls21Oc@public.gmane.org>
2007-04-16 12:19   ` [PATCH 1/3] Adds support for in-kernel mmio handlers Gregory Haskins
2007-04-16 12:19   ` [PATCH 2/3] KVM: Add irqdevice object Gregory Haskins
     [not found]     ` <20070416121936.9824.8889.stgit-5CR4LY5GPkvLDviKLk5550HKjMygAv58XqFh9Ls21Oc@public.gmane.org>
2007-04-16 14:07       ` Avi Kivity
     [not found]         ` <46238338.8070503-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-04-16 14:42           ` Gregory Haskins
     [not found]             ` <462352F1.BA47.005A.0-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
2007-04-16 14:55               ` Avi Kivity
     [not found]                 ` <46238E55.80401-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-04-16 15:17                   ` Gregory Haskins
2007-04-16 12:19   ` [PATCH 3/3] KVM: Preemptible VCPU Gregory Haskins

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox