From: "Gregory Haskins" <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
To: <kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org>
Subject: irqdevice INTR example
Date: Thu, 12 Apr 2007 00:02:32 -0400 [thread overview]
Message-ID: <461D7702.BA47.005A.0@novell.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 571 bytes --]
Hi All,
Attached are the first three patches in my queue. The first two you are likely familiar with at this point (though I have made some more of the requested changes to 02-irqdevice.patch). The last item (03-preemptible-cpu.patch) adds an implementation to the previously unused kvm_vcpu_intr() callback. This acts as a functional example of the INTR callback mechanism as Avi requested. Note that the work related to IF/NMI/TPR classification of interrupts happens later in my queue and is not mature enough to share yet, but hopefully soon.
Thoughts?
-Greg
[-- Attachment #2: 01-mmio_handler.patch --]
[-- Type: text/plain, Size: 5730 bytes --]
KVM: Adds support for in-kernel mmio handlers
From: <>
Signed-off-by: Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
---
drivers/kvm/kvm.h | 31 ++++++++++++++++++
drivers/kvm/kvm_main.c | 82 +++++++++++++++++++++++++++++++++++++++++-------
2 files changed, 101 insertions(+), 12 deletions(-)
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index fceeb84..181099f 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -236,6 +236,36 @@ struct kvm_pio_request {
int rep;
};
+struct kvm_io_device {
+ unsigned long (*read)(struct kvm_io_device *this,
+ gpa_t addr,
+ int length);
+ void (*write)(struct kvm_io_device *this,
+ gpa_t addr,
+ int length,
+ unsigned long val);
+ int (*in_range)(struct kvm_io_device *this, gpa_t addr);
+
+ void *private;
+};
+
+/*
+ * It would be nice to use something smarter than a linear search, TBD...
+ * Thankfully we dont expect many devices to register (famous last words :),
+ * so until then it will suffice. At least its abstracted so we can change
+ * in one place.
+ */
+struct kvm_io_bus {
+ int dev_count;
+#define NR_IOBUS_DEVS 6
+ struct kvm_io_device *devs[NR_IOBUS_DEVS];
+};
+
+void kvm_io_bus_init(struct kvm_io_bus *bus);
+struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr);
+void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
+ struct kvm_io_device *dev);
+
struct kvm_vcpu {
struct kvm *kvm;
union {
@@ -345,6 +375,7 @@ struct kvm {
unsigned long rmap_overflow;
struct list_head vm_list;
struct file *filp;
+ struct kvm_io_bus mmio_bus;
};
struct kvm_stat {
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 4473174..c3c0059 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -294,6 +294,7 @@ static struct kvm *kvm_create_vm(void)
spin_lock_init(&kvm->lock);
INIT_LIST_HEAD(&kvm->active_mmu_pages);
+ kvm_io_bus_init(&kvm->mmio_bus);
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
struct kvm_vcpu *vcpu = &kvm->vcpus[i];
@@ -1015,12 +1016,25 @@ static int emulator_write_std(unsigned long addr,
return X86EMUL_UNHANDLEABLE;
}
+static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
+ gpa_t addr)
+{
+ /*
+ * Note that its important to have this wrapper function because
+ * in the very near future we will be checking for MMIOs against
+ * the LAPIC as well as the general MMIO bus
+ */
+ return kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr);
+}
+
static int emulator_read_emulated(unsigned long addr,
unsigned long *val,
unsigned int bytes,
struct x86_emulate_ctxt *ctxt)
{
- struct kvm_vcpu *vcpu = ctxt->vcpu;
+ struct kvm_vcpu *vcpu = ctxt->vcpu;
+ struct kvm_io_device *mmio_dev;
+ gpa_t gpa;
if (vcpu->mmio_read_completed) {
memcpy(val, vcpu->mmio_data, bytes);
@@ -1029,18 +1043,26 @@ static int emulator_read_emulated(unsigned long addr,
} else if (emulator_read_std(addr, val, bytes, ctxt)
== X86EMUL_CONTINUE)
return X86EMUL_CONTINUE;
- else {
- gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
- if (gpa == UNMAPPED_GVA)
- return X86EMUL_PROPAGATE_FAULT;
- vcpu->mmio_needed = 1;
- vcpu->mmio_phys_addr = gpa;
- vcpu->mmio_size = bytes;
- vcpu->mmio_is_write = 0;
+ gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
+ if (gpa == UNMAPPED_GVA)
+ return X86EMUL_PROPAGATE_FAULT;
- return X86EMUL_UNHANDLEABLE;
+ /*
+ * Is this MMIO handled locally?
+ */
+ mmio_dev = vcpu_find_mmio_dev(vcpu, gpa);
+ if (mmio_dev) {
+ *val = mmio_dev->read(mmio_dev, gpa, bytes);
+ return X86EMUL_CONTINUE;
}
+
+ vcpu->mmio_needed = 1;
+ vcpu->mmio_phys_addr = gpa;
+ vcpu->mmio_size = bytes;
+ vcpu->mmio_is_write = 0;
+
+ return X86EMUL_UNHANDLEABLE;
}
static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
@@ -1068,8 +1090,9 @@ static int emulator_write_emulated(unsigned long addr,
unsigned int bytes,
struct x86_emulate_ctxt *ctxt)
{
- struct kvm_vcpu *vcpu = ctxt->vcpu;
- gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
+ struct kvm_vcpu *vcpu = ctxt->vcpu;
+ struct kvm_io_device *mmio_dev;
+ gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
if (gpa == UNMAPPED_GVA)
return X86EMUL_PROPAGATE_FAULT;
@@ -1077,6 +1100,15 @@ static int emulator_write_emulated(unsigned long addr,
if (emulator_write_phys(vcpu, gpa, val, bytes))
return X86EMUL_CONTINUE;
+ /*
+ * Is this MMIO handled locally?
+ */
+ mmio_dev = vcpu_find_mmio_dev(vcpu, gpa);
+ if (mmio_dev) {
+ mmio_dev->write(mmio_dev, gpa, bytes, val);
+ return X86EMUL_CONTINUE;
+ }
+
vcpu->mmio_needed = 1;
vcpu->mmio_phys_addr = gpa;
vcpu->mmio_size = bytes;
@@ -2911,6 +2943,32 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
return NOTIFY_OK;
}
+void kvm_io_bus_init(struct kvm_io_bus *bus)
+{
+ memset(bus, 0, sizeof(*bus));
+}
+
+struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr)
+{
+ int i;
+
+ for (i = 0; i < bus->dev_count; i++) {
+ struct kvm_io_device *pos = bus->devs[i];
+
+ if (pos->in_range(pos, addr))
+ return pos;
+ }
+
+ return NULL;
+}
+
+void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev)
+{
+ BUG_ON(bus->dev_count >= (NR_IOBUS_DEVS-1));
+
+ bus->devs[bus->dev_count++] = dev;
+}
+
static struct notifier_block kvm_cpu_notifier = {
.notifier_call = kvm_cpu_hotplug,
.priority = 20, /* must be > scheduler priority */
[-- Attachment #3: 02-irqdevice.patch --]
[-- Type: text/plain, Size: 23734 bytes --]
KVM: Add irqdevice object
From: <>
The current code is geared towards using a user-mode (A)PIC. This patch adds
an "irqdevice" abstraction, and implements a "userint" model to handle the
duties of the original code. Later, we can develop other irqdevice models
to handle objects like LAPIC, IOAPIC, i8259, etc, as appropriate
Signed-off-by: Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
---
drivers/kvm/Makefile | 2
drivers/kvm/irqdevice.h | 170 ++++++++++++++++++++++++++++++++++++++++
drivers/kvm/kvm.h | 9 +-
drivers/kvm/kvm_main.c | 57 ++++++++++---
drivers/kvm/svm.c | 33 ++++----
drivers/kvm/userint.c | 202 +++++++++++++++++++++++++++++++++++++++++++++++
drivers/kvm/vmx.c | 29 +++----
7 files changed, 449 insertions(+), 53 deletions(-)
diff --git a/drivers/kvm/Makefile b/drivers/kvm/Makefile
index c0a789f..540afbc 100644
--- a/drivers/kvm/Makefile
+++ b/drivers/kvm/Makefile
@@ -2,7 +2,7 @@
# Makefile for Kernel-based Virtual Machine module
#
-kvm-objs := kvm_main.o mmu.o x86_emulate.o
+kvm-objs := kvm_main.o mmu.o x86_emulate.o userint.o
obj-$(CONFIG_KVM) += kvm.o
kvm-intel-objs = vmx.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/drivers/kvm/irqdevice.h b/drivers/kvm/irqdevice.h
new file mode 100644
index 0000000..fe284bc
--- /dev/null
+++ b/drivers/kvm/irqdevice.h
@@ -0,0 +1,170 @@
+/*
+ * Defines an interface for an abstract interrupt controller. The model
+ * consists of a unit with an arbitrary number of input lines (IRQ0-N), an
+ * output line (INTR), and methods for completing an interrupt-acknowledge
+ * cycle (INTA). A particular implementation of this model will define
+ * various policies, such as irq-to-vector translation, INTA/auto-EOI policy,
+ * etc.
+ *
+ * In addition, the INTR callback mechanism allows the unit to be "wired" to
+ * an interruptible source in a very flexible manner. For instance, an
+ * irqdevice could have its INTR wired to a VCPU (ala LAPIC), or another
+ * interrupt controller (ala cascaded i8259s)
+ *
+ * Copyright (C) 2007 Novell
+ *
+ * Authors:
+ * Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef __IRQDEVICE_H
+#define __IRQDEVICE_H
+
+#define KVM_IRQFLAGS_NMI (1 << 0)
+#define KVM_IRQFLAGS_PEEK (1 << 1)
+
+struct kvm_irqdevice;
+
+struct kvm_irqsink {
+ void (*raise_intr)(struct kvm_irqsink *this,
+ struct kvm_irqdevice *dev);
+
+ void *private;
+};
+
+struct kvm_irqdevice {
+ int (*pending)(struct kvm_irqdevice *this, int flags);
+ int (*read_vector)(struct kvm_irqdevice *this, int flags);
+ int (*set_pin)(struct kvm_irqdevice *this, int pin, int level);
+ int (*summary)(struct kvm_irqdevice *this, void *data);
+ void (*destructor)(struct kvm_irqdevice *this);
+
+ void *private;
+ struct kvm_irqsink sink;
+};
+
+/**
+ * kvm_irqdevice_init - initialize the kvm_irqdevice for use
+ * @dev: The device
+ *
+ * Description: Initialize the kvm_irqdevice for use. Should be called before
+ * calling any derived implementation init functions
+ *
+ * Returns: (void)
+ */
+static inline void kvm_irqdevice_init(struct kvm_irqdevice *dev)
+{
+ memset(dev, 0, sizeof(*dev));
+}
+
+/**
+ * kvm_irqdevice_pending - efficiently determines if an interrupt is pending
+ * @dev: The device
+ * @flags: Modifies the behavior as follows:
+ * [+ KVM_IRQFLAGS_NMI: Mask everything but NMIs]
+ *
+ * Description: Efficiently determines if an interrupt is pending on an
+ * irqdevice
+ *
+ * Returns: (int)
+ * [0 = no iterrupts pending (per "flags" criteria)]
+ * [1 = one or more interrupts are pending]
+ */
+static inline int kvm_irqdevice_pending(struct kvm_irqdevice *dev, int flags)
+{
+ return dev->pending(dev, flags);
+}
+
+/**
+ * kvm_irqdevice_read_vector - read the highest priority vector from the device
+ * @dev: The device
+ * @flags: Modifies the behavior as follows:
+ * [+ KVM_IRQFLAGS_NMI: Mask everything but NMIs]
+ * [+ KVM_IRQFLAGS_PEEK: Do not auto-acknowledge interrupt]
+ *
+ * Description: Read the highest priority pending vector from the device,
+ * potentially invoking auto-EOI depending on device policy
+ *
+ * Returns: (int)
+ * [ -1 = no interrupts pending (per "flags" criteria)]
+ * [>=0 = the highest pending vector]
+ */
+static inline int kvm_irqdevice_read_vector(struct kvm_irqdevice *dev,
+ int flags)
+{
+ return dev->read_vector(dev, flags);
+}
+
+/**
+ * kvm_irqdevice_set_pin - allows the caller to assert/deassert an IRQ
+ * @dev: The device
+ * @pin: The input pin to alter
+ * @level: The value to set (1 = assert, 0 = deassert)
+ *
+ * Description: Allows the caller to assert/deassert an IRQ input pin to the
+ * device according to device policy.
+ *
+ * Returns: (int)
+ * [-1 = failure]
+ * [ 0 = success]
+ */
+static inline int kvm_irqdevice_set_pin(struct kvm_irqdevice *dev, int pin,
+ int level)
+{
+ return dev->set_pin(dev, pin, level);
+}
+
+/**
+ * kvm_irqdevice_summary - loads a summary bitmask
+ * @dev: The device
+ * @data: A pointer to a region capable of holding a 256 bit bitmap
+ *
+ * Description: Loads a summary bitmask of all pending vectors (0-255)
+ *
+ * Returns: (int)
+ * [-1 = failure]
+ * [ 0 = success]
+ */
+static inline int kvm_irqdevice_summary(struct kvm_irqdevice *dev, void *data)
+{
+ return dev->summary(dev, data);
+}
+
+/**
+ * kvm_irqdevice_register_sink - registers an kvm_irqsink object
+ * @dev: The device
+ * @sink: The sink to register. Data will be copied so building object from
+ * transient storage is ok.
+ *
+ * Description: Registers an kvm_irqsink object as an INTR callback
+ *
+ * Returns: (void)
+ */
+static inline void kvm_irqdevice_register_sink(struct kvm_irqdevice *dev,
+ const struct kvm_irqsink *sink)
+{
+ dev->sink = *sink;
+}
+
+/*
+ * kvm_irqdevice_raise_intr - invokes a registered INTR callback
+ * @dev: The device
+ *
+ * Description: Invokes a registered INTR callback (if present). This
+ * function is meant to be used privately by a irqdevice
+ * implementation.
+ *
+ * Returns: (void)
+ */
+static inline void kvm_irqdevice_raise_intr(struct kvm_irqdevice *dev)
+{
+ struct kvm_irqsink *sink = &dev->sink;
+ if (sink->raise_intr)
+ sink->raise_intr(sink, dev);
+}
+
+#endif /* __IRQDEVICE_H */
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 181099f..58966d9 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -13,6 +13,7 @@
#include <linux/mm.h>
#include "vmx.h"
+#include "irqdevice.h"
#include <linux/kvm.h>
#include <linux/kvm_para.h>
@@ -157,6 +158,8 @@ struct vmcs {
struct kvm_vcpu;
+int kvm_userint_init(struct kvm_irqdevice *dev);
+
/*
* x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
* 32-bit). The kvm_mmu structure abstracts the details of the current mmu
@@ -266,6 +269,8 @@ struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr);
void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
struct kvm_io_device *dev);
+#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
+
struct kvm_vcpu {
struct kvm *kvm;
union {
@@ -278,9 +283,7 @@ struct kvm_vcpu {
u64 host_tsc;
struct kvm_run *run;
int interrupt_window_open;
- unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
-#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
- unsigned long irq_pending[NR_IRQ_WORDS];
+ struct kvm_irqdevice irq_dev;
unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */
unsigned long rip; /* needs vcpu_load_rsp_rip() */
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index c3c0059..7e00412 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -1989,8 +1989,7 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
sregs->efer = vcpu->shadow_efer;
sregs->apic_base = vcpu->apic_base;
- memcpy(sregs->interrupt_bitmap, vcpu->irq_pending,
- sizeof sregs->interrupt_bitmap);
+ kvm_irqdevice_summary(&vcpu->irq_dev, &sregs->interrupt_bitmap);
vcpu_put(vcpu);
@@ -2044,13 +2043,11 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
if (mmu_reset_needed)
kvm_mmu_reset_context(vcpu);
- memcpy(vcpu->irq_pending, sregs->interrupt_bitmap,
- sizeof vcpu->irq_pending);
- vcpu->irq_summary = 0;
- for (i = 0; i < NR_IRQ_WORDS; ++i)
- if (vcpu->irq_pending[i])
- __set_bit(i, &vcpu->irq_summary);
-
+ /* walk the interrupt-bitmap and inject an IRQ for each bit found */
+ for (i = 0; i < 256; ++i)
+ if (test_bit(i, &sregs->interrupt_bitmap[0]))
+ kvm_irqdevice_set_pin(&vcpu->irq_dev, i, 1);
+
set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
@@ -2210,14 +2207,8 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
{
if (irq->irq < 0 || irq->irq >= 256)
return -EINVAL;
- vcpu_load(vcpu);
-
- set_bit(irq->irq, vcpu->irq_pending);
- set_bit(irq->irq / BITS_PER_LONG, &vcpu->irq_summary);
- vcpu_put(vcpu);
-
- return 0;
+ return kvm_irqdevice_set_pin(&vcpu->irq_dev, irq->irq, 1);
}
static int kvm_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
@@ -2319,6 +2310,36 @@ out1:
}
/*
+ * This function will be invoked whenever the vcpu->irq_dev raises its INTR
+ * line
+ */
+static void kvm_vcpu_intr(struct kvm_irqsink *this,
+ struct kvm_irqdevice *dev)
+{
+ /*
+ * Our irq device is requesting to interrupt the vcpu. If it is
+ * currently running, we should inject a host IPI to force a VMEXIT
+ */
+
+ /*
+ * FIXME: Implement this or the CPU wont notice the interrupt until
+ * the next natural VMEXIT. Note that this is how the system
+ * has always worked, so nothing is broken here. This is a future
+ * enhancement
+ */
+}
+
+static void kvm_vcpu_irqsink_init(struct kvm_vcpu *vcpu)
+{
+ struct kvm_irqsink sink = {
+ .raise_intr = kvm_vcpu_intr,
+ .private = vcpu
+ };
+
+ kvm_irqdevice_register_sink(&vcpu->irq_dev, &sink);
+}
+
+/*
* Creates some virtual cpus. Good luck creating more than one.
*/
static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
@@ -2364,6 +2385,10 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
if (r < 0)
goto out_free_vcpus;
+ kvm_irqdevice_init(&vcpu->irq_dev);
+ kvm_vcpu_irqsink_init(vcpu);
+ kvm_userint_init(&vcpu->irq_dev);
+
kvm_arch_ops->vcpu_load(vcpu);
r = kvm_mmu_setup(vcpu);
if (r >= 0)
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index b7e1410..e59a548 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -108,20 +108,16 @@ static unsigned get_addr_size(struct kvm_vcpu *vcpu)
static inline u8 pop_irq(struct kvm_vcpu *vcpu)
{
- int word_index = __ffs(vcpu->irq_summary);
- int bit_index = __ffs(vcpu->irq_pending[word_index]);
- int irq = word_index * BITS_PER_LONG + bit_index;
-
- clear_bit(bit_index, &vcpu->irq_pending[word_index]);
- if (!vcpu->irq_pending[word_index])
- clear_bit(word_index, &vcpu->irq_summary);
- return irq;
+ return kvm_irqdevice_read_vector(&vcpu->irq_dev, 0);
}
static inline void push_irq(struct kvm_vcpu *vcpu, u8 irq)
{
- set_bit(irq, vcpu->irq_pending);
- set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
+ /* FIXME: We probably want to reserve the "set_pin" verb for
+ * actual interrupt requests, not for putting back something
+ * previously pending. Lets revisit this
+ */
+ kvm_irqdevice_set_pin(&vcpu->irq_dev, irq, 1);
}
static inline void clgi(void)
@@ -1092,7 +1088,7 @@ static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 1;
skip_emulated_instruction(vcpu);
- if (vcpu->irq_summary)
+ if (kvm_irqdevice_pending(&vcpu->irq_dev, 0))
return 1;
kvm_run->exit_reason = KVM_EXIT_HLT;
@@ -1263,7 +1259,7 @@ static int interrupt_window_interception(struct kvm_vcpu *vcpu,
* possible
*/
if (kvm_run->request_interrupt_window &&
- !vcpu->irq_summary) {
+ !kvm_irqdevice_pending(&vcpu->irq_dev, 0)) {
++kvm_stat.irq_window_exits;
kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
return 0;
@@ -1399,7 +1395,8 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
(!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
(vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF));
- if (vcpu->interrupt_window_open && vcpu->irq_summary)
+ if (vcpu->interrupt_window_open &&
+ kvm_irqdevice_pending(&vcpu->irq_dev, 0))
/*
* If interrupts enabled, and not blocked by sti or mov ss. Good.
*/
@@ -1409,7 +1406,8 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
* Interrupts blocked. Wait for unblock.
*/
if (!vcpu->interrupt_window_open &&
- (vcpu->irq_summary || kvm_run->request_interrupt_window)) {
+ (kvm_irqdevice_pending(&vcpu->irq_dev, 0) ||
+ kvm_run->request_interrupt_window)) {
control->intercept |= 1ULL << INTERCEPT_VINTR;
} else
control->intercept &= ~(1ULL << INTERCEPT_VINTR);
@@ -1418,8 +1416,9 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
static void post_kvm_run_save(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
{
- kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open &&
- vcpu->irq_summary == 0);
+ kvm_run->ready_for_interrupt_injection =
+ (vcpu->interrupt_window_open &&
+ !kvm_irqdevice_pending(&vcpu->irq_dev, 0));
kvm_run->if_flag = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF) != 0;
kvm_run->cr8 = vcpu->cr8;
kvm_run->apic_base = vcpu->apic_base;
@@ -1434,7 +1433,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
{
- return (!vcpu->irq_summary &&
+ return (!kvm_irqdevice_pending(&vcpu->irq_dev, 0) &&
kvm_run->request_interrupt_window &&
vcpu->interrupt_window_open &&
(vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF));
diff --git a/drivers/kvm/userint.c b/drivers/kvm/userint.c
new file mode 100644
index 0000000..8363060
--- /dev/null
+++ b/drivers/kvm/userint.c
@@ -0,0 +1,202 @@
+/*
+ * User Interrupts IRQ device
+ *
+ * This acts as an extention of an interrupt controller that exists elsewhere
+ * (typically in userspace/QEMU). Because this PIC is a pseudo device that
+ * is downstream from a real emulated PIC, the "IRQ-to-vector" mapping has
+ * already occured. Therefore, this PIC has the following unusal properties:
+ *
+ * 1) It has 256 "pins" which are literal vectors (i.e. no translation)
+ * 2) It only supports "auto-EOI" behavior since it is expected that the
+ * upstream emulated PIC will handle the real EOIs (if applicable)
+ * 3) It only listens to "asserts" on the pins (deasserts are dropped)
+ * because its an auto-EOI device anyway.
+ *
+ * Copyright (C) 2007 Novell
+ *
+ * bitarray code based on original vcpu->irq_pending code,
+ * Copyright (C) 2007 Qumranet
+ *
+ * Authors:
+ * Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "kvm.h"
+
+/*
+ *----------------------------------------------------------------------
+ * optimized bitarray object - works like bitarrays in bitops, but uses
+ * a summary field to accelerate lookups. Assumes external locking
+ *---------------------------------------------------------------------
+ */
+
+struct bitarray {
+ unsigned long summary; /* 1 per word in pending */
+ unsigned long pending[NR_IRQ_WORDS];
+};
+
+static inline int bitarray_pending(struct bitarray *this)
+{
+ return this->summary ? 1 : 0;
+}
+
+static inline int bitarray_findhighest(struct bitarray *this)
+{
+ if (!this->summary)
+ return -1;
+ else {
+ int word_index = __fls(this->summary);
+ int bit_index = __fls(this->pending[word_index]);
+
+ return word_index * BITS_PER_LONG + bit_index;
+ }
+}
+
+static inline void bitarray_set(struct bitarray *this, int nr)
+{
+ __set_bit(nr, &this->pending);
+ __set_bit(nr / BITS_PER_LONG, &this->summary);
+}
+
+static inline void bitarray_clear(struct bitarray *this, int nr)
+{
+ int word = nr / BITS_PER_LONG;
+
+ __clear_bit(nr, &this->pending);
+ if (!this->pending[word])
+ __clear_bit(word, &this->summary);
+}
+
+static inline int bitarray_test(struct bitarray *this, int nr)
+{
+ return test_bit(nr, &this->pending);
+}
+
+/*
+ *----------------------------------------------------------------------
+ * userint interface - provides the actual kvm_irqdevice implementation
+ *---------------------------------------------------------------------
+ */
+
+struct kvm_userint {
+ spinlock_t lock;
+ struct bitarray irq_pending;
+ int nmi_pending;
+};
+
+static int userint_pending(struct kvm_irqdevice *this, int flags)
+{
+ struct kvm_userint *s = (struct kvm_userint*)this->private;
+ int ret;
+
+ spin_lock_irq(&s->lock);
+
+ if (flags & KVM_IRQFLAGS_NMI)
+ ret = s->nmi_pending;
+ else
+ ret = bitarray_pending(&s->irq_pending);
+
+ spin_unlock_irq(&s->lock);
+
+ return ret;
+}
+
+static int userint_read_vector(struct kvm_irqdevice *this, int flags)
+{
+ struct kvm_userint *s = (struct kvm_userint*)this->private;
+ int irq;
+
+ spin_lock_irq(&s->lock);
+
+ /*
+ * NMIs take priority, so if there is an NMI pending, or
+ * if we are filtering out NMIs, only consider them
+ */
+ if (s->nmi_pending || (flags & KVM_IRQFLAGS_NMI))
+ irq = s->nmi_pending ? 2 : -1;
+ else
+ irq = bitarray_findhighest(&s->irq_pending);
+
+ if ((irq > -1) && !(flags & KVM_IRQFLAGS_PEEK)) {
+ /*
+ * If the "peek" flag is not set, automatically clear the
+ * interrupt as the EOI mechanism (if any) will take place
+ * in userspace
+ */
+ bitarray_clear(&s->irq_pending, irq);
+ if (irq == 2)
+ s->nmi_pending = 0;
+ }
+
+ spin_unlock_irq(&s->lock);
+
+ return irq;
+}
+
+static int userint_set_pin(struct kvm_irqdevice* this, int irq, int level)
+{
+ struct kvm_userint *s = (struct kvm_userint*)this->private;
+
+ if (!level)
+ return 0; /* We dont care about deasserts */
+
+ spin_lock_irq(&s->lock);
+
+ /*
+ * Update the local state
+ */
+ bitarray_set(&s->irq_pending, irq);
+ if (irq == 2)
+ s->nmi_pending = 1;
+
+ spin_unlock_irq(&s->lock);
+
+ /*
+ * And then alert the higher layer software we have changes
+ */
+ kvm_irqdevice_raise_intr(this);
+
+ return 0;
+}
+
+static int userint_summary(struct kvm_irqdevice* this, void *data)
+{
+ struct kvm_userint *s = (struct kvm_userint*)this->private;
+
+ spin_lock_irq(&s->lock);
+ memcpy(data, s->irq_pending.pending, sizeof s->irq_pending.pending);
+ spin_unlock_irq(&s->lock);
+
+ return 0;
+}
+
+static void userint_destructor(struct kvm_irqdevice *this)
+{
+ kfree(this->private);
+}
+
+int kvm_userint_init(struct kvm_irqdevice *dev)
+{
+ struct kvm_userint *s;
+
+ s = kzalloc(sizeof(*s), GFP_KERNEL);
+ if (!s)
+ return -ENOMEM;
+
+ spin_lock_init(&s->lock);
+
+ dev->pending = userint_pending;
+ dev->read_vector = userint_read_vector;
+ dev->set_pin = userint_set_pin;
+ dev->summary = userint_summary;
+ dev->destructor = userint_destructor;
+
+ dev->private = s;
+
+ return 0;
+}
+
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 61a6116..a0fdf02 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -1219,13 +1219,8 @@ static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq)
static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
{
- int word_index = __ffs(vcpu->irq_summary);
- int bit_index = __ffs(vcpu->irq_pending[word_index]);
- int irq = word_index * BITS_PER_LONG + bit_index;
-
- clear_bit(bit_index, &vcpu->irq_pending[word_index]);
- if (!vcpu->irq_pending[word_index])
- clear_bit(word_index, &vcpu->irq_summary);
+ int irq = kvm_irqdevice_read_vector(&vcpu->irq_dev, 0);
+ BUG_ON(irq < 0);
if (vcpu->rmode.active) {
inject_rmode_irq(vcpu, irq);
@@ -1246,7 +1241,7 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
if (vcpu->interrupt_window_open &&
- vcpu->irq_summary &&
+ kvm_irqdevice_pending(&vcpu->irq_dev, 0) &&
!(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK))
/*
* If interrupts enabled, and not blocked by sti or mov ss. Good.
@@ -1255,7 +1250,8 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
if (!vcpu->interrupt_window_open &&
- (vcpu->irq_summary || kvm_run->request_interrupt_window))
+ (kvm_irqdevice_pending(&vcpu->irq_dev, 0) ||
+ kvm_run->request_interrupt_window))
/*
* Interrupts blocked. Wait for unblock.
*/
@@ -1314,8 +1310,8 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (is_external_interrupt(vect_info)) {
int irq = vect_info & VECTORING_INFO_VECTOR_MASK;
- set_bit(irq, vcpu->irq_pending);
- set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
+ /* FIXME: Is this right? */
+ kvm_irqdevice_set_pin(&vcpu->irq_dev, irq, 1);
}
if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */
@@ -1619,8 +1615,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
kvm_run->if_flag = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) != 0;
kvm_run->cr8 = vcpu->cr8;
kvm_run->apic_base = vcpu->apic_base;
- kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open &&
- vcpu->irq_summary == 0);
+ kvm_run->ready_for_interrupt_injection =
+ (vcpu->interrupt_window_open &&
+ !kvm_irqdevice_pending(&vcpu->irq_dev, 0));
}
static int handle_interrupt_window(struct kvm_vcpu *vcpu,
@@ -1631,7 +1628,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
* possible
*/
if (kvm_run->request_interrupt_window &&
- !vcpu->irq_summary) {
+ !kvm_irqdevice_pending(&vcpu->irq_dev, 0)) {
kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
++kvm_stat.irq_window_exits;
return 0;
@@ -1642,7 +1639,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
skip_emulated_instruction(vcpu);
- if (vcpu->irq_summary)
+ if (kvm_irqdevice_pending(&vcpu->irq_dev, 0))
return 1;
kvm_run->exit_reason = KVM_EXIT_HLT;
@@ -1713,7 +1710,7 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
{
- return (!vcpu->irq_summary &&
+ return (!kvm_irqdevice_pending(&vcpu->irq_dev, 0) &&
kvm_run->request_interrupt_window &&
vcpu->interrupt_window_open &&
(vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));
[-- Attachment #4: 03-preemptible-cpu.patch --]
[-- Type: text/plain, Size: 10834 bytes --]
KVM: Preemptible VCPU
From: <>
This adds support for interrupting an executing CPU
Signed-off-by: Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
---
drivers/kvm/Makefile | 2 -
drivers/kvm/condvar.c | 109 ++++++++++++++++++++++++++++++++++++++++++++++++
drivers/kvm/condvar.h | 36 ++++++++++++++++
drivers/kvm/kvm.h | 12 +++++
drivers/kvm/kvm_main.c | 47 ++++++++++++++++++---
drivers/kvm/svm.c | 35 +++++++++++++++
drivers/kvm/vmx.c | 35 +++++++++++++++
7 files changed, 270 insertions(+), 6 deletions(-)
diff --git a/drivers/kvm/Makefile b/drivers/kvm/Makefile
index 540afbc..b3bef0e 100644
--- a/drivers/kvm/Makefile
+++ b/drivers/kvm/Makefile
@@ -2,7 +2,7 @@
# Makefile for Kernel-based Virtual Machine module
#
-kvm-objs := kvm_main.o mmu.o x86_emulate.o userint.o
+kvm-objs := kvm_main.o mmu.o x86_emulate.o userint.o condvar.o
obj-$(CONFIG_KVM) += kvm.o
kvm-intel-objs = vmx.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/drivers/kvm/condvar.c b/drivers/kvm/condvar.c
new file mode 100644
index 0000000..87e464a
--- /dev/null
+++ b/drivers/kvm/condvar.c
@@ -0,0 +1,109 @@
+/*
+ * Condition Variable
+ *
+ * Copyright (C) 2007, Novell
+ *
+ * Authors:
+ * Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "condvar.h"
+
+void condvar_init(struct condvar *cv)
+{
+ wait_queue_head_t __head = __WAIT_QUEUE_HEAD_INITIALIZER(cv->queue);
+
+ memset(cv, 0, sizeof(*cv));
+ cv->queue = __head;
+}
+EXPORT_SYMBOL_GPL(condvar_init);
+
+/*
+ * Assumes the lock is already held
+ */
+int condvar_wait(struct condvar *cv, void *l, long timeout)
+{
+ DEFINE_WAIT(__wait);
+ int _ret = 0;
+
+ BUG_ON(!cv->lock_ops);
+
+ /*
+ * first place ourselves on the waitqueue before releasing the lock
+ */
+ prepare_to_wait(&cv->queue, &__wait, TASK_UNINTERRUPTIBLE);
+
+ /*
+ * now actually release the lock to unblock any potential signalers
+ */
+ cv->lock_ops->unlock(l);
+
+ /*
+ * finally, reschedule until we are re-awoken
+ */
+ if (timeout > -1)
+ schedule_timeout(timeout);
+ else
+ schedule();
+ finish_wait(&cv->queue, &__wait);
+
+ /*
+ * if we get here, its because someone signaled us.
+ * reaquire the lock
+ */
+ cv->lock_ops->lock(l);
+
+ return _ret;
+}
+EXPORT_SYMBOL_GPL(condvar_wait);
+
+/*
+ * Assumes the lock is already held
+ */
+int condvar_signal(struct condvar *cv)
+{
+ wake_up(&cv->queue);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(condvar_signal);
+
+/*
+ *------------------------------------------------------------------------
+ * spinlock_condvar
+ *
+ * spinlock_lock/unlock can sometimes be implemented as macros, so
+ * assigning them as function pointers directly is probably not going to
+ * work. Therefore we need these lightweight wrappers
+ *------------------------------------------------------------------------
+ */
+
+static void spinlock_condvar_lock(void *l)
+{
+ spinlock_t *lock = (spinlock_t*)l;
+
+ spin_lock(lock);
+}
+
+static void spinlock_condvar_unlock(void *l)
+{
+ spinlock_t *lock = (spinlock_t*)l;
+
+ spin_unlock(lock);
+}
+
+static struct cv_lock_ops spinlock_ops = {
+ .lock = spinlock_condvar_lock,
+ .unlock = spinlock_condvar_unlock
+};
+
+void spinlock_condvar_init(struct condvar *cv)
+{
+ condvar_init(cv);
+
+ cv->lock_ops = &spinlock_ops;
+}
+
diff --git a/drivers/kvm/condvar.h b/drivers/kvm/condvar.h
new file mode 100644
index 0000000..58ed523
--- /dev/null
+++ b/drivers/kvm/condvar.h
@@ -0,0 +1,36 @@
+/*
+ * Condition Variable
+ *
+ * Copyright (C) 2007, Novell
+ *
+ * Authors:
+ * Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+struct cv_lock_ops {
+ void (*lock)(void *);
+ void (*unlock)(void *);
+};
+
+struct condvar {
+ wait_queue_head_t queue;
+ struct cv_lock_ops *lock_ops;
+};
+
+void condvar_init(struct condvar *cv);
+int condvar_wait(struct condvar *cv, void *l, long timeout);
+int condvar_signal(struct condvar *cv);
+
+/*
+ *------------------------------------------------------------------------
+ * spinlock_condvar
+ *------------------------------------------------------------------------
+ */
+
+void spinlock_condvar_init(struct condvar *cv);
+
+
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 58966d9..703ffe0 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -14,6 +14,7 @@
#include "vmx.h"
#include "irqdevice.h"
+#include "condvar.h"
#include <linux/kvm.h>
#include <linux/kvm_para.h>
@@ -271,6 +272,16 @@ void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
+/*
+ * structure for maintaining info for interrupting an executing VCPU
+ */
+struct kvm_vcpu_irq {
+ spinlock_t lock;
+ struct condvar cv;
+ struct task_struct *task;
+ int pending;
+};
+
struct kvm_vcpu {
struct kvm *kvm;
union {
@@ -284,6 +295,7 @@ struct kvm_vcpu {
struct kvm_run *run;
int interrupt_window_open;
struct kvm_irqdevice irq_dev;
+ struct kvm_vcpu_irq irq;
unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */
unsigned long rip; /* needs vcpu_load_rsp_rip() */
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 7e00412..ea3609e 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -299,6 +299,11 @@ static struct kvm *kvm_create_vm(void)
struct kvm_vcpu *vcpu = &kvm->vcpus[i];
mutex_init(&vcpu->mutex);
+
+ memset(&vcpu->irq, 0, sizeof(vcpu->irq));
+ spin_lock_init(&vcpu->irq.lock);
+ spinlock_condvar_init(&vcpu->irq.cv);
+
vcpu->cpu = -1;
vcpu->kvm = kvm;
vcpu->mmu.root_hpa = INVALID_PAGE;
@@ -2320,13 +2325,45 @@ static void kvm_vcpu_intr(struct kvm_irqsink *this,
* Our irq device is requesting to interrupt the vcpu. If it is
* currently running, we should inject a host IPI to force a VMEXIT
*/
-
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu*)this->private;
+
/*
- * FIXME: Implement this or the CPU wont notice the interrupt until
- * the next natural VMEXIT. Note that this is how the system
- * has always worked, so nothing is broken here. This is a future
- * enhancement
+ * HACK ALERT!
+ *
+ * We want to send a virtual interrupt signal to the task that owns
+ * the guest. However, the signal will only force a VMEXIT (via
+ * a reschedule IPI) if the task is currently in GUEST mode. There
+ * is a race condition between the time that we mark the vcpu as
+ * running and the time the system actually enter guest mode. Since
+ * there doesnt appear to be any way to help with this situation from
+ * the VT hardware, we are forced to wait to make sure the guest
+ * actually gets interrupted in a reasonable amount of time. If it
+ * does not, we assume that the IPI failed because it was too early
+ * and must try again until it does.
+ *
+ * This condvar/spinlock/timeout/retry eliminate the race in a safe
+ * manner, at the expense of making the INTR delivery synchronous
*/
+ spin_lock(&vcpu->irq.lock);
+
+ if (vcpu->irq.task) {
+ struct timespec tmo = {
+ .tv_sec = 0,
+ .tv_nsec = 100000 /* 100us */
+ };
+
+ BUG_ON(vcpu->irq.task == current);
+
+ while (vcpu->irq.task) {
+ send_sig(SIGSTOP, vcpu->irq.task, 0);
+ condvar_wait(&vcpu->irq.cv, &vcpu->irq.lock,
+ timespec_to_jiffies(&tmo));
+ }
+
+ vcpu->irq.pending = 1;
+ }
+
+ spin_unlock(&vcpu->irq.lock);
}
static void kvm_vcpu_irqsink_init(struct kvm_vcpu *vcpu)
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index e59a548..6bc2fb1 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -1463,9 +1463,25 @@ static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
int r;
again:
+ spin_lock(&vcpu->irq.lock);
+
+ /*
+ * Setting vcpu->task signals to outsiders that the VMCS is
+ * effectively in GUEST mode, and therefore must be signalled
+ * to transition the task back to HOST mode if any new interrupts
+ * arrive.
+ */
+ vcpu->irq.task = current;
+
+ /*
+ * We also must inject interrupts (if any) while the irq_lock
+ * is held
+ */
if (!vcpu->mmio_read_completed)
do_interrupt_requests(vcpu, kvm_run);
+ spin_unlock(&vcpu->irq.lock);
+
clgi();
pre_svm_run(vcpu);
@@ -1617,6 +1633,25 @@ again:
reload_tss(vcpu);
/*
+ * Signal that we have transitioned back to host mode
+ */
+ spin_lock(&vcpu->irq.lock);
+
+ vcpu->irq.task = NULL;
+ condvar_signal(&vcpu->irq.cv);
+
+ /*
+ * If irqpending is asserted someone undoubtedly has sent us a SIGSTOP
+ * signal. Counter it with a SIGCONT
+ */
+ if(vcpu->irq.pending) {
+ send_sig(SIGCONT, current, 0);
+ vcpu->irq.pending = 0;
+ }
+
+ spin_unlock(&vcpu->irq.lock);
+
+ /*
* Profile KVM exit RIPs:
*/
if (unlikely(prof_on == KVM_PROFILING))
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index a0fdf02..f7b716b 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -1748,9 +1748,25 @@ again:
vmcs_writel(HOST_GS_BASE, segment_base(gs_sel));
#endif
+ spin_lock(&vcpu->irq.lock);
+
+ /*
+ * Setting vcpu->task signals to outsiders that the VMCS is
+ * effectively in GUEST mode, and therefore must be signalled
+ * to transition the task back to HOST mode if any new interrupts
+ * arrive.
+ */
+ vcpu->irq.task = current;
+
+ /*
+ * We also must inject interrupts (if any) while the irq_lock
+ * is held
+ */
if (!vcpu->mmio_read_completed)
do_interrupt_requests(vcpu, kvm_run);
+ spin_unlock(&vcpu->irq.lock);
+
if (vcpu->guest_debug.enabled)
kvm_guest_debug_pre(vcpu);
@@ -1911,6 +1927,25 @@ again:
asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
+ /*
+ * Signal that we have transitioned back to host mode
+ */
+ spin_lock(&vcpu->irq.lock);
+
+ vcpu->irq.task = NULL;
+ condvar_signal(&vcpu->irq.cv);
+
+ /*
+ * If irqpending is asserted someone undoubtedly has sent us a SIGSTOP
+ * signal. Counter it with a SIGCONT
+ */
+ if(vcpu->irq.pending) {
+ send_sig(SIGCONT, current, 0);
+ vcpu->irq.pending = 0;
+ }
+
+ spin_unlock(&vcpu->irq.lock);
+
if (fail) {
kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
kvm_run->fail_entry.hardware_entry_failure_reason
[-- Attachment #5: series --]
[-- Type: application/octet-stream, Size: 143 bytes --]
# This series applies on GIT commit 0ea6eecef44923d66409a49d71e4fa87fa0f5bed
01-mmio_handler.patch
02-irqdevice.patch
03-preemptible-cpu.patch
[-- Attachment #6: Type: text/plain, Size: 345 bytes --]
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys-and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
[-- Attachment #7: Type: text/plain, Size: 186 bytes --]
_______________________________________________
kvm-devel mailing list
kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
https://lists.sourceforge.net/lists/listinfo/kvm-devel
next reply other threads:[~2007-04-12 4:02 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-04-12 4:02 Gregory Haskins [this message]
[not found] ` <461D7702.BA47.005A.0-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
2007-04-12 8:02 ` irqdevice INTR example Avi Kivity
[not found] ` <461DE791.1040707-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-04-12 8:18 ` Christoph Hellwig
2007-04-12 11:55 ` Gregory Haskins
[not found] ` <461DE5C9.BA47.005A.0-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
2007-04-12 12:49 ` Avi Kivity
[not found] ` <461E2AD5.7070905-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-04-12 13:43 ` Gregory Haskins
[not found] ` <461DFF1C.BA47.005A.0-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
2007-04-12 14:14 ` Avi Kivity
[not found] ` <461E3EDB.3080002-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-04-12 16:01 ` Gregory Haskins
2007-04-13 13:05 ` Fwd: " Gregory Haskins
[not found] ` <461E1F73.BA47.005A.0-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
2007-04-14 14:30 ` Avi Kivity
[not found] ` <4620E56A.7040207-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-04-15 22:32 ` Gregory Haskins
2007-04-15 23:32 ` Gregory Haskins
[not found] ` <46226FBC.BA47.005A.0-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
2007-04-16 5:46 ` Avi Kivity
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=461D7702.BA47.005A.0@novell.com \
--to=ghaskins-et1tbqhtxzrqt0dzr+alfa@public.gmane.org \
--cc=kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.