KVM: Preemptible VCPU From: <> This adds support for interrupting an executing CPU Signed-off-by: Gregory Haskins --- drivers/kvm/kvm.h | 11 ++++++++++ drivers/kvm/kvm_main.c | 54 ++++++++++++++++++++++++++++++++++++++++++++---- drivers/kvm/svm.c | 35 +++++++++++++++++++++++++++++++ drivers/kvm/vmx.c | 35 +++++++++++++++++++++++++++++++ 4 files changed, 130 insertions(+), 5 deletions(-) diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 58966d9..70d1bb9 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -271,6 +271,16 @@ void kvm_io_bus_register_dev(struct kvm_io_bus *bus, #define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long) +/* + * structure for maintaining info for interrupting an executing VCPU + */ +struct kvm_vcpu_irq { + spinlock_t lock; + wait_queue_head_t wq; + struct task_struct *task; + int pending; +}; + struct kvm_vcpu { struct kvm *kvm; union { @@ -284,6 +294,7 @@ struct kvm_vcpu { struct kvm_run *run; int interrupt_window_open; struct kvm_irqdevice irq_dev; + struct kvm_vcpu_irq irq; unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */ unsigned long rip; /* needs vcpu_load_rsp_rip() */ diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 7e00412..1cf4060 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -299,6 +299,11 @@ static struct kvm *kvm_create_vm(void) struct kvm_vcpu *vcpu = &kvm->vcpus[i]; mutex_init(&vcpu->mutex); + + memset(&vcpu->irq, 0, sizeof(vcpu->irq)); + spin_lock_init(&vcpu->irq.lock); + init_waitqueue_head(&vcpu->irq.wq); + vcpu->cpu = -1; vcpu->kvm = kvm; vcpu->mmu.root_hpa = INVALID_PAGE; @@ -2320,13 +2325,52 @@ static void kvm_vcpu_intr(struct kvm_irqsink *this, * Our irq device is requesting to interrupt the vcpu. If it is * currently running, we should inject a host IPI to force a VMEXIT */ - + struct kvm_vcpu *vcpu = (struct kvm_vcpu*)this->private; + /* - * FIXME: Implement this or the CPU wont notice the interrupt until - * the next natural VMEXIT. Note that this is how the system - * has always worked, so nothing is broken here. This is a future - * enhancement + * HACK ALERT! + * + * We want to send a virtual interrupt signal to the task that owns + * the guest. However, the signal will only force a VMEXIT (via + * a reschedule IPI) if the task is currently in GUEST mode. There + * is a race condition between the time that we mark the vcpu as + * running and the time the system actually enter guest mode. Since + * there doesnt appear to be any way to help with this situation from + * the hardware, we are forced to wait to make sure the guest + * actually gets interrupted in a reasonable amount of time. If it + * does not, we assume that the IPI failed because it was too early + * and must try again until it does. + * + * This condvar/spinlock/timeout/retry eliminate the race in a safe + * manner, at the expense of making the INTR delivery synchronous */ + spin_lock(&vcpu->irq.lock); + + if (vcpu->irq.task) { + struct timespec tmo = { + .tv_sec = 0, + .tv_nsec = 100000 /* 100us */ + }; + + BUG_ON(vcpu->irq.task == current); + + while (vcpu->irq.task) { + DEFINE_WAIT(__wait); + + send_sig(SIGSTOP, vcpu->irq.task, 0); + + prepare_to_wait(&vcpu->irq.wq, &__wait, + TASK_UNINTERRUPTIBLE); + spin_unlock(&vcpu->irq.lock); + schedule_timeout(timespec_to_jiffies(&tmo)); + spin_lock(&vcpu->irq.lock); + finish_wait(&vcpu->irq.wq, &__wait); + } + + vcpu->irq.pending = 1; + } + + spin_unlock(&vcpu->irq.lock); } static void kvm_vcpu_irqsink_init(struct kvm_vcpu *vcpu) diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index e59a548..41765bd 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -1463,9 +1463,25 @@ static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int r; again: + spin_lock(&vcpu->irq.lock); + + /* + * Setting vcpu->task signals to outsiders that the VMCS is + * effectively in GUEST mode, and therefore must be signalled + * to transition the task back to HOST mode if any new interrupts + * arrive. + */ + vcpu->irq.task = current; + + /* + * We also must inject interrupts (if any) while the irq_lock + * is held + */ if (!vcpu->mmio_read_completed) do_interrupt_requests(vcpu, kvm_run); + spin_unlock(&vcpu->irq.lock); + clgi(); pre_svm_run(vcpu); @@ -1617,6 +1633,25 @@ again: reload_tss(vcpu); /* + * Signal that we have transitioned back to host mode + */ + spin_lock(&vcpu->irq.lock); + + vcpu->irq.task = NULL; + wake_up(&vcpu->irq.wq); + + /* + * If irqpending is asserted someone undoubtedly has sent us a SIGSTOP + * signal. Counter it with a SIGCONT + */ + if(vcpu->irq.pending) { + send_sig(SIGCONT, current, 0); + vcpu->irq.pending = 0; + } + + spin_unlock(&vcpu->irq.lock); + + /* * Profile KVM exit RIPs: */ if (unlikely(prof_on == KVM_PROFILING)) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index a0fdf02..1d5ce85 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1748,9 +1748,25 @@ again: vmcs_writel(HOST_GS_BASE, segment_base(gs_sel)); #endif + spin_lock(&vcpu->irq.lock); + + /* + * Setting vcpu->task signals to outsiders that the VMCS is + * effectively in GUEST mode, and therefore must be signalled + * to transition the task back to HOST mode if any new interrupts + * arrive. + */ + vcpu->irq.task = current; + + /* + * We also must inject interrupts (if any) while the irq_lock + * is held + */ if (!vcpu->mmio_read_completed) do_interrupt_requests(vcpu, kvm_run); + spin_unlock(&vcpu->irq.lock); + if (vcpu->guest_debug.enabled) kvm_guest_debug_pre(vcpu); @@ -1911,6 +1927,25 @@ again: asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); + /* + * Signal that we have transitioned back to host mode + */ + spin_lock(&vcpu->irq.lock); + + vcpu->irq.task = NULL; + wake_up(&vcpu->irq.wq); + + /* + * If irqpending is asserted someone undoubtedly has sent us a SIGSTOP + * signal. Counter it with a SIGCONT + */ + if(vcpu->irq.pending) { + send_sig(SIGCONT, current, 0); + vcpu->irq.pending = 0; + } + + spin_unlock(&vcpu->irq.lock); + if (fail) { kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; kvm_run->fail_entry.hardware_entry_failure_reason