* [patch 1/2] [PATCH] kvm: nowait retry for asynchronous page faults
2011-11-17 11:19 [patch 0/2] kvm-s390: asnychronous page faults Carsten Otte
@ 2011-11-17 11:19 ` Carsten Otte
2011-11-17 11:19 ` [patch 2/2] [PATCH] kvm-s390: pseudo page fault support Carsten Otte
1 sibling, 0 replies; 8+ messages in thread
From: Carsten Otte @ 2011-11-17 11:19 UTC (permalink / raw)
To: Avi Kivity, Marcelo Tossati
Cc: Christian Borntraeger, Heiko Carstens, Martin Schwidefsky,
Cornelia Huck, KVM
[-- Attachment #1: 502-kvm-pfault-gmap.diff --]
[-- Type: text/plain, Size: 5052 bytes --]
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Add FAULT_FLAG_RETRY_NOWAIT to the fault flags if the host is processing
a guest fault. In case of a fault retry exit sie64a() with the gmap_fault
indication set. This makes it possible to handle asynchronous page faults
without the need for mm notifiers.
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
arch/s390/include/asm/pgtable.h | 1 +
arch/s390/include/asm/processor.h | 1 +
arch/s390/mm/fault.c | 28 +++++++++++++++++++++++-----
arch/s390/mm/pgtable.c | 7 +++++++
4 files changed, 32 insertions(+), 5 deletions(-)
diff -urpN linux-2.6/arch/s390/include/asm/pgtable.h linux-2.6-patched/arch/s390/include/asm/pgtable.h
--- linux-2.6/arch/s390/include/asm/pgtable.h 2011-11-17 10:03:47.000000000 +0100
+++ linux-2.6-patched/arch/s390/include/asm/pgtable.h 2011-11-17 10:03:53.000000000 +0100
@@ -688,6 +688,7 @@ int gmap_unmap_segment(struct gmap *gmap
unsigned long __gmap_fault(unsigned long address, struct gmap *);
unsigned long gmap_fault(unsigned long address, struct gmap *);
void gmap_discard(unsigned long from, unsigned long to, struct gmap *);
+int gmap_pfault(struct gmap *gmap);
/*
* Certain architectures need to do special things when PTEs
diff -urpN linux-2.6/arch/s390/include/asm/processor.h linux-2.6-patched/arch/s390/include/asm/processor.h
--- linux-2.6/arch/s390/include/asm/processor.h 2011-11-17 10:03:39.000000000 +0100
+++ linux-2.6-patched/arch/s390/include/asm/processor.h 2011-11-17 10:03:53.000000000 +0100
@@ -83,6 +83,7 @@ struct thread_struct {
unsigned long prot_addr; /* address of protection-excep. */
unsigned int trap_no;
unsigned long gmap_addr; /* address of last gmap fault. */
+ unsigned int gmap_pfault; /* bool to indicate a guest pfault */
struct per_regs per_user; /* User specified PER registers */
struct per_event per_event; /* Cause of the last PER trap */
/* pfault_wait is used to block the process on a pfault event */
diff -urpN linux-2.6/arch/s390/mm/fault.c linux-2.6-patched/arch/s390/mm/fault.c
--- linux-2.6/arch/s390/mm/fault.c 2011-11-17 10:03:49.000000000 +0100
+++ linux-2.6-patched/arch/s390/mm/fault.c 2011-11-17 10:03:53.000000000 +0100
@@ -52,6 +52,7 @@
#define VM_FAULT_BADCONTEXT 0x010000
#define VM_FAULT_BADMAP 0x020000
#define VM_FAULT_BADACCESS 0x040000
+#define VM_FAULT_PFAULT 0x080000
static unsigned long store_indication;
@@ -242,6 +243,7 @@ static noinline void do_fault_error(stru
return;
}
case VM_FAULT_BADCONTEXT:
+ case VM_FAULT_PFAULT:
do_no_context(regs, int_code, trans_exc_code);
break;
default: /* fault & VM_FAULT_ERROR */
@@ -276,6 +278,9 @@ static noinline void do_fault_error(stru
static inline int do_exception(struct pt_regs *regs, int access,
unsigned long trans_exc_code)
{
+#ifdef CONFIG_PGSTE
+ struct gmap *gmap;
+#endif
struct task_struct *tsk;
struct mm_struct *mm;
struct vm_area_struct *vma;
@@ -306,9 +311,10 @@ static inline int do_exception(struct pt
down_read(&mm->mmap_sem);
#ifdef CONFIG_PGSTE
- if (test_tsk_thread_flag(current, TIF_SIE) && S390_lowcore.gmap) {
- address = __gmap_fault(address,
- (struct gmap *) S390_lowcore.gmap);
+ gmap = (struct gmap *)
+ (test_thread_flag(TIF_SIE) ? S390_lowcore.gmap : 0);
+ if (gmap) {
+ address = __gmap_fault(address, gmap);
if (address == -EFAULT) {
fault = VM_FAULT_BADMAP;
goto out_up;
@@ -317,6 +323,7 @@ static inline int do_exception(struct pt
fault = VM_FAULT_OOM;
goto out_up;
}
+ flags |= FAULT_FLAG_RETRY_NOWAIT;
}
#endif
@@ -368,10 +375,21 @@ retry:
regs, address);
}
if (fault & VM_FAULT_RETRY) {
+#ifdef CONFIG_PGSTE
+ if (gmap) {
+ /* FAULT_FLAG_RETRY_NOWAIT has been set,
+ * mmap_sem has not been released. */
+ if (gmap_pfault(gmap)) {
+ fault = VM_FAULT_PFAULT;
+ goto out_up;
+ }
+ } else
+#endif
+ down_read(&mm->mmap_sem);
/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
* of starvation. */
- flags &= ~FAULT_FLAG_ALLOW_RETRY;
- down_read(&mm->mmap_sem);
+ flags &= ~(FAULT_FLAG_ALLOW_RETRY |
+ FAULT_FLAG_RETRY_NOWAIT);
goto retry;
}
}
diff -urpN linux-2.6/arch/s390/mm/pgtable.c linux-2.6-patched/arch/s390/mm/pgtable.c
--- linux-2.6/arch/s390/mm/pgtable.c 2011-11-17 10:03:47.000000000 +0100
+++ linux-2.6-patched/arch/s390/mm/pgtable.c 2011-11-17 10:03:53.000000000 +0100
@@ -398,6 +398,7 @@ unsigned long __gmap_fault(unsigned long
pmd_t *pmd;
current->thread.gmap_addr = address;
+ current->thread.gmap_pfault = 0;
mm = gmap->mm;
/* Walk the gmap address space page table */
table = gmap->table + ((address >> 53) & 0x7ff);
@@ -514,6 +515,12 @@ void gmap_discard(unsigned long from, un
}
EXPORT_SYMBOL_GPL(gmap_discard);
+int gmap_pfault(struct gmap *gmap)
+{
+ current->thread.gmap_pfault = 1;
+ return 1;
+}
+
void gmap_unmap_notifier(struct mm_struct *mm, unsigned long *table)
{
struct gmap_rmap *rmap, *next;
^ permalink raw reply [flat|nested] 8+ messages in thread* [patch 2/2] [PATCH] kvm-s390: pseudo page fault support
2011-11-17 11:19 [patch 0/2] kvm-s390: asnychronous page faults Carsten Otte
2011-11-17 11:19 ` [patch 1/2] [PATCH] kvm: nowait retry for asynchronous " Carsten Otte
@ 2011-11-17 11:19 ` Carsten Otte
2011-11-17 13:18 ` Avi Kivity
1 sibling, 1 reply; 8+ messages in thread
From: Carsten Otte @ 2011-11-17 11:19 UTC (permalink / raw)
To: Avi Kivity, Marcelo Tossati
Cc: Christian Borntraeger, Heiko Carstens, Martin Schwidefsky,
Cornelia Huck, KVM
[-- Attachment #1: 503-kvm-pfault.diff --]
[-- Type: text/plain, Size: 19377 bytes --]
From: Carsten Otte <cotte@de.ibm.com>
This patch adds support for pseudo page faults. The corresponding
interface is implemented according to the documentation in CP
programming services.
Diagnose 258 allows to register compare and check masks for pseudo
page faults, and the guest can cancel these masks again. For both
operations, like everywhere else in KVM on z, access register mode
is not supported (ALET is assumed to be 0).
In case a major fault is recognized for a virtual machine, the page
fault path triggers IO and kvm_s390_handle_pfault is called in order
to determine if the fault can be handled asynchronously. In case the
fault condition qualifies for asynchronous treatment, the guest is
notified. Otherwise the vcpu thread synchronously waits for the page
to become available prior to reentry into SIE.
One kernel thread per virtual machine gets notified for all
asynchronous page fault events for its VM. Subsequently it waits for
the page to be faulted in by calling fault_in_user_pages, and it
notifies the guest that the page fault operation has completed.
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---
arch/s390/include/asm/kvm_host.h | 8 +
arch/s390/kvm/Makefile | 2
arch/s390/kvm/diag.c | 3
arch/s390/kvm/interrupt.c | 109 ++++++++++++++---
arch/s390/kvm/kvm-s390.c | 20 ++-
arch/s390/kvm/kvm-s390.h | 9 +
arch/s390/kvm/pfault.c | 247 +++++++++++++++++++++++++++++++++++++++
include/linux/kvm.h | 2
8 files changed, 380 insertions(+), 20 deletions(-)
diff -urpN linux-2.6/arch/s390/include/asm/kvm_host.h linux-2.6-patched/arch/s390/include/asm/kvm_host.h
--- linux-2.6/arch/s390/include/asm/kvm_host.h 2011-11-17 10:03:53.000000000 +0100
+++ linux-2.6-patched/arch/s390/include/asm/kvm_host.h 2011-11-17 10:03:53.000000000 +0100
@@ -240,6 +240,10 @@ struct kvm_vcpu_arch {
u64 stidp_data;
};
struct gmap *gmap;
+#define KVM_S390_PFAULT_TOKEN_INVALID (-1UL)
+ unsigned long pfault_token;
+ unsigned long pfault_select;
+ unsigned long pfault_compare;
};
struct kvm_vm_stat {
@@ -251,6 +255,10 @@ struct kvm_arch{
debug_info_t *dbf;
struct kvm_s390_float_interrupt float_int;
struct gmap *gmap;
+ struct task_struct *pfault_thread;
+ spinlock_t pfault_list_lock;
+ struct list_head pfault_list;
+ wait_queue_head_t pfault_wait;
};
extern int sie64a(struct kvm_s390_sie_block *, unsigned long *);
diff -urpN linux-2.6/arch/s390/kvm/diag.c linux-2.6-patched/arch/s390/kvm/diag.c
--- linux-2.6/arch/s390/kvm/diag.c 2011-11-17 10:03:52.000000000 +0100
+++ linux-2.6-patched/arch/s390/kvm/diag.c 2011-11-17 10:03:53.000000000 +0100
@@ -14,6 +14,7 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include "kvm-s390.h"
+#include "gaccess.h"
static int diag_release_pages(struct kvm_vcpu *vcpu)
{
@@ -89,6 +90,8 @@ int kvm_s390_handle_diag(struct kvm_vcpu
return diag_release_pages(vcpu);
case 0x44:
return __diag_time_slice_end(vcpu);
+ case 0x258:
+ return kvm_s390_diag_page_reference_service(vcpu);
case 0x308:
return __diag_ipl_functions(vcpu);
default:
diff -urpN linux-2.6/arch/s390/kvm/interrupt.c linux-2.6-patched/arch/s390/kvm/interrupt.c
--- linux-2.6/arch/s390/kvm/interrupt.c 2011-11-17 10:03:52.000000000 +0100
+++ linux-2.6-patched/arch/s390/kvm/interrupt.c 2011-11-17 10:03:53.000000000 +0100
@@ -56,6 +56,8 @@ static int __interrupt_is_deliverable(st
return 1;
return 0;
case KVM_S390_INT_VIRTIO:
+ case KVM_S390_INT_PFAULT_INIT:
+ case KVM_S390_INT_PFAULT_DONE:
if (psw_extint_disabled(vcpu))
return 0;
if (vcpu->arch.sie_block->gcr[0] & 0x200ul)
@@ -67,6 +69,8 @@ static int __interrupt_is_deliverable(st
case KVM_S390_RESTART:
return 1;
default:
+ printk(KERN_WARNING "illegal interrupt type %llx\n",
+ inti->type);
BUG();
}
return 0;
@@ -107,6 +111,8 @@ static void __set_intercept_indicator(st
case KVM_S390_INT_EMERGENCY:
case KVM_S390_INT_SERVICE:
case KVM_S390_INT_VIRTIO:
+ case KVM_S390_INT_PFAULT_INIT:
+ case KVM_S390_INT_PFAULT_DONE:
if (psw_extint_disabled(vcpu))
__set_cpuflag(vcpu, CPUSTAT_EXT_INT);
else
@@ -226,6 +232,60 @@ static void __do_deliver_interrupt(struc
exception = 1;
break;
+ case KVM_S390_INT_PFAULT_INIT:
+ VCPU_EVENT(vcpu, 4, "interrupt: pfault init token %llx",
+ inti->ext.ext_params2);
+ rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2603);
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = put_guest_u16(vcpu, __LC_CPU_ADDRESS, 0x0600);
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+ __LC_EXT_NEW_PSW, sizeof(psw_t));
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = put_guest_u64(vcpu, __LC_EXT_PARAMS2,
+ inti->ext.ext_params2);
+ if (rc == -EFAULT)
+ exception = 1;
+ break;
+
+ case KVM_S390_INT_PFAULT_DONE:
+ VCPU_EVENT(vcpu, 4, "interrupt: pfault done token %llx",
+ inti->ext.ext_params2);
+ rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2603);
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = put_guest_u16(vcpu, __LC_CPU_ADDRESS, 0x0680);
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+ __LC_EXT_NEW_PSW, sizeof(psw_t));
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = put_guest_u64(vcpu, __LC_EXT_PARAMS2,
+ inti->ext.ext_params2);
+ if (rc == -EFAULT)
+ exception = 1;
+ break;
+
case KVM_S390_SIGP_STOP:
VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
vcpu->stat.deliver_stop_signal++;
@@ -553,6 +613,8 @@ int kvm_s390_inject_vm(struct kvm *kvm,
case KVM_S390_SIGP_STOP:
case KVM_S390_INT_EXTERNAL_CALL:
case KVM_S390_INT_EMERGENCY:
+ case KVM_S390_INT_PFAULT_INIT:
+ case KVM_S390_INT_PFAULT_DONE:
default:
kfree(inti);
return -EINVAL;
@@ -582,10 +644,26 @@ int kvm_s390_inject_vm(struct kvm *kvm,
return 0;
}
+void __kvm_s390_inject_vcpu(struct kvm_s390_local_interrupt *li,
+ struct kvm_s390_interrupt_info *inti)
+{
+ spin_lock_bh(&li->lock);
+ if (inti->type == KVM_S390_PROGRAM_INT)
+ list_add(&inti->list, &li->list);
+ else
+ list_add_tail(&inti->list, &li->list);
+ atomic_set(&li->active, 1);
+ if (inti->type == KVM_S390_SIGP_STOP)
+ li->action_bits |= ACTION_STOP_ON_STOP;
+ atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+ if (waitqueue_active(&li->wq))
+ wake_up_interruptible(&li->wq);
+ spin_unlock_bh(&li->lock);
+}
+
int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
struct kvm_s390_interrupt *s390int)
{
- struct kvm_s390_local_interrupt *li;
struct kvm_s390_interrupt_info *inti;
inti = kzalloc(sizeof(*inti), GFP_KERNEL);
@@ -617,26 +695,27 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu
inti->type = s390int->type;
break;
case KVM_S390_INT_VIRTIO:
+ case KVM_S390_INT_PFAULT_INIT:
+ case KVM_S390_INT_PFAULT_DONE:
case KVM_S390_INT_SERVICE:
default:
kfree(inti);
return -EINVAL;
}
-
mutex_lock(&vcpu->kvm->lock);
- li = &vcpu->arch.local_int;
- spin_lock_bh(&li->lock);
- if (inti->type == KVM_S390_PROGRAM_INT)
- list_add(&inti->list, &li->list);
- else
- list_add_tail(&inti->list, &li->list);
- atomic_set(&li->active, 1);
- if (inti->type == KVM_S390_SIGP_STOP)
- li->action_bits |= ACTION_STOP_ON_STOP;
- atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
- if (waitqueue_active(&li->wq))
- wake_up_interruptible(&vcpu->arch.local_int.wq);
- spin_unlock_bh(&li->lock);
+ __kvm_s390_inject_vcpu(&vcpu->arch.local_int, inti);
mutex_unlock(&vcpu->kvm->lock);
+
+ return 0;
+}
+
+int kvm_s390_should_pfault(struct kvm_vcpu *vcpu)
+{
+ if (psw_extint_disabled(vcpu))
+ return 0;
+ if (kvm_cpu_has_interrupt(vcpu))
+ return 0;
+ if (vcpu->arch.sie_block->gcr[0] & 0x200ul)
+ return 1;
return 0;
}
diff -urpN linux-2.6/arch/s390/kvm/kvm-s390.c linux-2.6-patched/arch/s390/kvm/kvm-s390.c
--- linux-2.6/arch/s390/kvm/kvm-s390.c 2011-11-17 10:03:53.000000000 +0100
+++ linux-2.6-patched/arch/s390/kvm/kvm-s390.c 2011-11-17 10:03:53.000000000 +0100
@@ -201,6 +201,8 @@ int kvm_arch_init_vm(struct kvm *kvm)
if (!kvm->arch.gmap)
goto out_nogmap;
+ kvm_s390_pfault_init(kvm);
+
return 0;
out_nogmap:
debug_unregister(kvm->arch.dbf);
@@ -213,6 +215,7 @@ out_err:
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
VCPU_EVENT(vcpu, 3, "%s", "free cpu");
+ kvm_s390_pfault_clear_cpu(vcpu);
clear_bit(63 - vcpu->vcpu_id, (unsigned long *) &vcpu->kvm->arch.sca->mcn);
if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
(__u64) vcpu->arch.sie_block)
@@ -249,6 +252,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm
free_page((unsigned long)(kvm->arch.sca));
debug_unregister(kvm->arch.dbf);
gmap_free(kvm->arch.gmap);
+ kvm_s390_pfault_exit(kvm);
}
/* Section: vcpu related */
@@ -356,6 +360,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(st
init_waitqueue_head(&vcpu->arch.local_int.wq);
vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
spin_unlock(&kvm->arch.float_int.lock);
+ vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
rc = kvm_vcpu_init(vcpu, kvm, id);
if (rc)
@@ -468,6 +473,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(stru
static void __vcpu_run(struct kvm_vcpu *vcpu)
{
+ int rc;
+
memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16);
if (need_resched())
@@ -484,12 +491,17 @@ static void __vcpu_run(struct kvm_vcpu *
local_irq_enable();
VCPU_EVENT(vcpu, 6, "entering sie flags %x",
atomic_read(&vcpu->arch.sie_block->cpuflags));
- if (sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs)) {
- VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
- kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- }
+ rc = sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs);
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
vcpu->arch.sie_block->icptcode);
+ if (rc) {
+ if (current->thread.gmap_pfault) {
+ kvm_s390_handle_pfault(vcpu);
+ } else {
+ VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
+ kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ }
+ }
local_irq_disable();
kvm_guest_exit();
local_irq_enable();
diff -urpN linux-2.6/arch/s390/kvm/kvm-s390.h linux-2.6-patched/arch/s390/kvm/kvm-s390.h
--- linux-2.6/arch/s390/kvm/kvm-s390.h 2011-10-24 09:10:05.000000000 +0200
+++ linux-2.6-patched/arch/s390/kvm/kvm-s390.h 2011-11-17 10:03:53.000000000 +0100
@@ -53,10 +53,13 @@ void kvm_s390_tasklet(unsigned long parm
void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
int kvm_s390_inject_vm(struct kvm *kvm,
struct kvm_s390_interrupt *s390int);
+void __kvm_s390_inject_vcpu(struct kvm_s390_local_interrupt *,
+ struct kvm_s390_interrupt_info *);
int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
struct kvm_s390_interrupt *s390int);
int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action);
+int kvm_s390_should_pfault(struct kvm_vcpu *vcpu);
/* implemented in priv.c */
int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
@@ -71,4 +74,10 @@ int kvm_s390_vcpu_store_status(struct kv
/* implemented in diag.c */
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
+/* implemented in pfault.c */
+int kvm_s390_diag_page_reference_service(struct kvm_vcpu *vcpu);
+void kvm_s390_handle_pfault(struct kvm_vcpu *vcpu);
+void kvm_s390_pfault_init(struct kvm *kvm);
+void kvm_s390_pfault_exit(struct kvm *kvm);
+void kvm_s390_pfault_clear_cpu(struct kvm_vcpu *vcpu);
#endif
diff -urpN linux-2.6/arch/s390/kvm/Makefile linux-2.6-patched/arch/s390/kvm/Makefile
--- linux-2.6/arch/s390/kvm/Makefile 2011-10-24 09:10:05.000000000 +0200
+++ linux-2.6-patched/arch/s390/kvm/Makefile 2011-11-17 10:03:53.000000000 +0100
@@ -10,5 +10,5 @@ common-objs = $(addprefix ../../../virt/
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
-kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o diag.o
+kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o diag.o pfault.o
obj-$(CONFIG_KVM) += kvm.o
diff -urpN linux-2.6/arch/s390/kvm/pfault.c linux-2.6-patched/arch/s390/kvm/pfault.c
--- linux-2.6/arch/s390/kvm/pfault.c 1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6-patched/arch/s390/kvm/pfault.c 2011-11-17 10:03:53.000000000 +0100
@@ -0,0 +1,247 @@
+/*
+* pfault.c -- pseudo page fault support for KVM on s390
+*
+* Copyright IBM Corp. 2011
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License (version 2 only)
+* as published by the Free Software Foundation.
+*
+* Author(s): Carsten Otte <cotte@de.ibm.com>
+*/
+
+#include <linux/kthread.h>
+#include <linux/spinlock.h>
+#include <linux/pagemap.h>
+#include <linux/hash.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+
+struct pfault_event {
+ struct list_head pfault_list_element;
+ struct kvm_s390_local_interrupt *local_int;
+ struct kvm_s390_interrupt_info *inti;
+ unsigned long uaddr;
+};
+
+int kvm_s390_diag_page_reference_service(struct kvm_vcpu *vcpu)
+{
+ struct prs_parm {
+ u16 code;
+ u16 subcode;
+ u16 parm_len;
+ u16 parm_version;
+ u64 token_addr;
+ u64 select_mask;
+ u64 compare_mask;
+ u64 zarch;
+ };
+
+ struct prs_parm parm;
+ long rc;
+ u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4;
+ u16 ry = (vcpu->arch.sie_block->ipa & 0x0f);
+
+ /* we don't support AR-Mode here, ALET is assumed to be 000000 */
+ if (copy_from_guest_absolute(vcpu, &parm, vcpu->arch.guest_gprs[rx],
+ sizeof(parm))) {
+ VCPU_EVENT(vcpu, 3, "%s",
+ "fault while fetching parameters for diag 258");
+ kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ rc = 0;
+ goto out;
+ }
+
+ if (parm.parm_version != 2) {
+ rc = -EOPNOTSUPP;
+ goto out;
+ }
+
+ switch (parm.subcode) {
+ case 0: /* TOKEN function */
+ vcpu->arch.pfault_token = parm.token_addr;
+ vcpu->arch.pfault_select = parm.select_mask;
+ vcpu->arch.pfault_compare = parm.compare_mask;
+ vcpu->arch.guest_gprs[ry] = 0;
+ VCPU_EVENT(vcpu, 3, "%s", "pfault enabled");
+ rc = 0;
+ break;
+ case 1: /* Page-Fault-Cancel function */
+ vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+ vcpu->arch.guest_gprs[ry] = 0;
+ VCPU_EVENT(vcpu, 3, "%s", "pfault canceled\n");
+ rc = 0;
+ break;
+ default:
+ rc = -EOPNOTSUPP;
+ }
+out:
+ return rc;
+}
+
+static void kvm_s390_pfault_sync(struct kvm_vcpu *vcpu)
+{
+ unsigned long uaddr = gmap_fault(current->thread.gmap_addr,
+ vcpu->arch.gmap);
+
+ if (IS_ERR_VALUE(uaddr))
+ return;
+
+ VCPU_EVENT(vcpu, 5, "synchronous page fault at guest %lx user %lx",
+ current->thread.gmap_addr, uaddr);
+
+ fault_in_pages_readable((char __user *)uaddr, PAGE_SIZE);
+}
+
+static void kvm_s390_pfault_async(struct kvm_vcpu *vcpu)
+{
+ unsigned long uaddr = gmap_fault(current->thread.gmap_addr,
+ vcpu->arch.gmap);
+ struct pfault_event *event;
+ struct kvm_s390_interrupt_info *init, *done;
+ unsigned long pfault_token;
+
+ if (IS_ERR_VALUE(uaddr))
+ return;
+
+ if (!kvm_s390_should_pfault(vcpu)) {
+ kvm_s390_pfault_sync(vcpu);
+ return;
+ }
+
+ copy_from_guest(vcpu, &pfault_token, vcpu->arch.pfault_token,
+ 8);
+
+ init = kzalloc(sizeof(*init), GFP_ATOMIC);
+ if (!init)
+ return;
+
+ done = kzalloc(sizeof(*done), GFP_ATOMIC);
+ if (!done)
+ goto out_init;
+
+ event = kzalloc(sizeof(*event), GFP_ATOMIC);
+ if (!event)
+ goto out_done;
+
+ init->type = KVM_S390_INT_PFAULT_INIT;
+ init->ext.ext_params2 = pfault_token;
+
+ done->type = KVM_S390_INT_PFAULT_DONE;
+ done->ext.ext_params2 = pfault_token;
+
+ event->inti = done;
+ event->uaddr = uaddr;
+ event->local_int = &vcpu->arch.local_int;
+
+ VCPU_EVENT(vcpu, 5,
+ "initiating pfault for token %lx at guest %lx user %lx",
+ pfault_token, current->thread.gmap_addr, uaddr);
+
+ __kvm_s390_inject_vcpu(&vcpu->arch.local_int, init);
+
+ spin_lock_bh(&vcpu->kvm->arch.pfault_list_lock);
+ list_add_tail(&event->pfault_list_element,
+ &vcpu->kvm->arch.pfault_list);
+ wake_up(&vcpu->kvm->arch.pfault_wait);
+ spin_unlock_bh(&vcpu->kvm->arch.pfault_list_lock);
+ return;
+
+out_done:
+ kfree(done);
+out_init:
+ kfree(init);
+}
+
+void kvm_s390_handle_pfault(struct kvm_vcpu *vcpu)
+{
+ unsigned long mask;
+
+ if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
+ goto synchronous;
+
+ mask = vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select;
+
+ if (mask != vcpu->arch.pfault_compare)
+ goto synchronous;
+
+ kvm_s390_pfault_async(vcpu);
+ return;
+synchronous:
+ kvm_s390_pfault_sync(vcpu);
+}
+
+static int pfault_thread_fn(void *data)
+{
+ struct pfault_event *event, *n;
+ struct pfault_event *dequeued;
+ struct kvm *kvm = (struct kvm *)data;
+ wait_queue_t wait;
+
+ init_waitqueue_entry(&wait, current);
+ add_wait_queue(&kvm->arch.pfault_wait, &wait);
+ while (1) {
+ spin_lock_bh(&kvm->arch.pfault_list_lock);
+ current->state = TASK_INTERRUPTIBLE;
+ dequeued = NULL;
+ list_for_each_entry_safe(event, n, &kvm->arch.pfault_list,
+ pfault_list_element) {
+ if (!dequeued) {
+ list_del_init(&event->pfault_list_element);
+ dequeued = event;
+ }
+ }
+ spin_unlock_bh(&kvm->arch.pfault_list_lock);
+ if (kthread_should_stop()) {
+ current->state = TASK_RUNNING;
+ remove_wait_queue(&kvm->arch.pfault_wait, &wait);
+ return 0;
+ }
+ if (dequeued) {
+ current->state = TASK_RUNNING;
+ fault_in_pages_readable((char __user *)dequeued->uaddr,
+ PAGE_SIZE);
+ __kvm_s390_inject_vcpu(dequeued->local_int,
+ dequeued->inti);
+ kfree(dequeued);
+ } else {
+ schedule();
+ }
+ }
+}
+
+void kvm_s390_pfault_clear_cpu(struct kvm_vcpu *vcpu)
+{
+ struct pfault_event *event, *n;
+
+ spin_lock_bh(&vcpu->kvm->arch.pfault_list_lock);
+ list_for_each_entry_safe(event, n, &vcpu->kvm->arch.pfault_list,
+ pfault_list_element) {
+ if (event->local_int != &vcpu->arch.local_int)
+ continue;
+ list_del_init(&event->pfault_list_element);
+ kfree(event->inti);
+ kfree(event);
+ }
+ spin_unlock_bh(&vcpu->kvm->arch.pfault_list_lock);
+}
+
+void kvm_s390_pfault_init(struct kvm *kvm)
+{
+ spin_lock_init(&kvm->arch.pfault_list_lock);
+ init_waitqueue_head(&kvm->arch.pfault_wait);
+ INIT_LIST_HEAD(&kvm->arch.pfault_list);
+
+ kvm->arch.pfault_thread = kthread_run(pfault_thread_fn, kvm,
+ "kvm_pfault_mngr");
+ if (IS_ERR(kvm->arch.pfault_thread)) {
+ kvm->arch.pfault_thread = NULL;
+ printk(KERN_WARNING "kvm: pfault initialization failed\n");
+ }
+}
+
+void kvm_s390_pfault_exit(struct kvm *kvm)
+{
+ if (kvm->arch.pfault_thread)
+ kthread_stop(kvm->arch.pfault_thread);
+}
diff -urpN linux-2.6/include/linux/kvm.h linux-2.6-patched/include/linux/kvm.h
--- linux-2.6/include/linux/kvm.h 2011-11-17 10:03:45.000000000 +0100
+++ linux-2.6-patched/include/linux/kvm.h 2011-11-17 10:03:53.000000000 +0100
@@ -372,6 +372,8 @@ struct kvm_s390_psw {
#define KVM_S390_INT_SERVICE 0xffff2401u
#define KVM_S390_INT_EMERGENCY 0xffff1201u
#define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u
+#define KVM_S390_INT_PFAULT_INIT 0xfffe0004u
+#define KVM_S390_INT_PFAULT_DONE 0xfffe0005u
struct kvm_s390_interrupt {
__u32 type;
^ permalink raw reply [flat|nested] 8+ messages in thread