virtualization.lists.linux-foundation.org archive mirror
 help / color / mirror / Atom feed
* [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable
       [not found] <1206028710.6690.21.camel@cotte.boeblingen.de.ibm.com>
@ 2008-03-20 16:24 ` Carsten Otte
  2008-03-20 16:24 ` [RFC/PATCH 02/15] preparation: host memory management changes for s390 kvm Carsten Otte
                   ` (19 subsequent siblings)
  20 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:24 UTC (permalink / raw)
  To: virtualization, kvm-devel, Avi Kivity,
	Linux Memory Management List
  Cc: aliguori, EHRHARDT, arnd, hollisb, heiko.carstens, jeroney,
	borntraeger, schwidefsky, rvdheij, os, jblunck, Zhang, Xiantao

From: Martin Schwidefsky <schwidefsky@de.ibm.com>

The SIE instruction on s390 uses the 2nd half of the page table page to
virtualize the storage keys of a guest. This patch offers the s390_enable_sie
function, which reorganizes the page tables of a single-threaded process to
reserve space in the page table:
s390_enable_sie makes sure that the process is single threaded and then uses
dup_mm to create a new mm with reorganized page tables. The old mm is freed 
and the process has now a page status extended field after every page table.

Code that wants to exploit pgstes should SELECT CONFIG_PGSTE.

This patch has a small common code hit, namely making dup_mm non-static.


Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---

 arch/s390/Kconfig              |    4 ++
 arch/s390/kernel/setup.c       |    4 ++
 arch/s390/mm/pgtable.c         |   55 ++++++++++++++++++++++++++++++++++++++---
 include/asm-s390/mmu.h         |    1 
 include/asm-s390/mmu_context.h |    8 +++++
 include/asm-s390/pgtable.h     |    1 
 kernel/fork.c                  |    2 -
 7 files changed, 70 insertions(+), 5 deletions(-)

Index: kvm/arch/s390/Kconfig
===================================================================
--- kvm.orig/arch/s390/Kconfig
+++ kvm/arch/s390/Kconfig
@@ -55,6 +55,10 @@ config GENERIC_LOCKBREAK
 	default y
 	depends on SMP && PREEMPT
 
+config PGSTE
+	bool
+	default y if KVM
+
 mainmenu "Linux Kernel Configuration"
 
 config S390
Index: kvm/arch/s390/kernel/setup.c
===================================================================
--- kvm.orig/arch/s390/kernel/setup.c
+++ kvm/arch/s390/kernel/setup.c
@@ -315,7 +315,11 @@ static int __init early_parse_ipldelay(c
 early_param("ipldelay", early_parse_ipldelay);
 
 #ifdef CONFIG_S390_SWITCH_AMODE
+#ifdef CONFIG_PGSTE
+unsigned int switch_amode = 1;
+#else
 unsigned int switch_amode = 0;
+#endif
 EXPORT_SYMBOL_GPL(switch_amode);
 
 static void set_amode_and_uaccess(unsigned long user_amode,
Index: kvm/arch/s390/mm/pgtable.c
===================================================================
--- kvm.orig/arch/s390/mm/pgtable.c
+++ kvm/arch/s390/mm/pgtable.c
@@ -30,11 +30,27 @@
 #define TABLES_PER_PAGE	4
 #define FRAG_MASK	15UL
 #define SECOND_HALVES	10UL
+
+void clear_table_pgstes(unsigned long *table)
+{
+	clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
+	memset(table + 256, 0, PAGE_SIZE/4);
+	clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
+	memset(table + 768, 0, PAGE_SIZE/4);
+}
+
 #else
 #define ALLOC_ORDER	2
 #define TABLES_PER_PAGE	2
 #define FRAG_MASK	3UL
 #define SECOND_HALVES	2UL
+
+void clear_table_pgstes(unsigned long *table)
+{
+	clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
+	memset(table + 256, 0, PAGE_SIZE/2);
+}
+
 #endif
 
 unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
@@ -153,7 +169,7 @@ unsigned long *page_table_alloc(struct m
 	unsigned long *table;
 	unsigned long bits;
 
-	bits = mm->context.noexec ? 3UL : 1UL;
+	bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
 	spin_lock(&mm->page_table_lock);
 	page = NULL;
 	if (!list_empty(&mm->context.pgtable_list)) {
@@ -170,7 +186,10 @@ unsigned long *page_table_alloc(struct m
 		pgtable_page_ctor(page);
 		page->flags &= ~FRAG_MASK;
 		table = (unsigned long *) page_to_phys(page);
-		clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
+		if (mm->context.pgstes)
+			clear_table_pgstes(table);
+		else
+			clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
 		spin_lock(&mm->page_table_lock);
 		list_add(&page->lru, &mm->context.pgtable_list);
 	}
@@ -191,7 +210,7 @@ void page_table_free(struct mm_struct *m
 	struct page *page;
 	unsigned long bits;
 
-	bits = mm->context.noexec ? 3UL : 1UL;
+	bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
 	bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
 	spin_lock(&mm->page_table_lock);
@@ -228,3 +247,33 @@ void disable_noexec(struct mm_struct *mm
 	mm->context.noexec = 0;
 	update_mm(mm, tsk);
 }
+
+struct mm_struct *dup_mm(struct task_struct *tsk);
+
+/*
+ * switch on pgstes for its userspace process (for kvm)
+ */
+int s390_enable_sie(void)
+{
+	struct task_struct *tsk = current;
+	struct mm_struct *mm;
+
+	if (tsk->mm->context.pgstes)
+		return 0;
+	if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
+	    tsk->mm != tsk->active_mm || tsk->mm->ioctx_list)
+		return -EINVAL;
+	tsk->mm->context.pgstes = 1;	/* dirty little tricks .. */
+	mm = dup_mm(tsk);
+	tsk->mm->context.pgstes = 0;
+	if (!mm)
+		return -ENOMEM;
+	mmput(tsk->mm);
+	tsk->mm = tsk->active_mm = mm;
+	preempt_disable();
+	update_mm(mm, tsk);
+	cpu_set(smp_processor_id(), mm->cpu_vm_mask);
+	preempt_enable();
+	return 0;
+}
+EXPORT_SYMBOL_GPL(s390_enable_sie);
Index: kvm/include/asm-s390/mmu.h
===================================================================
--- kvm.orig/include/asm-s390/mmu.h
+++ kvm/include/asm-s390/mmu.h
@@ -7,6 +7,7 @@ typedef struct {
 	unsigned long asce_bits;
 	unsigned long asce_limit;
 	int noexec;
+	int pgstes;
 } mm_context_t;
 
 #endif
Index: kvm/include/asm-s390/mmu_context.h
===================================================================
--- kvm.orig/include/asm-s390/mmu_context.h
+++ kvm/include/asm-s390/mmu_context.h
@@ -20,7 +20,13 @@ static inline int init_new_context(struc
 #ifdef CONFIG_64BIT
 	mm->context.asce_bits |= _ASCE_TYPE_REGION3;
 #endif
-	mm->context.noexec = s390_noexec;
+	if (current->mm->context.pgstes) {
+		mm->context.noexec = 0;
+		mm->context.pgstes = 1;
+	} else {
+		mm->context.noexec = s390_noexec;
+		mm->context.pgstes = 0;
+	}
 	mm->context.asce_limit = STACK_TOP_MAX;
 	crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
 	return 0;
Index: kvm/include/asm-s390/pgtable.h
===================================================================
--- kvm.orig/include/asm-s390/pgtable.h
+++ kvm/include/asm-s390/pgtable.h
@@ -966,6 +966,7 @@ static inline pte_t mk_swap_pte(unsigned
 
 extern int add_shared_memory(unsigned long start, unsigned long size);
 extern int remove_shared_memory(unsigned long start, unsigned long size);
+extern int s390_enable_sie(void);
 
 /*
  * No page table caches to initialise
Index: kvm/kernel/fork.c
===================================================================
--- kvm.orig/kernel/fork.c
+++ kvm/kernel/fork.c
@@ -498,7 +498,7 @@ void mm_release(struct task_struct *tsk,
  * Allocate a new mm structure and copy contents from the
  * mm structure of the passed in task structure.
  */
-static struct mm_struct *dup_mm(struct task_struct *tsk)
+struct mm_struct *dup_mm(struct task_struct *tsk)
 {
 	struct mm_struct *mm, *oldmm = current->mm;
 	int err;

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [RFC/PATCH 02/15] preparation: host memory management changes for s390 kvm
       [not found] <1206028710.6690.21.camel@cotte.boeblingen.de.ibm.com>
  2008-03-20 16:24 ` [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable Carsten Otte
@ 2008-03-20 16:24 ` Carsten Otte
  2008-03-20 16:24 ` [RFC/PATCH 03/15] preparation: address of the 64bit extint parm in lowcore Carsten Otte
                   ` (18 subsequent siblings)
  20 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:24 UTC (permalink / raw)
  To: virtualization, kvm-devel, Avi Kivity,
	Linux Memory Management List
  Cc: aliguori, EHRHARDT, arnd, hollisb, heiko.carstens, jeroney,
	borntraeger, schwidefsky, rvdheij, os, jblunck, Zhang, Xiantao

From: Heiko Carstens <heiko.carstens@de.ibm.com>
From: Christian Borntraeger <borntraeger@de.ibm.com>

This patch changes the s390 memory management defintions to use the pgste field
for dirty and reference bit tracking of host and guest code. Usually on s390, 
dirty and referenced are tracked in storage keys, which belong to the physical
page. This changes with virtualization: The guest and host dirty/reference bits
are defined to be the logical OR of the values for the mapping and the physical
page. This patch implements the necessary changes in pgtable.h for s390.


There is a common code change in mm/rmap.c, the call to page_test_and_clear_young
must be moved. This is a no-op for all architecture but s390. page_referenced
checks the referenced bits for the physiscal page and for all mappings:
o The physical page is checked with page_test_and_clear_young.
o The mappings are checked with ptep_test_and_clear_young and friends.

Without pgstes (the current implementation on Linux s390) the physical page
check is implemented but the mapping callbacks are no-ops because dirty 
and referenced are not tracked in the s390 page tables. The pgstes introduces 
guest and host dirty and reference bits for s390 in the host mapping. These
mapping must be checked before page_test_and_clear_young resets the reference
bit. 

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---
 include/asm-s390/pgtable.h |  109 +++++++++++++++++++++++++++++++++++++++++++--
 mm/rmap.c                  |    7 +-
 2 files changed, 110 insertions(+), 6 deletions(-)

Index: kvm/include/asm-s390/pgtable.h
===================================================================
--- kvm.orig/include/asm-s390/pgtable.h
+++ kvm/include/asm-s390/pgtable.h
@@ -30,6 +30,7 @@
  */
 #ifndef __ASSEMBLY__
 #include <linux/mm_types.h>
+#include <asm/atomic.h>
 #include <asm/bug.h>
 #include <asm/processor.h>
 
@@ -258,6 +259,13 @@ extern char empty_zero_page[PAGE_SIZE];
  * swap pte is 1011 and 0001, 0011, 0101, 0111 are invalid.
  */
 
+/* Page status extended for virtualization */
+#define _PAGE_RCP_PCL	0x0080000000000000UL
+#define _PAGE_RCP_HR	0x0040000000000000UL
+#define _PAGE_RCP_HC	0x0020000000000000UL
+#define _PAGE_RCP_GR	0x0004000000000000UL
+#define _PAGE_RCP_GC	0x0002000000000000UL
+
 #ifndef __s390x__
 
 /* Bits in the segment table address-space-control-element */
@@ -513,6 +521,67 @@ static inline int pte_file(pte_t pte)
 #define __HAVE_ARCH_PTE_SAME
 #define pte_same(a,b)  (pte_val(a) == pte_val(b))
 
+static inline void rcp_lock(pte_t *ptep)
+{
+#ifdef CONFIG_PGSTE
+	atomic64_t *rcp = (atomic64_t *) (ptep + PTRS_PER_PTE);
+	preempt_disable();
+	atomic64_set_mask(_PAGE_RCP_PCL, rcp);
+#endif
+}
+
+static inline void rcp_unlock(pte_t *ptep)
+{
+#ifdef CONFIG_PGSTE
+	atomic64_t *rcp = (atomic64_t *) (ptep + PTRS_PER_PTE);
+	atomic64_clear_mask(_PAGE_RCP_PCL, rcp);
+	preempt_enable();
+#endif
+}
+
+static inline void rcp_set_bits(pte_t *ptep, unsigned long val)
+{
+#ifdef CONFIG_PGSTE
+	*(unsigned long *) (ptep + PTRS_PER_PTE) |= val;
+#endif
+}
+
+static inline int rcp_test_and_clear_bits(pte_t *ptep, unsigned long val)
+{
+#ifdef CONFIG_PGSTE
+	unsigned long ret;
+
+	ret = *(unsigned long *) (ptep + PTRS_PER_PTE);
+	*(unsigned long *) (ptep + PTRS_PER_PTE) &= ~val;
+	return (ret & val) == val;
+#else
+	return 0;
+#endif
+}
+
+
+/* forward declaration for SetPageUptodate in page-flags.h*/
+static inline void page_clear_dirty(struct page *page);
+#include <linux/page-flags.h>
+
+static inline void ptep_rcp_copy(pte_t *ptep)
+{
+#ifdef CONFIG_PGSTE
+	struct page *page = virt_to_page(pte_val(*ptep));
+	unsigned int skey;
+
+	skey = page_get_storage_key(page_to_phys(page));
+	if (skey & _PAGE_CHANGED)
+		rcp_set_bits(ptep, _PAGE_RCP_GC);
+	if (skey & _PAGE_REFERENCED)
+		rcp_set_bits(ptep, _PAGE_RCP_GR);
+	if (rcp_test_and_clear_bits(ptep, _PAGE_RCP_HC))
+		SetPageDirty(page);
+	if (rcp_test_and_clear_bits(ptep, _PAGE_RCP_HR))
+		SetPageReferenced(page);
+#endif
+}
+
 /*
  * query functions pte_write/pte_dirty/pte_young only work if
  * pte_present() is true. Undefined behaviour if not..
@@ -599,6 +668,8 @@ static inline void pmd_clear(pmd_t *pmd)
 
 static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
+	if (mm->context.pgstes)
+		ptep_rcp_copy(ptep);
 	pte_val(*ptep) = _PAGE_TYPE_EMPTY;
 	if (mm->context.noexec)
 		pte_val(ptep[PTRS_PER_PTE]) = _PAGE_TYPE_EMPTY;
@@ -667,6 +738,22 @@ static inline pte_t pte_mkyoung(pte_t pt
 static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
 					    unsigned long addr, pte_t *ptep)
 {
+#ifdef CONFIG_PGSTE
+	unsigned long physpage;
+	int young;
+
+	if (!vma->vm_mm->context.pgstes)
+		return 0;
+	physpage = pte_val(*ptep) & PAGE_MASK;
+
+	young = ((page_get_storage_key(physpage) & _PAGE_REFERENCED) != 0);
+	rcp_lock(ptep);
+	if (young)
+		rcp_set_bits(ptep, _PAGE_RCP_GR);
+	young |= rcp_test_and_clear_bits(ptep, _PAGE_RCP_HR);
+	rcp_unlock(ptep);
+	return young;
+#endif
 	return 0;
 }
 
@@ -674,7 +761,13 @@ static inline int ptep_test_and_clear_yo
 static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
 					 unsigned long address, pte_t *ptep)
 {
-	/* No need to flush TLB; bits are in storage key */
+	/* No need to flush TLB
+	 * On s390 reference bits are in storage key and never in TLB
+	 * With virtualization we handle the reference bit, without we
+	 * we can simply return */
+#ifdef CONFIG_PGSTE
+	return ptep_test_and_clear_young(vma, address, ptep);
+#endif
 	return 0;
 }
 
@@ -693,15 +786,25 @@ static inline void __ptep_ipte(unsigned 
 			: "=m" (*ptep) : "m" (*ptep),
 			  "a" (pto), "a" (address));
 	}
-	pte_val(*ptep) = _PAGE_TYPE_EMPTY;
 }
 
 static inline void ptep_invalidate(struct mm_struct *mm,
 				   unsigned long address, pte_t *ptep)
 {
+	if (mm->context.pgstes) {
+		rcp_lock(ptep);
+		__ptep_ipte(address, ptep);
+		ptep_rcp_copy(ptep);
+		pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+		rcp_unlock(ptep);
+		return;
+	}
 	__ptep_ipte(address, ptep);
-	if (mm->context.noexec)
+	pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+	if (mm->context.noexec) {
 		__ptep_ipte(address, ptep + PTRS_PER_PTE);
+		pte_val(*(ptep + PTRS_PER_PTE)) = _PAGE_TYPE_EMPTY;
+	}
 }
 
 /*
Index: kvm/mm/rmap.c
===================================================================
--- kvm.orig/mm/rmap.c
+++ kvm/mm/rmap.c
@@ -411,9 +411,6 @@ int page_referenced(struct page *page, i
 {
 	int referenced = 0;
 
-	if (page_test_and_clear_young(page))
-		referenced++;
-
 	if (TestClearPageReferenced(page))
 		referenced++;
 
@@ -431,6 +428,10 @@ int page_referenced(struct page *page, i
 			unlock_page(page);
 		}
 	}
+
+	if (page_test_and_clear_young(page))
+		referenced++;
+
 	return referenced;
 }

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [RFC/PATCH 03/15] preparation: address of the 64bit extint parm in lowcore
       [not found] <1206028710.6690.21.camel@cotte.boeblingen.de.ibm.com>
  2008-03-20 16:24 ` [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable Carsten Otte
  2008-03-20 16:24 ` [RFC/PATCH 02/15] preparation: host memory management changes for s390 kvm Carsten Otte
@ 2008-03-20 16:24 ` Carsten Otte
  2008-03-20 16:24 ` [RFC/PATCH 04/15] preparation: split sysinfo defintions for kvm use Carsten Otte
                   ` (17 subsequent siblings)
  20 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:24 UTC (permalink / raw)
  To: virtualization, kvm-devel, Avi Kivity
  Cc: aliguori, EHRHARDT, arnd, hollisb, heiko.carstens, jeroney,
	borntraeger, schwidefsky, rvdheij, os, jblunck, Zhang, Xiantao

From: Christian Borntraeger <borntraeger@de.ibm.com>

The address 0x11b8 is used by z/VM for pfault and diag 250 I/O to
provide a 64 bit extint parameter. virtio uses the same address, so
its time to update the lowcore structure.

Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---
 include/asm-s390/lowcore.h |   15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

Index: kvm/include/asm-s390/lowcore.h
===================================================================
--- kvm.orig/include/asm-s390/lowcore.h
+++ kvm/include/asm-s390/lowcore.h
@@ -380,27 +380,32 @@ struct _lowcore
         /* whether the kernel died with panic() or not */
         __u32        panic_magic;              /* 0xe00 */
 
-	__u8         pad13[0x1200-0xe04];      /* 0xe04 */
+	__u8         pad13[0x11b8-0xe04];      /* 0xe04 */
+
+	/* 64 bit extparam used for pfault, diag 250 etc  */
+	__u64        ext_params2;               /* 0x11B8 */
+
+	__u8         pad14[0x1200-0x11C0];      /* 0x11C0 */
 
         /* System info area */ 
 
 	__u64        floating_pt_save_area[16]; /* 0x1200 */
 	__u64        gpregs_save_area[16];      /* 0x1280 */
 	__u32        st_status_fixed_logout[4]; /* 0x1300 */
-	__u8         pad14[0x1318-0x1310];      /* 0x1310 */
+	__u8         pad15[0x1318-0x1310];      /* 0x1310 */
 	__u32        prefixreg_save_area;       /* 0x1318 */
 	__u32        fpt_creg_save_area;        /* 0x131c */
-	__u8         pad15[0x1324-0x1320];      /* 0x1320 */
+	__u8         pad16[0x1324-0x1320];      /* 0x1320 */
 	__u32        tod_progreg_save_area;     /* 0x1324 */
 	__u32        cpu_timer_save_area[2];    /* 0x1328 */
 	__u32        clock_comp_save_area[2];   /* 0x1330 */
-	__u8         pad16[0x1340-0x1338];      /* 0x1338 */ 
+	__u8         pad17[0x1340-0x1338];      /* 0x1338 */
 	__u32        access_regs_save_area[16]; /* 0x1340 */ 
 	__u64        cregs_save_area[16];       /* 0x1380 */
 
 	/* align to the top of the prefix area */
 
-	__u8         pad17[0x2000-0x1400];      /* 0x1400 */
+	__u8         pad18[0x2000-0x1400];      /* 0x1400 */
 #endif /* !__s390x__ */
 } __attribute__((packed)); /* End structure*/

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [RFC/PATCH 04/15] preparation: split sysinfo defintions for kvm use
       [not found] <1206028710.6690.21.camel@cotte.boeblingen.de.ibm.com>
                   ` (2 preceding siblings ...)
  2008-03-20 16:24 ` [RFC/PATCH 03/15] preparation: address of the 64bit extint parm in lowcore Carsten Otte
@ 2008-03-20 16:24 ` Carsten Otte
  2008-03-20 16:24 ` [RFC/PATCH 05/15] kvm-s390: s390 arch backend for the kvm kernel module Carsten Otte
                   ` (16 subsequent siblings)
  20 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:24 UTC (permalink / raw)
  To: virtualization, kvm-devel, Avi Kivity
  Cc: aliguori, EHRHARDT, arnd, hollisb, heiko.carstens, jeroney,
	borntraeger, schwidefsky, rvdheij, os, jblunck, Zhang, Xiantao

From: Christian Borntraeger <borntraeger@de.ibm.com>

drivers/s390/sysinfo.c uses the store system information intruction to query
the system about information of the machine, the LPAR and additional 
hypervisors. KVM has to implement the host part for this instruction. 

To avoid code duplication, this patch splits the common definitions from
sysinfo.c into a separate header file include/asm-s390/sysinfo.h for KVM use.

Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---
 drivers/s390/sysinfo.c     |  100 ----------------------------------------
 include/asm-s390/sysinfo.h |  112 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 113 insertions(+), 99 deletions(-)

Index: kvm/drivers/s390/sysinfo.c
===================================================================
--- kvm.orig/drivers/s390/sysinfo.c
+++ kvm/drivers/s390/sysinfo.c
@@ -11,111 +11,13 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <asm/ebcdic.h>
+#include <asm/sysinfo.h>
 
 /* Sigh, math-emu. Don't ask. */
 #include <asm/sfp-util.h>
 #include <math-emu/soft-fp.h>
 #include <math-emu/single.h>
 
-struct sysinfo_1_1_1 {
-	char reserved_0[32];
-	char manufacturer[16];
-	char type[4];
-	char reserved_1[12];
-	char model_capacity[16];
-	char sequence[16];
-	char plant[4];
-	char model[16];
-};
-
-struct sysinfo_1_2_1 {
-	char reserved_0[80];
-	char sequence[16];
-	char plant[4];
-	char reserved_1[2];
-	unsigned short cpu_address;
-};
-
-struct sysinfo_1_2_2 {
-	char format;
-	char reserved_0[1];
-	unsigned short acc_offset;
-	char reserved_1[24];
-	unsigned int secondary_capability;
-	unsigned int capability;
-	unsigned short cpus_total;
-	unsigned short cpus_configured;
-	unsigned short cpus_standby;
-	unsigned short cpus_reserved;
-	unsigned short adjustment[0];
-};
-
-struct sysinfo_1_2_2_extension {
-	unsigned int alt_capability;
-	unsigned short alt_adjustment[0];
-};
-
-struct sysinfo_2_2_1 {
-	char reserved_0[80];
-	char sequence[16];
-	char plant[4];
-	unsigned short cpu_id;
-	unsigned short cpu_address;
-};
-
-struct sysinfo_2_2_2 {
-	char reserved_0[32];
-	unsigned short lpar_number;
-	char reserved_1;
-	unsigned char characteristics;
-	unsigned short cpus_total;
-	unsigned short cpus_configured;
-	unsigned short cpus_standby;
-	unsigned short cpus_reserved;
-	char name[8];
-	unsigned int caf;
-	char reserved_2[16];
-	unsigned short cpus_dedicated;
-	unsigned short cpus_shared;
-};
-
-#define LPAR_CHAR_DEDICATED	(1 << 7)
-#define LPAR_CHAR_SHARED	(1 << 6)
-#define LPAR_CHAR_LIMITED	(1 << 5)
-
-struct sysinfo_3_2_2 {
-	char reserved_0[31];
-	unsigned char count;
-	struct {
-		char reserved_0[4];
-		unsigned short cpus_total;
-		unsigned short cpus_configured;
-		unsigned short cpus_standby;
-		unsigned short cpus_reserved;
-		char name[8];
-		unsigned int caf;
-		char cpi[16];
-		char reserved_1[24];
-
-	} vm[8];
-};
-
-static inline int stsi(void *sysinfo, int fc, int sel1, int sel2)
-{
-	register int r0 asm("0") = (fc << 28) | sel1;
-	register int r1 asm("1") = sel2;
-
-	asm volatile(
-		"   stsi 0(%2)\n"
-		"0: jz   2f\n"
-		"1: lhi  %0,%3\n"
-		"2:\n"
-		EX_TABLE(0b,1b)
-		: "+d" (r0) : "d" (r1), "a" (sysinfo), "K" (-ENOSYS)
-		: "cc", "memory" );
-	return r0;
-}
-
 static inline int stsi_0(void)
 {
 	int rc = stsi (NULL, 0, 0, 0);
Index: kvm/include/asm-s390/sysinfo.h
===================================================================
--- /dev/null
+++ kvm/include/asm-s390/sysinfo.h
@@ -0,0 +1,112 @@
+/*
+ * definition for store system information stsi
+ *
+ * Copyright IBM Corp. 2001,2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Ulrich Weigand <weigand@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+struct sysinfo_1_1_1 {
+	char reserved_0[32];
+	char manufacturer[16];
+	char type[4];
+	char reserved_1[12];
+	char model_capacity[16];
+	char sequence[16];
+	char plant[4];
+	char model[16];
+};
+
+struct sysinfo_1_2_1 {
+	char reserved_0[80];
+	char sequence[16];
+	char plant[4];
+	char reserved_1[2];
+	unsigned short cpu_address;
+};
+
+struct sysinfo_1_2_2 {
+	char format;
+	char reserved_0[1];
+	unsigned short acc_offset;
+	char reserved_1[24];
+	unsigned int secondary_capability;
+	unsigned int capability;
+	unsigned short cpus_total;
+	unsigned short cpus_configured;
+	unsigned short cpus_standby;
+	unsigned short cpus_reserved;
+	unsigned short adjustment[0];
+};
+
+struct sysinfo_1_2_2_extension {
+	unsigned int alt_capability;
+	unsigned short alt_adjustment[0];
+};
+
+struct sysinfo_2_2_1 {
+	char reserved_0[80];
+	char sequence[16];
+	char plant[4];
+	unsigned short cpu_id;
+	unsigned short cpu_address;
+};
+
+struct sysinfo_2_2_2 {
+	char reserved_0[32];
+	unsigned short lpar_number;
+	char reserved_1;
+	unsigned char characteristics;
+	unsigned short cpus_total;
+	unsigned short cpus_configured;
+	unsigned short cpus_standby;
+	unsigned short cpus_reserved;
+	char name[8];
+	unsigned int caf;
+	char reserved_2[16];
+	unsigned short cpus_dedicated;
+	unsigned short cpus_shared;
+};
+
+#define LPAR_CHAR_DEDICATED	(1 << 7)
+#define LPAR_CHAR_SHARED	(1 << 6)
+#define LPAR_CHAR_LIMITED	(1 << 5)
+
+struct sysinfo_3_2_2 {
+	char reserved_0[31];
+	unsigned char count;
+	struct {
+		char reserved_0[4];
+		unsigned short cpus_total;
+		unsigned short cpus_configured;
+		unsigned short cpus_standby;
+		unsigned short cpus_reserved;
+		char name[8];
+		unsigned int caf;
+		char cpi[16];
+		char reserved_1[24];
+
+	} vm[8];
+};
+
+static inline int stsi(void *sysinfo, int fc, int sel1, int sel2)
+{
+	register int r0 asm("0") = (fc << 28) | sel1;
+	register int r1 asm("1") = sel2;
+
+	asm volatile(
+		"   stsi 0(%2)\n"
+		"0: jz   2f\n"
+		"1: lhi  %0,%3\n"
+		"2:\n"
+		EX_TABLE(0b, 1b)
+		: "+d" (r0) : "d" (r1), "a" (sysinfo), "K" (-ENOSYS)
+		: "cc", "memory");
+	return r0;
+}
+

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [RFC/PATCH 05/15] kvm-s390: s390 arch backend for the kvm kernel module
       [not found] <1206028710.6690.21.camel@cotte.boeblingen.de.ibm.com>
                   ` (3 preceding siblings ...)
  2008-03-20 16:24 ` [RFC/PATCH 04/15] preparation: split sysinfo defintions for kvm use Carsten Otte
@ 2008-03-20 16:24 ` Carsten Otte
  2008-03-20 16:25 ` [RFC/PATCH 06/15] kvm-s390: sie intercept handling Carsten Otte
                   ` (15 subsequent siblings)
  20 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:24 UTC (permalink / raw)
  To: virtualization, kvm-devel, Avi Kivity
  Cc: aliguori, EHRHARDT, arnd, hollisb, heiko.carstens, jeroney,
	borntraeger, schwidefsky, rvdheij, os, jblunck, Zhang, Xiantao

From: Carsten Otte <cotte@de.ibm.com>
From: Christian Borntraeger <borntraeger@de.ibm.com>
From: Heiko Carstens <heiko.carstens@de.ibm.com>

This patch contains the port of Qumranet's kvm kernel module to IBM zSeries
 (aka s390x, mainframe) architecture. It uses the mainframe's virtualization
instruction SIE to run virtual machines with up to 64 virtual CPUs each.
This port is only usable on 64bit host kernels, and can only run 64bit guest
kernels. However, running 31bit applications in guest userspace is possible.

The following source files are introduced by this patch
arch/s390/kvm/kvm-s390.c    similar to arch/x86/kvm/x86.c, this implements all
                            arch callbacks for kvm. __vcpu_run calls back into
                            sie64a to enter the guest machine context
arch/s390/kvm/sie64a.S      assembler function sie64a, which enters guest
                            context via SIE, and switches world before and after                            that
include/asm-s390/kvm_host.h contains all vital data structures needed to run
                            virtual machines on the mainframe
include/asm-s390/kvm.h      defines kvm_regs and friends for user access to
                            guest register content
arch/s390/kvm/gaccess.h     functions similar to uaccess to access guest memory
arch/s390/kvm/kvm-s390.h    header file for kvm-s390 internals, extended by
                            later patches

Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---
 arch/s390/Makefile          |    2 
 arch/s390/kernel/vtime.c    |    1 
 arch/s390/kvm/Makefile      |   14 +
 arch/s390/kvm/gaccess.h     |  280 +++++++++++++++++++++
 arch/s390/kvm/kvm-s390.c    |  574 ++++++++++++++++++++++++++++++++++++++++++++
 arch/s390/kvm/kvm-s390.h    |   29 ++
 arch/s390/kvm/sie64a.S      |   47 +++
 include/asm-s390/Kbuild     |    1 
 include/asm-s390/kvm.h      |   44 +++
 include/asm-s390/kvm_host.h |  119 +++++++++
 include/asm-s390/kvm_para.h |   30 ++
 include/linux/kvm.h         |   15 +
 include/linux/kvm_host.h    |    4 
 13 files changed, 1159 insertions(+), 1 deletion(-)

Index: kvm/arch/s390/Makefile
===================================================================
--- kvm.orig/arch/s390/Makefile
+++ kvm/arch/s390/Makefile
@@ -87,7 +87,7 @@ LDFLAGS_vmlinux := -e start
 head-y		:= arch/s390/kernel/head.o arch/s390/kernel/init_task.o
 
 core-y		+= arch/s390/mm/ arch/s390/kernel/ arch/s390/crypto/ \
-		   arch/s390/appldata/ arch/s390/hypfs/
+		   arch/s390/appldata/ arch/s390/hypfs/ arch/s390/kvm/
 libs-y		+= arch/s390/lib/
 drivers-y	+= drivers/s390/
 drivers-$(CONFIG_MATHEMU) += arch/s390/math-emu/
Index: kvm/arch/s390/kernel/vtime.c
===================================================================
--- kvm.orig/arch/s390/kernel/vtime.c
+++ kvm/arch/s390/kernel/vtime.c
@@ -110,6 +110,7 @@ void account_system_vtime(struct task_st
 	S390_lowcore.steal_clock -= cputime << 12;
 	account_system_time(tsk, 0, cputime);
 }
+EXPORT_SYMBOL_GPL(account_system_vtime);
 
 static inline void set_vtimer(__u64 expires)
 {
Index: kvm/arch/s390/kvm/Makefile
===================================================================
--- /dev/null
+++ kvm/arch/s390/kvm/Makefile
@@ -0,0 +1,14 @@
+# Makefile for kernel virtual machines on s390
+#
+# Copyright IBM Corp. 2008
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License (version 2 only)
+# as published by the Free Software Foundation.
+
+common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o)
+
+EXTRA_CFLAGS += -Ivirt/kvm -Iarch/s390/kvm
+
+kvm-objs := $(common-objs) kvm-s390.o sie64a.o
+obj-$(CONFIG_KVM) += kvm.o
Index: kvm/arch/s390/kvm/gaccess.h
===================================================================
--- /dev/null
+++ kvm/arch/s390/kvm/gaccess.h
@@ -0,0 +1,280 @@
+/*
+ * gaccess.h -  access guest memory
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ */
+
+#ifndef __KVM_S390_GACCESS_H
+#define __KVM_S390_GACCESS_H
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+#include <asm/uaccess.h>
+
+static inline void __user *__guestaddr_to_user(struct kvm_vcpu *vcpu,
+					       u64 guestaddr)
+{
+	u64 prefix  = vcpu->arch.sie_block->prefix;
+	u64 origin  = vcpu->kvm->arch.guest_origin;
+	u64 memsize = vcpu->kvm->arch.guest_memsize;
+
+	if (guestaddr < 2 * PAGE_SIZE)
+		guestaddr += prefix;
+	else if ((guestaddr >= prefix) && (guestaddr < prefix + 2 * PAGE_SIZE))
+		guestaddr -= prefix;
+
+	if (guestaddr > memsize)
+		return (void __user __force *) ERR_PTR(-EFAULT);
+
+	guestaddr += origin;
+
+	return (void __user *) guestaddr;
+}
+
+static inline int get_guest_u64(struct kvm_vcpu *vcpu, u64 guestaddr,
+				u64 *result)
+{
+	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+	if (guestaddr & 7)
+		BUG();
+
+	if (IS_ERR((void __force *) uptr))
+		return PTR_ERR((void __force *) uptr);
+
+	return get_user(*result, (u64 __user *) uptr);
+}
+
+static inline int get_guest_u32(struct kvm_vcpu *vcpu, u64 guestaddr,
+				u32 *result)
+{
+	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+	if (guestaddr & 3)
+		BUG();
+
+	if (IS_ERR((void __force *) uptr))
+		return PTR_ERR((void __force *) uptr);
+
+	return get_user(*result, (u32 __user *) uptr);
+}
+
+static inline int get_guest_u16(struct kvm_vcpu *vcpu, u64 guestaddr,
+				u16 *result)
+{
+	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+	if (guestaddr & 1)
+		BUG();
+
+	if (IS_ERR(uptr))
+		return PTR_ERR(uptr);
+
+	return get_user(*result, (u16 __user *) uptr);
+}
+
+static inline int get_guest_u8(struct kvm_vcpu *vcpu, u64 guestaddr,
+			       u8 *result)
+{
+	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+	if (IS_ERR((void __force *) uptr))
+		return PTR_ERR((void __force *) uptr);
+
+	return get_user(*result, (u8 __user *) uptr);
+}
+
+static inline int put_guest_u64(struct kvm_vcpu *vcpu, u64 guestaddr,
+				u64 value)
+{
+	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+	if (guestaddr & 7)
+		BUG();
+
+	if (IS_ERR((void __force *) uptr))
+		return PTR_ERR((void __force *) uptr);
+
+	return put_user(value, (u64 __user *) uptr);
+}
+
+static inline int put_guest_u32(struct kvm_vcpu *vcpu, u64 guestaddr,
+				u32 value)
+{
+	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+	if (guestaddr & 3)
+		BUG();
+
+	if (IS_ERR((void __force *) uptr))
+		return PTR_ERR((void __force *) uptr);
+
+	return put_user(value, (u32 __user *) uptr);
+}
+
+static inline int put_guest_u16(struct kvm_vcpu *vcpu, u64 guestaddr,
+				u16 value)
+{
+	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+	if (guestaddr & 1)
+		BUG();
+
+	if (IS_ERR((void __force *) uptr))
+		return PTR_ERR((void __force *) uptr);
+
+	return put_user(value, (u16 __user *) uptr);
+}
+
+static inline int put_guest_u8(struct kvm_vcpu *vcpu, u64 guestaddr,
+			       u8 value)
+{
+	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+	if (IS_ERR((void __force *) uptr))
+		return PTR_ERR((void __force *) uptr);
+
+	return put_user(value, (u8 __user *) uptr);
+}
+
+
+static inline int __copy_to_guest_slow(struct kvm_vcpu *vcpu, u64 guestdest,
+				       const void *from, unsigned long n)
+{
+	int rc;
+	unsigned long i;
+	const u8 *data = from;
+
+	for (i = 0; i < n; i++) {
+		rc = put_guest_u8(vcpu, guestdest++, *(data++));
+		if (rc < 0)
+			return rc;
+	}
+	return 0;
+}
+
+static inline int copy_to_guest(struct kvm_vcpu *vcpu, u64 guestdest,
+				const void *from, unsigned long n)
+{
+	u64 prefix  = vcpu->arch.sie_block->prefix;
+	u64 origin  = vcpu->kvm->arch.guest_origin;
+	u64 memsize = vcpu->kvm->arch.guest_memsize;
+
+	if ((guestdest < 2 * PAGE_SIZE) && (guestdest + n > 2 * PAGE_SIZE))
+		goto slowpath;
+
+	if ((guestdest < prefix) && (guestdest + n > prefix))
+		goto slowpath;
+
+	if ((guestdest < prefix + 2 * PAGE_SIZE)
+	    && (guestdest + n > prefix + 2 * PAGE_SIZE))
+		goto slowpath;
+
+	if (guestdest < 2 * PAGE_SIZE)
+		guestdest += prefix;
+	else if ((guestdest >= prefix) && (guestdest < prefix + 2 * PAGE_SIZE))
+		guestdest -= prefix;
+
+	if (guestdest + n > memsize)
+		return -EFAULT;
+
+	if (guestdest + n < guestdest)
+		return -EFAULT;
+
+	guestdest += origin;
+
+	return copy_to_user((void __user *) guestdest, from, n);
+slowpath:
+	return __copy_to_guest_slow(vcpu, guestdest, from, n);
+}
+
+static inline int __copy_from_guest_slow(struct kvm_vcpu *vcpu, void *to,
+					 u64 guestsrc, unsigned long n)
+{
+	int rc;
+	unsigned long i;
+	u8 *data = to;
+
+	for (i = 0; i < n; i++) {
+		rc = get_guest_u8(vcpu, guestsrc++, data++);
+		if (rc < 0)
+			return rc;
+	}
+	return 0;
+}
+
+static inline int copy_from_guest(struct kvm_vcpu *vcpu, void *to,
+				  u64 guestsrc, unsigned long n)
+{
+	u64 prefix  = vcpu->arch.sie_block->prefix;
+	u64 origin  = vcpu->kvm->arch.guest_origin;
+	u64 memsize = vcpu->kvm->arch.guest_memsize;
+
+	if ((guestsrc < 2 * PAGE_SIZE) && (guestsrc + n > 2 * PAGE_SIZE))
+		goto slowpath;
+
+	if ((guestsrc < prefix) && (guestsrc + n > prefix))
+		goto slowpath;
+
+	if ((guestsrc < prefix + 2 * PAGE_SIZE)
+	    && (guestsrc + n > prefix + 2 * PAGE_SIZE))
+		goto slowpath;
+
+	if (guestsrc < 2 * PAGE_SIZE)
+		guestsrc += prefix;
+	else if ((guestsrc >= prefix) && (guestsrc < prefix + 2 * PAGE_SIZE))
+		guestsrc -= prefix;
+
+	if (guestsrc + n > memsize)
+		return -EFAULT;
+
+	if (guestsrc + n < guestsrc)
+		return -EFAULT;
+
+	guestsrc += origin;
+
+	return copy_from_user(to, (void __user *) guestsrc, n);
+slowpath:
+	return __copy_from_guest_slow(vcpu, to, guestsrc, n);
+}
+
+static inline int copy_to_guest_absolute(struct kvm_vcpu *vcpu, u64 guestdest,
+					 const void *from, unsigned long n)
+{
+	u64 origin  = vcpu->kvm->arch.guest_origin;
+	u64 memsize = vcpu->kvm->arch.guest_memsize;
+
+	if (guestdest + n > memsize)
+		return -EFAULT;
+
+	if (guestdest + n < guestdest)
+		return -EFAULT;
+
+	guestdest += origin;
+
+	return copy_to_user((void __user *) guestdest, from, n);
+}
+
+static inline int copy_from_guest_absolute(struct kvm_vcpu *vcpu, void *to,
+					   u64 guestsrc, unsigned long n)
+{
+	u64 origin  = vcpu->kvm->arch.guest_origin;
+	u64 memsize = vcpu->kvm->arch.guest_memsize;
+
+	if (guestsrc + n > memsize)
+		return -EFAULT;
+
+	if (guestsrc + n < guestsrc)
+		return -EFAULT;
+
+	guestsrc += origin;
+
+	return copy_from_user(to, (void __user *) guestsrc, n);
+}
+#endif
Index: kvm/arch/s390/kvm/kvm-s390.c
===================================================================
--- /dev/null
+++ kvm/arch/s390/kvm/kvm-s390.c
@@ -0,0 +1,574 @@
+/*
+ * s390host.c --  hosting zSeries kernel virtual machines
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ *               Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/compiler.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <asm/lowcore.h>
+#include <asm/pgtable.h>
+
+#include "gaccess.h"
+
+#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+	{ "userspace_handled", VCPU_STAT(exit_userspace) },
+	{ NULL }
+};
+
+
+/* Section: not file related */
+void kvm_arch_hardware_enable(void *garbage)
+{
+	/* every s390 is virtualization enabled ;-) */
+}
+
+void kvm_arch_hardware_disable(void *garbage)
+{
+}
+
+void decache_vcpus_on_cpu(int cpu)
+{
+}
+
+int kvm_arch_hardware_setup(void)
+{
+	return 0;
+}
+
+void kvm_arch_hardware_unsetup(void)
+{
+}
+
+void kvm_arch_check_processor_compat(void *rtn)
+{
+}
+
+int kvm_arch_init(void *opaque)
+{
+	return 0;
+}
+
+void kvm_arch_exit(void)
+{
+}
+
+/* Section: device related */
+long kvm_arch_dev_ioctl(struct file *filp,
+			unsigned int ioctl, unsigned long arg)
+{
+	if (ioctl == KVM_S390_ENABLE_SIE)
+		return s390_enable_sie();
+	return -EINVAL;
+}
+
+
+
+int kvm_dev_ioctl_check_extension(long ext)
+{
+	return 0;
+}
+
+/* Section: vm related */
+/*
+ * Get (and clear) the dirty memory log for a memory slot.
+ */
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
+			       struct kvm_dirty_log *log)
+{
+	return 0;
+}
+
+long kvm_arch_vm_ioctl(struct file *filp,
+		       unsigned int ioctl, unsigned long arg)
+{
+	struct kvm *kvm = filp->private_data;
+	void __user *argp = (void __user *)arg;
+	int r;
+
+	switch (ioctl) {
+	default:
+		r = -EINVAL;
+	}
+
+	return r;
+}
+
+struct kvm *kvm_arch_create_vm(void)
+{
+	struct kvm *kvm;
+	int rc;
+	char debug_name[16];
+
+
+	rc = s390_enable_sie();
+	if (rc)
+		goto out_nokvm;
+
+	rc = -ENOMEM;
+	kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
+	if (!kvm)
+		goto out_nokvm;
+
+	kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
+	if (!kvm->arch.sca)
+		goto out_nosca;
+
+	sprintf(debug_name, "kvm-%u", current->pid);
+
+	kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
+	if (!kvm->arch.dbf)
+		goto out_nodbf;
+
+	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
+	VM_EVENT(kvm, 3, "%s", "vm created");
+
+	try_module_get(THIS_MODULE);
+
+	return kvm;
+out_nodbf:
+	free_page((unsigned long)(kvm->arch.sca));
+out_nosca:
+	kfree(kvm);
+out_nokvm:
+	return ERR_PTR(rc);
+}
+
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
+	debug_unregister(kvm->arch.dbf);
+	free_page((unsigned long)(kvm->arch.sca));
+	kfree(kvm);
+	module_put(THIS_MODULE);
+}
+
+/* Section: vcpu related */
+int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+	/* kvm common code refers to this, but does'nt call it */
+	BUG();
+}
+
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+	save_fp_regs(&vcpu->arch.host_fpregs);
+	save_access_regs(vcpu->arch.host_acrs);
+	vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
+	restore_fp_regs(&vcpu->arch.guest_fpregs);
+	restore_access_regs(vcpu->arch.guest_acrs);
+
+	if (signal_pending(current))
+		atomic_set_mask(CPUSTAT_STOP_INT,
+			&vcpu->arch.sie_block->cpuflags);
+}
+
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+{
+	save_fp_regs(&vcpu->arch.guest_fpregs);
+	save_access_regs(vcpu->arch.guest_acrs);
+	restore_fp_regs(&vcpu->arch.host_fpregs);
+	restore_access_regs(vcpu->arch.host_acrs);
+}
+
+static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
+{
+	/* this equals initial cpu reset in pop, but we don't switch to ESA */
+	vcpu->arch.sie_block->gpsw.mask = 0UL;
+	vcpu->arch.sie_block->gpsw.addr = 0UL;
+	vcpu->arch.sie_block->prefix    = 0UL;
+	vcpu->arch.sie_block->ihcpu     = 0xffff;
+	vcpu->arch.sie_block->cputm     = 0UL;
+	vcpu->arch.sie_block->ckc       = 0UL;
+	vcpu->arch.sie_block->todpr     = 0;
+	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
+	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
+	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
+	vcpu->arch.guest_fpregs.fpc = 0;
+	asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
+	vcpu->arch.sie_block->gbea = 1;
+}
+
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
+	vcpu->arch.sie_block->gmslm = 0xffffffffffUL;
+	vcpu->arch.sie_block->gmsor = 0x000000000000;
+	vcpu->arch.sie_block->ecb   = 2;
+	vcpu->arch.sie_block->eca   = 0xC1002001U;
+
+	return 0;
+}
+
+struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
+				      unsigned int id)
+{
+	struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
+	int rc = -ENOMEM;
+
+	if (!vcpu)
+		goto out_nomem;
+
+	vcpu->arch.sie_block = (struct sie_block *) get_zeroed_page(GFP_KERNEL);
+
+	if (!vcpu->arch.sie_block)
+		goto out_free_cpu;
+
+	vcpu->arch.sie_block->icpua = id;
+	BUG_ON(!kvm->arch.sca);
+	BUG_ON(kvm->arch.sca->cpu[id].sda);
+	kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
+	vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
+	vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
+
+	rc = kvm_vcpu_init(vcpu, kvm, id);
+	if (rc)
+		goto out_free_cpu;
+	VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
+		 vcpu->arch.sie_block);
+
+	try_module_get(THIS_MODULE);
+
+	return vcpu;
+out_free_cpu:
+	kfree(vcpu);
+out_nomem:
+	return ERR_PTR(rc);
+}
+
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+	VCPU_EVENT(vcpu, 3, "%s", "destroy cpu");
+	free_page((unsigned long)(vcpu->arch.sie_block));
+	kfree(vcpu);
+	module_put(THIS_MODULE);
+}
+
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
+{
+	/* kvm common code refers to this, but never calls it */
+	BUG();
+	return 0;
+}
+
+static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
+{
+	vcpu_load(vcpu);
+	kvm_s390_vcpu_initial_reset(vcpu);
+	vcpu_put(vcpu);
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	vcpu_load(vcpu);
+	memcpy(&vcpu->arch.guest_gprs, &regs->gprs, sizeof(regs->gprs));
+	vcpu_put(vcpu);
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	vcpu_load(vcpu);
+	memcpy(&regs->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs));
+	vcpu_put(vcpu);
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+				  struct kvm_sregs *sregs)
+{
+	vcpu_load(vcpu);
+	memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs));
+	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
+	vcpu_put(vcpu);
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+				  struct kvm_sregs *sregs)
+{
+	vcpu_load(vcpu);
+	memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs));
+	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
+	vcpu_put(vcpu);
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	vcpu_load(vcpu);
+	memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
+	vcpu->arch.guest_fpregs.fpc = fpu->fpc;
+	vcpu_put(vcpu);
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	vcpu_load(vcpu);
+	memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
+	fpu->fpc = vcpu->arch.guest_fpregs.fpc;
+	vcpu_put(vcpu);
+	return 0;
+}
+
+static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu,
+	psw_t psw)
+{
+	int rc = 0;
+
+	vcpu_load(vcpu);
+	if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
+		rc = -EBUSY;
+	else
+		vcpu->arch.sie_block->gpsw = psw;
+	vcpu_put(vcpu);
+	return rc;
+}
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+				  struct kvm_translation *tr)
+{
+	return -EINVAL; /* not implemented yet */
+}
+
+int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
+				    struct kvm_debug_guest *dbg)
+{
+	return -EINVAL; /* not implemented yet */
+}
+
+static void __vcpu_run(struct kvm_vcpu *vcpu)
+{
+	memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16);
+
+	if (need_resched())
+		schedule();
+
+	vcpu->arch.sie_block->icptcode = 0;
+	local_irq_disable();
+	kvm_guest_enter();
+	local_irq_enable();
+	VCPU_EVENT(vcpu, 6, "entering sie flags %x",
+		   atomic_read(&vcpu->arch.sie_block->cpuflags));
+	sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs);
+	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
+		   vcpu->arch.sie_block->icptcode);
+	local_irq_disable();
+	kvm_guest_exit();
+	local_irq_enable();
+
+	memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16);
+}
+
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	sigset_t sigsaved;
+
+	vcpu_load(vcpu);
+
+	if (vcpu->sigset_active)
+		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+
+	atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+
+	__vcpu_run(vcpu);
+
+	if (vcpu->sigset_active)
+		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+	vcpu_put(vcpu);
+
+	vcpu->stat.exit_userspace++;
+	return 0;
+}
+
+static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from,
+		       unsigned long n, int prefix)
+{
+	if (prefix)
+		return copy_to_guest(vcpu, guestdest, from, n);
+	else
+		return copy_to_guest_absolute(vcpu, guestdest, from, n);
+}
+
+/*
+ * store status at address
+ * we use have two special cases:
+ * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
+ * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
+ */
+int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+	const unsigned char archmode = 1;
+	int prefix;
+
+	if (addr == KVM_S390_STORE_STATUS_NOADDR) {
+		if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
+			return -EFAULT;
+		addr = SAVE_AREA_BASE;
+		prefix = 0;
+	} else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
+		if (copy_to_guest(vcpu, 163ul, &archmode, 1))
+			return -EFAULT;
+		addr = SAVE_AREA_BASE;
+		prefix = 1;
+	} else
+		prefix = 0;
+
+
+	if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, fp_regs),
+	  vcpu->arch.guest_fpregs.fprs, 128, prefix))
+		return -EFAULT;
+
+	if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, gp_regs),
+	  vcpu->arch.guest_gprs, 128, prefix))
+		return -EFAULT;
+
+	if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, psw),
+	  &vcpu->arch.sie_block->gpsw, 16, prefix))
+		return -EFAULT;
+
+	if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, pref_reg),
+	  &vcpu->arch.sie_block->prefix, 4, prefix))
+		return -EFAULT;
+
+	if (__guestcopy(vcpu,
+	  addr + offsetof(struct save_area_s390x, fp_ctrl_reg),
+	  &vcpu->arch.guest_fpregs.fpc, 4, prefix))
+		return -EFAULT;
+
+	if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, tod_reg),
+	  &vcpu->arch.sie_block->todpr, 4, prefix))
+		return -EFAULT;
+
+	if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, timer),
+	  &vcpu->arch.sie_block->cputm, 8, prefix))
+		return -EFAULT;
+
+	if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, clk_cmp),
+	  &vcpu->arch.sie_block->ckc, 8, prefix))
+		return -EFAULT;
+
+	if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, acc_regs),
+	  &vcpu->arch.guest_acrs, 64, prefix))
+		return -EFAULT;
+
+	if (__guestcopy(vcpu,
+	  addr + offsetof(struct save_area_s390x, ctrl_regs),
+	  &vcpu->arch.sie_block->gcr, 128, prefix))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+	int rc;
+
+	vcpu_load(vcpu);
+	rc = __kvm_s390_vcpu_store_status(vcpu, addr);
+	vcpu_put(vcpu);
+
+	return rc;
+}
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+			 unsigned int ioctl, unsigned long arg)
+{
+	struct kvm_vcpu *vcpu = filp->private_data;
+	void __user *argp = (void __user *)arg;
+
+	switch (ioctl) {
+	case KVM_S390_STORE_STATUS:
+		return kvm_s390_vcpu_store_status(vcpu, arg);
+	case KVM_S390_SET_INITIAL_PSW: {
+		psw_t psw;
+
+		if (copy_from_user(&psw, argp, sizeof(psw)))
+			return -EFAULT;
+		return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
+	}
+	case KVM_S390_INITIAL_RESET:
+		return kvm_arch_vcpu_ioctl_initial_reset(vcpu);
+	default:
+		;
+	}
+	return -EINVAL;
+}
+
+/* Section: memory related */
+int kvm_arch_set_memory_region(struct kvm *kvm,
+				struct kvm_userspace_memory_region *mem,
+				struct kvm_memory_slot old,
+				int user_alloc)
+{
+	/* A few sanity checks. We can have exactly one memory slot which has
+	   to start at guest virtual zero and which has to be located at a
+	   page boundary in userland and which has to end at a page boundary.
+	   The memory in userland is ok to be fragmented into various different
+	   vmas. It is okay to mmap() and munmap() stuff in this slot after
+	   doing this call at any time */
+
+	if (mem->slot != 0)
+		return -EINVAL;
+
+	if (mem->guest_phys_addr != 0)
+		return -EINVAL;
+
+	if (mem->userspace_addr % PAGE_SIZE)
+		return -EINVAL;
+
+	if (mem->memory_size % PAGE_SIZE)
+		return -EINVAL;
+
+	kvm->arch.guest_origin = mem->userspace_addr;
+	kvm->arch.guest_memsize = mem->memory_size;
+
+	/* FIXME: we do want to interrupt running CPUs and update their memory
+	   configuration now to avoid race conditions. But hey, changing the
+	   memory layout while virtual CPUs are running is usually bad
+	   programming practice. */
+
+	return 0;
+}
+
+gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
+{
+	return gfn;
+}
+
+static int __init kvm_s390_init(void)
+{
+	return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
+}
+
+static void __exit kvm_s390_exit(void)
+{
+	kvm_exit();
+	return;
+}
+
+module_init(kvm_s390_init);
+module_exit(kvm_s390_exit);
Index: kvm/arch/s390/kvm/kvm-s390.h
===================================================================
--- /dev/null
+++ kvm/arch/s390/kvm/kvm-s390.h
@@ -0,0 +1,29 @@
+/*
+ * kvm_s390.h -  definition for kvm on s390
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#ifndef ARCH_S390_KVM_S390_H
+#define ARCH_S390_KVM_S390_H
+#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
+do { \
+	debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \
+	  d_args); \
+} while (0)
+
+#define VCPU_EVENT(d_vcpu, d_loglevel, d_string, d_args...)\
+do { \
+	debug_sprintf_event(d_vcpu->kvm->arch.dbf, d_loglevel, \
+	  "%02d[%016lx-%016lx]: " d_string "\n", d_vcpu->vcpu_id, \
+	  d_vcpu->arch.sie_block->gpsw.mask, d_vcpu->arch.sie_block->gpsw.addr,\
+	  d_args); \
+} while (0)
+#endif
Index: kvm/arch/s390/kvm/sie64a.S
===================================================================
--- /dev/null
+++ kvm/arch/s390/kvm/sie64a.S
@@ -0,0 +1,47 @@
+/*
+ * sie64a.S - low level sie call
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/errno.h>
+#include <asm/asm-offsets.h>
+
+SP_R5 =	5 * 8	# offset into stackframe
+SP_R6 =	6 * 8
+
+/*
+ * sie64a calling convention:
+ * %r2 pointer to sie control block
+ * %r3 guest register save area
+ */
+	.globl	sie64a
+sie64a:
+	lgr	%r5,%r3
+	stmg	%r5,%r14,SP_R5(%r15)	# save register on entry
+	lgr	%r14,%r2		# pointer to sie control block
+	lmg	%r0,%r13,0(%r3)		# load guest gprs 0-13
+sie_inst:
+	sie	0(%r14)
+	lg	%r14,SP_R5(%r15)
+	stmg	%r0,%r13,0(%r14)	# save guest gprs 0-13
+	lghi	%r2,0
+	lmg	%r6,%r14,SP_R6(%r15)
+	br	%r14
+
+sie_err:
+	lg	%r14,SP_R5(%r15)
+	stmg	%r0,%r13,0(%r14)	# save guest gprs 0-13
+	lghi	%r2,-EFAULT
+	lmg	%r6,%r14,SP_R6(%r15)
+	br	%r14
+
+	.section __ex_table,"a"
+	.quad	sie_inst,sie_err
+	.previous
Index: kvm/include/asm-s390/Kbuild
===================================================================
--- kvm.orig/include/asm-s390/Kbuild
+++ kvm/include/asm-s390/Kbuild
@@ -7,6 +7,7 @@ header-y += tape390.h
 header-y += ucontext.h
 header-y += vtoc.h
 header-y += zcrypt.h
+header-y += kvm.h
 
 unifdef-y += cmb.h
 unifdef-y += debug.h
Index: kvm/include/asm-s390/kvm.h
===================================================================
--- /dev/null
+++ kvm/include/asm-s390/kvm.h
@@ -0,0 +1,44 @@
+#ifndef __LINUX_KVM_S390_H
+#define __LINUX_KVM_S390_H
+/*
+ * asm-s390/kvm.h - KVM s390 specific structures and definitions
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+#include <asm/types.h>
+
+/* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */
+struct kvm_pic_state {
+	/* no PIC for s390 */
+};
+
+struct kvm_ioapic_state {
+	/* no IOAPIC for s390 */
+};
+
+/* for KVM_GET_REGS and KVM_SET_REGS */
+struct kvm_regs {
+	/* general purpose regs for s390 */
+	__u64 gprs[16];
+};
+
+/* for KVM_GET_SREGS and KVM_SET_SREGS */
+struct kvm_sregs {
+	__u32 acrs[16];
+	__u64 crs[16];
+};
+
+/* for KVM_GET_FPU and KVM_SET_FPU */
+struct kvm_fpu {
+	__u32 fpc;
+	__u64 fprs[16];
+};
+
+#endif
Index: kvm/include/asm-s390/kvm_host.h
===================================================================
--- /dev/null
+++ kvm/include/asm-s390/kvm_host.h
@@ -0,0 +1,119 @@
+/*
+ * asm-s390/kvm_host.h - definition for kernel virtual machines on s390
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ */
+
+
+#ifndef ASM_KVM_HOST_H
+#define ASM_KVM_HOST_H
+#include <linux/kvm_host.h>
+#include <asm/debug.h>
+
+struct sca_entry {
+	atomic_t scn;
+	__u64	reserved;
+	__u64	sda;
+	__u64	reserved2[2];
+} __attribute__((packed));
+
+
+struct sca_block {
+	__u64	ipte_control;
+	__u64	reserved[5];
+	__u64	mcn;
+	__u64	reserved2;
+	struct sca_entry cpu[64];
+} __attribute__((packed));
+
+#define KVM_PAGES_PER_HPAGE 256
+
+#define CPUSTAT_HOST       0x80000000
+#define CPUSTAT_WAIT       0x10000000
+#define CPUSTAT_ECALL_PEND 0x08000000
+#define CPUSTAT_STOP_INT   0x04000000
+#define CPUSTAT_IO_INT     0x02000000
+#define CPUSTAT_EXT_INT    0x01000000
+#define CPUSTAT_RUNNING    0x00800000
+#define CPUSTAT_RETAINED   0x00400000
+#define CPUSTAT_TIMING_SUB 0x00020000
+#define CPUSTAT_SIE_SUB    0x00010000
+#define CPUSTAT_RRF        0x00008000
+#define CPUSTAT_SLSV       0x00004000
+#define CPUSTAT_SLSR       0x00002000
+#define CPUSTAT_ZARCH      0x00000800
+#define CPUSTAT_MCDS       0x00000100
+#define CPUSTAT_SM         0x00000080
+#define CPUSTAT_G          0x00000008
+#define CPUSTAT_J          0x00000002
+#define CPUSTAT_P          0x00000001
+
+struct sie_block {
+	atomic_t cpuflags;		/* 0x0000 */
+	__u32	prefix;			/* 0x0004 */
+	__u8	reserved8[32];		/* 0x0008 */
+	__u64	cputm;			/* 0x0028 */
+	__u64	ckc;			/* 0x0030 */
+	__u64	epoch;			/* 0x0038 */
+	__u8	reserved40[4];		/* 0x0040 */
+	__u16   lctl;			/* 0x0044 */
+	__s16	icpua;			/* 0x0046 */
+	__u32	ictl;			/* 0x0048 */
+	__u32	eca;			/* 0x004c */
+	__u8	icptcode;		/* 0x0050 */
+	__u8	reserved51;		/* 0x0051 */
+	__u16	ihcpu;			/* 0x0052 */
+	__u8	reserved54[2];		/* 0x0054 */
+	__u16	ipa;			/* 0x0056 */
+	__u32	ipb;			/* 0x0058 */
+	__u32	scaoh;			/* 0x005c */
+	__u8	reserved60;		/* 0x0060 */
+	__u8	ecb;			/* 0x0061 */
+	__u8	reserved62[2];		/* 0x0062 */
+	__u32	scaol;			/* 0x0064 */
+	__u8	reserved68[4];		/* 0x0068 */
+	__u32	todpr;			/* 0x006c */
+	__u8	reserved70[16];		/* 0x0070 */
+	__u64	gmsor;			/* 0x0080 */
+	__u64	gmslm;			/* 0x0088 */
+	psw_t	gpsw;			/* 0x0090 */
+	__u64	gg14;			/* 0x00a0 */
+	__u64	gg15;			/* 0x00a8 */
+	__u8	reservedb0[80];		/* 0x00b0 */
+	__u64	gcr[16];		/* 0x0100 */
+	__u64	gbea;			/* 0x0180 */
+	__u8	reserved188[120];	/* 0x0188 */
+} __attribute__((packed));
+
+struct kvm_vcpu_stat {
+	u32 exit_userspace;
+};
+
+struct kvm_vcpu_arch {
+	struct sie_block *sie_block;
+	unsigned long	  guest_gprs[16];
+	s390_fp_regs      host_fpregs;
+	unsigned int      host_acrs[NUM_ACRS];
+	s390_fp_regs      guest_fpregs;
+	unsigned int      guest_acrs[NUM_ACRS];
+};
+
+struct kvm_vm_stat {
+	u32 remote_tlb_flush;
+};
+
+struct kvm_arch{
+	unsigned long guest_origin;
+	unsigned long guest_memsize;
+	struct sca_block *sca;
+	debug_info_t *dbf;
+};
+
+extern int sie64a(struct sie_block *, __u64 *);
+#endif
Index: kvm/include/asm-s390/kvm_para.h
===================================================================
--- /dev/null
+++ kvm/include/asm-s390/kvm_para.h
@@ -0,0 +1,30 @@
+/*
+ * asm-s390/kvm_para.h - definition for paravirtual devices on s390
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#ifndef __S390_KVM_PARA_H
+#define __S390_KVM_PARA_H
+
+/*
+ * No hypercalls for KVM on s390
+ */
+
+static inline int kvm_para_available(void)
+{
+	return 0;
+}
+
+static inline unsigned int kvm_arch_para_features(void)
+{
+	return 0;
+}
+
+#endif /* __S390_KVM_PARA_H */
Index: kvm/include/linux/kvm.h
===================================================================
--- kvm.orig/include/linux/kvm.h
+++ kvm/include/linux/kvm.h
@@ -205,6 +205,11 @@ struct kvm_vapic_addr {
 	__u64 vapic_addr;
 };
 
+struct kvm_s390_psw {
+	__u64 mask;
+	__u64 addr;
+};
+
 #define KVMIO 0xAE
 
 /*
@@ -213,6 +218,8 @@ struct kvm_vapic_addr {
 #define KVM_GET_API_VERSION       _IO(KVMIO,   0x00)
 #define KVM_CREATE_VM             _IO(KVMIO,   0x01) /* returns a VM fd */
 #define KVM_GET_MSR_INDEX_LIST    _IOWR(KVMIO, 0x02, struct kvm_msr_list)
+
+#define KVM_S390_ENABLE_SIE       _IO(KVMIO,   0x06)
 /*
  * Check if a kvm extension is available.  Argument is extension number,
  * return is 1 (yes) or 0 (no, sorry).
@@ -291,5 +298,13 @@ struct kvm_vapic_addr {
 #define KVM_TPR_ACCESS_REPORTING  _IOWR(KVMIO,  0x92, struct kvm_tpr_access_ctl)
 /* Available with KVM_CAP_VAPIC */
 #define KVM_SET_VAPIC_ADDR        _IOW(KVMIO,  0x93, struct kvm_vapic_addr)
+/* store status for s390 */
+#define KVM_S390_STORE_STATUS_NOADDR    (-1ul)
+#define KVM_S390_STORE_STATUS_PREFIXED  (-2ul)
+#define KVM_S390_STORE_STATUS	  _IOW(KVMIO,  0x95, unsigned long)
+/* initial ipl psw for s390 */
+#define KVM_S390_SET_INITIAL_PSW  _IOW(KVMIO,  0x96, struct kvm_s390_psw)
+/* initial reset for s390 */
+#define KVM_S390_INITIAL_RESET    _IO(KVMIO,  0x97)
 
 #endif
Index: kvm/include/linux/kvm_host.h
===================================================================
--- kvm.orig/include/linux/kvm_host.h
+++ kvm/include/linux/kvm_host.h
@@ -24,7 +24,11 @@
 
 #include <asm/kvm_host.h>
 
+#ifdef CONFIG_S390
+#define KVM_MAX_VCPUS 64
+#else
 #define KVM_MAX_VCPUS 16
+#endif
 #define KVM_MEMORY_SLOTS 32
 /* memory slots that does not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 4

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [RFC/PATCH 06/15] kvm-s390: sie intercept handling
       [not found] <1206028710.6690.21.camel@cotte.boeblingen.de.ibm.com>
                   ` (4 preceding siblings ...)
  2008-03-20 16:24 ` [RFC/PATCH 05/15] kvm-s390: s390 arch backend for the kvm kernel module Carsten Otte
@ 2008-03-20 16:25 ` Carsten Otte
  2008-03-20 16:25 ` [RFC/PATCH 07/15] kvm-s390: interrupt subsystem, cpu timer, waitpsw Carsten Otte
                   ` (14 subsequent siblings)
  20 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:25 UTC (permalink / raw)
  To: virtualization, kvm-devel, Avi Kivity
  Cc: aliguori, EHRHARDT, arnd, hollisb, heiko.carstens, jeroney,
	borntraeger, schwidefsky, rvdheij, os, jblunck, Zhang, Xiantao

From: Carsten Otte <cotte@de.ibm.com>
From: Christian Borntraeger <borntraeger@de.ibm.com>

This path introduces handling of sie intercepts in three flavors: Intercepts
are either handled completely in-kernel by kvm_handle_sie_intercept(),
or passed to userspace with corresponding data in struct kvm_run in case
kvm_handle_sie_intercept() returns -ENOTSUPP.
In case of partial execution in kernel with the need of userspace support,
kvm_handle_sie_intercept() may choose to set up struct kvm_run and return
-EREMOTE.

The trivial intercept reasons are handled in this patch:
handle_noop() just does nothing for intercepts that don't require our support
  at all
handle_stop() is called when a cpu enters stopped state, and it drops out to
  userland after updating our vcpu state
handle_validity() faults in the cpu lowcore if needed, or passes the request
  to userland

Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---
 arch/s390/kvm/Makefile      |    2 -
 arch/s390/kvm/intercept.c   |   83 ++++++++++++++++++++++++++++++++++++++++++++
 arch/s390/kvm/kvm-s390.c    |   46 +++++++++++++++++++++++-
 arch/s390/kvm/kvm-s390.h    |    6 +++
 include/asm-s390/kvm_host.h |    4 ++
 include/linux/kvm.h         |    9 ++++
 6 files changed, 148 insertions(+), 2 deletions(-)

Index: kvm/arch/s390/kvm/Makefile
===================================================================
--- kvm.orig/arch/s390/kvm/Makefile
+++ kvm/arch/s390/kvm/Makefile
@@ -10,5 +10,5 @@ common-objs = $(addprefix ../../../virt/
 
 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/s390/kvm
 
-kvm-objs := $(common-objs) kvm-s390.o sie64a.o
+kvm-objs := $(common-objs) kvm-s390.o sie64a.o intercept.o
 obj-$(CONFIG_KVM) += kvm.o
Index: kvm/arch/s390/kvm/intercept.c
===================================================================
--- /dev/null
+++ kvm/arch/s390/kvm/intercept.c
@@ -0,0 +1,83 @@
+/*
+ * intercept.c - in-kernel handling for sie intercepts
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+
+#include <asm/kvm_host.h>
+
+#include "kvm-s390.h"
+
+static int handle_noop(struct kvm_vcpu *vcpu)
+{
+	switch (vcpu->arch.sie_block->icptcode) {
+	case 0x10:
+		vcpu->stat.exit_external_request++;
+		break;
+	case 0x14:
+		vcpu->stat.exit_external_interrupt++;
+		break;
+	default:
+		break; /* nothing */
+	}
+	return 0;
+}
+
+static int handle_stop(struct kvm_vcpu *vcpu)
+{
+	vcpu->stat.exit_stop_request++;
+	VCPU_EVENT(vcpu, 3, "%s", "cpu stopped");
+	atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+	return -ENOTSUPP;
+}
+
+static int handle_validity(struct kvm_vcpu *vcpu)
+{
+	int viwhy = vcpu->arch.sie_block->ipb >> 16;
+	vcpu->stat.exit_validity++;
+	if (viwhy == 0x37) {
+		fault_in_pages_writeable((char __user *)
+				vcpu->kvm->arch.guest_origin +
+				vcpu->arch.sie_block->prefix, PAGE_SIZE);
+		return 0;
+	}
+	VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d",
+		viwhy);
+	return -ENOTSUPP;
+}
+
+static const intercept_handler_t intercept_funcs[0x48 >> 2] = {
+	[0x00 >> 2] = handle_noop,
+	[0x10 >> 2] = handle_noop,
+	[0x14 >> 2] = handle_noop,
+	[0x20 >> 2] = handle_validity,
+	[0x28 >> 2] = handle_stop,
+};
+
+int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
+{
+	intercept_handler_t func;
+	u8 code = vcpu->arch.sie_block->icptcode;
+
+	if (code & 3 || code > 0x48)
+		return -ENOTSUPP;
+
+	func = intercept_funcs[code >> 2];
+
+	if (func)
+		return func(vcpu);
+
+	return -ENOTSUPP;
+}
+
Index: kvm/arch/s390/kvm/kvm-s390.c
===================================================================
--- kvm.orig/arch/s390/kvm/kvm-s390.c
+++ kvm/arch/s390/kvm/kvm-s390.c
@@ -23,12 +23,17 @@
 #include <asm/lowcore.h>
 #include <asm/pgtable.h>
 
+#include "kvm-s390.h"
 #include "gaccess.h"
 
 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
 
 struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
+	{ "exit_validity", VCPU_STAT(exit_validity) },
+	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
+	{ "exit_external_request", VCPU_STAT(exit_external_request) },
+	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
 	{ NULL }
 };
 
@@ -384,6 +389,7 @@ static void __vcpu_run(struct kvm_vcpu *
 
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
+	int rc;
 	sigset_t sigsaved;
 
 	vcpu_load(vcpu);
@@ -393,7 +399,45 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v
 
 	atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
 
-	__vcpu_run(vcpu);
+	switch (kvm_run->exit_reason) {
+	case KVM_EXIT_S390_SIEIC:
+		vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask;
+		vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr;
+		break;
+	case KVM_EXIT_UNKNOWN:
+	case KVM_EXIT_S390_RESET:
+		break;
+	default:
+		BUG();
+	}
+
+	might_sleep();
+
+	do {
+		__vcpu_run(vcpu);
+
+		rc = kvm_handle_sie_intercept(vcpu);
+	} while (!signal_pending(current) && !rc);
+
+	if ((rc == 0) && signal_pending(current))
+		rc = -EINTR;
+
+	if (rc == -ENOTSUPP) {
+		/* intercept cannot be handled in-kernel, prepare kvm-run */
+		kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
+		kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
+		kvm_run->s390_sieic.mask     = vcpu->arch.sie_block->gpsw.mask;
+		kvm_run->s390_sieic.addr     = vcpu->arch.sie_block->gpsw.addr;
+		kvm_run->s390_sieic.ipa      = vcpu->arch.sie_block->ipa;
+		kvm_run->s390_sieic.ipb      = vcpu->arch.sie_block->ipb;
+		rc = 0;
+	}
+
+	if (rc == -EREMOTE) {
+		/* intercept was handled, but userspace support is needed
+		 * kvm_run has been prepared by the handler */
+		rc = 0;
+	}
 
 	if (vcpu->sigset_active)
 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
Index: kvm/arch/s390/kvm/kvm-s390.h
===================================================================
--- kvm.orig/arch/s390/kvm/kvm-s390.h
+++ kvm/arch/s390/kvm/kvm-s390.h
@@ -13,6 +13,12 @@
 
 #ifndef ARCH_S390_KVM_S390_H
 #define ARCH_S390_KVM_S390_H
+#include <linux/kvm_host.h>
+
+typedef  int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
+
+extern int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
+
 #define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
 do { \
 	debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \
Index: kvm/include/asm-s390/kvm_host.h
===================================================================
--- kvm.orig/include/asm-s390/kvm_host.h
+++ kvm/include/asm-s390/kvm_host.h
@@ -93,6 +93,10 @@ struct sie_block {
 
 struct kvm_vcpu_stat {
 	u32 exit_userspace;
+	u32 exit_external_request;
+	u32 exit_external_interrupt;
+	u32 exit_stop_request;
+	u32 exit_validity;
 };
 
 struct kvm_vcpu_arch {
Index: kvm/include/linux/kvm.h
===================================================================
--- kvm.orig/include/linux/kvm.h
+++ kvm/include/linux/kvm.h
@@ -74,6 +74,7 @@ struct kvm_irqchip {
 #define KVM_EXIT_INTR             10
 #define KVM_EXIT_SET_TPR          11
 #define KVM_EXIT_TPR_ACCESS       12
+#define KVM_EXIT_S390_SIEIC       13
 
 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
 struct kvm_run {
@@ -138,6 +139,14 @@ struct kvm_run {
 			__u32 is_write;
 			__u32 pad;
 		} tpr_access;
+		/* KVM_EXIT_S390_SIEIC */
+		struct {
+			__u8 icptcode;
+			__u64 mask; /* psw upper half */
+			__u64 addr; /* psw lower half */
+			__u16 ipa;
+			__u32 ipb;
+		} s390_sieic;
 		/* Fix the size of the union. */
 		char padding[256];
 	};

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [RFC/PATCH 07/15] kvm-s390: interrupt subsystem, cpu timer, waitpsw
       [not found] <1206028710.6690.21.camel@cotte.boeblingen.de.ibm.com>
                   ` (5 preceding siblings ...)
  2008-03-20 16:25 ` [RFC/PATCH 06/15] kvm-s390: sie intercept handling Carsten Otte
@ 2008-03-20 16:25 ` Carsten Otte
  2008-03-20 16:25 ` [RFC/PATCH 08/15] kvm-s390: intercepts for privileged instructions Carsten Otte
                   ` (13 subsequent siblings)
  20 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:25 UTC (permalink / raw)
  To: virtualization, kvm-devel, Avi Kivity
  Cc: aliguori, EHRHARDT, arnd, hollisb, heiko.carstens, jeroney,
	borntraeger, schwidefsky, rvdheij, os, jblunck, Zhang, Xiantao

From: Carsten Otte <cotte@de.ibm.com>

This patch contains the s390 interrupt subsystem (similar to in kernel apic)
including timer interrupts (similar to in-kernel-pit) and enabled wait
(similar to in kernel hlt).

In order to achieve that, this patch also introduces intercept handling
for instruction intercepts, and it implements load control instructions.

This patch introduces an ioctl KVM_S390_INTERRUPT which is valid for both
the vm file descriptors and the vcpu file descriptors. In case this ioctl is
issued against a vm file descriptor, the interrupt is considered floating.
Floating interrupts may be delivered to any virtual cpu in the configuration.

The following interrupts are supported:
SIGP STOP       - interprocessor signal that stops a remote cpu
SIGP SET PREFIX - interprocessor signal that sets the prefix register of a
                  (stopped) remote cpu
INT EMERGENCY   - interprocessor interrupt, usually used to signal need_reshed
                  and for smp_call_function() in the guest.
PROGRAM INT     - exception during program execution such as page fault, illegal
                  instruction and friends
RESTART         - interprocessor signal that starts a stopped cpu
INT VIRTIO      - floating interrupt for virtio signalisation
INT SERVICE     - floating interrupt for signalisations from the system
                  service processor

struct kvm_s390_interrupt, which is submitted as ioctl parameter when injecting
an interrupt, also carrys parameter data for interrupts along with the interrupt
type. Interrupts on s390 usually have a state that represents the current
operation, or identifies which device has caused the interruption on s390.

kvm_s390_handle_wait() does handle waitpsw in two flavors: in case of a
disabled wait (that is, disabled for interrupts), we exit to userspace. In case
of an enabled wait we set up a timer that equals the cpu clock comparator value
and sleep on a wait queue.

Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---
 arch/s390/kvm/Makefile      |    2 
 arch/s390/kvm/intercept.c   |  123 +++++++++
 arch/s390/kvm/interrupt.c   |  583 ++++++++++++++++++++++++++++++++++++++++++++
 arch/s390/kvm/kvm-s390.c    |   48 +++
 arch/s390/kvm/kvm-s390.h    |   15 +
 include/asm-s390/kvm_host.h |   75 +++++
 include/linux/kvm.h         |   17 +
 7 files changed, 860 insertions(+), 3 deletions(-)

Index: kvm/arch/s390/kvm/Makefile
===================================================================
--- kvm.orig/arch/s390/kvm/Makefile
+++ kvm/arch/s390/kvm/Makefile
@@ -10,5 +10,5 @@ common-objs = $(addprefix ../../../virt/
 
 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/s390/kvm
 
-kvm-objs := $(common-objs) kvm-s390.o sie64a.o intercept.o
+kvm-objs := $(common-objs) kvm-s390.o sie64a.o intercept.o interrupt.o
 obj-$(CONFIG_KVM) += kvm.o
Index: kvm/arch/s390/kvm/intercept.c
===================================================================
--- kvm.orig/arch/s390/kvm/intercept.c
+++ kvm/arch/s390/kvm/intercept.c
@@ -18,6 +18,91 @@
 #include <asm/kvm_host.h>
 
 #include "kvm-s390.h"
+#include "gaccess.h"
+
+static int handle_lctg(struct kvm_vcpu *vcpu)
+{
+	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+	int base2 = vcpu->arch.sie_block->ipb >> 28;
+	int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
+			((vcpu->arch.sie_block->ipb & 0xff00) << 4);
+	u64 useraddr;
+	int reg, rc;
+
+	vcpu->stat.instruction_lctg++;
+	if ((vcpu->arch.sie_block->ipb & 0xff) != 0x2f)
+		return -ENOTSUPP;
+
+	useraddr = disp2;
+	if (base2)
+		useraddr += vcpu->arch.guest_gprs[base2];
+
+	reg = reg1;
+
+	VCPU_EVENT(vcpu, 5, "lctg r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2,
+		   disp2);
+
+	do {
+		rc = get_guest_u64(vcpu, useraddr,
+			&vcpu->arch.sie_block->gcr[reg]);
+		if (rc == -EFAULT) {
+			kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+			break;
+		}
+		useraddr += 8;
+		if (reg == reg3)
+			break;
+		reg = reg + 1;
+		if (reg > 15)
+			reg = 0;
+	} while (1);
+	return 0;
+}
+
+static int handle_lctl(struct kvm_vcpu *vcpu)
+{
+	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+	int base2 = vcpu->arch.sie_block->ipb >> 28;
+	int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+	u64 useraddr;
+	u32 val = 0;
+	int reg, rc;
+
+	vcpu->stat.instruction_lctl++;
+
+	useraddr = disp2;
+	if (base2)
+		useraddr += vcpu->arch.guest_gprs[base2];
+
+	reg = reg1;
+
+	VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2,
+		   disp2);
+
+	do {
+		rc = get_guest_u32(vcpu, useraddr, &val);
+		if (rc == -EFAULT) {
+			kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+			break;
+		}
+		vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
+		vcpu->arch.sie_block->gcr[reg] |= val;
+		useraddr += 4;
+		if (reg == reg3)
+			break;
+		reg = reg + 1;
+		if (reg > 15)
+			reg = 0;
+	} while (1);
+	return 0;
+}
+
+static intercept_handler_t instruction_handlers[256] = {
+	[0xb7] = handle_lctl,
+	[0xeb] = handle_lctg,
+};
 
 static int handle_noop(struct kvm_vcpu *vcpu)
 {
@@ -57,10 +142,48 @@ static int handle_validity(struct kvm_vc
 	return -ENOTSUPP;
 }
 
+static int handle_instruction(struct kvm_vcpu *vcpu)
+{
+	intercept_handler_t handler =
+		instruction_handlers[vcpu->arch.sie_block->ipa >> 8];
+
+	vcpu->stat.exit_instruction++;
+
+	if (!handler)
+		return -ENOTSUPP;
+
+	return handler(vcpu);
+}
+
+static int handle_prog(struct kvm_vcpu *vcpu)
+{
+	vcpu->stat.exit_program_interruption++;
+	return kvm_s390_inject_program_int(vcpu, vcpu->arch.sie_block->iprcc);
+}
+
+static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
+{
+	int rc, rc2;
+
+	vcpu->stat.exit_instr_and_program++;
+	rc = handle_instruction(vcpu);
+	rc2 = handle_prog(vcpu);
+
+	if (rc == -ENOTSUPP)
+		vcpu->arch.sie_block->icptcode = 0x04;
+	if (rc)
+		return rc;
+	return rc2;
+}
+
 static const intercept_handler_t intercept_funcs[0x48 >> 2] = {
 	[0x00 >> 2] = handle_noop,
+	[0x04 >> 2] = handle_instruction,
+	[0x08 >> 2] = handle_prog,
+	[0x0C >> 2] = handle_instruction_and_prog,
 	[0x10 >> 2] = handle_noop,
 	[0x14 >> 2] = handle_noop,
+	[0x1C >> 2] = kvm_s390_handle_wait,
 	[0x20 >> 2] = handle_validity,
 	[0x28 >> 2] = handle_stop,
 };
Index: kvm/arch/s390/kvm/interrupt.c
===================================================================
--- /dev/null
+++ kvm/arch/s390/kvm/interrupt.c
@@ -0,0 +1,583 @@
+/*
+ * interrupt.c - handling kvm guest interrupts
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ */
+
+#include <asm/lowcore.h>
+#include <asm/uaccess.h>
+#include <linux/kvm_host.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+
+static int psw_extint_disabled(struct kvm_vcpu *vcpu)
+{
+	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT);
+}
+
+static int psw_interrupts_disabled(struct kvm_vcpu *vcpu)
+{
+	if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) ||
+	    (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO) ||
+	    (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT))
+		return 0;
+	return 1;
+}
+
+static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
+				      struct interrupt_info *inti)
+{
+	switch (inti->type) {
+	case KVM_S390_INT_EMERGENCY:
+		if (psw_extint_disabled(vcpu))
+			return 0;
+		if (vcpu->arch.sie_block->gcr[0] & 0x4000ul)
+			return 1;
+		return 0;
+	case KVM_S390_INT_SERVICE:
+		if (psw_extint_disabled(vcpu))
+			return 0;
+		if (vcpu->arch.sie_block->gcr[0] & 0x200ul)
+			return 1;
+		return 0;
+	case KVM_S390_INT_VIRTIO:
+		if (psw_extint_disabled(vcpu))
+			return 0;
+		if (vcpu->arch.sie_block->gcr[0] & 0x200ul)
+			return 1; /*FIXME virtio control register bit */
+		return 0;
+	case KVM_S390_PROGRAM_INT:
+	case KVM_S390_SIGP_STOP:
+	case KVM_S390_SIGP_SET_PREFIX:
+	case KVM_S390_RESTART:
+		return 1;
+	default:
+		BUG();
+	}
+	return 0;
+}
+
+static void __set_cpu_idle(struct kvm_vcpu *vcpu)
+{
+	BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1);
+	atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
+	set_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
+}
+
+static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
+{
+	BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1);
+	atomic_clear_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
+	clear_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
+}
+
+static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
+{
+	atomic_clear_mask(CPUSTAT_ECALL_PEND |
+		CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
+		&vcpu->arch.sie_block->cpuflags);
+	vcpu->arch.sie_block->lctl = 0x0000;
+}
+
+static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
+{
+	atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags);
+}
+
+static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
+				      struct interrupt_info *inti)
+{
+	switch (inti->type) {
+	case KVM_S390_INT_EMERGENCY:
+	case KVM_S390_INT_SERVICE:
+	case KVM_S390_INT_VIRTIO:
+		if (psw_extint_disabled(vcpu))
+			__set_cpuflag(vcpu, CPUSTAT_EXT_INT);
+		else
+			vcpu->arch.sie_block->lctl |= LCTL_CR0;
+		break;
+	case KVM_S390_SIGP_STOP:
+		__set_cpuflag(vcpu, CPUSTAT_STOP_INT);
+		break;
+	default:
+		BUG();
+	}
+}
+
+static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
+				   struct interrupt_info *inti)
+{
+	const unsigned short table[] = { 2, 4, 4, 6 };
+	int rc, exception = 0;
+
+	switch (inti->type) {
+	case KVM_S390_INT_EMERGENCY:
+		VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
+		vcpu->stat.deliver_emergency_signal++;
+		rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1201);
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+			 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+			__LC_EXT_NEW_PSW, sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+		break;
+
+	case KVM_S390_INT_SERVICE:
+		VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
+			   inti->ext.ext_params);
+		vcpu->stat.deliver_service_signal++;
+		rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2401);
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+			 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+			__LC_EXT_NEW_PSW, sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params);
+		if (rc == -EFAULT)
+			exception = 1;
+		break;
+
+	case KVM_S390_INT_VIRTIO:
+		VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%lx",
+			   inti->ext.ext_params, inti->ext.ext_params2);
+		vcpu->stat.deliver_virtio_interrupt++;
+		rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1237);
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+			 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+			__LC_EXT_NEW_PSW, sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params);
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = put_guest_u64(vcpu, __LC_PFAULT_INTPARM,
+			inti->ext.ext_params2);
+		if (rc == -EFAULT)
+			exception = 1;
+		break;
+
+	case KVM_S390_SIGP_STOP:
+		VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
+		vcpu->stat.deliver_stop_signal++;
+		__set_intercept_indicator(vcpu, inti);
+		break;
+
+	case KVM_S390_SIGP_SET_PREFIX:
+		VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x",
+			   inti->prefix.address);
+		vcpu->stat.deliver_prefix_signal++;
+		vcpu->arch.sie_block->prefix = inti->prefix.address;
+		vcpu->arch.sie_block->ihcpu = 0xffff;
+		break;
+
+	case KVM_S390_RESTART:
+		VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
+		vcpu->stat.deliver_restart_signal++;
+		rc = copy_to_guest(vcpu, offsetof(struct _lowcore,
+		  restart_old_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+			offsetof(struct _lowcore, restart_psw), sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+		break;
+
+	case KVM_S390_PROGRAM_INT:
+		VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
+			   inti->pgm.code,
+			   table[vcpu->arch.sie_block->ipa >> 14]);
+		vcpu->stat.deliver_program_int++;
+		rc = put_guest_u16(vcpu, __LC_PGM_INT_CODE, inti->pgm.code);
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = put_guest_u16(vcpu, __LC_PGM_ILC,
+			table[vcpu->arch.sie_block->ipa >> 14]);
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = copy_to_guest(vcpu, __LC_PGM_OLD_PSW,
+			 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+			__LC_PGM_NEW_PSW, sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+		break;
+
+	default:
+		BUG();
+	}
+
+	if (exception) {
+		VCPU_EVENT(vcpu, 1, "%s", "program exception while delivering"
+			   " interrupt");
+		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		if (inti->type == KVM_S390_PROGRAM_INT) {
+			printk(KERN_WARNING "kvm: recursive program check\n");
+			BUG();
+		}
+	}
+}
+
+static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
+{
+	int rc, exception = 0;
+
+	if (psw_extint_disabled(vcpu))
+		return 0;
+	if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))
+		return 0;
+	rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1004);
+	if (rc == -EFAULT)
+		exception = 1;
+	rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+		 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	if (rc == -EFAULT)
+		exception = 1;
+	rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+		__LC_EXT_NEW_PSW, sizeof(psw_t));
+	if (rc == -EFAULT)
+		exception = 1;
+
+	if (exception) {
+		VCPU_EVENT(vcpu, 1, "%s", "program exception while delivering" \
+			   " ckc interrupt");
+		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		return 0;
+	}
+
+	return 1;
+}
+
+int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
+{
+	struct local_interrupt *li = &vcpu->arch.local_int;
+	struct float_interrupt *fi = vcpu->arch.local_int.float_int;
+	struct interrupt_info  *inti;
+	int rc = 0;
+
+	if (atomic_read(&li->active)) {
+		spin_lock_bh(&li->lock);
+		list_for_each_entry(inti, &li->list, list)
+			if (__interrupt_is_deliverable(vcpu, inti)) {
+				rc = 1;
+				break;
+			}
+		spin_unlock_bh(&li->lock);
+	}
+
+	if ((!rc) && atomic_read(&fi->active)) {
+		spin_lock_bh(&fi->lock);
+		list_for_each_entry(inti, &fi->list, list)
+			if (__interrupt_is_deliverable(vcpu, inti)) {
+				rc = 1;
+				break;
+			}
+		spin_unlock_bh(&fi->lock);
+	}
+
+	if ((!rc) && (vcpu->arch.sie_block->ckc <
+		get_clock() + vcpu->arch.sie_block->epoch)) {
+		if ((!psw_extint_disabled(vcpu)) &&
+			(vcpu->arch.sie_block->gcr[0] & 0x800ul))
+			rc = 1;
+	}
+
+	return rc;
+}
+
+int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
+{
+	u64 now, sltime;
+	DECLARE_WAITQUEUE(wait, current);
+
+	vcpu->stat.exit_wait_state++;
+	if (kvm_cpu_has_interrupt(vcpu))
+		return 0;
+
+	if (psw_interrupts_disabled(vcpu)) {
+		VCPU_EVENT(vcpu, 3, "%s", "disabled wait");
+		__unset_cpu_idle(vcpu);
+		return -ENOTSUPP; /* disabled wait */
+	}
+
+	if (psw_extint_disabled(vcpu) ||
+	    (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))) {
+		VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer");
+		goto no_timer;
+	}
+
+	now = get_clock() + vcpu->arch.sie_block->epoch;
+	if (vcpu->arch.sie_block->ckc < now) {
+		__unset_cpu_idle(vcpu);
+		return 0;
+	}
+
+	sltime = (vcpu->arch.sie_block->ckc - now) / (0xf4240000ul / HZ) + 1;
+
+	vcpu->arch.ckc_timer.expires = jiffies + sltime;
+
+	add_timer(&vcpu->arch.ckc_timer);
+	VCPU_EVENT(vcpu, 5, "enabled wait timer:%lx jiffies", sltime);
+no_timer:
+	spin_lock_bh(&vcpu->arch.local_int.float_int->lock);
+	spin_lock_bh(&vcpu->arch.local_int.lock);
+	__set_cpu_idle(vcpu);
+	vcpu->arch.local_int.timer_due = 0;
+	add_wait_queue(&vcpu->arch.local_int.wq, &wait);
+	while (list_empty(&vcpu->arch.local_int.list) &&
+		list_empty(&vcpu->arch.local_int.float_int->list) &&
+		(!vcpu->arch.local_int.timer_due) &&
+		!signal_pending(current)) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		spin_unlock_bh(&vcpu->arch.local_int.lock);
+		spin_unlock_bh(&vcpu->arch.local_int.float_int->lock);
+		vcpu_put(vcpu);
+		schedule();
+		vcpu_load(vcpu);
+		spin_lock_bh(&vcpu->arch.local_int.float_int->lock);
+		spin_lock_bh(&vcpu->arch.local_int.lock);
+	}
+	__unset_cpu_idle(vcpu);
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(&vcpu->wq, &wait);
+	spin_unlock_bh(&vcpu->arch.local_int.lock);
+	spin_unlock_bh(&vcpu->arch.local_int.float_int->lock);
+	del_timer(&vcpu->arch.ckc_timer);
+	return 0;
+}
+
+void kvm_s390_idle_wakeup(unsigned long data)
+{
+	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
+
+	spin_lock_bh(&vcpu->arch.local_int.lock);
+	vcpu->arch.local_int.timer_due = 1;
+	if (waitqueue_active(&vcpu->arch.local_int.wq))
+		wake_up_interruptible(&vcpu->arch.local_int.wq);
+	spin_unlock_bh(&vcpu->arch.local_int.lock);
+}
+
+
+void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
+{
+	struct local_interrupt *li = &vcpu->arch.local_int;
+	struct float_interrupt *fi = vcpu->arch.local_int.float_int;
+	struct interrupt_info  *n, *inti = NULL;
+	int deliver;
+
+	__reset_intercept_indicators(vcpu);
+	if (atomic_read(&li->active)) {
+		do {
+			deliver = 0;
+			spin_lock_bh(&li->lock);
+			list_for_each_entry_safe(inti, n, &li->list, list) {
+				if (__interrupt_is_deliverable(vcpu, inti)) {
+					list_del(&inti->list);
+					deliver = 1;
+					break;
+				}
+				__set_intercept_indicator(vcpu, inti);
+			}
+			if (list_empty(&li->list))
+				atomic_set(&li->active, 0);
+			spin_unlock_bh(&li->lock);
+			if (deliver) {
+				__do_deliver_interrupt(vcpu, inti);
+				kfree(inti);
+			}
+		} while (deliver);
+	}
+
+	if ((vcpu->arch.sie_block->ckc <
+		get_clock() + vcpu->arch.sie_block->epoch))
+		__try_deliver_ckc_interrupt(vcpu);
+
+	if (atomic_read(&fi->active)) {
+		do {
+			deliver = 0;
+			spin_lock_bh(&fi->lock);
+			list_for_each_entry_safe(inti, n, &fi->list, list) {
+				if (__interrupt_is_deliverable(vcpu, inti)) {
+					list_del(&inti->list);
+					deliver = 1;
+					break;
+				}
+				__set_intercept_indicator(vcpu, inti);
+			}
+			if (list_empty(&fi->list))
+				atomic_set(&fi->active, 0);
+			spin_unlock_bh(&fi->lock);
+			if (deliver) {
+				__do_deliver_interrupt(vcpu, inti);
+				kfree(inti);
+			}
+		} while (deliver);
+	}
+}
+
+int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
+{
+	struct local_interrupt *li = &vcpu->arch.local_int;
+	struct interrupt_info *inti;
+
+	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+	if (!inti)
+		return -ENOMEM;
+
+	inti->type = KVM_S390_PROGRAM_INT;;
+	inti->pgm.code = code;
+
+	VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
+	spin_lock_bh(&li->lock);
+	list_add(&inti->list, &li->list);
+	atomic_set(&li->active, 1);
+	BUG_ON(waitqueue_active(&li->wq));
+	spin_unlock_bh(&li->lock);
+	return 0;
+}
+
+int kvm_s390_inject_vm(struct kvm *kvm,
+		       struct kvm_s390_interrupt *s390int)
+{
+	struct local_interrupt *li;
+	struct float_interrupt *fi;
+	struct interrupt_info *inti;
+	int sigcpu;
+
+	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+	if (!inti)
+		return -ENOMEM;
+
+	switch (s390int->type) {
+	case KVM_S390_INT_VIRTIO:
+		VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%lx",
+			 s390int->parm, s390int->parm64);
+		inti->type = s390int->type;
+		inti->ext.ext_params = s390int->parm;
+		inti->ext.ext_params2 = s390int->parm64;
+		break;
+	case KVM_S390_INT_SERVICE:
+		VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm);
+		inti->type = s390int->type;
+		inti->ext.ext_params = s390int->parm;
+		break;
+	case KVM_S390_PROGRAM_INT:
+	case KVM_S390_SIGP_STOP:
+	case KVM_S390_INT_EMERGENCY:
+	default:
+		kfree(inti);
+		return -EINVAL;
+	}
+
+	mutex_lock(&kvm->lock);
+	fi = &kvm->arch.float_int;
+	spin_lock_bh(&fi->lock);
+	list_add_tail(&inti->list, &fi->list);
+	atomic_set(&fi->active, 1);
+	sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
+	if (sigcpu == KVM_MAX_VCPUS) {
+		do {
+			sigcpu = fi->next_rr_cpu++;
+			if (sigcpu == KVM_MAX_VCPUS)
+				sigcpu = fi->next_rr_cpu = 0;
+		} while (fi->local_int[sigcpu] == NULL);
+	}
+	li = fi->local_int[sigcpu];
+	spin_lock_bh(&li->lock);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	if (waitqueue_active(&li->wq))
+		wake_up_interruptible(&li->wq);
+	spin_unlock_bh(&li->lock);
+	spin_unlock_bh(&fi->lock);
+	mutex_unlock(&kvm->lock);
+	return 0;
+}
+
+int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
+			 struct kvm_s390_interrupt *s390int)
+{
+	struct local_interrupt *li;
+	struct interrupt_info *inti;
+
+	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+	if (!inti)
+		return -ENOMEM;
+
+	switch (s390int->type) {
+	case KVM_S390_PROGRAM_INT:
+		if (s390int->parm & 0xffff0000) {
+			kfree(inti);
+			return -EINVAL;
+		}
+		inti->type = s390int->type;
+		inti->pgm.code = s390int->parm;
+		VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
+			   s390int->parm);
+		break;
+	case KVM_S390_SIGP_STOP:
+	case KVM_S390_RESTART:
+	case KVM_S390_SIGP_SET_PREFIX:
+	case KVM_S390_INT_EMERGENCY:
+		VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type);
+		inti->type = s390int->type;
+		break;
+	case KVM_S390_INT_VIRTIO:
+	case KVM_S390_INT_SERVICE:
+	default:
+		kfree(inti);
+		return -EINVAL;
+	}
+
+	mutex_lock(&vcpu->kvm->lock);
+	li = &vcpu->arch.local_int;
+	spin_lock_bh(&li->lock);
+	if (inti->type == KVM_S390_PROGRAM_INT)
+		list_add(&inti->list, &li->list);
+	else
+		list_add_tail(&inti->list, &li->list);
+	atomic_set(&li->active, 1);
+	if (inti->type == KVM_S390_SIGP_STOP)
+		li->action_bits |= ACTION_STOP_ON_STOP;
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	if (waitqueue_active(&li->wq))
+		wake_up_interruptible(&vcpu->arch.local_int.wq);
+	spin_unlock_bh(&li->lock);
+	mutex_unlock(&vcpu->kvm->lock);
+	return 0;
+}
Index: kvm/arch/s390/kvm/kvm-s390.c
===================================================================
--- kvm.orig/arch/s390/kvm/kvm-s390.c
+++ kvm/arch/s390/kvm/kvm-s390.c
@@ -20,6 +20,7 @@
 #include <linux/kvm_host.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/timer.h>
 #include <asm/lowcore.h>
 #include <asm/pgtable.h>
 
@@ -34,6 +35,19 @@ struct kvm_stats_debugfs_item debugfs_en
 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
+	{ "exit_instruction", VCPU_STAT(exit_instruction) },
+	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
+	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
+	{ "instruction_lctg", VCPU_STAT(instruction_lctg) },
+	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
+	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
+	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
+	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
+	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
+	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
+	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
+	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
+	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
 	{ NULL }
 };
 
@@ -108,6 +122,15 @@ long kvm_arch_vm_ioctl(struct file *filp
 	int r;
 
 	switch (ioctl) {
+	case KVM_S390_INTERRUPT: {
+		struct kvm_s390_interrupt s390int;
+
+		r = -EFAULT;
+		if (copy_from_user(&s390int, argp, sizeof(s390int)))
+			break;
+		r = kvm_s390_inject_vm(kvm, &s390int);
+		break;
+	}
 	default:
 		r = -EINVAL;
 	}
@@ -141,6 +164,9 @@ struct kvm *kvm_arch_create_vm(void)
 	if (!kvm->arch.dbf)
 		goto out_nodbf;
 
+	spin_lock_init(&kvm->arch.float_int.lock);
+	INIT_LIST_HEAD(&kvm->arch.float_int.list);
+
 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
 	VM_EVENT(kvm, 3, "%s", "vm created");
 
@@ -221,7 +247,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu 
 	vcpu->arch.sie_block->gmsor = 0x000000000000;
 	vcpu->arch.sie_block->ecb   = 2;
 	vcpu->arch.sie_block->eca   = 0xC1002001U;
-
+	setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup,
+		 (unsigned long) vcpu);
 	return 0;
 }
 
@@ -246,6 +273,14 @@ struct kvm_vcpu *kvm_arch_vcpu_create(st
 	vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
 	vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
 
+	spin_lock_init(&vcpu->arch.local_int.lock);
+	INIT_LIST_HEAD(&vcpu->arch.local_int.list);
+	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
+	spin_lock_bh(&kvm->arch.float_int.lock);
+	kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
+	init_waitqueue_head(&vcpu->arch.local_int.wq);
+	spin_unlock_bh(&kvm->arch.float_int.lock);
+
 	rc = kvm_vcpu_init(vcpu, kvm, id);
 	if (rc)
 		goto out_free_cpu;
@@ -399,6 +434,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v
 
 	atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
 
+	BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
+
 	switch (kvm_run->exit_reason) {
 	case KVM_EXIT_S390_SIEIC:
 		vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask;
@@ -414,8 +451,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v
 	might_sleep();
 
 	do {
+		kvm_s390_deliver_pending_interrupts(vcpu);
 		__vcpu_run(vcpu);
-
 		rc = kvm_handle_sie_intercept(vcpu);
 	} while (!signal_pending(current) && !rc);
 
@@ -545,6 +582,13 @@ long kvm_arch_vcpu_ioctl(struct file *fi
 	void __user *argp = (void __user *)arg;
 
 	switch (ioctl) {
+	case KVM_S390_INTERRUPT: {
+		struct kvm_s390_interrupt s390int;
+
+		if (copy_from_user(&s390int, argp, sizeof(s390int)))
+			return -EFAULT;
+		return kvm_s390_inject_vcpu(vcpu, &s390int);
+	}
 	case KVM_S390_STORE_STATUS:
 		return kvm_s390_vcpu_store_status(vcpu, arg);
 	case KVM_S390_SET_INITIAL_PSW: {
Index: kvm/arch/s390/kvm/kvm-s390.h
===================================================================
--- kvm.orig/arch/s390/kvm/kvm-s390.h
+++ kvm/arch/s390/kvm/kvm-s390.h
@@ -13,6 +13,7 @@
 
 #ifndef ARCH_S390_KVM_S390_H
 #define ARCH_S390_KVM_S390_H
+#include <linux/kvm.h>
 #include <linux/kvm_host.h>
 
 typedef  int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
@@ -32,4 +33,18 @@ do { \
 	  d_vcpu->arch.sie_block->gpsw.mask, d_vcpu->arch.sie_block->gpsw.addr,\
 	  d_args); \
 } while (0)
+
+static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu)
+{
+	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOP_INT;
+}
+
+int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
+void kvm_s390_idle_wakeup(unsigned long data);
+void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
+int kvm_s390_inject_vm(struct kvm *kvm,
+		struct kvm_s390_interrupt *s390int);
+int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
+		struct kvm_s390_interrupt *s390int);
+int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
 #endif
Index: kvm/include/asm-s390/kvm_host.h
===================================================================
--- kvm.orig/include/asm-s390/kvm_host.h
+++ kvm/include/asm-s390/kvm_host.h
@@ -62,6 +62,7 @@ struct sie_block {
 	__u64	ckc;			/* 0x0030 */
 	__u64	epoch;			/* 0x0038 */
 	__u8	reserved40[4];		/* 0x0040 */
+#define LCTL_CR0	0x8000
 	__u16   lctl;			/* 0x0044 */
 	__s16	icpua;			/* 0x0046 */
 	__u32	ictl;			/* 0x0048 */
@@ -97,8 +98,79 @@ struct kvm_vcpu_stat {
 	u32 exit_external_interrupt;
 	u32 exit_stop_request;
 	u32 exit_validity;
+	u32 exit_instruction;
+	u32 instruction_lctl;
+	u32 instruction_lctg;
+	u32 exit_program_interruption;
+	u32 exit_instr_and_program;
+	u32 deliver_emergency_signal;
+	u32 deliver_service_signal;
+	u32 deliver_virtio_interrupt;
+	u32 deliver_stop_signal;
+	u32 deliver_prefix_signal;
+	u32 deliver_restart_signal;
+	u32 deliver_program_int;
+	u32 exit_wait_state;
 };
 
+struct io_info {
+	__u16        subchannel_id;            /* 0x0b8 */
+	__u16        subchannel_nr;            /* 0x0ba */
+	__u32        io_int_parm;              /* 0x0bc */
+	__u32        io_int_word;              /* 0x0c0 */
+};
+
+struct ext_info {
+	__u32 ext_params;
+	__u64 ext_params2;
+};
+
+#define PGM_OPERATION            0x01
+#define PGM_PRIVILEGED_OPERATION 0x02
+#define PGM_EXECUTE              0x03
+#define PGM_PROTECTION           0x04
+#define PGM_ADDRESSING           0x05
+#define PGM_SPECIFICATION        0x06
+#define PGM_DATA                 0x07
+
+struct pgm_info {
+	__u16 code;
+};
+
+struct prefix_info {
+	__u32 address;
+};
+
+struct interrupt_info {
+	struct list_head list;
+	u64	type;
+	union {
+		struct io_info io;
+		struct ext_info ext;
+		struct pgm_info pgm;
+		struct prefix_info prefix;
+	};
+};
+
+struct local_interrupt {
+	spinlock_t lock;
+	struct list_head list;
+	atomic_t active;
+	struct float_interrupt *float_int;
+	int timer_due; /* event indicator for waitqueue below */
+	wait_queue_head_t wq;
+};
+
+struct float_interrupt {
+	spinlock_t lock;
+	struct list_head list;
+	atomic_t active;
+	int next_rr_cpu;
+	unsigned long idle_mask [(64 + sizeof(long) - 1) / sizeof(long)];
+	struct local_interrupt *local_int[64];
+};
+
+
 struct kvm_vcpu_arch {
 	struct sie_block *sie_block;
 	unsigned long	  guest_gprs[16];
@@ -106,6 +178,8 @@ struct kvm_vcpu_arch {
 	unsigned int      host_acrs[NUM_ACRS];
 	s390_fp_regs      guest_fpregs;
 	unsigned int      guest_acrs[NUM_ACRS];
+	struct local_interrupt local_int;
+	struct timer_list ckc_timer;
 };
 
 struct kvm_vm_stat {
@@ -117,6 +191,7 @@ struct kvm_arch{
 	unsigned long guest_memsize;
 	struct sca_block *sca;
 	debug_info_t *dbf;
+	struct float_interrupt float_int;
 };
 
 extern int sie64a(struct sie_block *, __u64 *);
Index: kvm/include/linux/kvm.h
===================================================================
--- kvm.orig/include/linux/kvm.h
+++ kvm/include/linux/kvm.h
@@ -219,6 +219,21 @@ struct kvm_s390_psw {
 	__u64 addr;
 };
 
+/* valid values for type in kvm_s390_interrupt */
+#define KVM_S390_SIGP_STOP		0xfffe0000u
+#define KVM_S390_PROGRAM_INT		0xfffe0001u
+#define KVM_S390_SIGP_SET_PREFIX	0xfffe0002u
+#define KVM_S390_RESTART		0xfffe0003u
+#define KVM_S390_INT_VIRTIO		0xffff1237u /*FIXME arch number */
+#define KVM_S390_INT_SERVICE		0xffff2401u
+#define KVM_S390_INT_EMERGENCY		0xffff1201u
+
+struct kvm_s390_interrupt {
+	__u32 type;
+	__u32 parm;
+	__u64 parm64;
+};
+
 #define KVMIO 0xAE
 
 /*
@@ -307,6 +322,8 @@ struct kvm_s390_psw {
 #define KVM_TPR_ACCESS_REPORTING  _IOWR(KVMIO,  0x92, struct kvm_tpr_access_ctl)
 /* Available with KVM_CAP_VAPIC */
 #define KVM_SET_VAPIC_ADDR        _IOW(KVMIO,  0x93, struct kvm_vapic_addr)
+/* valid for virtual machine (for floating interrupt)_and_ vcpu */
+#define KVM_S390_INTERRUPT        _IOW(KVMIO,  0x94, struct kvm_s390_interrupt)
 /* store status for s390 */
 #define KVM_S390_STORE_STATUS_NOADDR    (-1ul)
 #define KVM_S390_STORE_STATUS_PREFIXED  (-2ul)

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [RFC/PATCH 08/15] kvm-s390: intercepts for privileged instructions
       [not found] <1206028710.6690.21.camel@cotte.boeblingen.de.ibm.com>
                   ` (6 preceding siblings ...)
  2008-03-20 16:25 ` [RFC/PATCH 07/15] kvm-s390: interrupt subsystem, cpu timer, waitpsw Carsten Otte
@ 2008-03-20 16:25 ` Carsten Otte
  2008-03-20 16:25 ` [RFC/PATCH 09/15] kvm-s390: interprocessor communication via sigp Carsten Otte
                   ` (12 subsequent siblings)
  20 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:25 UTC (permalink / raw)
  To: virtualization, kvm-devel, Avi Kivity
  Cc: aliguori, EHRHARDT, arnd, hollisb, heiko.carstens, jeroney,
	borntraeger, schwidefsky, rvdheij, os, jblunck, Zhang, Xiantao

From: Carsten Otte <cotte@de.ibm.com>
From: Christian Borntraeger <borntraeger@de.ibm.com>

This patch introduces in-kernel handling of some intercepts for privileged
instructions:
handle_set_prefix()        sets the prefix register of the local cpu
handle_store_prefix()      stores the content of the prefix register to memory
handle_store_cpu_address() stores the cpu number of the current cpu to memory
handle_skey()              just decrements the instruction address and retries
handle_stsch()             delivers condition code 3 "operation not supported"
handle_chsc()              same here
handle_stfl()              stores the facility list which contains the
                           capabilities of the cpu
handle_stidp()             stores cpu type/model/revision and such
handle_stsi()              stores information about the system topology

Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---
 arch/s390/kvm/Makefile      |    2 
 arch/s390/kvm/intercept.c   |    1 
 arch/s390/kvm/kvm-s390.c    |   11 +
 arch/s390/kvm/kvm-s390.h    |    3 
 arch/s390/kvm/priv.c        |  322 ++++++++++++++++++++++++++++++++++++++++++++
 include/asm-s390/kvm_host.h |   13 +
 6 files changed, 351 insertions(+), 1 deletion(-)

Index: kvm/arch/s390/kvm/Makefile
===================================================================
--- kvm.orig/arch/s390/kvm/Makefile
+++ kvm/arch/s390/kvm/Makefile
@@ -10,5 +10,5 @@ common-objs = $(addprefix ../../../virt/
 
 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/s390/kvm
 
-kvm-objs := $(common-objs) kvm-s390.o sie64a.o intercept.o interrupt.o
+kvm-objs := $(common-objs) kvm-s390.o sie64a.o intercept.o interrupt.o priv.o
 obj-$(CONFIG_KVM) += kvm.o
Index: kvm/arch/s390/kvm/intercept.c
===================================================================
--- kvm.orig/arch/s390/kvm/intercept.c
+++ kvm/arch/s390/kvm/intercept.c
@@ -100,6 +100,7 @@ static int handle_lctl(struct kvm_vcpu *
 }
 
 static intercept_handler_t instruction_handlers[256] = {
+	[0xb2] = kvm_s390_handle_priv,
 	[0xb7] = handle_lctl,
 	[0xeb] = handle_lctg,
 };
Index: kvm/arch/s390/kvm/kvm-s390.c
===================================================================
--- kvm.orig/arch/s390/kvm/kvm-s390.c
+++ kvm/arch/s390/kvm/kvm-s390.c
@@ -48,6 +48,15 @@ struct kvm_stats_debugfs_item debugfs_en
 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
 	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
+	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
+	{ "instruction_spx", VCPU_STAT(instruction_spx) },
+	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
+	{ "instruction_stap", VCPU_STAT(instruction_stap) },
+	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
+	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
+	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
+	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
+	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
 	{ NULL }
 };
 
@@ -249,6 +258,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu 
 	vcpu->arch.sie_block->eca   = 0xC1002001U;
 	setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup,
 		 (unsigned long) vcpu);
+	get_cpu_id(&vcpu->arch.cpu_id);
+	vcpu->arch.cpu_id.version = 0xfe;
 	return 0;
 }
 
Index: kvm/arch/s390/kvm/kvm-s390.h
===================================================================
--- kvm.orig/arch/s390/kvm/kvm-s390.h
+++ kvm/arch/s390/kvm/kvm-s390.h
@@ -47,4 +47,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
 int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
 		struct kvm_s390_interrupt *s390int);
 int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
+
+/* implemented in priv.c */
+int kvm_s390_handle_priv(struct kvm_vcpu *vcpu);
 #endif
Index: kvm/arch/s390/kvm/priv.c
===================================================================
--- /dev/null
+++ kvm/arch/s390/kvm/priv.c
@@ -0,0 +1,322 @@
+/*
+ * priv.c - handling privileged instructions
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#include <linux/kvm.h>
+#include <linux/errno.h>
+#include <asm/current.h>
+#include <asm/debug.h>
+#include <asm/ebcdic.h>
+#include <asm/sysinfo.h>
+#include "gaccess.h"
+#include "kvm-s390.h"
+
+static int handle_set_prefix(struct kvm_vcpu *vcpu)
+{
+	int base2 = vcpu->arch.sie_block->ipb >> 28;
+	int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+	u64 operand2;
+	u32 address = 0;
+	u8 tmp;
+
+	vcpu->stat.instruction_spx++;
+
+	operand2 = disp2;
+	if (base2)
+		operand2 += vcpu->arch.guest_gprs[base2];
+
+	/* must be word boundary */
+	if (operand2 & 3) {
+		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+		goto out;
+	}
+
+	/* get the value */
+	if (get_guest_u32(vcpu, operand2, &address)) {
+		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		goto out;
+	}
+
+	address = address & 0x7fffe000u;
+
+	/* make sure that the new value is valid memory */
+	if (copy_from_guest_absolute(vcpu, &tmp, address, 1) ||
+	   (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) {
+		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		goto out;
+	}
+
+	vcpu->arch.sie_block->prefix = address;
+	vcpu->arch.sie_block->ihcpu = 0xffff;
+
+	VCPU_EVENT(vcpu, 5, "setting prefix to %x", address);
+out:
+	return 0;
+}
+
+static int handle_store_prefix(struct kvm_vcpu *vcpu)
+{
+	int base2 = vcpu->arch.sie_block->ipb >> 28;
+	int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+	u64 operand2;
+	u32 address;
+
+	vcpu->stat.instruction_stpx++;
+	operand2 = disp2;
+	if (base2)
+		operand2 += vcpu->arch.guest_gprs[base2];
+
+	/* must be word boundary */
+	if (operand2 & 3) {
+		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+		goto out;
+	}
+
+	address = vcpu->arch.sie_block->prefix;
+	address = address & 0x7fffe000u;
+
+	/* get the value */
+	if (put_guest_u32(vcpu, operand2, address)) {
+		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		goto out;
+	}
+
+	VCPU_EVENT(vcpu, 5, "storing prefix to %x", address);
+out:
+	return 0;
+}
+
+static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
+{
+	int base2 = vcpu->arch.sie_block->ipb >> 28;
+	int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+	u64 useraddr;
+	int rc;
+
+	vcpu->stat.instruction_stap++;
+	useraddr = disp2;
+	if (base2)
+		useraddr += vcpu->arch.guest_gprs[base2];
+
+	if (useraddr & 1) {
+		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+		goto out;
+	}
+
+	rc = put_guest_u16(vcpu, useraddr, vcpu->vcpu_id);
+	if (rc == -EFAULT) {
+		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		goto out;
+	}
+
+	VCPU_EVENT(vcpu, 5, "storing cpu address to %lx", useraddr);
+out:
+	return 0;
+}
+
+static int handle_skey(struct kvm_vcpu *vcpu)
+{
+	vcpu->stat.instruction_storage_key++;
+	vcpu->arch.sie_block->gpsw.addr -= 4;
+	VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
+	return 0;
+}
+
+static int handle_stsch(struct kvm_vcpu *vcpu)
+{
+	vcpu->stat.instruction_stsch++;
+	VCPU_EVENT(vcpu, 4, "%s", "store subchannel - CC3");
+	/* condition code 3 */
+	vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+	vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44;
+	return 0;
+}
+
+static int handle_chsc(struct kvm_vcpu *vcpu)
+{
+	vcpu->stat.instruction_chsc++;
+	VCPU_EVENT(vcpu, 4, "%s", "channel subsystem call - CC3");
+	/* condition code 3 */
+	vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+	vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44;
+	return 0;
+}
+
+static unsigned int stfl(void)
+{
+	asm volatile(
+		"	.insn	s,0xb2b10000,0(0)\n" /* stfl */
+		"0:\n"
+		EX_TABLE(0b, 0b));
+	return S390_lowcore.stfl_fac_list;
+}
+
+static int handle_stfl(struct kvm_vcpu *vcpu)
+{
+	unsigned int facility_list = stfl();
+	int rc;
+
+	vcpu->stat.instruction_stfl++;
+	facility_list &= ~(1UL<<24); /* no stfle */
+
+	rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list),
+			   &facility_list, sizeof(facility_list));
+	if (rc == -EFAULT)
+		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+	else
+		VCPU_EVENT(vcpu, 5, "store facility list value %x",
+			   facility_list);
+
+	return 0;
+}
+
+static int handle_stidp(struct kvm_vcpu *vcpu)
+{
+	int base2 = vcpu->arch.sie_block->ipb >> 28;
+	int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+	u64 operand2;
+	int rc;
+
+	vcpu->stat.instruction_stidp++;
+	operand2 = disp2;
+	if (base2)
+		operand2 += vcpu->arch.guest_gprs[base2];
+
+	if (operand2 & 7) {
+		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+		goto out;
+	}
+
+	rc = put_guest_u64(vcpu, operand2, vcpu->arch.stidp_data);
+	if (rc == -EFAULT) {
+		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		goto out;
+	}
+
+	VCPU_EVENT(vcpu, 5, "%s", "store cpu id");
+out:
+	return 0;
+}
+
+static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
+{
+	struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	int cpus = 0;
+	int n;
+
+	spin_lock_bh(&fi->lock);
+	for (n = 0; n < KVM_MAX_VCPUS; n++)
+		if (fi->local_int[n])
+			cpus++;
+	spin_unlock_bh(&fi->lock);
+
+	/* deal with other level 3 hypervisors */
+	if (stsi(mem, 3, 2, 2) == -ENOSYS)
+		mem->count = 0;
+	if (mem->count < 8)
+		mem->count++;
+	for (n = mem->count - 1; n > 0 ; n--)
+		memcpy(&mem->vm[n], &mem->vm[n - 1], sizeof(mem->vm[0]));
+
+	mem->vm[0].cpus_total = cpus;
+	mem->vm[0].cpus_configured = cpus;
+	mem->vm[0].cpus_standby = 0;
+	mem->vm[0].cpus_reserved = 0;
+	mem->vm[0].caf = 1000;
+	memcpy(mem->vm[0].name, "KVMguest", 8);
+	ASCEBC(mem->vm[0].name, 8);
+	memcpy(mem->vm[0].cpi, "KVM/Linux       ", 16);
+	ASCEBC(mem->vm[0].cpi, 16);
+}
+
+static int handle_stsi(struct kvm_vcpu *vcpu)
+{
+	int fc = (vcpu->arch.guest_gprs[0] & 0xf0000000) >> 28;
+	int sel1 = vcpu->arch.guest_gprs[0] & 0xff;
+	int sel2 = vcpu->arch.guest_gprs[1] & 0xffff;
+	int base2 = vcpu->arch.sie_block->ipb >> 28;
+	int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+	u64 operand2;
+	unsigned long mem;
+
+	vcpu->stat.instruction_stsi++;
+	VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2);
+
+	operand2 = disp2;
+	if (base2)
+		operand2 += vcpu->arch.guest_gprs[base2];
+
+	if (operand2 & 0xfff && fc > 0)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	switch (fc) {
+	case 0:
+		vcpu->arch.guest_gprs[0] = 3 << 28;
+		vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+		return 0;
+	case 1: /* same handling for 1 and 2 */
+	case 2:
+		mem = get_zeroed_page(GFP_KERNEL);
+		if (!mem)
+			goto out_fail;
+		if (stsi((void *) mem, fc, sel1, sel2) == -ENOSYS)
+			goto out_mem;
+		break;
+	case 3:
+		if (sel1 != 2 || sel2 != 2)
+			goto out_fail;
+		mem = get_zeroed_page(GFP_KERNEL);
+		if (!mem)
+			goto out_fail;
+		handle_stsi_3_2_2(vcpu, (void *) mem);
+		break;
+	default:
+		goto out_fail;
+	}
+
+	if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) {
+		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		goto out_mem;
+	}
+	free_page(mem);
+	vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+	vcpu->arch.guest_gprs[0] = 0;
+	return 0;
+out_mem:
+	free_page(mem);
+out_fail:
+	/* condition code 3 */
+	vcpu->arch.sie_block->gpsw.mask |= 3ul << 44;
+	return 0;
+}
+
+static intercept_handler_t priv_handlers[256] = {
+	[0x02] = handle_stidp,
+	[0x10] = handle_set_prefix,
+	[0x11] = handle_store_prefix,
+	[0x12] = handle_store_cpu_address,
+	[0x29] = handle_skey,
+	[0x2a] = handle_skey,
+	[0x2b] = handle_skey,
+	[0x34] = handle_stsch,
+	[0x5f] = handle_chsc,
+	[0x7d] = handle_stsi,
+	[0xb1] = handle_stfl,
+};
+
+int kvm_s390_handle_priv(struct kvm_vcpu *vcpu)
+{
+	if (priv_handlers[vcpu->arch.sie_block->ipa & 0x00ff])
+		return priv_handlers[vcpu->arch.sie_block->ipa & 0x00ff]
+			(vcpu);
+		return -ENOTSUPP;
+}
Index: kvm/include/asm-s390/kvm_host.h
===================================================================
--- kvm.orig/include/asm-s390/kvm_host.h
+++ kvm/include/asm-s390/kvm_host.h
@@ -111,6 +111,15 @@ struct kvm_vcpu_stat {
 	u32 deliver_restart_signal;
 	u32 deliver_program_int;
 	u32 exit_wait_state;
+	u32 instruction_stidp;
+	u32 instruction_spx;
+	u32 instruction_stpx;
+	u32 instruction_stap;
+	u32 instruction_storage_key;
+	u32 instruction_stsch;
+	u32 instruction_chsc;
+	u32 instruction_stsi;
+	u32 instruction_stfl;
 };
 
 struct io_info {
@@ -180,6 +189,10 @@ struct kvm_vcpu_arch {
 	unsigned int      guest_acrs[NUM_ACRS];
 	struct local_interrupt local_int;
 	struct timer_list ckc_timer;
+	union  {
+		cpuid_t	  cpu_id;
+		u64	  stidp_data;
+	};
 };
 
 struct kvm_vm_stat {

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [RFC/PATCH 09/15] kvm-s390: interprocessor communication via sigp
       [not found] <1206028710.6690.21.camel@cotte.boeblingen.de.ibm.com>
                   ` (7 preceding siblings ...)
  2008-03-20 16:25 ` [RFC/PATCH 08/15] kvm-s390: intercepts for privileged instructions Carsten Otte
@ 2008-03-20 16:25 ` Carsten Otte
  2008-03-20 16:25 ` [RFC/PATCH 10/15] kvm-s390: intercepts for diagnose instructions Carsten Otte
                   ` (11 subsequent siblings)
  20 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:25 UTC (permalink / raw)
  To: virtualization, kvm-devel, Avi Kivity
  Cc: aliguori, EHRHARDT, arnd, hollisb, heiko.carstens, jeroney,
	borntraeger, schwidefsky, rvdheij, os, jblunck, Zhang, Xiantao

From: Carsten Otte <cotte@de.ibm.com>
From: Christian Borntraeger <borntraeger@de.ibm.com>

This patch introduces in-kernel handling of _some_ sigp interprocessor
signals (similar to ipi).
kvm_s390_handle_sigp() decodes the sigp instruction and calls individual
handlers depending on the operation requested:
- sigp sense tries to retrieve information such as existence or running state
  of the remote cpu
- sigp emergency sends an external interrupt to the remove cpu
- sigp stop stops a remove cpu
- sigp stop store status stops a remote cpu, and stores its entire internal
  state to the cpus lowcore
- sigp set arch sets the architecture mode of the remote cpu. setting to
  ESAME (s390x 64bit) is accepted, setting to ESA/S390 (s390, 31 or 24 bit) is
  denied, all others are passed to userland
- sigp set prefix sets the prefix register of a remote cpu

For implementation of this, the stop intercept indication starts to get reused
on purpose: a set of action bits defines what to do once a cpu gets stopped:
ACTION_STOP_ON_STOP  really stops the cpu when a stop intercept is recognized
ACTION_STORE_ON_STOP stores the cpu status to lowcore when a stop intercept is
                     recognized

Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---
 arch/s390/kvm/Makefile      |    2 
 arch/s390/kvm/intercept.c   |   22 +++
 arch/s390/kvm/kvm-s390.c    |    7 +
 arch/s390/kvm/kvm-s390.h    |    7 +
 arch/s390/kvm/sigp.c        |  289 ++++++++++++++++++++++++++++++++++++++++++++
 include/asm-s390/kvm_host.h |   12 +
 6 files changed, 336 insertions(+), 3 deletions(-)

Index: kvm/arch/s390/kvm/Makefile
===================================================================
--- kvm.orig/arch/s390/kvm/Makefile
+++ kvm/arch/s390/kvm/Makefile
@@ -10,5 +10,5 @@ common-objs = $(addprefix ../../../virt/
 
 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/s390/kvm
 
-kvm-objs := $(common-objs) kvm-s390.o sie64a.o intercept.o interrupt.o priv.o
+kvm-objs := $(common-objs) kvm-s390.o sie64a.o intercept.o interrupt.o priv.o sigp.o
 obj-$(CONFIG_KVM) += kvm.o
Index: kvm/arch/s390/kvm/intercept.c
===================================================================
--- kvm.orig/arch/s390/kvm/intercept.c
+++ kvm/arch/s390/kvm/intercept.c
@@ -100,6 +100,7 @@ static int handle_lctl(struct kvm_vcpu *
 }
 
 static intercept_handler_t instruction_handlers[256] = {
+	[0xae] = kvm_s390_handle_sigp,
 	[0xb2] = kvm_s390_handle_priv,
 	[0xb7] = handle_lctl,
 	[0xeb] = handle_lctg,
@@ -122,10 +123,27 @@ static int handle_noop(struct kvm_vcpu *
 
 static int handle_stop(struct kvm_vcpu *vcpu)
 {
+	int rc;
+
 	vcpu->stat.exit_stop_request++;
-	VCPU_EVENT(vcpu, 3, "%s", "cpu stopped");
 	atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
-	return -ENOTSUPP;
+	spin_lock_bh(&vcpu->arch.local_int.lock);
+	if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) {
+		vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP;
+		rc = __kvm_s390_vcpu_store_status(vcpu,
+						  KVM_S390_STORE_STATUS_NOADDR);
+		if (rc >= 0)
+			rc = -ENOTSUPP;
+	}
+
+	if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) {
+		vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP;
+		VCPU_EVENT(vcpu, 3, "%s", "cpu stopped");
+		rc = -ENOTSUPP;
+	} else
+		rc = 0;
+	spin_unlock_bh(&vcpu->arch.local_int.lock);
+	return rc;
 }
 
 static int handle_validity(struct kvm_vcpu *vcpu)
Index: kvm/arch/s390/kvm/kvm-s390.c
===================================================================
--- kvm.orig/arch/s390/kvm/kvm-s390.c
+++ kvm/arch/s390/kvm/kvm-s390.c
@@ -57,6 +57,12 @@ struct kvm_stats_debugfs_item debugfs_en
 	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
+	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
+	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
+	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
+	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
+	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
+	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 	{ NULL }
 };
 
@@ -290,6 +296,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(st
 	spin_lock_bh(&kvm->arch.float_int.lock);
 	kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
 	init_waitqueue_head(&vcpu->arch.local_int.wq);
+	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
 	spin_unlock_bh(&kvm->arch.float_int.lock);
 
 	rc = kvm_vcpu_init(vcpu, kvm, id);
Index: kvm/arch/s390/kvm/kvm-s390.h
===================================================================
--- kvm.orig/arch/s390/kvm/kvm-s390.h
+++ kvm/arch/s390/kvm/kvm-s390.h
@@ -50,4 +50,11 @@ int kvm_s390_inject_program_int(struct k
 
 /* implemented in priv.c */
 int kvm_s390_handle_priv(struct kvm_vcpu *vcpu);
+
+/* implemented in sigp.c */
+int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
+
+/* implemented in kvm-s390.c */
+int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu,
+				 unsigned long addr);
 #endif
Index: kvm/arch/s390/kvm/sigp.c
===================================================================
--- /dev/null
+++ kvm/arch/s390/kvm/sigp.c
@@ -0,0 +1,289 @@
+/*
+ * sigp.c - handlinge interprocessor communication
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include "gaccess.h"
+#include "kvm-s390.h"
+
+/* sigp order codes */
+#define SIGP_SENSE             0x01
+#define SIGP_EXTERNAL_CALL     0x02
+#define SIGP_EMERGENCY         0x03
+#define SIGP_START             0x04
+#define SIGP_STOP              0x05
+#define SIGP_RESTART           0x06
+#define SIGP_STOP_STORE_STATUS 0x09
+#define SIGP_INITIAL_CPU_RESET 0x0b
+#define SIGP_CPU_RESET         0x0c
+#define SIGP_SET_PREFIX        0x0d
+#define SIGP_STORE_STATUS_ADDR 0x0e
+#define SIGP_SET_ARCH          0x12
+
+/* cpu status bits */
+#define SIGP_STAT_EQUIPMENT_CHECK   0x80000000UL
+#define SIGP_STAT_INCORRECT_STATE   0x00000200UL
+#define SIGP_STAT_INVALID_PARAMETER 0x00000100UL
+#define SIGP_STAT_EXT_CALL_PENDING  0x00000080UL
+#define SIGP_STAT_STOPPED           0x00000040UL
+#define SIGP_STAT_OPERATOR_INTERV   0x00000020UL
+#define SIGP_STAT_CHECK_STOP        0x00000010UL
+#define SIGP_STAT_INOPERATIVE       0x00000004UL
+#define SIGP_STAT_INVALID_ORDER     0x00000002UL
+#define SIGP_STAT_RECEIVER_CHECK    0x00000001UL
+
+
+static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, u64 *reg)
+{
+	struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	int rc;
+
+	if (cpu_addr >= KVM_MAX_VCPUS)
+		return 3; /* not operational */
+
+	spin_lock_bh(&fi->lock);
+	if (fi->local_int[cpu_addr] == NULL)
+		rc = 3; /* not operational */
+	else if (atomic_read(fi->local_int[cpu_addr]->cpuflags)
+		 & CPUSTAT_RUNNING) {
+		*reg &= 0xffffffff00000000UL;
+		rc = 1; /* status stored */
+	} else {
+		*reg &= 0xffffffff00000000UL;
+		*reg |= SIGP_STAT_STOPPED;
+		rc = 1; /* status stored */
+	}
+	spin_unlock_bh(&fi->lock);
+
+	VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc);
+	return rc;
+}
+
+static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
+{
+	struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	struct local_interrupt *li;
+	struct interrupt_info *inti;
+	int rc;
+
+	if (cpu_addr >= KVM_MAX_VCPUS)
+		return 3; /* not operational */
+
+	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+	if (!inti)
+		return -ENOMEM;
+
+	inti->type = KVM_S390_INT_EMERGENCY;
+
+	spin_lock_bh(&fi->lock);
+	li = fi->local_int[cpu_addr];
+	if (li == NULL) {
+		rc = 3; /* not operational */
+		kfree(inti);
+		goto unlock;
+	}
+	spin_lock_bh(&li->lock);
+	list_add_tail(&inti->list, &li->list);
+	atomic_set(&li->active, 1);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	if (waitqueue_active(&li->wq))
+		wake_up_interruptible(&li->wq);
+	spin_unlock_bh(&li->lock);
+	rc = 0; /* order accepted */
+unlock:
+	spin_unlock_bh(&fi->lock);
+	VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
+	return rc;
+}
+
+static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store)
+{
+	struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	struct local_interrupt *li;
+	struct interrupt_info *inti;
+	int rc;
+
+	if (cpu_addr >= KVM_MAX_VCPUS)
+		return 3; /* not operational */
+
+	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+	if (!inti)
+		return -ENOMEM;
+
+	inti->type = KVM_S390_SIGP_STOP;
+
+	spin_lock_bh(&fi->lock);
+	li = fi->local_int[cpu_addr];
+	if (li == NULL) {
+		rc = 3; /* not operational */
+		kfree(inti);
+		goto unlock;
+	}
+	spin_lock_bh(&li->lock);
+	list_add_tail(&inti->list, &li->list);
+	atomic_set(&li->active, 1);
+	atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
+	if (store)
+		li->action_bits |= ACTION_STORE_ON_STOP;
+	li->action_bits |= ACTION_STOP_ON_STOP;
+	if (waitqueue_active(&li->wq))
+		wake_up_interruptible(&li->wq);
+	spin_unlock_bh(&li->lock);
+	rc = 0; /* order accepted */
+unlock:
+	spin_unlock_bh(&fi->lock);
+	VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
+	return rc;
+}
+
+static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
+{
+	int rc;
+	parameter = parameter & 0xff;
+
+	switch (parameter) {
+	case 0:
+		printk(KERN_WARNING "kvm: request to switch to ESA/390 mode"
+							" not supported");
+		rc = 3; /* not operational */
+		break;
+	case 1:
+	case 2:
+		rc = 0; /* order accepted */
+		break;
+	default:
+		rc = -ENOTSUPP;
+	}
+	return rc;
+}
+
+static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
+			     u64 *reg)
+{
+	struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	struct local_interrupt *li;
+	struct interrupt_info *inti;
+	int rc;
+	u8 tmp;
+
+	/* make sure that the new value is valid memory */
+	address = address & 0x7fffe000u;
+	if ((copy_from_guest(vcpu, &tmp,
+		(u64) (address + vcpu->kvm->arch.guest_origin) , 1)) ||
+	   (copy_from_guest(vcpu, &tmp, (u64) (address +
+			vcpu->kvm->arch.guest_origin + PAGE_SIZE), 1))) {
+		*reg |= SIGP_STAT_INVALID_PARAMETER;
+		return 1; /* invalid parameter */
+	}
+
+	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+	if (!inti)
+		return 2; /* busy */
+
+	spin_lock_bh(&fi->lock);
+	li = fi->local_int[cpu_addr];
+
+	if ((cpu_addr >= KVM_MAX_VCPUS) || (li == NULL)) {
+		rc = 1; /* incorrect state */
+		*reg &= SIGP_STAT_INCORRECT_STATE;
+		kfree(inti);
+		goto out_fi;
+	}
+
+	spin_lock_bh(&li->lock);
+	/* cpu must be in stopped state */
+	if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) {
+		rc = 1; /* incorrect state */
+		*reg &= SIGP_STAT_INCORRECT_STATE;
+		kfree(inti);
+		goto out_li;
+	}
+
+	inti->type = KVM_S390_SIGP_SET_PREFIX;
+	inti->prefix.address = address;
+
+	list_add_tail(&inti->list, &li->list);
+	atomic_set(&li->active, 1);
+	if (waitqueue_active(&li->wq))
+		wake_up_interruptible(&li->wq);
+	rc = 0; /* order accepted */
+
+	VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address);
+out_li:
+	spin_unlock_bh(&li->lock);
+out_fi:
+	spin_unlock_bh(&fi->lock);
+	return rc;
+}
+
+int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
+{
+	int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+	int r3 = vcpu->arch.sie_block->ipa & 0x000f;
+	int base2 = vcpu->arch.sie_block->ipb >> 28;
+	int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+	u32 parameter;
+	u16 cpu_addr = vcpu->arch.guest_gprs[r3];
+	u8 order_code;
+	int rc;
+
+	order_code = disp2;
+	if (base2)
+		order_code += vcpu->arch.guest_gprs[base2];
+
+	if (r1 % 2)
+		parameter = vcpu->arch.guest_gprs[r1];
+	else
+		parameter = vcpu->arch.guest_gprs[r1 + 1];
+
+	switch (order_code) {
+	case SIGP_SENSE:
+		vcpu->stat.instruction_sigp_sense++;
+		rc = __sigp_sense(vcpu, cpu_addr,
+				  &vcpu->arch.guest_gprs[r1]);
+		break;
+	case SIGP_EMERGENCY:
+		vcpu->stat.instruction_sigp_emergency++;
+		rc = __sigp_emergency(vcpu, cpu_addr);
+		break;
+	case SIGP_STOP:
+		vcpu->stat.instruction_sigp_stop++;
+		rc = __sigp_stop(vcpu, cpu_addr, 0);
+		break;
+	case SIGP_STOP_STORE_STATUS:
+		vcpu->stat.instruction_sigp_stop++;
+		rc = __sigp_stop(vcpu, cpu_addr, 1);
+		break;
+	case SIGP_SET_ARCH:
+		vcpu->stat.instruction_sigp_arch++;
+		rc = __sigp_set_arch(vcpu, parameter);
+		break;
+	case SIGP_SET_PREFIX:
+		vcpu->stat.instruction_sigp_prefix++;
+		rc = __sigp_set_prefix(vcpu, cpu_addr, parameter,
+				       &vcpu->arch.guest_gprs[r1]);
+		break;
+	case SIGP_RESTART:
+		vcpu->stat.instruction_sigp_restart++;
+		/* user space must know about restart */
+	default:
+		return -ENOTSUPP;
+	}
+
+	if (rc < 0)
+		return rc;
+
+	vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+	vcpu->arch.sie_block->gpsw.mask |= (rc & 3ul) << 44;
+	return 0;
+}
Index: kvm/include/asm-s390/kvm_host.h
===================================================================
--- kvm.orig/include/asm-s390/kvm_host.h
+++ kvm/include/asm-s390/kvm_host.h
@@ -120,6 +120,12 @@ struct kvm_vcpu_stat {
 	u32 instruction_chsc;
 	u32 instruction_stsi;
 	u32 instruction_stfl;
+	u32 instruction_sigp_sense;
+	u32 instruction_sigp_emergency;
+	u32 instruction_sigp_stop;
+	u32 instruction_sigp_arch;
+	u32 instruction_sigp_prefix;
+	u32 instruction_sigp_restart;
 };
 
 struct io_info {
@@ -161,6 +167,10 @@ struct interrupt_info {
 	};
 };
 
+/* for local_interrupt.action_flags */
+#define ACTION_STORE_ON_STOP 1
+#define ACTION_STOP_ON_STOP  2
+
 struct local_interrupt {
 	spinlock_t lock;
 	struct list_head list;
@@ -168,6 +178,8 @@ struct local_interrupt {
 	struct float_interrupt *float_int;
 	int timer_due; /* event indicator for waitqueue below */
 	wait_queue_head_t wq;
+	atomic_t *cpuflags;
+	unsigned int action_bits;
 };
 
 struct float_interrupt {

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [RFC/PATCH 10/15] kvm-s390: intercepts for diagnose instructions
       [not found] <1206028710.6690.21.camel@cotte.boeblingen.de.ibm.com>
                   ` (8 preceding siblings ...)
  2008-03-20 16:25 ` [RFC/PATCH 09/15] kvm-s390: interprocessor communication via sigp Carsten Otte
@ 2008-03-20 16:25 ` Carsten Otte
  2008-03-20 16:25 ` [RFC/PATCH 11/15] kvm-s390: add kvm to kconfig on s390 Carsten Otte
                   ` (10 subsequent siblings)
  20 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:25 UTC (permalink / raw)
  To: virtualization, kvm-devel, Avi Kivity
  Cc: aliguori, EHRHARDT, arnd, hollisb, heiko.carstens, jeroney,
	borntraeger, schwidefsky, rvdheij, os, jblunck, Zhang, Xiantao

From: Carsten Otte <cotte@de.ibm.com>
From: Christian Borntraeger <borntraeger@de.ibm.com>

This patch introduces interpretation of some diagnose instruction intercepts.
Diagnose is our classic architected way of doing a hypercall. This patch
features the following diagnose codes:
- vm storage size, that tells the guest about its memory layout
- time slice end, which is used by the guest to indicate that it waits
  for a lock and thus cannot use up its time slice in a useful way
- ipl functions, which a guest can use to reset and reboot itself

In order to implement ipl functions, we also introduce an exit reason that
causes userspace to perform various resets on the virtual machine. All resets
are described in the principles of operation book, except KVM_S390_RESET_IPL
which causes a reboot of the machine.

Acked-by: Martin Schwidefsky <martin.schwidefsky@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---
 arch/s390/kvm/Makefile      |    2 -
 arch/s390/kvm/diag.c        |   67 ++++++++++++++++++++++++++++++++++++++++++++
 arch/s390/kvm/intercept.c   |    1 
 arch/s390/kvm/kvm-s390.c    |    1 
 arch/s390/kvm/kvm-s390.h    |    2 +
 include/asm-s390/kvm_host.h |    5 ++-
 include/linux/kvm.h         |    8 +++++
 7 files changed, 84 insertions(+), 2 deletions(-)

Index: kvm/arch/s390/kvm/Makefile
===================================================================
--- kvm.orig/arch/s390/kvm/Makefile
+++ kvm/arch/s390/kvm/Makefile
@@ -10,5 +10,5 @@ common-objs = $(addprefix ../../../virt/
 
 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/s390/kvm
 
-kvm-objs := $(common-objs) kvm-s390.o sie64a.o intercept.o interrupt.o priv.o sigp.o
+kvm-objs := $(common-objs) kvm-s390.o sie64a.o intercept.o interrupt.o priv.o sigp.o diag.o
 obj-$(CONFIG_KVM) += kvm.o
Index: kvm/arch/s390/kvm/diag.c
===================================================================
--- /dev/null
+++ kvm/arch/s390/kvm/diag.c
@@ -0,0 +1,67 @@
+/*
+ * diag.c - handling diagnose instructions
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include "kvm-s390.h"
+
+static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
+{
+	VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
+	vcpu->stat.diagnose_44++;
+	vcpu_put(vcpu);
+	schedule();
+	vcpu_load(vcpu);
+	return 0;
+}
+
+static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
+{
+	unsigned int reg = vcpu->arch.sie_block->ipa & 0xf;
+	unsigned long subcode = vcpu->arch.guest_gprs[reg] & 0xffff;
+
+	VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode);
+	switch (subcode) {
+	case 3:
+		vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
+		break;
+	case 4:
+		vcpu->run->s390_reset_flags = 0;
+		break;
+	default:
+		return -ENOTSUPP;
+	}
+
+	atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+	vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM;
+	vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL;
+	vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT;
+	vcpu->run->exit_reason = KVM_EXIT_S390_RESET;
+	VCPU_EVENT(vcpu, 3, "requesting userspace resets %lx",
+	  vcpu->run->s390_reset_flags);
+	return -EREMOTE;
+}
+
+int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
+{
+	int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16;
+
+	switch (code) {
+	case 0x44:
+		return __diag_time_slice_end(vcpu);
+	case 0x308:
+		return __diag_ipl_functions(vcpu);
+	default:
+		return -ENOTSUPP;
+	}
+}
Index: kvm/arch/s390/kvm/intercept.c
===================================================================
--- kvm.orig/arch/s390/kvm/intercept.c
+++ kvm/arch/s390/kvm/intercept.c
@@ -100,6 +100,7 @@ static int handle_lctl(struct kvm_vcpu *
 }
 
 static intercept_handler_t instruction_handlers[256] = {
+	[0x83] = kvm_s390_handle_diag,
 	[0xae] = kvm_s390_handle_sigp,
 	[0xb2] = kvm_s390_handle_priv,
 	[0xb7] = handle_lctl,
Index: kvm/arch/s390/kvm/kvm-s390.c
===================================================================
--- kvm.orig/arch/s390/kvm/kvm-s390.c
+++ kvm/arch/s390/kvm/kvm-s390.c
@@ -63,6 +63,7 @@ struct kvm_stats_debugfs_item debugfs_en
 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
+	{ "diagnose_44", VCPU_STAT(diagnose_44) },
 	{ NULL }
 };
 
Index: kvm/arch/s390/kvm/kvm-s390.h
===================================================================
--- kvm.orig/arch/s390/kvm/kvm-s390.h
+++ kvm/arch/s390/kvm/kvm-s390.h
@@ -57,4 +57,6 @@ int kvm_s390_handle_sigp(struct kvm_vcpu
 /* implemented in kvm-s390.c */
 int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu,
 				 unsigned long addr);
+/* implemented in diag.c */
+int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
 #endif
Index: kvm/include/asm-s390/kvm_host.h
===================================================================
--- kvm.orig/include/asm-s390/kvm_host.h
+++ kvm/include/asm-s390/kvm_host.h
@@ -86,7 +86,9 @@ struct sie_block {
 	psw_t	gpsw;			/* 0x0090 */
 	__u64	gg14;			/* 0x00a0 */
 	__u64	gg15;			/* 0x00a8 */
-	__u8	reservedb0[80];		/* 0x00b0 */
+	__u8	reservedb0[30];		/* 0x00b0 */
+	__u16   iprcc;			/* 0x00ce */
+	__u8	reservedd0[48];		/* 0x00d0 */
 	__u64	gcr[16];		/* 0x0100 */
 	__u64	gbea;			/* 0x0180 */
 	__u8	reserved188[120];	/* 0x0188 */
@@ -126,6 +128,7 @@ struct kvm_vcpu_stat {
 	u32 instruction_sigp_arch;
 	u32 instruction_sigp_prefix;
 	u32 instruction_sigp_restart;
+	u32 diagnose_44;
 };
 
 struct io_info {
Index: kvm/include/linux/kvm.h
===================================================================
--- kvm.orig/include/linux/kvm.h
+++ kvm/include/linux/kvm.h
@@ -75,6 +75,7 @@ struct kvm_irqchip {
 #define KVM_EXIT_SET_TPR          11
 #define KVM_EXIT_TPR_ACCESS       12
 #define KVM_EXIT_S390_SIEIC       13
+#define KVM_EXIT_S390_RESET       14
 
 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
 struct kvm_run {
@@ -147,6 +148,13 @@ struct kvm_run {
 			__u16 ipa;
 			__u32 ipb;
 		} s390_sieic;
+		/* KVM_EXIT_S390_RESET */
+#define KVM_S390_RESET_POR       1
+#define KVM_S390_RESET_CLEAR     2
+#define KVM_S390_RESET_SUBSYSTEM 4
+#define KVM_S390_RESET_CPU_INIT  8
+#define KVM_S390_RESET_IPL       16
+		__u64 s390_reset_flags;
 		/* Fix the size of the union. */
 		char padding[256];
 	};

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [RFC/PATCH 11/15] kvm-s390: add kvm to kconfig on s390
       [not found] <1206028710.6690.21.camel@cotte.boeblingen.de.ibm.com>
                   ` (9 preceding siblings ...)
  2008-03-20 16:25 ` [RFC/PATCH 10/15] kvm-s390: intercepts for diagnose instructions Carsten Otte
@ 2008-03-20 16:25 ` Carsten Otte
  2008-03-20 16:25 ` [RFC/PATCH 12/15] kvm-s390: API documentation Carsten Otte
                   ` (9 subsequent siblings)
  20 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:25 UTC (permalink / raw)
  To: virtualization, kvm-devel, Avi Kivity
  Cc: aliguori, EHRHARDT, arnd, hollisb, heiko.carstens, jeroney,
	borntraeger, schwidefsky, rvdheij, os, jblunck, Zhang, Xiantao

From: Carsten Otte <cotte@de.ibm.com>
From: Christian Borntraeger <borntraeger@de.ibm.com>

This patch adds the virtualization submenu and the kvm option to the kernel
config. It also defines HAVE_KVM for 64bit kernels.

Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---
 arch/s390/Kconfig     |    3 +++
 arch/s390/kvm/Kconfig |   43 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+)

Index: kvm/arch/s390/Kconfig
===================================================================
--- kvm.orig/arch/s390/Kconfig
+++ kvm/arch/s390/Kconfig
@@ -66,6 +66,7 @@ config S390
 	select HAVE_OPROFILE
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
+	select HAVE_KVM if 64BIT
 
 source "init/Kconfig"
 
@@ -553,3 +554,5 @@ source "security/Kconfig"
 source "crypto/Kconfig"
 
 source "lib/Kconfig"
+
+source "arch/s390/kvm/Kconfig"
Index: kvm/arch/s390/kvm/Kconfig
===================================================================
--- /dev/null
+++ kvm/arch/s390/kvm/Kconfig
@@ -0,0 +1,43 @@
+#
+# KVM configuration
+#
+config HAVE_KVM
+       bool
+
+menuconfig VIRTUALIZATION
+	bool "Virtualization"
+	default y
+	---help---
+	  Say Y here to get to see options for using your Linux host to run other
+	  operating systems inside virtual machines (guests).
+	  This option alone does not add any kernel code.
+
+	  If you say N, all options in this submenu will be skipped and disabled.
+
+if VIRTUALIZATION
+
+config KVM
+	tristate "Kernel-based Virtual Machine (KVM) support"
+	depends on HAVE_KVM && EXPERIMENTAL
+	select PREEMPT_NOTIFIERS
+	select ANON_INODES
+	select S390_SWITCH_AMODE
+	select PREEMPT
+	---help---
+	  Support hosting paravirtualized guest machines using the SIE
+	  virtualization capability on the mainframe. This should work
+	  on any 64bit machine.
+
+	  This module provides access to the hardware capabilities through
+	  a character device node named /dev/kvm.
+
+	  To compile this as a module, choose M here: the module
+	  will be called kvm.
+
+	  If unsure, say N.
+
+# OK, it's a little counter-intuitive to do this, but it puts it neatly under
+# the virtualization menu.
+source drivers/virtio/Kconfig
+
+endif # VIRTUALIZATION

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [RFC/PATCH 12/15] kvm-s390: API documentation
       [not found] <1206028710.6690.21.camel@cotte.boeblingen.de.ibm.com>
                   ` (10 preceding siblings ...)
  2008-03-20 16:25 ` [RFC/PATCH 11/15] kvm-s390: add kvm to kconfig on s390 Carsten Otte
@ 2008-03-20 16:25 ` Carsten Otte
  2008-03-20 16:25 ` [RFC/PATCH 13/15] kvm-s390: update maintainers Carsten Otte
                   ` (8 subsequent siblings)
  20 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:25 UTC (permalink / raw)
  To: virtualization, kvm-devel, Avi Kivity
  Cc: aliguori, EHRHARDT, arnd, hollisb, heiko.carstens, jeroney,
	borntraeger, schwidefsky, rvdheij, os, jblunck, Zhang, Xiantao

From: Carsten Otte <cotte@de.ibm.com>

This patch adds Documentation/s390/kvm.txt, which describes specifics of kvm's
user interface that are unique to s390 architecture.

Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---
 Documentation/s390/kvm.txt |  125 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 125 insertions(+)

Index: kvm/Documentation/s390/kvm.txt
===================================================================
--- /dev/null
+++ kvm/Documentation/s390/kvm.txt
@@ -0,0 +1,125 @@
+*** BIG FAT WARNING ***
+The kvm module is currently in EXPERIMENTAL state for s390. This means, that
+the interface to the module is not yet considered to remain stable. Thus, be
+prepared that we keep breaking your userspace application and guest
+compatibility over and over again until we feel happy with the result. Make sure
+your guest kernel, your host kernel, and your userspace launcher are in a
+consistent state.
+
+This Documentation describes the unique ioctl calls to /dev/kvm, the resulting
+kvm-vm file descriptors, and the kvm-vcpu file descriptors that differ from x86.
+
+1. ioctl calls to /dev/kvm
+KVM does support the following ioctls on s390 that are common with other
+architectures and do behave the same:
+KVM_GET_API_VERSION
+KVM_CREATE_VM		(*) see note
+KVM_CHECK_EXTENSION
+KVM_GET_VCPU_MMAP_SIZE
+
+Notes:
+* KVM_CREATE_VM may fail on s390, if the calling process has multiple
+threads and has not called KVM_S390_ENABLE_SIE before.
+
+In addition, on s390 the following architecture specific ioctls are supported:
+ioctl:		KVM_S390_ENABLE_SIE
+args:		none
+see also:	include/linux/kvm.h
+This call causes the kernel to switch on PGSTE in the user page table. This
+operation is needed in order to run a virtual machine, and it requires the
+calling process to be single-threaded. Note that the first call to KVM_CREATE_VM
+will implicitly try to switch on PGSTE if the user process has not called
+KVM_S390_ENABLE_SIE before. User processes that want to launch multiple threads
+before creating a virtual machine have to call KVM_S390_ENABLE_SIE, or will
+observe an error calling KVM_CREATE_VM. Switching on PGSTE is a one-time
+operation, is not reversible, and will persist over the entire lifetime of
+the calling process. It does not have any user-visibe effect other than a small
+performance penalty.
+
+2. ioctl calls to the kvm-vm file descriptor
+KVM does support the following ioctls on s390 that are common with other
+architectures and do behave the same:
+KVM_CREATE_VCPU
+KVM_SET_USER_MEMORY_REGION      (*) see note
+KVM_GET_DIRTY_LOG		(**) see note
+
+Notes:
+*  kvm does only allow exactly one memory slot on s390, which has to start
+   at guest absolute address zero and at a user address that is aligned on any
+   page boundary. This hardware "limitation" allows us to have a few unique
+   optimizations. The memory slot does'nt have to be filled
+   with memory actually, it may contain sparse holes. That said, with different
+   user memory layout this does still allow a large flexibility when
+   doing the guest memory setup.
+** KVM_GET_DIRTY_LOG does'nt work proper yet. The user will receive an empty
+log. This ioctl call is only needed for guest migration, and we intend to
+implement this one in the future.
+
+In addition, on s390 the following architecture specific ioctls for the kvm-vm
+file descriptor are supported:
+ioctl:		KVM_S390_INTERRUPT
+args:		struct kvm_s390_interrupt *
+see also:	include/linux/kvm.h
+This ioctl is used to submit a floating interrupt for a virtual machine.
+Floating interrupts may be delivered to any virtual cpu in the configuration.
+Only some interrupt types defined in include/linux/kvm.h make sense when
+submitted as floating interrupt. The following interrupts are not considered
+to be useful as floating interrupt, and a call to inject them will result in
+-EINVAL error code: program interrupts, and interprocessor signals. Valid
+floating interrupts are:
+KVM_S390_INT_VIRTIO
+KVM_S390_INT_SERVICE
+
+3. ioctl calls to the kvm-vcpu file descriptor
+KVM does support the following ioctls on s390 that are common with other
+architectures and do behave the same:
+KVM_RUN
+KVM_GET_REGS
+KVM_SET_REGS
+KVM_GET_SREGS
+KVM_SET_SREGS
+KVM_GET_FPU
+KVM_SET_FPU
+
+In addition, on s390 the following architecture specific ioctls for the
+kvm-vcpu file descriptor are supported:
+ioctl:		KVM_S390_INTERRUPT
+args:		struct kvm_s390_interrupt *
+see also:	include/linux/kvm.h
+This ioctl is used to submit an interrupt for a specific virtual cpu.
+Only some interrupt types defined in include/linux/kvm.h make sense when
+submitted for a specific cpu. The following interrupts are not considered
+to be useful, and a call to inject them will result in -EINVAL error code:
+service processor calls, and virtio interrupts. Valid interrupt types are:
+KVM_S390_PROGRAM_INT
+KVM_S390_SIGP_STOP
+KVM_S390_RESTART
+KVM_S390_SIGP_SET_PREFIX
+KVM_S390_INT_EMERGENCY
+
+ioctl:		KVM_S390_STORE_STATUS
+args:		unsigned long
+see also:	include/linux/kvm.h
+This ioctl stores the state of the cpu at the guest real address given as
+argument, unless one of the following values defined in include/linux/kvm.h
+is given as arguement:
+KVM_S390_STORE_STATUS_NOADDR - the CPU stores its status to the save area in
+absolute lowcore as defined by the principles of operation
+KVM_S390_STORE_STATUS_PREFIXED - the CPU stores its status to the save area in
+its prefix page just like the dump tool that comes with zipl. This is useful
+to create a system dump for use with lkcdutils or crash.
+
+ioctl:		KVM_S390_SET_INITIAL_PSW
+args:		struct kvm_s390_psw *
+see also:	include/linux/kvm.h
+This ioctl can be used to set the processor status word (psw) of a stopped cpu
+prior to running it with KVM_RUN. Note that this call is not required to modify
+the psw during sie intercepts that fall back to userspace because struct kvm_run
+does contain the psw, and this value is evaluated during reentry of KVM_RUN
+after the intercept exit was recognized.
+
+ioctl:		KVM_S390_INITIAL_RESET
+args:		none
+see also:	include/linux/kvm.h
+This ioctl can be used to perform an initial cpu reset as defined by the
+principles of operation. The target cpu has to be in stopped state.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [RFC/PATCH 13/15] kvm-s390: update maintainers
       [not found] <1206028710.6690.21.camel@cotte.boeblingen.de.ibm.com>
                   ` (11 preceding siblings ...)
  2008-03-20 16:25 ` [RFC/PATCH 12/15] kvm-s390: API documentation Carsten Otte
@ 2008-03-20 16:25 ` Carsten Otte
  2008-03-20 16:25 ` [RFC/PATCH 14/15] guest: detect when running on kvm Carsten Otte
                   ` (7 subsequent siblings)
  20 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:25 UTC (permalink / raw)
  To: virtualization, kvm-devel, Avi Kivity
  Cc: aliguori, EHRHARDT, arnd, hollisb, heiko.carstens, jeroney,
	borntraeger, schwidefsky, rvdheij, os, jblunck, Zhang, Xiantao

From: Christian Borntraeger <borntraeger@de.ibm.com>

This patch adds an entry for kvm on s390 to the MAINTAINERS file :-). We intend
to push all patches regarding this via Avi's kvm.git.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---
 MAINTAINERS |   10 ++++++++++
 1 file changed, 10 insertions(+)

Index: kvm/MAINTAINERS
===================================================================
--- kvm.orig/MAINTAINERS
+++ kvm/MAINTAINERS
@@ -2296,6 +2296,16 @@ L:	kvm-ia64-devel@lists.sourceforge.net
 W:	kvm.sourceforge.net
 S:	Supported
 
+KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
+P:	Carsten Otte
+M:	cotte@de.ibm.com
+P:	Christian Borntraeger
+M:	borntraeger@de.ibm.com
+M:	linux390@de.ibm.com
+L:	linux-s390@vger.kernel.org
+W:	http://www.ibm.com/developerworks/linux/linux390/
+S:	Supported
+
 KEXEC
 P:	Eric Biederman
 M:	ebiederm@xmission.com

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [RFC/PATCH 14/15] guest: detect when running on kvm
       [not found] <1206028710.6690.21.camel@cotte.boeblingen.de.ibm.com>
                   ` (12 preceding siblings ...)
  2008-03-20 16:25 ` [RFC/PATCH 13/15] kvm-s390: update maintainers Carsten Otte
@ 2008-03-20 16:25 ` Carsten Otte
  2008-03-20 16:25 ` [RFC/PATCH 15/15] guest: virtio device support, and kvm hypercalls Carsten Otte
                   ` (6 subsequent siblings)
  20 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:25 UTC (permalink / raw)
  To: virtualization, kvm-devel, Avi Kivity
  Cc: aliguori, EHRHARDT, arnd, hollisb, heiko.carstens, jeroney,
	borntraeger, schwidefsky, rvdheij, os, jblunck, Zhang, Xiantao

From: Christian Borntraeger <borntraeger@de.ibm.com>
From: Carsten Otte <cotte@de.ibm.com>

This patch adds functionality to detect if the kernel runs under the KVM
hypervisor. A macro MACHINE_IS_KVM is exported for device drivers. This
allows drivers to skip device detection if the systems runs non-virtualized.
We also define a preferred console to avoid having the ttyS0, which is a line
mode only console.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---
 arch/s390/Kconfig        |    7 +++++++
 arch/s390/kernel/early.c |    4 ++++
 arch/s390/kernel/setup.c |   10 +++++++---
 include/asm-s390/setup.h |    1 +
 4 files changed, 19 insertions(+), 3 deletions(-)

Index: kvm/arch/s390/Kconfig
===================================================================
--- kvm.orig/arch/s390/Kconfig
+++ kvm/arch/s390/Kconfig
@@ -533,6 +533,13 @@ config ZFCPDUMP
 	  Select this option if you want to build an zfcpdump enabled kernel.
 	  Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this.
 
+config S390_GUEST
+bool "s390 guest support (EXPERIMENTAL)"
+	depends on 64BIT && EXPERIMENTAL
+	select VIRTIO
+	select VIRTIO_RING
+	help
+	  Select this option if you want to run the kernel under s390 linux
 endmenu
 
 source "net/Kconfig"
Index: kvm/arch/s390/kernel/early.c
===================================================================
--- kvm.orig/arch/s390/kernel/early.c
+++ kvm/arch/s390/kernel/early.c
@@ -143,6 +143,10 @@ static noinline __init void detect_machi
 	/* Running on a P/390 ? */
 	if (cpuinfo->cpu_id.machine == 0x7490)
 		machine_flags |= 4;
+
+	/* Running under KVM ? */
+	if (cpuinfo->cpu_id.version == 0xfe)
+		machine_flags |= 64;
 }
 
 #ifdef CONFIG_64BIT
Index: kvm/arch/s390/kernel/setup.c
===================================================================
--- kvm.orig/arch/s390/kernel/setup.c
+++ kvm/arch/s390/kernel/setup.c
@@ -793,9 +793,13 @@ setup_arch(char **cmdline_p)
 	       "This machine has an IEEE fpu\n" :
 	       "This machine has no IEEE fpu\n");
 #else /* CONFIG_64BIT */
-	printk((MACHINE_IS_VM) ?
-	       "We are running under VM (64 bit mode)\n" :
-	       "We are running native (64 bit mode)\n");
+	if (MACHINE_IS_VM)
+		printk("We are running under VM (64 bit mode)\n");
+	else if (MACHINE_IS_KVM) {
+		printk("We are running under KVM (64 bit mode)\n");
+		add_preferred_console("ttyS", 1, NULL);
+	} else
+		printk("We are running native (64 bit mode)\n");
 #endif /* CONFIG_64BIT */
 
 	/* Save unparsed command line copy for /proc/cmdline */
Index: kvm/include/asm-s390/setup.h
===================================================================
--- kvm.orig/include/asm-s390/setup.h
+++ kvm/include/asm-s390/setup.h
@@ -62,6 +62,7 @@ extern unsigned long machine_flags;
 #define MACHINE_IS_VM		(machine_flags & 1)
 #define MACHINE_IS_P390		(machine_flags & 4)
 #define MACHINE_HAS_MVPG	(machine_flags & 16)
+#define MACHINE_IS_KVM		(machine_flags & 64)
 #define MACHINE_HAS_IDTE	(machine_flags & 128)
 #define MACHINE_HAS_DIAG9C	(machine_flags & 256)

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [RFC/PATCH 15/15] guest: virtio device support, and kvm hypercalls
       [not found] <1206028710.6690.21.camel@cotte.boeblingen.de.ibm.com>
                   ` (13 preceding siblings ...)
  2008-03-20 16:25 ` [RFC/PATCH 14/15] guest: detect when running on kvm Carsten Otte
@ 2008-03-20 16:25 ` Carsten Otte
       [not found] ` <1206030298.6690.56.camel@cotte.boeblingen.de.ibm.com>
                   ` (5 subsequent siblings)
  20 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:25 UTC (permalink / raw)
  To: virtualization, kvm-devel, Avi Kivity
  Cc: aliguori, EHRHARDT, arnd, hollisb, heiko.carstens, jeroney,
	borntraeger, schwidefsky, rvdheij, os, jblunck, Zhang, Xiantao

From: Christian Borntraeger <borntraeger@de.ibm.com>

This patch implements kvm guest kernel support for paravirtualized devices
and contains two parts:
o a basic virtio stub using virtio_ring and external interrupts and hypercalls
o full hypercall implementation in kvm_para.h

Currently we dont have PCI on s390. Making virtio_pci usable for s390 seems
more complicated that providing an own stub. This virtio stub is similar to
the lguest one, the memory for the descriptors and the device detection is made
via additional mapped memory on top of the guest storage. We use an external
interrupt with extint code 1237 for host->guest notification. 

The hypercall definition uses the diag instruction for issuing a hypercall. The
parameters are written in R2-R7, the hypercall number is written in R1. This is
similar to the system call ABI (svc) which can use R1 for the number and R2-R6 
for the parameters.


Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---
 drivers/s390/Makefile         |    2 
 drivers/s390/kvm/Makefile     |    9 +
 drivers/s390/kvm/kvm_virtio.c |  326 ++++++++++++++++++++++++++++++++++++++++++
 drivers/s390/kvm/kvm_virtio.h |   47 ++++++
 include/asm-s390/kvm_para.h   |  124 +++++++++++++++
 5 files changed, 505 insertions(+), 3 deletions(-)

Index: kvm/drivers/s390/Makefile
===================================================================
--- kvm.orig/drivers/s390/Makefile
+++ kvm/drivers/s390/Makefile
@@ -5,7 +5,7 @@
 CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w
 
 obj-y += s390mach.o sysinfo.o s390_rdev.o
-obj-y += cio/ block/ char/ crypto/ net/ scsi/
+obj-y += cio/ block/ char/ crypto/ net/ scsi/ kvm/
 
 drivers-y += drivers/s390/built-in.o
 
Index: kvm/drivers/s390/kvm/Makefile
===================================================================
--- /dev/null
+++ kvm/drivers/s390/kvm/Makefile
@@ -0,0 +1,9 @@
+# Makefile for kvm guest drivers on s390
+#
+# Copyright IBM Corp. 2008
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License (version 2 only)
+# as published by the Free Software Foundation.
+
+obj-$(CONFIG_VIRTIO) += kvm_virtio.o
Index: kvm/drivers/s390/kvm/kvm_virtio.c
===================================================================
--- /dev/null
+++ kvm/drivers/s390/kvm/kvm_virtio.c
@@ -0,0 +1,326 @@
+/*
+ * kvm_virtio.c - virtio for kvm on s390
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/err.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/interrupt.h>
+#include <linux/virtio_ring.h>
+#include <asm/io.h>
+#include <asm/kvm_para.h>
+#include <asm/setup.h>
+#include <asm/s390_ext.h>
+
+#include "kvm_virtio.h"
+
+/*
+ * The pointer to our (page) of device descriptions.
+ */
+static void *kvm_devices;
+
+/*
+ * Unique numbering for kvm devices.
+ */
+static unsigned int dev_index;
+
+struct kvm_device {
+	struct virtio_device vdev;
+	struct kvm_device_desc *desc;
+};
+
+#define to_kvmdev(vd) container_of(vd, struct kvm_device, vdev)
+
+/*
+ * memory layout:
+ * - kvm_device_descriptor
+ *        struct kvm_device_desc
+ * - configuration
+ *        struct kvm_vqconfig
+ * - feature bits
+ * - config space
+ */
+static struct kvm_vqconfig *kvm_vq_config(const struct kvm_device_desc *desc)
+{
+	return (struct kvm_vqconfig *)(desc + 1);
+}
+
+static u8 *kvm_vq_features(const struct kvm_device_desc *desc)
+{
+	return (u8 *)(kvm_vq_config(desc) + desc->num_vq);
+}
+
+static u8 *kvm_vq_configspace(const struct kvm_device_desc *desc)
+{
+	return kvm_vq_features(desc) + desc->feature_len * 2;
+}
+
+/*
+ * The total size of the config page used by this device (incl. desc)
+ */
+static unsigned desc_size(const struct kvm_device_desc *desc)
+{
+	return sizeof(*desc)
+		+ desc->num_vq * sizeof(struct kvm_vqconfig)
+		+ desc->feature_len * 2
+		+ desc->config_len;
+}
+
+/*
+ * This tests (and acknowleges) a feature bit.
+ */
+static bool kvm_feature(struct virtio_device *vdev, unsigned fbit)
+{
+	struct kvm_device_desc *desc = to_kvmdev(vdev)->desc;
+	u8 *features;
+
+	if (fbit / 8 > desc->feature_len)
+		return false;
+
+	features = kvm_vq_features(desc);
+	if (!(features[fbit / 8] & (1 << (fbit % 8))))
+		return false;
+
+	/*
+	 * We set the matching bit in the other half of the bitmap to tell the
+	 * Host we want to use this feature.
+	 */
+	features[desc->feature_len + fbit / 8] |= (1 << (fbit % 8));
+	return true;
+}
+
+/*
+ * Reading and writing elements in config space
+ */
+static void kvm_get(struct virtio_device *vdev, unsigned int offset,
+		   void *buf, unsigned len)
+{
+	struct kvm_device_desc *desc = to_kvmdev(vdev)->desc;
+
+	BUG_ON(offset + len > desc->config_len);
+	memcpy(buf, kvm_vq_configspace(desc) + offset, len);
+}
+
+static void kvm_set(struct virtio_device *vdev, unsigned int offset,
+		   const void *buf, unsigned len)
+{
+	struct kvm_device_desc *desc = to_kvmdev(vdev)->desc;
+
+	BUG_ON(offset + len > desc->config_len);
+	memcpy(kvm_vq_configspace(desc) + offset, buf, len);
+}
+
+/*
+ * The operations to get and set the status word just access
+ * the status field of the device descriptor.
+ */
+static u8 kvm_get_status(struct virtio_device *vdev)
+{
+	return to_kvmdev(vdev)->desc->status;
+}
+
+static void kvm_set_status(struct virtio_device *vdev, u8 status)
+{
+	BUG_ON(!status);
+	to_kvmdev(vdev)->desc->status = status;
+}
+
+/*
+ * To reset the device, we (ab)use the NOTIFY hypercall, with the descriptor
+ * address of the device.  The Host will zero the status and all the
+ * features.
+ */
+static void kvm_reset(struct virtio_device *vdev)
+{
+	unsigned long offset = (void *)to_kvmdev(vdev)->desc - kvm_devices;
+
+	kvm_hypercall1(1237, (max_pfn<<PAGE_SHIFT) + offset);
+}
+
+/*
+ * When the virtio_ring code wants to notify the Host, it calls us here and we
+ * make a hypercall.  We hand the address  of the virtqueue so the Host
+ * knows which virtqueue we're talking about.
+ */
+static void kvm_notify(struct virtqueue *vq)
+{
+	struct kvm_vqconfig *config = vq->priv;
+
+	kvm_hypercall1(1237, config->address);
+}
+
+/*
+ * This routine finds the first virtqueue described in the configuration of
+ * this device and sets it up.
+ */
+static struct virtqueue *kvm_find_vq(struct virtio_device *vdev,
+				    unsigned index,
+				    void (*callback)(struct virtqueue *vq))
+{
+	struct kvm_device *kdev = to_kvmdev(vdev);
+	struct kvm_vqconfig *config;
+	struct virtqueue *vq;
+	int err;
+
+	if (index >= kdev->desc->num_vq)
+		return ERR_PTR(-ENOENT);
+
+	config = kvm_vq_config(kdev->desc)+index;
+
+	if (add_shared_memory(config->address,
+				vring_size(config->num, PAGE_SIZE))) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	vq = vring_new_virtqueue(config->num, vdev, (void *) config->address,
+				 kvm_notify, callback);
+	if (!vq) {
+		err = -ENOMEM;
+		goto unmap;
+	}
+
+	/*
+	 * register a callback token
+	 * The host will sent this via the external interrupt parameter
+	 */
+	config->token = (u64) vq;
+
+	vq->priv = config;
+	return vq;
+unmap:
+	remove_shared_memory(config->address, vring_size(config->num,
+			     PAGE_SIZE));
+out:
+	return ERR_PTR(err);
+}
+
+static void kvm_del_vq(struct virtqueue *vq)
+{
+	struct kvm_vqconfig *config = vq->priv;
+
+	vring_del_virtqueue(vq);
+	remove_shared_memory(config->address,
+			     vring_size(config->num, PAGE_SIZE));
+}
+
+/*
+ * The config ops structure as defined by virtio config
+ */
+static struct virtio_config_ops kvm_vq_configspace_ops = {
+	.feature = kvm_feature,
+	.get = kvm_get,
+	.set = kvm_set,
+	.get_status = kvm_get_status,
+	.set_status = kvm_set_status,
+	.reset = kvm_reset,
+	.find_vq = kvm_find_vq,
+	.del_vq = kvm_del_vq,
+};
+
+/*
+ * The root device for the kvm virtio devices.
+ * This makes them appear as /sys/devices/kvm/0,1,2 not /sys/devices/0,1,2.
+ */
+static struct device kvm_root = {
+	.parent = NULL,
+	.bus_id = "kvm_s390",
+};
+
+/*
+ * adds a new device and register it with virtio
+ * appropriate drivers are loaded by the device model
+ */
+static void add_kvm_device(struct kvm_device_desc *d)
+{
+	struct kvm_device *kdev;
+
+	kdev = kzalloc(sizeof(*kdev), GFP_KERNEL);
+	if (!kdev) {
+		printk(KERN_EMERG "Cannot allocate kvm dev %u\n",
+		       dev_index++);
+		return;
+	}
+
+	kdev->vdev.dev.parent = &kvm_root;
+	kdev->vdev.index = dev_index++;
+	kdev->vdev.id.device = d->type;
+	kdev->vdev.config = &kvm_vq_configspace_ops;
+	kdev->desc = d;
+
+	if (register_virtio_device(&kdev->vdev) != 0) {
+		printk(KERN_ERR "Failed to register kvm device %u\n",
+		       kdev->vdev.index);
+		kfree(kdev);
+	}
+}
+
+/*
+ * scan_devices() simply iterates through the device page.
+ * The type 0 is reserved to mean "end of devices".
+ */
+static void scan_devices(void)
+{
+	unsigned int i;
+	struct kvm_device_desc *d;
+
+	for (i = 0; i < PAGE_SIZE; i += desc_size(d)) {
+		d = kvm_devices + i;
+
+		if (d->type == 0)
+			break;
+
+		add_kvm_device(d);
+	}
+}
+
+/*
+ * we emulate the request_irq behaviour on top of s390 extints
+ */
+static void kvm_extint_handler(u16 code)
+{
+	void *data = (void *) *(long *) __LC_PFAULT_INTPARM;
+
+	vring_interrupt(0, data);
+}
+
+/*
+ * Init function for virtio
+ * devices are in a single page above top of "normal" mem
+ */
+static int __init kvm_devices_init(void)
+{
+	if (!MACHINE_IS_KVM)
+		return -ENODEV;
+
+	if (device_register(&kvm_root) != 0)
+		panic("Could not register kvm root");
+
+	if (add_shared_memory((max_pfn) << PAGE_SHIFT, PAGE_SIZE)) {
+		device_unregister(&kvm_root);
+		return -ENOMEM;
+	}
+
+	kvm_devices  = (void *) (max_pfn << PAGE_SHIFT);
+
+	register_external_interrupt(0x1237, kvm_extint_handler);
+	ctl_set_bit(0, 9);
+
+	scan_devices();
+	return 0;
+}
+
+/*
+ * We do this after core stuff, but before the drivers.
+ */
+postcore_initcall(kvm_devices_init);
Index: kvm/drivers/s390/kvm/kvm_virtio.h
===================================================================
--- /dev/null
+++ kvm/drivers/s390/kvm/kvm_virtio.h
@@ -0,0 +1,47 @@
+/*
+ * kvm_virtio.h - definition for virtio for kvm on s390
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#ifndef __KVM_S390_VIRTIO_H
+#define __KVM_S390_VIRTIO_H
+
+struct kvm_device_desc {
+	/* The device type: console, network, disk etc.  Type 0 terminates. */
+	__u8 type;
+	/* The number of virtqueues (first in config array) */
+	__u8 num_vq;
+	/*
+	 * The number of bytes of feature bits.  Multiply by 2: one for host
+	 * features and one for guest acknowledgements.
+	 */
+	__u8 feature_len;
+	/* The number of bytes of the config array after virtqueues. */
+	__u8 config_len;
+	/* A status byte, written by the Guest. */
+	__u8 status;
+	__u8 config[0];
+};
+
+/*
+ * This is how we expect the device configuration field for a virtqueue
+ * to be laid out in config space.
+ */
+struct kvm_vqconfig {
+	/* The token returned with an interrupt. Set by the guest */
+	__u64 token;
+	/* The address of the virtio ring */
+	__u64 address;
+	/* The number of entries in the virtio_ring */
+	__u16 num;
+
+};
+#endif
+
Index: kvm/include/asm-s390/kvm_para.h
===================================================================
--- kvm.orig/include/asm-s390/kvm_para.h
+++ kvm/include/asm-s390/kvm_para.h
@@ -14,14 +14,134 @@
 #define __S390_KVM_PARA_H
 
 /*
- * No hypercalls for KVM on s390
+ * Hypercalls for KVM on s390. The calling convention is similar to the
+ * s390 ABI, so we use R2-R6 for parameters 1-5. In addition we use R1
+ * as hypercall number and R7 as parameter 6. The return value is
+ * written to R2. We use the diagnose instruction as hypercall. To avoid
+ * conflicts with existing diagnoses for LPAR and z/VM, we do not use
+ * the instruction encoded number, but specify the number in R1 and
+ * use 0x500 as KVM hypercall
+ *
+ * Copyright IBM Corp. 2007,2008
+ * Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
  */
 
+static inline long kvm_hypercall0(unsigned long nr)
+{
+	register unsigned long __nr asm("1") = nr;
+	register long __rc asm("2");
+
+	asm volatile ("diag 2,4,0x500\n"
+		      : "=d" (__rc) : "d" (__nr): "memory", "cc");
+	return __rc;
+}
+
+static inline long kvm_hypercall1(unsigned long nr, unsigned long p1)
+{
+	register unsigned long __nr asm("1") = nr;
+	register unsigned long __p1 asm("2") = p1;
+	register long __rc asm("2");
+
+	asm volatile ("diag 2,4,0x500\n"
+		      : "=d" (__rc) : "d" (__nr), "0" (__p1) : "memory", "cc");
+	return __rc;
+}
+
+static inline long kvm_hypercall2(unsigned long nr, unsigned long p1,
+			       unsigned long p2)
+{
+	register unsigned long __nr asm("1") = nr;
+	register unsigned long __p1 asm("2") = p1;
+	register unsigned long __p2 asm("3") = p2;
+	register long __rc asm("2");
+
+	asm volatile ("diag 2,4,0x500\n"
+		      : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2)
+		      : "memory", "cc");
+	return __rc;
+}
+
+static inline long kvm_hypercall3(unsigned long nr, unsigned long p1,
+			       unsigned long p2, unsigned long p3)
+{
+	register unsigned long __nr asm("1") = nr;
+	register unsigned long __p1 asm("2") = p1;
+	register unsigned long __p2 asm("3") = p2;
+	register unsigned long __p3 asm("4") = p3;
+	register long __rc asm("2");
+
+	asm volatile ("diag 2,4,0x500\n"
+		      : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2),
+			"d" (__p3) : "memory", "cc");
+	return __rc;
+}
+
+
+static inline long kvm_hypercall4(unsigned long nr, unsigned long p1,
+			       unsigned long p2, unsigned long p3,
+			       unsigned long p4)
+{
+	register unsigned long __nr asm("1") = nr;
+	register unsigned long __p1 asm("2") = p1;
+	register unsigned long __p2 asm("3") = p2;
+	register unsigned long __p3 asm("4") = p3;
+	register unsigned long __p4 asm("5") = p4;
+	register long __rc asm("2");
+
+	asm volatile ("diag 2,4,0x500\n"
+		      : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2),
+			"d" (__p3), "d" (__p4) : "memory", "cc");
+	return __rc;
+}
+
+static inline long kvm_hypercall5(unsigned long nr, unsigned long p1,
+			       unsigned long p2, unsigned long p3,
+			       unsigned long p4, unsigned long p5)
+{
+	register unsigned long __nr asm("1") = nr;
+	register unsigned long __p1 asm("2") = p1;
+	register unsigned long __p2 asm("3") = p2;
+	register unsigned long __p3 asm("4") = p3;
+	register unsigned long __p4 asm("5") = p4;
+	register unsigned long __p5 asm("6") = p5;
+	register long __rc asm("2");
+
+	asm volatile ("diag 2,4,0x500\n"
+		      : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2),
+			"d" (__p3), "d" (__p4), "d" (__p5)  : "memory", "cc");
+	return __rc;
+}
+
+static inline long kvm_hypercall6(unsigned long nr, unsigned long p1,
+			       unsigned long p2, unsigned long p3,
+			       unsigned long p4, unsigned long p5,
+			       unsigned long p6)
+{
+	register unsigned long __nr asm("1") = nr;
+	register unsigned long __p1 asm("2") = p1;
+	register unsigned long __p2 asm("3") = p2;
+	register unsigned long __p3 asm("4") = p3;
+	register unsigned long __p4 asm("5") = p4;
+	register unsigned long __p5 asm("6") = p5;
+	register unsigned long __p6 asm("7") = p6;
+	register long __rc asm("2");
+
+	asm volatile ("diag 2,4,0x500\n"
+		      : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2),
+			"d" (__p3), "d" (__p4), "d" (__p5), "d" (__p6)
+		      : "memory", "cc");
+	return __rc;
+}
+
+/* kvm on s390 is always paravirtualization enabled */
 static inline int kvm_para_available(void)
 {
-	return 0;
+	return 1;
 }
 
+/* No feature bits are currently assigned for kvm on s390 */
 static inline unsigned int kvm_arch_para_features(void)
 {
 	return 0;

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC/PATCH 05/15] KVM_MAX_VCPUS
       [not found] ` <1206030298.6690.56.camel@cotte.boeblingen.de.ibm.com>
@ 2008-03-20 16:43   ` Hollis Blanchard
       [not found]   ` <1206031383.6356.13.camel@basalt>
  1 sibling, 0 replies; 52+ messages in thread
From: Hollis Blanchard @ 2008-03-20 16:43 UTC (permalink / raw)
  To: Carsten Otte
  Cc: aliguori, EHRHARDT, arnd, kvm-devel, heiko.carstens, jeroney,
	virtualization, borntraeger, schwidefsky, rvdheij, os, jblunck,
	Zhang, Xiantao

On Thu, 2008-03-20 at 17:24 +0100, Carsten Otte wrote:
> Index: kvm/include/linux/kvm_host.h
> ===================================================================
> --- kvm.orig/include/linux/kvm_host.h
> +++ kvm/include/linux/kvm_host.h
> @@ -24,7 +24,11 @@
> 
>  #include <asm/kvm_host.h>
> 
> +#ifdef CONFIG_S390
> +#define KVM_MAX_VCPUS 64
> +#else
>  #define KVM_MAX_VCPUS 16
> +#endif
>  #define KVM_MEMORY_SLOTS 32
>  /* memory slots that does not exposed to userspace */
>  #define KVM_PRIVATE_MEM_SLOTS 4
> 
Why don't we just define this in <asm/kvm_host.h> ?

-- 
Hollis Blanchard
IBM Linux Technology Center

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC/PATCH 05/15] KVM_MAX_VCPUS
       [not found]   ` <1206031383.6356.13.camel@basalt>
@ 2008-03-20 16:48     ` Carsten Otte
       [not found]     ` <47E2954B.7090903@de.ibm.com>
  1 sibling, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:48 UTC (permalink / raw)
  To: Hollis Blanchard
  Cc: aliguori, Christian Ehrhardt, arnd, kvm-devel, mschwid2, heicars2,
	jeroney, virtualization, borntrae, rvdheij, Olaf Schnapper,
	jblunck, Zhang, Xiantao

Hollis Blanchard wrote:
> On Thu, 2008-03-20 at 17:24 +0100, Carsten Otte wrote:
>> Index: kvm/include/linux/kvm_host.h
>> ===================================================================
>> --- kvm.orig/include/linux/kvm_host.h
>> +++ kvm/include/linux/kvm_host.h
>> @@ -24,7 +24,11 @@
>>
>>  #include <asm/kvm_host.h>
>>
>> +#ifdef CONFIG_S390
>> +#define KVM_MAX_VCPUS 64
>> +#else
>>  #define KVM_MAX_VCPUS 16
>> +#endif
>>  #define KVM_MEMORY_SLOTS 32
>>  /* memory slots that does not exposed to userspace */
>>  #define KVM_PRIVATE_MEM_SLOTS 4
>>
> Why don't we just define this in <asm/kvm_host.h> ?
No problem with that, I just wanted to keep impact on common code very 
low and things like this seperated from the actual port. I have a few 
things like this that can safely be taken care about later.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC/PATCH 14/15] guest: detect when running on kvm
       [not found] ` <1206030326.6690.65.camel@cotte.boeblingen.de.ibm.com>
@ 2008-03-20 17:16   ` Randy Dunlap
       [not found]   ` <20080320101602.f2737c94.randy.dunlap@oracle.com>
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 52+ messages in thread
From: Randy Dunlap @ 2008-03-20 17:16 UTC (permalink / raw)
  To: Carsten Otte
  Cc: aliguori, EHRHARDT, hollisb, arnd, kvm-devel, heiko.carstens,
	jeroney, virtualization, borntraeger, schwidefsky, rvdheij, os,
	jblunck, Zhang, Xiantao

On Thu, 20 Mar 2008 17:25:26 +0100 Carsten Otte wrote:

> From: Christian Borntraeger <borntraeger@de.ibm.com>
> From: Carsten Otte <cotte@de.ibm.com>
> 
> This patch adds functionality to detect if the kernel runs under the KVM
> hypervisor. A macro MACHINE_IS_KVM is exported for device drivers. This
> allows drivers to skip device detection if the systems runs non-virtualized.
> We also define a preferred console to avoid having the ttyS0, which is a line
> mode only console.
> 
> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
> Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
> Signed-off-by: Carsten Otte <cotte@de.ibm.com>
> ---
>  arch/s390/Kconfig        |    7 +++++++
>  arch/s390/kernel/early.c |    4 ++++
>  arch/s390/kernel/setup.c |   10 +++++++---
>  include/asm-s390/setup.h |    1 +
>  4 files changed, 19 insertions(+), 3 deletions(-)
> 
> Index: kvm/arch/s390/kernel/early.c
> ===================================================================
> --- kvm.orig/arch/s390/kernel/early.c
> +++ kvm/arch/s390/kernel/early.c
> @@ -143,6 +143,10 @@ static noinline __init void detect_machi
>  	/* Running on a P/390 ? */
>  	if (cpuinfo->cpu_id.machine == 0x7490)
>  		machine_flags |= 4;
> +
> +	/* Running under KVM ? */
> +	if (cpuinfo->cpu_id.version == 0xfe)

Hi,

Where are these magic numbers documented?  (0x7490, 0xfe, etc.)


> +		machine_flags |= 64;
>  }
>  
>  #ifdef CONFIG_64BIT

---
~Randy

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC/PATCH 12/15] kvm-s390: API documentation
       [not found] ` <1206030320.6690.63.camel@cotte.boeblingen.de.ibm.com>
@ 2008-03-20 17:22   ` Randy Dunlap
       [not found]   ` <20080320102200.3718e573.randy.dunlap@oracle.com>
  1 sibling, 0 replies; 52+ messages in thread
From: Randy Dunlap @ 2008-03-20 17:22 UTC (permalink / raw)
  To: Carsten Otte
  Cc: aliguori, EHRHARDT, hollisb, arnd, kvm-devel, heiko.carstens,
	jeroney, virtualization, borntraeger, schwidefsky, rvdheij, os,
	jblunck, Zhang, Xiantao

On Thu, 20 Mar 2008 17:25:20 +0100 Carsten Otte wrote:

> This patch adds Documentation/s390/kvm.txt, which describes specifics of kvm's
> user interface that are unique to s390 architecture.
> 
> Signed-off-by: Carsten Otte <cotte@de.ibm.com>
> ---
>  Documentation/s390/kvm.txt |  125 +++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 125 insertions(+)
> 
> Index: kvm/Documentation/s390/kvm.txt
> ===================================================================
> --- /dev/null
> +++ kvm/Documentation/s390/kvm.txt
> @@ -0,0 +1,125 @@
> +*** BIG FAT WARNING ***
> +The kvm module is currently in EXPERIMENTAL state for s390. This means, that

This means that  [no comma]

> +the interface to the module is not yet considered to remain stable. Thus, be
> +prepared that we keep breaking your userspace application and guest
> +compatibility over and over again until we feel happy with the result. Make sure
> +your guest kernel, your host kernel, and your userspace launcher are in a
> +consistent state.
> +
> +This Documentation describes the unique ioctl calls to /dev/kvm, the resulting
> +kvm-vm file descriptors, and the kvm-vcpu file descriptors that differ from x86.
> +
> +1. ioctl calls to /dev/kvm
> +KVM does support the following ioctls on s390 that are common with other
> +architectures and do behave the same:
> +KVM_GET_API_VERSION
> +KVM_CREATE_VM		(*) see note
> +KVM_CHECK_EXTENSION
> +KVM_GET_VCPU_MMAP_SIZE
> +
> +Notes:
> +* KVM_CREATE_VM may fail on s390, if the calling process has multiple
> +threads and has not called KVM_S390_ENABLE_SIE before.
> +
> +In addition, on s390 the following architecture specific ioctls are supported:
> +ioctl:		KVM_S390_ENABLE_SIE
> +args:		none
> +see also:	include/linux/kvm.h
> +This call causes the kernel to switch on PGSTE in the user page table. This
> +operation is needed in order to run a virtual machine, and it requires the
> +calling process to be single-threaded. Note that the first call to KVM_CREATE_VM
> +will implicitly try to switch on PGSTE if the user process has not called
> +KVM_S390_ENABLE_SIE before. User processes that want to launch multiple threads
> +before creating a virtual machine have to call KVM_S390_ENABLE_SIE, or will
> +observe an error calling KVM_CREATE_VM. Switching on PGSTE is a one-time
> +operation, is not reversible, and will persist over the entire lifetime of
> +the calling process. It does not have any user-visibe effect other than a small

                                             user-visible

> +performance penalty.
> +
> +2. ioctl calls to the kvm-vm file descriptor
> +KVM does support the following ioctls on s390 that are common with other
> +architectures and do behave the same:
> +KVM_CREATE_VCPU
> +KVM_SET_USER_MEMORY_REGION      (*) see note
> +KVM_GET_DIRTY_LOG		(**) see note
> +
> +Notes:
> +*  kvm does only allow exactly one memory slot on s390, which has to start
> +   at guest absolute address zero and at a user address that is aligned on any
> +   page boundary. This hardware "limitation" allows us to have a few unique
> +   optimizations. The memory slot does'nt have to be filled

                                     doesn't

> +   with memory actually, it may contain sparse holes. That said, with different
> +   user memory layout this does still allow a large flexibility when
> +   doing the guest memory setup.
> +** KVM_GET_DIRTY_LOG does'nt work proper yet. The user will receive an empty

                        doesn't work properly

> +log. This ioctl call is only needed for guest migration, and we intend to
> +implement this one in the future.
> +
> +In addition, on s390 the following architecture specific ioctls for the kvm-vm
> +file descriptor are supported:
> +ioctl:		KVM_S390_INTERRUPT
> +args:		struct kvm_s390_interrupt *
> +see also:	include/linux/kvm.h
> +This ioctl is used to submit a floating interrupt for a virtual machine.
> +Floating interrupts may be delivered to any virtual cpu in the configuration.
> +Only some interrupt types defined in include/linux/kvm.h make sense when
> +submitted as floating interrupt. The following interrupts are not considered

                         interrupts.

> +to be useful as floating interrupt, and a call to inject them will result in

                            interrupts,

> +-EINVAL error code: program interrupts, and interprocessor signals. Valid

no comma

> +floating interrupts are:
> +KVM_S390_INT_VIRTIO
> +KVM_S390_INT_SERVICE
> +
> +3. ioctl calls to the kvm-vcpu file descriptor
> +KVM does support the following ioctls on s390 that are common with other
> +architectures and do behave the same:
> +KVM_RUN
> +KVM_GET_REGS
> +KVM_SET_REGS
> +KVM_GET_SREGS
> +KVM_SET_SREGS
> +KVM_GET_FPU
> +KVM_SET_FPU
> +
> +In addition, on s390 the following architecture specific ioctls for the
> +kvm-vcpu file descriptor are supported:
> +ioctl:		KVM_S390_INTERRUPT
> +args:		struct kvm_s390_interrupt *
> +see also:	include/linux/kvm.h
> +This ioctl is used to submit an interrupt for a specific virtual cpu.
> +Only some interrupt types defined in include/linux/kvm.h make sense when
> +submitted for a specific cpu. The following interrupts are not considered
> +to be useful, and a call to inject them will result in -EINVAL error code:
> +service processor calls, and virtio interrupts. Valid interrupt types are:

no comma

> +KVM_S390_PROGRAM_INT
> +KVM_S390_SIGP_STOP
> +KVM_S390_RESTART
> +KVM_S390_SIGP_SET_PREFIX
> +KVM_S390_INT_EMERGENCY
> +
> +ioctl:		KVM_S390_STORE_STATUS
> +args:		unsigned long
> +see also:	include/linux/kvm.h
> +This ioctl stores the state of the cpu at the guest real address given as
> +argument, unless one of the following values defined in include/linux/kvm.h
> +is given as arguement:
> +KVM_S390_STORE_STATUS_NOADDR - the CPU stores its status to the save area in
> +absolute lowcore as defined by the principles of operation
> +KVM_S390_STORE_STATUS_PREFIXED - the CPU stores its status to the save area in
> +its prefix page just like the dump tool that comes with zipl. This is useful
> +to create a system dump for use with lkcdutils or crash.
> +
> +ioctl:		KVM_S390_SET_INITIAL_PSW
> +args:		struct kvm_s390_psw *
> +see also:	include/linux/kvm.h
> +This ioctl can be used to set the processor status word (psw) of a stopped cpu
> +prior to running it with KVM_RUN. Note that this call is not required to modify
> +the psw during sie intercepts that fall back to userspace because struct kvm_run
> +does contain the psw, and this value is evaluated during reentry of KVM_RUN
> +after the intercept exit was recognized.
> +
> +ioctl:		KVM_S390_INITIAL_RESET
> +args:		none
> +see also:	include/linux/kvm.h
> +This ioctl can be used to perform an initial cpu reset as defined by the
> +principles of operation. The target cpu has to be in stopped state.


---
~Randy

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC/PATCH 14/15] guest: detect when running on kvm
       [not found]   ` <20080320101602.f2737c94.randy.dunlap@oracle.com>
@ 2008-03-20 17:27     ` Carsten Otte
  0 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 17:27 UTC (permalink / raw)
  To: Randy Dunlap
  Cc: aliguori, Christian Ehrhardt, hollisb, arnd, kvm-devel, mschwid2,
	heicars2, jeroney, virtualization, borntrae, rvdheij,
	Olaf Schnapper, jblunck, Zhang, Xiantao

Randy Dunlap wrote:
>> Index: kvm/arch/s390/kernel/early.c
>> ===================================================================
>> --- kvm.orig/arch/s390/kernel/early.c
>> +++ kvm/arch/s390/kernel/early.c
>> @@ -143,6 +143,10 @@ static noinline __init void detect_machi
>>  	/* Running on a P/390 ? */
>>  	if (cpuinfo->cpu_id.machine == 0x7490)
>>  		machine_flags |= 4;
>> +
>> +	/* Running under KVM ? */
>> +	if (cpuinfo->cpu_id.version == 0xfe)
> 
> Hi,
> 
> Where are these magic numbers documented?  (0x7490, 0xfe, etc.)
> 
> 
>> +		machine_flags |= 64;
>>  }
>>  
>>  #ifdef CONFIG_64BIT
The cpuid (and most other things about s390 arch) are documented in 
the principles of operation:
http://publibz.boulder.ibm.com/epubs/pdf/a2278324.pdf
http://publibz.boulder.ibm.com/epubs/pdf/dz9zs001.pdf

(see chapter "control instructions" - store cpu id)

The 0xfe however is convention, the kvm arch code sets this value 
where it implements that instruction. See "privileged instructions" patch.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable
       [not found] ` <1206030278.6690.52.camel@cotte.boeblingen.de.ibm.com>
@ 2008-03-20 17:28   ` Jeremy Fitzhardinge
       [not found]   ` <47E29EC6.5050403@goop.org>
  1 sibling, 0 replies; 52+ messages in thread
From: Jeremy Fitzhardinge @ 2008-03-20 17:28 UTC (permalink / raw)
  To: Carsten Otte
  Cc: aliguori, EHRHARDT, hollisb, arnd, borntraeger, kvm-devel,
	heiko.carstens, jeroney, virtualization,
	Linux Memory Management List, schwidefsky, rvdheij, os, jblunck,
	Zhang, Xiantao

Carsten Otte wrote:
> +struct mm_struct *dup_mm(struct task_struct *tsk);
>   

No prototypes in .c files.  Put this in an appropriate header.

    J

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [kvm-devel] [RFC/PATCH 14/15] guest: detect when running on kvm
       [not found] ` <1206030326.6690.65.camel@cotte.boeblingen.de.ibm.com>
  2008-03-20 17:16   ` [RFC/PATCH 14/15] guest: detect when running on kvm Randy Dunlap
       [not found]   ` <20080320101602.f2737c94.randy.dunlap@oracle.com>
@ 2008-03-20 17:53   ` Christoph Hellwig
       [not found]   ` <20080320175357.GA30959@infradead.org>
  3 siblings, 0 replies; 52+ messages in thread
From: Christoph Hellwig @ 2008-03-20 17:53 UTC (permalink / raw)
  To: Carsten Otte
  Cc: EHRHARDT, arnd, hollisb, kvm-devel, heiko.carstens, jeroney,
	virtualization, borntraeger, schwidefsky, rvdheij, os, jblunck,
	Zhang, Xiantao

On Thu, Mar 20, 2008 at 05:25:26PM +0100, Carsten Otte wrote:
> @@ -143,6 +143,10 @@ static noinline __init void detect_machi
>  	/* Running on a P/390 ? */
>  	if (cpuinfo->cpu_id.machine == 0x7490)
>  		machine_flags |= 4;
> +
> +	/* Running under KVM ? */
> +	if (cpuinfo->cpu_id.version == 0xfe)
> +		machine_flags |= 64;

Shouldn't these have symbolic names?

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable
       [not found]   ` <47E29EC6.5050403@goop.org>
@ 2008-03-20 19:13     ` Dave Hansen
       [not found]     ` <1206040405.8232.24.camel@nimitz.home.sr71.net>
  1 sibling, 0 replies; 52+ messages in thread
From: Dave Hansen @ 2008-03-20 19:13 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Carsten Otte, aliguori, EHRHARDT, hollisb, arnd, borntraeger,
	kvm-devel, heiko.carstens, jeroney, virtualization,
	Linux Memory Management List, schwidefsky, rvdheij, os, jblunck,
	Zhang, Xiantao

On Thu, 2008-03-20 at 10:28 -0700, Jeremy Fitzhardinge wrote:
> Carsten Otte wrote:
> > +struct mm_struct *dup_mm(struct task_struct *tsk);
> 
> No prototypes in .c files.  Put this in an appropriate header.

Well, and more fundamentally: do we really want dup_mm() able to be
called from other code?

Maybe we need a bit more detailed justification why fork() itself isn't
good enough.  It looks to me like they basically need an arch-specific
argument to fork, telling the new process's page tables to take the
fancy new bit.

I'm really curious how this new stuff is going to get used.  Are you
basically replacing fork() when creating kvm guests?

-- Dave

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [kvm-devel] [RFC/PATCH 14/15] guest: detect when running on kvm
       [not found]     ` <47E2CAFF.3070203@de.ibm.com>
@ 2008-03-20 19:41       ` Christoph Hellwig
       [not found]       ` <20080320194137.GA9975@infradead.org>
  1 sibling, 0 replies; 52+ messages in thread
From: Christoph Hellwig @ 2008-03-20 19:41 UTC (permalink / raw)
  To: carsteno
  Cc: Christian Ehrhardt, arnd, hollisb, kvm-devel, mschwid2, heicars2,
	jeroney, virtualization, Christoph Hellwig, borntrae, rvdheij,
	Olaf Schnapper, jblunck, Zhang, Xiantao

On Thu, Mar 20, 2008 at 09:37:19PM +0100, Carsten Otte wrote:
> Christoph Hellwig wrote:
>> On Thu, Mar 20, 2008 at 05:25:26PM +0100, Carsten Otte wrote:
>>> @@ -143,6 +143,10 @@ static noinline __init void detect_machi
>>>  	/* Running on a P/390 ? */
>>>  	if (cpuinfo->cpu_id.machine == 0x7490)
>>>  		machine_flags |= 4;
>>> +
>>> +	/* Running under KVM ? */
>>> +	if (cpuinfo->cpu_id.version == 0xfe)
>>> +		machine_flags |= 64;
>>
>> Shouldn't these have symbolic names?
> You mean symbolics for machine_flags? Or symbolics for cpu ids?

Either.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [kvm-devel] [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable
       [not found]     ` <1206040405.8232.24.camel@nimitz.home.sr71.net>
@ 2008-03-20 20:35       ` Carsten Otte
       [not found]       ` <47E2CAAC.6020903@de.ibm.com>
  1 sibling, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 20:35 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Christian Ehrhardt, hollisb, arnd, kvm-devel, mschwid2, heicars2,
	jeroney, borntrae, virtualization, Linux Memory Management List,
	rvdheij, Olaf Schnapper, jblunck, Zhang, Xiantao

Dave Hansen wrote:
> Well, and more fundamentally: do we really want dup_mm() able to be
> called from other code?
> 
> Maybe we need a bit more detailed justification why fork() itself isn't
> good enough.  It looks to me like they basically need an arch-specific
> argument to fork, telling the new process's page tables to take the
> fancy new bit.
> 
> I'm really curious how this new stuff is going to get used.  Are you
> basically replacing fork() when creating kvm guests?
No. The trick is, that we do need bigger page tables when running 
guests: our page tables are usually 2k, but when running a guest 
they're 4k to track both guest and host dirty&reference information. 
This looks like this:
*----------*
*2k PTE's  *
*----------*
*2k PGSTE  *
*----------*
We don't want to waste precious memory for all page tables. We'd like 
to have one kernel image that runs regular server workload _and_ 
guests. Therefore, we need to reallocate the page table after fork() 
once we know that task is going to be a hypervisor. That's what this 
code does: reallocate a bigger page table to accomondate the extra 
information. The task needs to be single-threaded when calling for 
extended page tables.

Btw: at fork() time, we cannot tell whether or not the user's going to 
be a hypervisor. Therefore we cannot do this in fork.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [kvm-devel] [RFC/PATCH 14/15] guest: detect when running on kvm
       [not found]   ` <20080320175357.GA30959@infradead.org>
       [not found]     ` <47E2CAFF.3070203@de.ibm.com>
@ 2008-03-20 20:37     ` Carsten Otte
  1 sibling, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 20:37 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Christian Ehrhardt, arnd, hollisb, kvm-devel, mschwid2, heicars2,
	jeroney, virtualization, borntrae, rvdheij, Olaf Schnapper,
	jblunck, Zhang, Xiantao

Christoph Hellwig wrote:
> On Thu, Mar 20, 2008 at 05:25:26PM +0100, Carsten Otte wrote:
>> @@ -143,6 +143,10 @@ static noinline __init void detect_machi
>>  	/* Running on a P/390 ? */
>>  	if (cpuinfo->cpu_id.machine == 0x7490)
>>  		machine_flags |= 4;
>> +
>> +	/* Running under KVM ? */
>> +	if (cpuinfo->cpu_id.version == 0xfe)
>> +		machine_flags |= 64;
> 
> Shouldn't these have symbolic names?
You mean symbolics for machine_flags? Or symbolics for cpu ids?

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [kvm-devel] [RFC/PATCH 14/15] guest: detect when running on kvm
       [not found]       ` <20080320194137.GA9975@infradead.org>
@ 2008-03-20 20:59         ` Carsten Otte
       [not found]         ` <47E2D034.4090600@de.ibm.com>
  1 sibling, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 20:59 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Christian Ehrhardt, arnd, hollisb, carsteno, mschwid2, heicars2,
	jeroney, virtualization, kvm-devel, borntrae, rvdheij,
	Olaf Schnapper, jblunck, Zhang, Xiantao

Christoph Hellwig wrote:
> On Thu, Mar 20, 2008 at 09:37:19PM +0100, Carsten Otte wrote:
>> Christoph Hellwig wrote:
>>> On Thu, Mar 20, 2008 at 05:25:26PM +0100, Carsten Otte wrote:
>>>> @@ -143,6 +143,10 @@ static noinline __init void detect_machi
>>>>  	/* Running on a P/390 ? */
>>>>  	if (cpuinfo->cpu_id.machine == 0x7490)
>>>>  		machine_flags |= 4;
>>>> +
>>>> +	/* Running under KVM ? */
>>>> +	if (cpuinfo->cpu_id.version == 0xfe)
>>>> +		machine_flags |= 64;
>>> Shouldn't these have symbolic names?
>> You mean symbolics for machine_flags? Or symbolics for cpu ids?
> 
> Either.
Hmmh. For cpu id's did'nt make sense probably until now that kvm also 
uses them. Before, this was the only one place that uses them.

With kvm and 0xfe, this one is sort of temporary one. We intend to 
rework this code to use "store system information", which would give 
us way more information about the machine and it's hypervisor 
topology. Up until my todo list gets to that point, I think we'll have 
to cope with a temporary number. We'll aim for making that change 
before 2.6.26 gets released.

The machine flags do have symbolic names, defined in 
include/asm-s390/setup.h. And yea, they should be used here. Will 
change that.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [kvm-devel] [RFC/PATCH 14/15] guest: detect when running on kvm
       [not found]         ` <47E2D034.4090600@de.ibm.com>
@ 2008-03-20 21:22           ` Heiko Carstens
       [not found]           ` <20080320212255.GA4225@osiris.boeblingen.de.ibm.com>
  1 sibling, 0 replies; 52+ messages in thread
From: Heiko Carstens @ 2008-03-20 21:22 UTC (permalink / raw)
  To: carsteno
  Cc: Christian Ehrhardt, arnd, hollisb, kvm-devel, mschwid2, heicars2,
	jeroney, virtualization, Christoph Hellwig, borntrae, rvdheij,
	Olaf Schnapper, jblunck, Zhang, Xiantao

On Thu, Mar 20, 2008 at 09:59:32PM +0100, Carsten Otte wrote:
> Christoph Hellwig wrote:
> > On Thu, Mar 20, 2008 at 09:37:19PM +0100, Carsten Otte wrote:
> >> Christoph Hellwig wrote:
> >>> On Thu, Mar 20, 2008 at 05:25:26PM +0100, Carsten Otte wrote:
> >>>> @@ -143,6 +143,10 @@ static noinline __init void detect_machi
> >>>>  	/* Running on a P/390 ? */
> >>>>  	if (cpuinfo->cpu_id.machine == 0x7490)
> >>>>  		machine_flags |= 4;
> >>>> +
> >>>> +	/* Running under KVM ? */
> >>>> +	if (cpuinfo->cpu_id.version == 0xfe)
> >>>> +		machine_flags |= 64;
> >>> Shouldn't these have symbolic names?
> >> You mean symbolics for machine_flags? Or symbolics for cpu ids?
> > 
> > Either.
> [...]
> The machine flags do have symbolic names, defined in 
> include/asm-s390/setup.h. And yea, they should be used here. Will 
> change that.

Since when do we have symbolic names for the bits?
It was always on my todo list to do a cleanup and replace the numbers
we use everywhere with names. Especially since we have clashes from time
to time... but that didn't hurt enough yet, obviously.
But now that you volunteered to take care of this... :)

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC/PATCH 15/15] guest: virtio device support, and kvm hypercalls
       [not found] ` <1206030328.6690.66.camel@cotte.boeblingen.de.ibm.com>
@ 2008-03-21  0:24   ` Rusty Russell
       [not found]   ` <200803211124.49829.rusty@rustcorp.com.au>
  2008-03-21 10:44   ` Avi Kivity
  2 siblings, 0 replies; 52+ messages in thread
From: Rusty Russell @ 2008-03-21  0:24 UTC (permalink / raw)
  To: Carsten Otte
  Cc: aliguori, EHRHARDT, arnd, hollisb, kvm-devel, heiko.carstens,
	jeroney, virtualization, borntraeger, schwidefsky, rvdheij, os,
	jblunck, Zhang, Xiantao

On Friday 21 March 2008 03:25:28 Carsten Otte wrote:
> +static void kvm_set_status(struct virtio_device *vdev, u8 status)
> +{
> +	BUG_ON(!status);
> +	to_kvmdev(vdev)->desc->status = status;
> +}
> +
> +/*
> + * To reset the device, we (ab)use the NOTIFY hypercall, with the descriptor
> + * address of the device.  The Host will zero the status and all the 
> + * features. 
> + */
> +static void kvm_reset(struct virtio_device *vdev)
> +{
> +	unsigned long offset = (void *)to_kvmdev(vdev)->desc - kvm_devices;
> +
> +	kvm_hypercall1(1237, (max_pfn<<PAGE_SHIFT) + offset);
> +}

I'd recommend a hypercall after set_status, as well as reset.  The
reason lguest doesn't do this is that we don't do feature negotiation
(assuming guest kernel matches host kernel).  In general, the host
needs to know when the VIRTIO_CONFIG_S_DRIVER_OK is set so it can see
what features the guest driver accepted.

Overloading the notify hypercall is kind of a hack too, but it works so
no real need to change that.

> + * The root device for the kvm virtio devices.
> + * This makes them appear as /sys/devices/kvm/0,1,2 not /sys/devices/0,1,2.
> + */ 
> +static struct device kvm_root = {
> +	.parent = NULL,
> +	.bus_id = "kvm_s390",
> +};

You mean /sys/devices/kvm_s390/0,1,2?

> +static int __init kvm_devices_init(void)
> +{
> +	if (!MACHINE_IS_KVM)
> +		return -ENODEV;
> +
> +	if (device_register(&kvm_root) != 0)
> +		panic("Could not register kvm root");
> +
> +	if (add_shared_memory((max_pfn) << PAGE_SHIFT, PAGE_SIZE)) {
> +		device_unregister(&kvm_root);
> +		return -ENOMEM;
> +	}

Hmm, panic on device_register fail, but -ENOMEM on add_shared_memory fail?
My theory was that since this is boot time, panic() is the right thing.

Cheers,
Rusty.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [kvm-devel] [RFC/PATCH 15/15] guest: virtio device support, and kvm hypercalls
       [not found]   ` <200803211124.49829.rusty@rustcorp.com.au>
@ 2008-03-21  7:12     ` Carsten Otte
  2008-03-21  8:15     ` Christian Borntraeger
       [not found]     ` <200803210915.48029.borntraeger@de.ibm.com>
  2 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-21  7:12 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Christian Ehrhardt, arnd, hollisb, kvm-devel, mschwid2, heicars2,
	jeroney, virtualization, borntrae, rvdheij, Olaf Schnapper,
	jblunck, Zhang, Xiantao

Rusty Russell wrote:
>> +static int __init kvm_devices_init(void)
>> +{
>> +	if (!MACHINE_IS_KVM)
>> +		return -ENODEV;
>> +
>> +	if (device_register(&kvm_root) != 0)
>> +		panic("Could not register kvm root");
>> +
>> +	if (add_shared_memory((max_pfn) << PAGE_SHIFT, PAGE_SIZE)) {
>> +		device_unregister(&kvm_root);
>> +		return -ENOMEM;
>> +	}
> 
> Hmm, panic on device_register fail, but -ENOMEM on add_shared_memory fail?
> My theory was that since this is boot time, panic() is the right thing.
We can't tell whether or not this is an important device or not. Maybe 
the guest is running with ramdisk as rootfs and can have a happy life 
if we don't kill it here. Return the rc from device register seems to 
be the right thing to me, if it was an important device we'll see 
"panic: cannot mount rootfs" or something later.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC/PATCH 15/15] guest: virtio device support, and kvm hypercalls
       [not found]   ` <200803211124.49829.rusty@rustcorp.com.au>
  2008-03-21  7:12     ` [kvm-devel] " Carsten Otte
@ 2008-03-21  8:15     ` Christian Borntraeger
       [not found]     ` <200803210915.48029.borntraeger@de.ibm.com>
  2 siblings, 0 replies; 52+ messages in thread
From: Christian Borntraeger @ 2008-03-21  8:15 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Carsten Otte, aliguori, EHRHARDT, arnd, hollisb, kvm-devel,
	heiko.carstens, jeroney, virtualization, schwidefsky, rvdheij, os,
	jblunck, Zhang, Xiantao

Am Freitag, 21. März 2008 schrieb Rusty Russell:
> I'd recommend a hypercall after set_status, as well as reset.  The
> reason lguest doesn't do this is that we don't do feature negotiation
> (assuming guest kernel matches host kernel).  In general, the host
> needs to know when the VIRTIO_CONFIG_S_DRIVER_OK is set so it can see
> what features the guest driver accepted.

Right. Will have a look.
> 
> Overloading the notify hypercall is kind of a hack too, but it works so
> no real need to change that.
> 
> > + * The root device for the kvm virtio devices.
> > + * This makes them appear as /sys/devices/kvm/0,1,2 
not /sys/devices/0,1,2.
> > + */ 
> > +static struct device kvm_root = {
> > +	.parent = NULL,
> > +	.bus_id = "kvm_s390",
> > +};
> 
> You mean /sys/devices/kvm_s390/0,1,2?

Yes, thanks.
> 
> > +static int __init kvm_devices_init(void)
> > +{
> > +	if (!MACHINE_IS_KVM)
> > +		return -ENODEV;
> > +
> > +	if (device_register(&kvm_root) != 0)
> > +		panic("Could not register kvm root");
> > +
> > +	if (add_shared_memory((max_pfn) << PAGE_SHIFT, PAGE_SIZE)) {
> > +		device_unregister(&kvm_root);
> > +		return -ENOMEM;
> > +	}
> 
> Hmm, panic on device_register fail, but -ENOMEM on add_shared_memory fail?
> My theory was that since this is boot time, panic() is the right thing.

Good spot, but I agree with Carsten. Drivers should not panic. I have module 
load/unload capability on my long term todo list, but I can change the 
panic now.

Christian


_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [kvm-devel] [RFC/PATCH 12/15] kvm-s390: API documentation
       [not found]   ` <20080320102200.3718e573.randy.dunlap@oracle.com>
@ 2008-03-21 10:33     ` Carsten Otte
  0 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-21 10:33 UTC (permalink / raw)
  To: Randy Dunlap
  Cc: Christian Ehrhardt, hollisb, arnd, kvm-devel, mschwid2, heicars2,
	jeroney, virtualization, borntrae, rvdheij, Olaf Schnapper,
	jblunck, Zhang, Xiantao

Randy Dunlap wrote:
> This means that  [no comma]
<snip>
Being a native speaker is cheating ;-). I've integrated your feedback, 
for the next round of sending out these patches. Thank you :-).

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [kvm-devel] [RFC/PATCH 05/15] KVM_MAX_VCPUS
       [not found]     ` <47E2954B.7090903@de.ibm.com>
@ 2008-03-21 10:41       ` Avi Kivity
  2008-03-21 11:13         ` Carsten Otte
  0 siblings, 1 reply; 52+ messages in thread
From: Avi Kivity @ 2008-03-21 10:41 UTC (permalink / raw)
  To: carsteno
  Cc: Christian Ehrhardt, Hollis Blanchard, arnd, kvm-devel, mschwid2,
	heicars2, jeroney, borntrae, virtualization, rvdheij,
	Olaf Schnapper, jblunck, Zhang, Xiantao

Carsten Otte wrote:
> Hollis Blanchard wrote:
>   
>> On Thu, 2008-03-20 at 17:24 +0100, Carsten Otte wrote:
>>     
>>> Index: kvm/include/linux/kvm_host.h
>>> ===================================================================
>>> --- kvm.orig/include/linux/kvm_host.h
>>> +++ kvm/include/linux/kvm_host.h
>>> @@ -24,7 +24,11 @@
>>>
>>>  #include <asm/kvm_host.h>
>>>
>>> +#ifdef CONFIG_S390
>>> +#define KVM_MAX_VCPUS 64
>>> +#else
>>>  #define KVM_MAX_VCPUS 16
>>> +#endif
>>>  #define KVM_MEMORY_SLOTS 32
>>>  /* memory slots that does not exposed to userspace */
>>>  #define KVM_PRIVATE_MEM_SLOTS 4
>>>
>>>       
>> Why don't we just define this in <asm/kvm_host.h> ?
>>     
> No problem with that, I just wanted to keep impact on common code very 
> low and things like this seperated from the actual port. I have a few 
> things like this that can safely be taken care about later.
>
>   

Since there were a few other comments, I went ahead and moved those 
#defines to asm-x86.

-- 
Any sufficiently difficult bug is indistinguishable from a feature.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC/PATCH 15/15] guest: virtio device support, and kvm hypercalls
       [not found] ` <1206030328.6690.66.camel@cotte.boeblingen.de.ibm.com>
  2008-03-21  0:24   ` [RFC/PATCH 15/15] guest: virtio device support, and kvm hypercalls Rusty Russell
       [not found]   ` <200803211124.49829.rusty@rustcorp.com.au>
@ 2008-03-21 10:44   ` Avi Kivity
  2 siblings, 0 replies; 52+ messages in thread
From: Avi Kivity @ 2008-03-21 10:44 UTC (permalink / raw)
  To: Carsten Otte
  Cc: aliguori, EHRHARDT, arnd, hollisb, kvm-devel, heiko.carstens,
	jeroney, virtualization, borntraeger, schwidefsky, rvdheij, os,
	jblunck, Zhang, Xiantao

Carsten Otte wrote:
> Currently we dont have PCI on s390. Making virtio_pci usable for s390 seems
> more complicated that providing an own stub. This virtio stub is similar to
> the lguest one, the memory for the descriptors and the device detection is made
> via additional mapped memory on top of the guest storage. We use an external
> interrupt with extint code 1237 for host->guest notification. 
>   

So, sanity won in the end.

-- 
Any sufficiently difficult bug is indistinguishable from a feature.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [kvm-devel] [RFC/PATCH 06/15] kvm-s390: sie intercept handling
       [not found] ` <1206030302.6690.57.camel@cotte.boeblingen.de.ibm.com>
@ 2008-03-21 10:53   ` Avi Kivity
  2008-03-21 11:26     ` Carsten Otte
  0 siblings, 1 reply; 52+ messages in thread
From: Avi Kivity @ 2008-03-21 10:53 UTC (permalink / raw)
  To: Carsten Otte
  Cc: EHRHARDT, arnd, hollisb, kvm-devel, heiko.carstens, jeroney,
	virtualization, borntraeger, schwidefsky, rvdheij, os, jblunck,
	Zhang, Xiantao

Carsten Otte wrote:
>  
>  /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
>  struct kvm_run {
> @@ -138,6 +139,14 @@ struct kvm_run {
>  			__u32 is_write;
>  			__u32 pad;
>  		} tpr_access;
> +		/* KVM_EXIT_S390_SIEIC */
> +		struct {
> +			__u8 icptcode;
> +			__u64 mask; /* psw upper half */
> +			__u64 addr; /* psw lower half */
> +			__u16 ipa;
> +			__u32 ipb;
> +		} s390_sieic;
>  		/* Fix the size of the union. */
>  		char padding[256];
>  	};
>
>   

Do you support 32-bit userspace on 64-bit kernel?  If so, this is likely 
badly aligned.

-- 
Any sufficiently difficult bug is indistinguishable from a feature.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [kvm-devel] [RFC/PATCH 14/15] guest: detect when running on kvm
       [not found]           ` <20080320212255.GA4225@osiris.boeblingen.de.ibm.com>
@ 2008-03-21 11:12             ` Carsten Otte
       [not found]             ` <47E39804.4030605@de.ibm.com>
  1 sibling, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-21 11:12 UTC (permalink / raw)
  To: Heiko Carstens
  Cc: carsteno, Christian Ehrhardt, arnd, hollisb, kvm-devel, heicars2,
	mschwid2, jeroney, virtualization, Christoph Hellwig, borntrae,
	rvdheij, Olaf Schnapper, jblunck, Zhang, Xiantao

MAILER-DAEMON@linux.ibm.com wrote:
> Since when do we have symbolic names for the bits?
> It was always on my todo list to do a cleanup and replace the numbers
> we use everywhere with names. Especially since we have clashes from time
> to time... but that didn't hurt enough yet, obviously.
> But now that you volunteered to take care of this... :)
Right. We only have defines for (machine_flags & bit). Looks to me 
like the bits really should have a name on them. I've created a patch 
that does this, but I want to talk it over with Martin before sending 
that one out.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [kvm-devel] [RFC/PATCH 05/15] KVM_MAX_VCPUS
  2008-03-21 10:41       ` [kvm-devel] " Avi Kivity
@ 2008-03-21 11:13         ` Carsten Otte
  0 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-21 11:13 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Christian Ehrhardt, Hollis Blanchard, arnd, carsteno, heicars2,
	mschwid2, jeroney, borntrae, virtualization, kvm-devel, rvdheij,
	Olaf Schnapper, jblunck, Zhang, Xiantao

Avi Kivity wrote:
> Carsten Otte wrote:
>> Hollis Blanchard wrote:
>>  
>>> On Thu, 2008-03-20 at 17:24 +0100, Carsten Otte wrote:
>>>    
>>>> Index: kvm/include/linux/kvm_host.h
>>>> ===================================================================
>>>> --- kvm.orig/include/linux/kvm_host.h
>>>> +++ kvm/include/linux/kvm_host.h
>>>> @@ -24,7 +24,11 @@
>>>>
>>>>  #include <asm/kvm_host.h>
>>>>
>>>> +#ifdef CONFIG_S390
>>>> +#define KVM_MAX_VCPUS 64
>>>> +#else
>>>>  #define KVM_MAX_VCPUS 16
>>>> +#endif
>>>>  #define KVM_MEMORY_SLOTS 32
>>>>  /* memory slots that does not exposed to userspace */
>>>>  #define KVM_PRIVATE_MEM_SLOTS 4
>>>>
>>>>       
>>> Why don't we just define this in <asm/kvm_host.h> ?
>>>     
>> No problem with that, I just wanted to keep impact on common code very 
>> low and things like this seperated from the actual port. I have a few 
>> things like this that can safely be taken care about later.
>>
>>   
> 
> Since there were a few other comments, I went ahead and moved those 
> #defines to asm-x86.
Great! I will rebase the patch series.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [kvm-devel] [RFC/PATCH 06/15] kvm-s390: sie intercept handling
  2008-03-21 10:53   ` [kvm-devel] [RFC/PATCH 06/15] kvm-s390: sie intercept handling Avi Kivity
@ 2008-03-21 11:26     ` Carsten Otte
  0 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-21 11:26 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Christian Ehrhardt, arnd, hollisb, kvm-devel, mschwid2, heicars2,
	jeroney, borntrae, virtualization, rvdheij, Olaf Schnapper,
	jblunck, Zhang, Xiantao

Avi Kivity wrote:
> Carsten Otte wrote:
>>  
>>  /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
>>  struct kvm_run {
>> @@ -138,6 +139,14 @@ struct kvm_run {
>>              __u32 is_write;
>>              __u32 pad;
>>          } tpr_access;
>> +        /* KVM_EXIT_S390_SIEIC */
>> +        struct {
>> +            __u8 icptcode;
>> +            __u64 mask; /* psw upper half */
>> +            __u64 addr; /* psw lower half */
>> +            __u16 ipa;
>> +            __u32 ipb;
>> +        } s390_sieic;
>>          /* Fix the size of the union. */
>>          char padding[256];
>>      };
>>
>>   
> 
> Do you support 32-bit userspace on 64-bit kernel?  If so, this is likely 
> badly aligned.
32bit userspace is not pracitcal, current enterprise distributions 
come with 64bit only on s390. Nevertheless, I don't get your point on 
allignment. What is the problem caused by the struct, and how can I 
solve it?

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [kvm-devel] [RFC/PATCH 14/15] guest: detect when running on kvm
       [not found]             ` <47E39804.4030605@de.ibm.com>
@ 2008-03-21 14:06               ` Heiko Carstens
       [not found]               ` <20080321140603.GC4128@osiris.boeblingen.de.ibm.com>
  1 sibling, 0 replies; 52+ messages in thread
From: Heiko Carstens @ 2008-03-21 14:06 UTC (permalink / raw)
  To: carsteno
  Cc: carsteno, Christian Ehrhardt, arnd, hollisb, kvm-devel, heicars2,
	mschwid2, jeroney, virtualization, Christoph Hellwig, borntrae,
	rvdheij, Olaf Schnapper, jblunck, Zhang, Xiantao

On Fri, Mar 21, 2008 at 12:12:04PM +0100, Carsten Otte wrote:
> MAILER-DAEMON@linux.ibm.com wrote:
>> Since when do we have symbolic names for the bits?
>> It was always on my todo list to do a cleanup and replace the numbers
>> we use everywhere with names. Especially since we have clashes from time
>> to time... but that didn't hurt enough yet, obviously.
>> But now that you volunteered to take care of this... :)
> Right. We only have defines for (machine_flags & bit). Looks to me like 
> the bits really should have a name on them. I've created a patch that 
> does this, but I want to talk it over with Martin before sending that one 
> out.

Just introduce something like MACHINE_FLAG_KVM. The rest can be converted
later. Unless you're bored and feel like fiddling around with assembly code :)

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [kvm-devel] [RFC/PATCH 14/15] guest: detect when running on kvm
       [not found]               ` <20080321140603.GC4128@osiris.boeblingen.de.ibm.com>
@ 2008-03-21 14:33                 ` Carsten Otte
       [not found]                 ` <1206110009.8363.3.camel@cotte.boeblingen.de.ibm.com>
  1 sibling, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-21 14:33 UTC (permalink / raw)
  To: Heiko Carstens
  Cc: carsteno, Christian Ehrhardt, arnd, hollisb, carsteno, heicars2,
	mschwid2, jeroney, virtualization, Christoph Hellwig, kvm-devel,
	borntrae, rvdheij, Olaf Schnapper, jblunck, Zhang, Xiantao

Am Freitag, den 21.03.2008, 15:06 +0100 schrieb Heiko Carstens:
> Just introduce something like MACHINE_FLAG_KVM. The rest can be converted
> later. Unless you're bored and feel like fiddling around with assembly code :)
I've done that patch this morning already, see below. I agree with HCH
that we should do that, but after the kvm merge. I don't want kvm-s390
conflict with Martin's patches. This is just a beautification, and can
safely wait a release cycle.
---
 arch/s390/kernel/early.c |    6 +++---
 include/asm-s390/setup.h |   32 ++++++++++++++++++++++----------
 2 files changed, 25 insertions(+), 13 deletions(-)

Index: linux-host/arch/s390/kernel/early.c
===================================================================
--- linux-host.orig/arch/s390/kernel/early.c
+++ linux-host/arch/s390/kernel/early.c
@@ -138,15 +138,15 @@ static noinline __init void detect_machi
 
 	/* Running under z/VM ? */
 	if (cpuinfo->cpu_id.version == 0xff)
-		machine_flags |= 1;
+		machine_flags |= MACHINE_FLAG_VM;
 
 	/* Running on a P/390 ? */
 	if (cpuinfo->cpu_id.machine == 0x7490)
-		machine_flags |= 4;
+		machine_flags |= MACHINE_FLAG_P390;
 
 	/* Running under KVM ? */
 	if (cpuinfo->cpu_id.version == 0xfe)
-		machine_flags |= 64;
+		machine_flags |= MACHINE_FLAG_KVM;
 }
 
 #ifdef CONFIG_64BIT
Index: linux-host/include/asm-s390/setup.h
===================================================================
--- linux-host.orig/include/asm-s390/setup.h
+++ linux-host/include/asm-s390/setup.h
@@ -59,23 +59,35 @@ extern unsigned int s390_noexec;
  */
 extern unsigned long machine_flags;
 
-#define MACHINE_IS_VM		(machine_flags & 1)
-#define MACHINE_IS_P390		(machine_flags & 4)
-#define MACHINE_HAS_MVPG	(machine_flags & 16)
-#define MACHINE_IS_KVM		(machine_flags & 64)
-#define MACHINE_HAS_IDTE	(machine_flags & 128)
-#define MACHINE_HAS_DIAG9C	(machine_flags & 256)
+#define MACHINE_FLAG_VM		1
+#define MACHINE_FLAG_IEEE	2
+#define MACHINE_FLAG_P390	4
+#define MACHINE_FLAG_CSP	8
+#define MACHINE_FLAG_MVPG	16
+#define MACHINE_FLAG_DIAG44	32
+#define MACHINE_FLAG_KVM	64
+#define MACHINE_FLAG_IDTE	128
+#define MACHINE_FLAG_DIAG9C	256
+#define MACHINE_FLAG_MVCOS	512
+
+
+#define MACHINE_IS_VM		(machine_flags & MACHINE_FLAG_VM)
+#define MACHINE_IS_KVM		(machine_flags & MACHINE_FLAG_KVM)
+#define MACHINE_IS_P390		(machine_flags & MACHINE_FLAG_P390)
+#define MACHINE_HAS_MVPG	(machine_flags & MACHINE_FLAG_MVPG)
+#define MACHINE_HAS_IDTE	(machine_flags & MACHINE_FLAG_IDTE)
+#define MACHINE_HAS_DIAG9C	(machine_flags & MACHINE_FLAG_DIAG9C)
 
 #ifndef __s390x__
-#define MACHINE_HAS_IEEE	(machine_flags & 2)
-#define MACHINE_HAS_CSP		(machine_flags & 8)
+#define MACHINE_HAS_IEEE	(machine_flags & MACHINE_FLAG_IEEE)
+#define MACHINE_HAS_CSP		(machine_flags & MACHINE_FLAG_CSP)
 #define MACHINE_HAS_DIAG44	(1)
 #define MACHINE_HAS_MVCOS	(0)
 #else /* __s390x__ */
 #define MACHINE_HAS_IEEE	(1)
 #define MACHINE_HAS_CSP		(1)
-#define MACHINE_HAS_DIAG44	(machine_flags & 32)
-#define MACHINE_HAS_MVCOS	(machine_flags & 512)
+#define MACHINE_HAS_DIAG44	(machine_flags & MACHINE_FLAG_DIAG44)
+#define MACHINE_HAS_MVCOS	(machine_flags & MACHINE_FLAG_MVCOS)
 #endif /* __s390x__ */
 
 #define MACHINE_HAS_SCLP	(!MACHINE_IS_P390)

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [kvm-devel] [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable
       [not found]       ` <47E2CAAC.6020903@de.ibm.com>
@ 2008-03-21 18:29         ` Dave Hansen
  2008-03-21 19:03           ` Carsten Otte
                             ` (3 more replies)
  0 siblings, 4 replies; 52+ messages in thread
From: Dave Hansen @ 2008-03-21 18:29 UTC (permalink / raw)
  To: carsteno
  Cc: Christian Ehrhardt, hollisb, arnd, kvm-devel, mschwid2, heicars2,
	jeroney, borntrae, virtualization, Linux Memory Management List,
	rvdheij, Olaf Schnapper, jblunck, Zhang, Xiantao

On Thu, 2008-03-20 at 21:35 +0100, Carsten Otte wrote:
> Dave Hansen wrote:
> > Well, and more fundamentally: do we really want dup_mm() able to be
> > called from other code?
> > 
> > Maybe we need a bit more detailed justification why fork() itself isn't
> > good enough.  It looks to me like they basically need an arch-specific
> > argument to fork, telling the new process's page tables to take the
> > fancy new bit.
> > 
> > I'm really curious how this new stuff is going to get used.  Are you
> > basically replacing fork() when creating kvm guests?
> No. The trick is, that we do need bigger page tables when running 
> guests: our page tables are usually 2k, but when running a guest 
> they're 4k to track both guest and host dirty&reference information. 
> This looks like this:
> *----------*
> *2k PTE's  *
> *----------*
> *2k PGSTE  *
> *----------*
> We don't want to waste precious memory for all page tables. We'd like 
> to have one kernel image that runs regular server workload _and_ 
> guests.

That makes a lot of sense.

Is that layout (the shadow and regular stacked together) specified in
hardware somehow, or was it just chosen?

What you've done with dup_mm() is probably the brute-force way that I
would have done it had I just been trying to make a proof of concept or
something.  I'm worried that there are a bunch of corner cases that
haven't been considered.

What if someone else is poking around with ptrace or something similar
and they bump the mm_users:

+       if (tsk->mm->context.pgstes)
+               return 0;
+       if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
+           tsk->mm != tsk->active_mm || tsk->mm->ioctx_list)
+               return -EINVAL;
-------->HERE
+       tsk->mm->context.pgstes = 1;    /* dirty little tricks .. */
+       mm = dup_mm(tsk);

It'll race, possibly fault in some other pages, and those faults will be
lost during the dup_mm().  I think you need to be able to lock out all
of the users of access_process_vm() before you go and do this.  You also
need to make sure that anyone who has looked at task->mm doesn't go and
get a reference to it and get confused later when it isn't the task->mm
any more.

> Therefore, we need to reallocate the page table after fork() 
> once we know that task is going to be a hypervisor. That's what this 
> code does: reallocate a bigger page table to accomondate the extra 
> information. The task needs to be single-threaded when calling for 
> extended page tables.
> 
> Btw: at fork() time, we cannot tell whether or not the user's going to 
> be a hypervisor. Therefore we cannot do this in fork.

Can you convert the page tables at a later time without doing a
wholesale replacement of the mm?  It should be a bit easier to keep
people off the pagetables than keep their grubby mitts off the mm
itself.

-- Dave

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [kvm-devel] [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable
  2008-03-21 18:29         ` Dave Hansen
@ 2008-03-21 19:03           ` Carsten Otte
  2008-03-22 17:57           ` Heiko Carstens
                             ` (2 subsequent siblings)
  3 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-21 19:03 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Christian Ehrhardt, hollisb, arnd, Linux Memory Management List,
	carsteno, mschwid2, heicars2, jeroney, borntrae, virtualization,
	kvm-devel, rvdheij, Olaf Schnapper, jblunck, Zhang, Xiantao

Dave Hansen wrote:
> On Thu, 2008-03-20 at 21:35 +0100, Carsten Otte wrote:
>> Dave Hansen wrote:
>>> Well, and more fundamentally: do we really want dup_mm() able to be
>>> called from other code?
>>>
>>> Maybe we need a bit more detailed justification why fork() itself isn't
>>> good enough.  It looks to me like they basically need an arch-specific
>>> argument to fork, telling the new process's page tables to take the
>>> fancy new bit.
>>>
>>> I'm really curious how this new stuff is going to get used.  Are you
>>> basically replacing fork() when creating kvm guests?
>> No. The trick is, that we do need bigger page tables when running 
>> guests: our page tables are usually 2k, but when running a guest 
>> they're 4k to track both guest and host dirty&reference information. 
>> This looks like this:
>> *----------*
>> *2k PTE's  *
>> *----------*
>> *2k PGSTE  *
>> *----------*
>> We don't want to waste precious memory for all page tables. We'd like 
>> to have one kernel image that runs regular server workload _and_ 
>> guests.
> 
> That makes a lot of sense.
> 
> Is that layout (the shadow and regular stacked together) specified in
> hardware somehow, or was it just chosen?
It's defined by hardware. The chip just adds +2k to the ptep to get to 
the corresponding pgste. Both pte and pgste are 64bit per page. I know 
Heiko and Martin have thought a lot about possible races. I'll have to 
leave your question on the race against pfault open for them.

Btw: thanks a lot for reviewing our changes :-)

cheers,
Carsten

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC/PATCH 15/15] guest: virtio device support, and kvm hypercalls
       [not found]     ` <200803210915.48029.borntraeger@de.ibm.com>
@ 2008-03-21 23:30       ` Rusty Russell
       [not found]       ` <200803221030.49457.rusty@rustcorp.com.au>
  1 sibling, 0 replies; 52+ messages in thread
From: Rusty Russell @ 2008-03-21 23:30 UTC (permalink / raw)
  To: Christian Borntraeger
  Cc: Carsten Otte, aliguori, EHRHARDT, arnd, hollisb, kvm-devel,
	heiko.carstens, jeroney, virtualization, schwidefsky, rvdheij, os,
	jblunck, Zhang, Xiantao

On Friday 21 March 2008 19:15:47 Christian Borntraeger wrote:
> Am Freitag, 21. März 2008 schrieb Rusty Russell:
> > Hmm, panic on device_register fail, but -ENOMEM on add_shared_memory
> > fail? My theory was that since this is boot time, panic() is the right
> > thing.
>
> Good spot, but I agree with Carsten. Drivers should not panic. I have
> module load/unload capability on my long term todo list, but I can change
> the panic now.

Yep, that makes sense.  For lguest, we panic: it's always at boot time so if 
it fails we should die early to make it easier to diagnose (and that makes 
sure it happens before we lose our early console).

Cheers,
Rusty.
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC/PATCH 15/15] guest: virtio device support, and kvm hypercalls
       [not found]       ` <200803221030.49457.rusty@rustcorp.com.au>
@ 2008-03-22  7:36         ` Carsten Otte
  0 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-22  7:36 UTC (permalink / raw)
  To: Rusty Russell
  Cc: aliguori, Christian Ehrhardt, arnd, hollisb, kvm-devel, mschwid2,
	heicars2, jeroney, virtualization, borntrae, rvdheij,
	Olaf Schnapper, jblunck, Zhang, Xiantao

Rusty Russell wrote:
> On Friday 21 March 2008 19:15:47 Christian Borntraeger wrote:
>> Am Freitag, 21. März 2008 schrieb Rusty Russell:
>>> Hmm, panic on device_register fail, but -ENOMEM on add_shared_memory
>>> fail? My theory was that since this is boot time, panic() is the right
>>> thing.
>> Good spot, but I agree with Carsten. Drivers should not panic. I have
>> module load/unload capability on my long term todo list, but I can change
>> the panic now.
> 
> Yep, that makes sense.  For lguest, we panic: it's always at boot time so if 
> it fails we should die early to make it easier to diagnose (and that makes 
> sure it happens before we lose our early console).
Diangnostic is easy here at any time during the boot process: we've 
got our store status ioctl that userspace calls after guest execution 
has ended. It causes all cpus to store their register content and such 
into the cpu's lowcore area. Then it writes out our memory to a dump 
image, which lkcdutils and/or crash can read.

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [kvm-devel] [RFC/PATCH 14/15] guest: detect when running on kvm
       [not found]                 ` <1206110009.8363.3.camel@cotte.boeblingen.de.ibm.com>
@ 2008-03-22 17:25                   ` Heiko Carstens
  0 siblings, 0 replies; 52+ messages in thread
From: Heiko Carstens @ 2008-03-22 17:25 UTC (permalink / raw)
  To: Carsten Otte
  Cc: carsteno, Christian Ehrhardt, arnd, hollisb, carsteno, mschwid2,
	heicars2, jeroney, virtualization, Christoph Hellwig, kvm-devel,
	borntrae, rvdheij, Olaf Schnapper, jblunck, Zhang, Xiantao

On Fri, Mar 21, 2008 at 03:33:29PM +0100, Carsten Otte wrote:
> Am Freitag, den 21.03.2008, 15:06 +0100 schrieb Heiko Carstens:
> > Just introduce something like MACHINE_FLAG_KVM. The rest can be converted
> > later. Unless you're bored and feel like fiddling around with assembly code :)
> I've done that patch this morning already, see below. I agree with HCH
> that we should do that, but after the kvm merge. I don't want kvm-s390
> conflict with Martin's patches. This is just a beautification, and can
> safely wait a release cycle.

That's nice for a start. But you didn't convert the assembly files to use
the new defines. So there is still no connection between setting a bit
in asm code and the new defines.
That's the reason why I said something about fiddling around with asm code.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [kvm-devel] [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable
  2008-03-21 18:29         ` Dave Hansen
  2008-03-21 19:03           ` Carsten Otte
@ 2008-03-22 17:57           ` Heiko Carstens
       [not found]           ` <20080322175705.GD6367@osiris.boeblingen.de.ibm.com>
  2008-03-25 15:37           ` Carsten Otte
  3 siblings, 0 replies; 52+ messages in thread
From: Heiko Carstens @ 2008-03-22 17:57 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Christian Ehrhardt, hollisb, arnd, Linux Memory Management List,
	carsteno, heicars2, mschwid2, jeroney, borntrae, virtualization,
	kvm-devel, rvdheij, Olaf Schnapper, jblunck, Zhang, Xiantao

> What you've done with dup_mm() is probably the brute-force way that I
> would have done it had I just been trying to make a proof of concept or
> something.  I'm worried that there are a bunch of corner cases that
> haven't been considered.
> 
> What if someone else is poking around with ptrace or something similar
> and they bump the mm_users:
> 
> +       if (tsk->mm->context.pgstes)
> +               return 0;
> +       if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
> +           tsk->mm != tsk->active_mm || tsk->mm->ioctx_list)
> +               return -EINVAL;
> -------->HERE
> +       tsk->mm->context.pgstes = 1;    /* dirty little tricks .. */
> +       mm = dup_mm(tsk);
> 
> It'll race, possibly fault in some other pages, and those faults will be
> lost during the dup_mm().  I think you need to be able to lock out all
> of the users of access_process_vm() before you go and do this.  You also
> need to make sure that anyone who has looked at task->mm doesn't go and
> get a reference to it and get confused later when it isn't the task->mm
> any more.
> 
> > Therefore, we need to reallocate the page table after fork() 
> > once we know that task is going to be a hypervisor. That's what this 
> > code does: reallocate a bigger page table to accomondate the extra 
> > information. The task needs to be single-threaded when calling for 
> > extended page tables.
> > 
> > Btw: at fork() time, we cannot tell whether or not the user's going to 
> > be a hypervisor. Therefore we cannot do this in fork.
> 
> Can you convert the page tables at a later time without doing a
> wholesale replacement of the mm?  It should be a bit easier to keep
> people off the pagetables than keep their grubby mitts off the mm
> itself.

Yes, as far as I can see you're right. And whatever we do in arch code,
after all it's just a work around to avoid a new clone flag.
If something like clone() with CLONE_KVM would be useful for more
architectures than just s390 then maybe we should try to get a flag.

Oh... there are just two unused clone flag bits left. Looks like the
namespace changes ate up a lot of them lately.

Well, we could still play dirty tricks like setting a bit in current
via whatever mechanism which indicates child-wants-extended-page-tables
and then just fork and be happy.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [kvm-devel] [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable
       [not found]           ` <20080322175705.GD6367@osiris.boeblingen.de.ibm.com>
@ 2008-03-23 10:15             ` Avi Kivity
       [not found]             ` <47E62DBA.4050102@qumranet.com>
  1 sibling, 0 replies; 52+ messages in thread
From: Avi Kivity @ 2008-03-23 10:15 UTC (permalink / raw)
  To: Heiko Carstens
  Cc: Christian Ehrhardt, hollisb, arnd, carsteno, heicars2,
	Dave Hansen, jeroney, borntrae, virtualization,
	Linux Memory Management List, mschwid2, rvdheij, Olaf Schnapper,
	jblunck, Zhang, Xiantao, kvm-devel

Heiko Carstens wrote:
>> What you've done with dup_mm() is probably the brute-force way that I
>> would have done it had I just been trying to make a proof of concept or
>> something.  I'm worried that there are a bunch of corner cases that
>> haven't been considered.
>>
>> What if someone else is poking around with ptrace or something similar
>> and they bump the mm_users:
>>
>> +       if (tsk->mm->context.pgstes)
>> +               return 0;
>> +       if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
>> +           tsk->mm != tsk->active_mm || tsk->mm->ioctx_list)
>> +               return -EINVAL;
>> -------->HERE
>> +       tsk->mm->context.pgstes = 1;    /* dirty little tricks .. */
>> +       mm = dup_mm(tsk);
>>
>> It'll race, possibly fault in some other pages, and those faults will be
>> lost during the dup_mm().  I think you need to be able to lock out all
>> of the users of access_process_vm() before you go and do this.  You also
>> need to make sure that anyone who has looked at task->mm doesn't go and
>> get a reference to it and get confused later when it isn't the task->mm
>> any more.
>>
>>     
>>> Therefore, we need to reallocate the page table after fork() 
>>> once we know that task is going to be a hypervisor. That's what this 
>>> code does: reallocate a bigger page table to accomondate the extra 
>>> information. The task needs to be single-threaded when calling for 
>>> extended page tables.
>>>
>>> Btw: at fork() time, we cannot tell whether or not the user's going to 
>>> be a hypervisor. Therefore we cannot do this in fork.
>>>       
>> Can you convert the page tables at a later time without doing a
>> wholesale replacement of the mm?  It should be a bit easier to keep
>> people off the pagetables than keep their grubby mitts off the mm
>> itself.
>>     
>
> Yes, as far as I can see you're right. And whatever we do in arch code,
> after all it's just a work around to avoid a new clone flag.
> If something like clone() with CLONE_KVM would be useful for more
> architectures than just s390 then maybe we should try to get a flag.
>
> Oh... there are just two unused clone flag bits left. Looks like the
> namespace changes ate up a lot of them lately.
>
> Well, we could still play dirty tricks like setting a bit in current
> via whatever mechanism which indicates child-wants-extended-page-tables
> and then just fork and be happy.
>   

How about taking mmap_sem for write and converting all page tables 
in-place?  I'd rather avoid the need to fork() when creating a VM.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [kvm-devel] [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable
       [not found]             ` <47E62DBA.4050102@qumranet.com>
@ 2008-03-23 18:23               ` Martin Schwidefsky
  2008-03-24  6:57                 ` Avi Kivity
       [not found]                 ` <47E750ED.7060509@qumranet.com>
  0 siblings, 2 replies; 52+ messages in thread
From: Martin Schwidefsky @ 2008-03-23 18:23 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Christian Ehrhardt, hollisb, arnd, carsteno, Heiko Carstens,
	Dave Hansen, jeroney, borntrae, virtualization,
	Linux Memory Management List, mschwid2, heicars2, rvdheij,
	Olaf Schnapper, jblunck, Zhang, Xiantao, kvm-devel

On Sun, 2008-03-23 at 12:15 +0200, Avi Kivity wrote:
> >> Can you convert the page tables at a later time without doing a
> >> wholesale replacement of the mm?  It should be a bit easier to keep
> >> people off the pagetables than keep their grubby mitts off the mm
> >> itself.
> >>     
> >
> > Yes, as far as I can see you're right. And whatever we do in arch code,
> > after all it's just a work around to avoid a new clone flag.
> > If something like clone() with CLONE_KVM would be useful for more
> > architectures than just s390 then maybe we should try to get a flag.
> >
> > Oh... there are just two unused clone flag bits left. Looks like the
> > namespace changes ate up a lot of them lately.
> >
> > Well, we could still play dirty tricks like setting a bit in current
> > via whatever mechanism which indicates child-wants-extended-page-tables
> > and then just fork and be happy.
> >   
> 
> How about taking mmap_sem for write and converting all page tables 
> in-place?  I'd rather avoid the need to fork() when creating a VM.

That was my initial approach as well. If all the page table allocations
can be fullfilled the code is not too complicated. To handle allocation
failures gets tricky. At this point I realized that dup_mmap already
does what we want to do. It walks all the page tables, allocates new
page tables and copies the ptes. In principle I would reinvent the wheel
if we can not use dup_mmap.

-- 
blue skies,
  Martin.

"Reality continues to ruin my life." - Calvin.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [kvm-devel] [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable
  2008-03-23 18:23               ` Martin Schwidefsky
@ 2008-03-24  6:57                 ` Avi Kivity
       [not found]                 ` <47E750ED.7060509@qumranet.com>
  1 sibling, 0 replies; 52+ messages in thread
From: Avi Kivity @ 2008-03-24  6:57 UTC (permalink / raw)
  To: schwidefsky
  Cc: Christian Ehrhardt, hollisb, arnd, carsteno, Heiko Carstens,
	Dave Hansen, jeroney, borntrae, virtualization,
	Linux Memory Management List, mschwid2, heicars2, rvdheij,
	Olaf Schnapper, jblunck, Zhang, Xiantao, kvm-devel

Martin Schwidefsky wrote:
> On Sun, 2008-03-23 at 12:15 +0200, Avi Kivity wrote:
>   
>>>> Can you convert the page tables at a later time without doing a
>>>> wholesale replacement of the mm?  It should be a bit easier to keep
>>>> people off the pagetables than keep their grubby mitts off the mm
>>>> itself.
>>>>     
>>>>         
>>> Yes, as far as I can see you're right. And whatever we do in arch code,
>>> after all it's just a work around to avoid a new clone flag.
>>> If something like clone() with CLONE_KVM would be useful for more
>>> architectures than just s390 then maybe we should try to get a flag.
>>>
>>> Oh... there are just two unused clone flag bits left. Looks like the
>>> namespace changes ate up a lot of them lately.
>>>
>>> Well, we could still play dirty tricks like setting a bit in current
>>> via whatever mechanism which indicates child-wants-extended-page-tables
>>> and then just fork and be happy.
>>>   
>>>       
>> How about taking mmap_sem for write and converting all page tables 
>> in-place?  I'd rather avoid the need to fork() when creating a VM.
>>     
>
> That was my initial approach as well. If all the page table allocations
> can be fullfilled the code is not too complicated. To handle allocation
> failures gets tricky. At this point I realized that dup_mmap already
> does what we want to do. It walks all the page tables, allocates new
> page tables and copies the ptes. In principle I would reinvent the wheel
> if we can not use dup_mmap

Well, dup_mm() can't work (and now that I think about it, for more 
reasons -- what if the process has threads?).

I don't think conversion is too bad.  You'd need a four-level loop to 
allocate and convert, and another loop to deallocate in case of error.  
If, as I don't doubt, s390 hardware can modify the ptes, you'd need 
cmpxchg to read and clear a pte in one operation.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [kvm-devel] [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable
       [not found]                 ` <47E750ED.7060509@qumranet.com>
@ 2008-03-25  6:08                   ` Carsten Otte
       [not found]                   ` <47E896EA.5060309@de.ibm.com>
  1 sibling, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-25  6:08 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Christian Ehrhardt, arnd, hollisb, carsteno, Heiko Carstens,
	Dave Hansen, jeroney, borntrae, virtualization,
	Linux Memory Management List, mschwid2, heicars2, schwidefsky,
	rvdheij, Olaf Schnapper, jblunck, Zhang, Xiantao, kvm-devel

Avi Kivity wrote:
> Well, dup_mm() can't work (and now that I think about it, for more 
> reasons -- what if the process has threads?).
We lock out multithreaded users already, -EINVAL.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [kvm-devel] [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable
       [not found]                   ` <47E896EA.5060309@de.ibm.com>
@ 2008-03-25  6:12                     ` Avi Kivity
  0 siblings, 0 replies; 52+ messages in thread
From: Avi Kivity @ 2008-03-25  6:12 UTC (permalink / raw)
  To: carsteno
  Cc: Christian Ehrhardt, arnd, hollisb, kvm-devel, Heiko Carstens,
	Dave Hansen, jeroney, borntrae, virtualization,
	Linux Memory Management List, mschwid2, heicars2, schwidefsky,
	rvdheij, Olaf Schnapper, jblunck, Zhang, Xiantao

Carsten Otte wrote:
> Avi Kivity wrote:
>> Well, dup_mm() can't work (and now that I think about it, for more 
>> reasons -- what if the process has threads?).
> We lock out multithreaded users already, -EINVAL.
>

Would be much better if this can be avoided.  It's surprising.

-- 
Any sufficiently difficult bug is indistinguishable from a feature.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [kvm-devel] [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable
  2008-03-21 18:29         ` Dave Hansen
                             ` (2 preceding siblings ...)
       [not found]           ` <20080322175705.GD6367@osiris.boeblingen.de.ibm.com>
@ 2008-03-25 15:37           ` Carsten Otte
  3 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-25 15:37 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Christian Ehrhardt, hollisb, arnd, Linux Memory Management List,
	carsteno, mschwid2, heicars2, jeroney, borntrae, virtualization,
	kvm-devel, rvdheij, Olaf Schnapper, jblunck, Zhang, Xiantao

Am Freitag, den 21.03.2008, 11:29 -0700 schrieb Dave Hansen:
> What you've done with dup_mm() is probably the brute-force way that I
> would have done it had I just been trying to make a proof of concept or
> something.  I'm worried that there are a bunch of corner cases that
> haven't been considered.
> 
> What if someone else is poking around with ptrace or something similar
> and they bump the mm_users:
> 
> +       if (tsk->mm->context.pgstes)
> +               return 0;
> +       if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
> +           tsk->mm != tsk->active_mm || tsk->mm->ioctx_list)
> +               return -EINVAL;
> -------->HERE
> +       tsk->mm->context.pgstes = 1;    /* dirty little tricks .. */
> +       mm = dup_mm(tsk);
> 
> It'll race, possibly fault in some other pages, and those faults will be
> lost during the dup_mm().  I think you need to be able to lock out all
> of the users of access_process_vm() before you go and do this.  You also
> need to make sure that anyone who has looked at task->mm doesn't go and
> get a reference to it and get confused later when it isn't the task->mm
> any more.

Good catch, Dave. We intend to get rid of that race via task_lock().
That should lock out ptrace and all others who modify mm_users via get_task_mm.


See patch below:
---

 arch/s390/Kconfig              |    4 ++
 arch/s390/kernel/setup.c       |    4 ++
 arch/s390/mm/pgtable.c         |   65 +++++++++++++++++++++++++++++++++++++++--
 include/asm-s390/mmu.h         |    1 
 include/asm-s390/mmu_context.h |    8 ++++-
 include/asm-s390/pgtable.h     |    1 
 include/linux/sched.h          |    2 +
 kernel/fork.c                  |    2 -
 8 files changed, 82 insertions(+), 5 deletions(-)

Index: linux-host/arch/s390/Kconfig
===================================================================
--- linux-host.orig/arch/s390/Kconfig
+++ linux-host/arch/s390/Kconfig
@@ -55,6 +55,10 @@ config GENERIC_LOCKBREAK
 	default y
 	depends on SMP && PREEMPT
 
+config PGSTE
+	bool
+	default y if KVM
+
 mainmenu "Linux Kernel Configuration"
 
 config S390
Index: linux-host/arch/s390/kernel/setup.c
===================================================================
--- linux-host.orig/arch/s390/kernel/setup.c
+++ linux-host/arch/s390/kernel/setup.c
@@ -315,7 +315,11 @@ static int __init early_parse_ipldelay(c
 early_param("ipldelay", early_parse_ipldelay);
 
 #ifdef CONFIG_S390_SWITCH_AMODE
+#ifdef CONFIG_PGSTE
+unsigned int switch_amode = 1;
+#else
 unsigned int switch_amode = 0;
+#endif
 EXPORT_SYMBOL_GPL(switch_amode);
 
 static void set_amode_and_uaccess(unsigned long user_amode,
Index: linux-host/arch/s390/mm/pgtable.c
===================================================================
--- linux-host.orig/arch/s390/mm/pgtable.c
+++ linux-host/arch/s390/mm/pgtable.c
@@ -30,11 +30,27 @@
 #define TABLES_PER_PAGE	4
 #define FRAG_MASK	15UL
 #define SECOND_HALVES	10UL
+
+void clear_table_pgstes(unsigned long *table)
+{
+	clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
+	memset(table + 256, 0, PAGE_SIZE/4);
+	clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
+	memset(table + 768, 0, PAGE_SIZE/4);
+}
+
 #else
 #define ALLOC_ORDER	2
 #define TABLES_PER_PAGE	2
 #define FRAG_MASK	3UL
 #define SECOND_HALVES	2UL
+
+void clear_table_pgstes(unsigned long *table)
+{
+	clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
+	memset(table + 256, 0, PAGE_SIZE/2);
+}
+
 #endif
 
 unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
@@ -153,7 +169,7 @@ unsigned long *page_table_alloc(struct m
 	unsigned long *table;
 	unsigned long bits;
 
-	bits = mm->context.noexec ? 3UL : 1UL;
+	bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
 	spin_lock(&mm->page_table_lock);
 	page = NULL;
 	if (!list_empty(&mm->context.pgtable_list)) {
@@ -170,7 +186,10 @@ unsigned long *page_table_alloc(struct m
 		pgtable_page_ctor(page);
 		page->flags &= ~FRAG_MASK;
 		table = (unsigned long *) page_to_phys(page);
-		clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
+		if (mm->context.pgstes)
+			clear_table_pgstes(table);
+		else
+			clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
 		spin_lock(&mm->page_table_lock);
 		list_add(&page->lru, &mm->context.pgtable_list);
 	}
@@ -191,7 +210,7 @@ void page_table_free(struct mm_struct *m
 	struct page *page;
 	unsigned long bits;
 
-	bits = mm->context.noexec ? 3UL : 1UL;
+	bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
 	bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
 	spin_lock(&mm->page_table_lock);
@@ -228,3 +247,43 @@ void disable_noexec(struct mm_struct *mm
 	mm->context.noexec = 0;
 	update_mm(mm, tsk);
 }
+
+/*
+ * switch on pgstes for its userspace process (for kvm)
+ */
+int s390_enable_sie(void)
+{
+	struct task_struct *tsk = current;
+	struct mm_struct *mm;
+	int rc;
+
+	task_lock(tsk);
+
+	rc = 0;
+	if (tsk->mm->context.pgstes)
+		goto unlock;
+
+	rc = -EINVAL;
+	if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
+	    tsk->mm != tsk->active_mm || tsk->mm->ioctx_list)
+		goto unlock;
+
+	tsk->mm->context.pgstes = 1;	/* dirty little tricks .. */
+	mm = dup_mm(tsk);
+	tsk->mm->context.pgstes = 0;
+
+	rc = -ENOMEM;
+	if (!mm)
+		goto unlock;
+	mmput(tsk->mm);
+	tsk->mm = tsk->active_mm = mm;
+	preempt_disable();
+	update_mm(mm, tsk);
+	cpu_set(smp_processor_id(), mm->cpu_vm_mask);
+	preempt_enable();
+	rc = 0;
+unlock:
+	task_unlock(tsk);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(s390_enable_sie);
Index: linux-host/include/asm-s390/mmu.h
===================================================================
--- linux-host.orig/include/asm-s390/mmu.h
+++ linux-host/include/asm-s390/mmu.h
@@ -7,6 +7,7 @@ typedef struct {
 	unsigned long asce_bits;
 	unsigned long asce_limit;
 	int noexec;
+	int pgstes;
 } mm_context_t;
 
 #endif
Index: linux-host/include/asm-s390/mmu_context.h
===================================================================
--- linux-host.orig/include/asm-s390/mmu_context.h
+++ linux-host/include/asm-s390/mmu_context.h
@@ -20,7 +20,13 @@ static inline int init_new_context(struc
 #ifdef CONFIG_64BIT
 	mm->context.asce_bits |= _ASCE_TYPE_REGION3;
 #endif
-	mm->context.noexec = s390_noexec;
+	if (current->mm->context.pgstes) {
+		mm->context.noexec = 0;
+		mm->context.pgstes = 1;
+	} else {
+		mm->context.noexec = s390_noexec;
+		mm->context.pgstes = 0;
+	}
 	mm->context.asce_limit = STACK_TOP_MAX;
 	crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
 	return 0;
Index: linux-host/include/asm-s390/pgtable.h
===================================================================
--- linux-host.orig/include/asm-s390/pgtable.h
+++ linux-host/include/asm-s390/pgtable.h
@@ -966,6 +966,7 @@ static inline pte_t mk_swap_pte(unsigned
 
 extern int add_shared_memory(unsigned long start, unsigned long size);
 extern int remove_shared_memory(unsigned long start, unsigned long size);
+extern int s390_enable_sie(void);
 
 /*
  * No page table caches to initialise
Index: linux-host/kernel/fork.c
===================================================================
--- linux-host.orig/kernel/fork.c
+++ linux-host/kernel/fork.c
@@ -498,7 +498,7 @@ void mm_release(struct task_struct *tsk,
  * Allocate a new mm structure and copy contents from the
  * mm structure of the passed in task structure.
  */
-static struct mm_struct *dup_mm(struct task_struct *tsk)
+struct mm_struct *dup_mm(struct task_struct *tsk)
 {
 	struct mm_struct *mm, *oldmm = current->mm;
 	int err;
Index: linux-host/include/linux/sched.h
===================================================================
--- linux-host.orig/include/linux/sched.h
+++ linux-host/include/linux/sched.h
@@ -1758,6 +1758,8 @@ extern void mmput(struct mm_struct *);
 extern struct mm_struct *get_task_mm(struct task_struct *task);
 /* Remove the current tasks stale references to the old mm_struct */
 extern void mm_release(struct task_struct *, struct mm_struct *);
+/* Allocate a new mm structure and copy contents from tsk->mm */
+extern struct mm_struct *dup_mm(struct task_struct *tsk);
 
 extern int  copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *);
 extern void flush_thread(void);

^ permalink raw reply	[flat|nested] 52+ messages in thread

end of thread, other threads:[~2008-03-25 15:37 UTC | newest]

Thread overview: 52+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <1206028710.6690.21.camel@cotte.boeblingen.de.ibm.com>
2008-03-20 16:24 ` [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable Carsten Otte
2008-03-20 16:24 ` [RFC/PATCH 02/15] preparation: host memory management changes for s390 kvm Carsten Otte
2008-03-20 16:24 ` [RFC/PATCH 03/15] preparation: address of the 64bit extint parm in lowcore Carsten Otte
2008-03-20 16:24 ` [RFC/PATCH 04/15] preparation: split sysinfo defintions for kvm use Carsten Otte
2008-03-20 16:24 ` [RFC/PATCH 05/15] kvm-s390: s390 arch backend for the kvm kernel module Carsten Otte
2008-03-20 16:25 ` [RFC/PATCH 06/15] kvm-s390: sie intercept handling Carsten Otte
2008-03-20 16:25 ` [RFC/PATCH 07/15] kvm-s390: interrupt subsystem, cpu timer, waitpsw Carsten Otte
2008-03-20 16:25 ` [RFC/PATCH 08/15] kvm-s390: intercepts for privileged instructions Carsten Otte
2008-03-20 16:25 ` [RFC/PATCH 09/15] kvm-s390: interprocessor communication via sigp Carsten Otte
2008-03-20 16:25 ` [RFC/PATCH 10/15] kvm-s390: intercepts for diagnose instructions Carsten Otte
2008-03-20 16:25 ` [RFC/PATCH 11/15] kvm-s390: add kvm to kconfig on s390 Carsten Otte
2008-03-20 16:25 ` [RFC/PATCH 12/15] kvm-s390: API documentation Carsten Otte
2008-03-20 16:25 ` [RFC/PATCH 13/15] kvm-s390: update maintainers Carsten Otte
2008-03-20 16:25 ` [RFC/PATCH 14/15] guest: detect when running on kvm Carsten Otte
2008-03-20 16:25 ` [RFC/PATCH 15/15] guest: virtio device support, and kvm hypercalls Carsten Otte
     [not found] ` <1206030298.6690.56.camel@cotte.boeblingen.de.ibm.com>
2008-03-20 16:43   ` [RFC/PATCH 05/15] KVM_MAX_VCPUS Hollis Blanchard
     [not found]   ` <1206031383.6356.13.camel@basalt>
2008-03-20 16:48     ` Carsten Otte
     [not found]     ` <47E2954B.7090903@de.ibm.com>
2008-03-21 10:41       ` [kvm-devel] " Avi Kivity
2008-03-21 11:13         ` Carsten Otte
     [not found] ` <1206030320.6690.63.camel@cotte.boeblingen.de.ibm.com>
2008-03-20 17:22   ` [RFC/PATCH 12/15] kvm-s390: API documentation Randy Dunlap
     [not found]   ` <20080320102200.3718e573.randy.dunlap@oracle.com>
2008-03-21 10:33     ` [kvm-devel] " Carsten Otte
     [not found] ` <1206030278.6690.52.camel@cotte.boeblingen.de.ibm.com>
2008-03-20 17:28   ` [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable Jeremy Fitzhardinge
     [not found]   ` <47E29EC6.5050403@goop.org>
2008-03-20 19:13     ` Dave Hansen
     [not found]     ` <1206040405.8232.24.camel@nimitz.home.sr71.net>
2008-03-20 20:35       ` [kvm-devel] " Carsten Otte
     [not found]       ` <47E2CAAC.6020903@de.ibm.com>
2008-03-21 18:29         ` Dave Hansen
2008-03-21 19:03           ` Carsten Otte
2008-03-22 17:57           ` Heiko Carstens
     [not found]           ` <20080322175705.GD6367@osiris.boeblingen.de.ibm.com>
2008-03-23 10:15             ` Avi Kivity
     [not found]             ` <47E62DBA.4050102@qumranet.com>
2008-03-23 18:23               ` Martin Schwidefsky
2008-03-24  6:57                 ` Avi Kivity
     [not found]                 ` <47E750ED.7060509@qumranet.com>
2008-03-25  6:08                   ` Carsten Otte
     [not found]                   ` <47E896EA.5060309@de.ibm.com>
2008-03-25  6:12                     ` Avi Kivity
2008-03-25 15:37           ` Carsten Otte
     [not found] ` <1206030326.6690.65.camel@cotte.boeblingen.de.ibm.com>
2008-03-20 17:16   ` [RFC/PATCH 14/15] guest: detect when running on kvm Randy Dunlap
     [not found]   ` <20080320101602.f2737c94.randy.dunlap@oracle.com>
2008-03-20 17:27     ` Carsten Otte
2008-03-20 17:53   ` [kvm-devel] " Christoph Hellwig
     [not found]   ` <20080320175357.GA30959@infradead.org>
     [not found]     ` <47E2CAFF.3070203@de.ibm.com>
2008-03-20 19:41       ` Christoph Hellwig
     [not found]       ` <20080320194137.GA9975@infradead.org>
2008-03-20 20:59         ` Carsten Otte
     [not found]         ` <47E2D034.4090600@de.ibm.com>
2008-03-20 21:22           ` Heiko Carstens
     [not found]           ` <20080320212255.GA4225@osiris.boeblingen.de.ibm.com>
2008-03-21 11:12             ` Carsten Otte
     [not found]             ` <47E39804.4030605@de.ibm.com>
2008-03-21 14:06               ` Heiko Carstens
     [not found]               ` <20080321140603.GC4128@osiris.boeblingen.de.ibm.com>
2008-03-21 14:33                 ` Carsten Otte
     [not found]                 ` <1206110009.8363.3.camel@cotte.boeblingen.de.ibm.com>
2008-03-22 17:25                   ` Heiko Carstens
2008-03-20 20:37     ` Carsten Otte
     [not found] ` <1206030328.6690.66.camel@cotte.boeblingen.de.ibm.com>
2008-03-21  0:24   ` [RFC/PATCH 15/15] guest: virtio device support, and kvm hypercalls Rusty Russell
     [not found]   ` <200803211124.49829.rusty@rustcorp.com.au>
2008-03-21  7:12     ` [kvm-devel] " Carsten Otte
2008-03-21  8:15     ` Christian Borntraeger
     [not found]     ` <200803210915.48029.borntraeger@de.ibm.com>
2008-03-21 23:30       ` Rusty Russell
     [not found]       ` <200803221030.49457.rusty@rustcorp.com.au>
2008-03-22  7:36         ` Carsten Otte
2008-03-21 10:44   ` Avi Kivity
     [not found] ` <1206030302.6690.57.camel@cotte.boeblingen.de.ibm.com>
2008-03-21 10:53   ` [kvm-devel] [RFC/PATCH 06/15] kvm-s390: sie intercept handling Avi Kivity
2008-03-21 11:26     ` Carsten Otte

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).