* [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable
[not found] <1206028710.6690.21.camel@cotte.boeblingen.de.ibm.com>
@ 2008-03-20 16:24 ` Carsten Otte
2008-03-20 16:24 ` [RFC/PATCH 02/15] preparation: host memory management changes for s390 kvm Carsten Otte
` (19 subsequent siblings)
20 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:24 UTC (permalink / raw)
To: virtualization, kvm-devel, Avi Kivity,
Linux Memory Management List
Cc: aliguori, EHRHARDT, arnd, hollisb, heiko.carstens, jeroney,
borntraeger, schwidefsky, rvdheij, os, jblunck, Zhang, Xiantao
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
The SIE instruction on s390 uses the 2nd half of the page table page to
virtualize the storage keys of a guest. This patch offers the s390_enable_sie
function, which reorganizes the page tables of a single-threaded process to
reserve space in the page table:
s390_enable_sie makes sure that the process is single threaded and then uses
dup_mm to create a new mm with reorganized page tables. The old mm is freed
and the process has now a page status extended field after every page table.
Code that wants to exploit pgstes should SELECT CONFIG_PGSTE.
This patch has a small common code hit, namely making dup_mm non-static.
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---
arch/s390/Kconfig | 4 ++
arch/s390/kernel/setup.c | 4 ++
arch/s390/mm/pgtable.c | 55 ++++++++++++++++++++++++++++++++++++++---
include/asm-s390/mmu.h | 1
include/asm-s390/mmu_context.h | 8 +++++
include/asm-s390/pgtable.h | 1
kernel/fork.c | 2 -
7 files changed, 70 insertions(+), 5 deletions(-)
Index: kvm/arch/s390/Kconfig
===================================================================
--- kvm.orig/arch/s390/Kconfig
+++ kvm/arch/s390/Kconfig
@@ -55,6 +55,10 @@ config GENERIC_LOCKBREAK
default y
depends on SMP && PREEMPT
+config PGSTE
+ bool
+ default y if KVM
+
mainmenu "Linux Kernel Configuration"
config S390
Index: kvm/arch/s390/kernel/setup.c
===================================================================
--- kvm.orig/arch/s390/kernel/setup.c
+++ kvm/arch/s390/kernel/setup.c
@@ -315,7 +315,11 @@ static int __init early_parse_ipldelay(c
early_param("ipldelay", early_parse_ipldelay);
#ifdef CONFIG_S390_SWITCH_AMODE
+#ifdef CONFIG_PGSTE
+unsigned int switch_amode = 1;
+#else
unsigned int switch_amode = 0;
+#endif
EXPORT_SYMBOL_GPL(switch_amode);
static void set_amode_and_uaccess(unsigned long user_amode,
Index: kvm/arch/s390/mm/pgtable.c
===================================================================
--- kvm.orig/arch/s390/mm/pgtable.c
+++ kvm/arch/s390/mm/pgtable.c
@@ -30,11 +30,27 @@
#define TABLES_PER_PAGE 4
#define FRAG_MASK 15UL
#define SECOND_HALVES 10UL
+
+void clear_table_pgstes(unsigned long *table)
+{
+ clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
+ memset(table + 256, 0, PAGE_SIZE/4);
+ clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
+ memset(table + 768, 0, PAGE_SIZE/4);
+}
+
#else
#define ALLOC_ORDER 2
#define TABLES_PER_PAGE 2
#define FRAG_MASK 3UL
#define SECOND_HALVES 2UL
+
+void clear_table_pgstes(unsigned long *table)
+{
+ clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
+ memset(table + 256, 0, PAGE_SIZE/2);
+}
+
#endif
unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
@@ -153,7 +169,7 @@ unsigned long *page_table_alloc(struct m
unsigned long *table;
unsigned long bits;
- bits = mm->context.noexec ? 3UL : 1UL;
+ bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
spin_lock(&mm->page_table_lock);
page = NULL;
if (!list_empty(&mm->context.pgtable_list)) {
@@ -170,7 +186,10 @@ unsigned long *page_table_alloc(struct m
pgtable_page_ctor(page);
page->flags &= ~FRAG_MASK;
table = (unsigned long *) page_to_phys(page);
- clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
+ if (mm->context.pgstes)
+ clear_table_pgstes(table);
+ else
+ clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
spin_lock(&mm->page_table_lock);
list_add(&page->lru, &mm->context.pgtable_list);
}
@@ -191,7 +210,7 @@ void page_table_free(struct mm_struct *m
struct page *page;
unsigned long bits;
- bits = mm->context.noexec ? 3UL : 1UL;
+ bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
spin_lock(&mm->page_table_lock);
@@ -228,3 +247,33 @@ void disable_noexec(struct mm_struct *mm
mm->context.noexec = 0;
update_mm(mm, tsk);
}
+
+struct mm_struct *dup_mm(struct task_struct *tsk);
+
+/*
+ * switch on pgstes for its userspace process (for kvm)
+ */
+int s390_enable_sie(void)
+{
+ struct task_struct *tsk = current;
+ struct mm_struct *mm;
+
+ if (tsk->mm->context.pgstes)
+ return 0;
+ if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
+ tsk->mm != tsk->active_mm || tsk->mm->ioctx_list)
+ return -EINVAL;
+ tsk->mm->context.pgstes = 1; /* dirty little tricks .. */
+ mm = dup_mm(tsk);
+ tsk->mm->context.pgstes = 0;
+ if (!mm)
+ return -ENOMEM;
+ mmput(tsk->mm);
+ tsk->mm = tsk->active_mm = mm;
+ preempt_disable();
+ update_mm(mm, tsk);
+ cpu_set(smp_processor_id(), mm->cpu_vm_mask);
+ preempt_enable();
+ return 0;
+}
+EXPORT_SYMBOL_GPL(s390_enable_sie);
Index: kvm/include/asm-s390/mmu.h
===================================================================
--- kvm.orig/include/asm-s390/mmu.h
+++ kvm/include/asm-s390/mmu.h
@@ -7,6 +7,7 @@ typedef struct {
unsigned long asce_bits;
unsigned long asce_limit;
int noexec;
+ int pgstes;
} mm_context_t;
#endif
Index: kvm/include/asm-s390/mmu_context.h
===================================================================
--- kvm.orig/include/asm-s390/mmu_context.h
+++ kvm/include/asm-s390/mmu_context.h
@@ -20,7 +20,13 @@ static inline int init_new_context(struc
#ifdef CONFIG_64BIT
mm->context.asce_bits |= _ASCE_TYPE_REGION3;
#endif
- mm->context.noexec = s390_noexec;
+ if (current->mm->context.pgstes) {
+ mm->context.noexec = 0;
+ mm->context.pgstes = 1;
+ } else {
+ mm->context.noexec = s390_noexec;
+ mm->context.pgstes = 0;
+ }
mm->context.asce_limit = STACK_TOP_MAX;
crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
return 0;
Index: kvm/include/asm-s390/pgtable.h
===================================================================
--- kvm.orig/include/asm-s390/pgtable.h
+++ kvm/include/asm-s390/pgtable.h
@@ -966,6 +966,7 @@ static inline pte_t mk_swap_pte(unsigned
extern int add_shared_memory(unsigned long start, unsigned long size);
extern int remove_shared_memory(unsigned long start, unsigned long size);
+extern int s390_enable_sie(void);
/*
* No page table caches to initialise
Index: kvm/kernel/fork.c
===================================================================
--- kvm.orig/kernel/fork.c
+++ kvm/kernel/fork.c
@@ -498,7 +498,7 @@ void mm_release(struct task_struct *tsk,
* Allocate a new mm structure and copy contents from the
* mm structure of the passed in task structure.
*/
-static struct mm_struct *dup_mm(struct task_struct *tsk)
+struct mm_struct *dup_mm(struct task_struct *tsk)
{
struct mm_struct *mm, *oldmm = current->mm;
int err;
^ permalink raw reply [flat|nested] 52+ messages in thread* [RFC/PATCH 02/15] preparation: host memory management changes for s390 kvm
[not found] <1206028710.6690.21.camel@cotte.boeblingen.de.ibm.com>
2008-03-20 16:24 ` [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable Carsten Otte
@ 2008-03-20 16:24 ` Carsten Otte
2008-03-20 16:24 ` [RFC/PATCH 03/15] preparation: address of the 64bit extint parm in lowcore Carsten Otte
` (18 subsequent siblings)
20 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:24 UTC (permalink / raw)
To: virtualization, kvm-devel, Avi Kivity,
Linux Memory Management List
Cc: aliguori, EHRHARDT, arnd, hollisb, heiko.carstens, jeroney,
borntraeger, schwidefsky, rvdheij, os, jblunck, Zhang, Xiantao
From: Heiko Carstens <heiko.carstens@de.ibm.com>
From: Christian Borntraeger <borntraeger@de.ibm.com>
This patch changes the s390 memory management defintions to use the pgste field
for dirty and reference bit tracking of host and guest code. Usually on s390,
dirty and referenced are tracked in storage keys, which belong to the physical
page. This changes with virtualization: The guest and host dirty/reference bits
are defined to be the logical OR of the values for the mapping and the physical
page. This patch implements the necessary changes in pgtable.h for s390.
There is a common code change in mm/rmap.c, the call to page_test_and_clear_young
must be moved. This is a no-op for all architecture but s390. page_referenced
checks the referenced bits for the physiscal page and for all mappings:
o The physical page is checked with page_test_and_clear_young.
o The mappings are checked with ptep_test_and_clear_young and friends.
Without pgstes (the current implementation on Linux s390) the physical page
check is implemented but the mapping callbacks are no-ops because dirty
and referenced are not tracked in the s390 page tables. The pgstes introduces
guest and host dirty and reference bits for s390 in the host mapping. These
mapping must be checked before page_test_and_clear_young resets the reference
bit.
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---
include/asm-s390/pgtable.h | 109 +++++++++++++++++++++++++++++++++++++++++++--
mm/rmap.c | 7 +-
2 files changed, 110 insertions(+), 6 deletions(-)
Index: kvm/include/asm-s390/pgtable.h
===================================================================
--- kvm.orig/include/asm-s390/pgtable.h
+++ kvm/include/asm-s390/pgtable.h
@@ -30,6 +30,7 @@
*/
#ifndef __ASSEMBLY__
#include <linux/mm_types.h>
+#include <asm/atomic.h>
#include <asm/bug.h>
#include <asm/processor.h>
@@ -258,6 +259,13 @@ extern char empty_zero_page[PAGE_SIZE];
* swap pte is 1011 and 0001, 0011, 0101, 0111 are invalid.
*/
+/* Page status extended for virtualization */
+#define _PAGE_RCP_PCL 0x0080000000000000UL
+#define _PAGE_RCP_HR 0x0040000000000000UL
+#define _PAGE_RCP_HC 0x0020000000000000UL
+#define _PAGE_RCP_GR 0x0004000000000000UL
+#define _PAGE_RCP_GC 0x0002000000000000UL
+
#ifndef __s390x__
/* Bits in the segment table address-space-control-element */
@@ -513,6 +521,67 @@ static inline int pte_file(pte_t pte)
#define __HAVE_ARCH_PTE_SAME
#define pte_same(a,b) (pte_val(a) == pte_val(b))
+static inline void rcp_lock(pte_t *ptep)
+{
+#ifdef CONFIG_PGSTE
+ atomic64_t *rcp = (atomic64_t *) (ptep + PTRS_PER_PTE);
+ preempt_disable();
+ atomic64_set_mask(_PAGE_RCP_PCL, rcp);
+#endif
+}
+
+static inline void rcp_unlock(pte_t *ptep)
+{
+#ifdef CONFIG_PGSTE
+ atomic64_t *rcp = (atomic64_t *) (ptep + PTRS_PER_PTE);
+ atomic64_clear_mask(_PAGE_RCP_PCL, rcp);
+ preempt_enable();
+#endif
+}
+
+static inline void rcp_set_bits(pte_t *ptep, unsigned long val)
+{
+#ifdef CONFIG_PGSTE
+ *(unsigned long *) (ptep + PTRS_PER_PTE) |= val;
+#endif
+}
+
+static inline int rcp_test_and_clear_bits(pte_t *ptep, unsigned long val)
+{
+#ifdef CONFIG_PGSTE
+ unsigned long ret;
+
+ ret = *(unsigned long *) (ptep + PTRS_PER_PTE);
+ *(unsigned long *) (ptep + PTRS_PER_PTE) &= ~val;
+ return (ret & val) == val;
+#else
+ return 0;
+#endif
+}
+
+
+/* forward declaration for SetPageUptodate in page-flags.h*/
+static inline void page_clear_dirty(struct page *page);
+#include <linux/page-flags.h>
+
+static inline void ptep_rcp_copy(pte_t *ptep)
+{
+#ifdef CONFIG_PGSTE
+ struct page *page = virt_to_page(pte_val(*ptep));
+ unsigned int skey;
+
+ skey = page_get_storage_key(page_to_phys(page));
+ if (skey & _PAGE_CHANGED)
+ rcp_set_bits(ptep, _PAGE_RCP_GC);
+ if (skey & _PAGE_REFERENCED)
+ rcp_set_bits(ptep, _PAGE_RCP_GR);
+ if (rcp_test_and_clear_bits(ptep, _PAGE_RCP_HC))
+ SetPageDirty(page);
+ if (rcp_test_and_clear_bits(ptep, _PAGE_RCP_HR))
+ SetPageReferenced(page);
+#endif
+}
+
/*
* query functions pte_write/pte_dirty/pte_young only work if
* pte_present() is true. Undefined behaviour if not..
@@ -599,6 +668,8 @@ static inline void pmd_clear(pmd_t *pmd)
static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
+ if (mm->context.pgstes)
+ ptep_rcp_copy(ptep);
pte_val(*ptep) = _PAGE_TYPE_EMPTY;
if (mm->context.noexec)
pte_val(ptep[PTRS_PER_PTE]) = _PAGE_TYPE_EMPTY;
@@ -667,6 +738,22 @@ static inline pte_t pte_mkyoung(pte_t pt
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
+#ifdef CONFIG_PGSTE
+ unsigned long physpage;
+ int young;
+
+ if (!vma->vm_mm->context.pgstes)
+ return 0;
+ physpage = pte_val(*ptep) & PAGE_MASK;
+
+ young = ((page_get_storage_key(physpage) & _PAGE_REFERENCED) != 0);
+ rcp_lock(ptep);
+ if (young)
+ rcp_set_bits(ptep, _PAGE_RCP_GR);
+ young |= rcp_test_and_clear_bits(ptep, _PAGE_RCP_HR);
+ rcp_unlock(ptep);
+ return young;
+#endif
return 0;
}
@@ -674,7 +761,13 @@ static inline int ptep_test_and_clear_yo
static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep)
{
- /* No need to flush TLB; bits are in storage key */
+ /* No need to flush TLB
+ * On s390 reference bits are in storage key and never in TLB
+ * With virtualization we handle the reference bit, without we
+ * we can simply return */
+#ifdef CONFIG_PGSTE
+ return ptep_test_and_clear_young(vma, address, ptep);
+#endif
return 0;
}
@@ -693,15 +786,25 @@ static inline void __ptep_ipte(unsigned
: "=m" (*ptep) : "m" (*ptep),
"a" (pto), "a" (address));
}
- pte_val(*ptep) = _PAGE_TYPE_EMPTY;
}
static inline void ptep_invalidate(struct mm_struct *mm,
unsigned long address, pte_t *ptep)
{
+ if (mm->context.pgstes) {
+ rcp_lock(ptep);
+ __ptep_ipte(address, ptep);
+ ptep_rcp_copy(ptep);
+ pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+ rcp_unlock(ptep);
+ return;
+ }
__ptep_ipte(address, ptep);
- if (mm->context.noexec)
+ pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+ if (mm->context.noexec) {
__ptep_ipte(address, ptep + PTRS_PER_PTE);
+ pte_val(*(ptep + PTRS_PER_PTE)) = _PAGE_TYPE_EMPTY;
+ }
}
/*
Index: kvm/mm/rmap.c
===================================================================
--- kvm.orig/mm/rmap.c
+++ kvm/mm/rmap.c
@@ -411,9 +411,6 @@ int page_referenced(struct page *page, i
{
int referenced = 0;
- if (page_test_and_clear_young(page))
- referenced++;
-
if (TestClearPageReferenced(page))
referenced++;
@@ -431,6 +428,10 @@ int page_referenced(struct page *page, i
unlock_page(page);
}
}
+
+ if (page_test_and_clear_young(page))
+ referenced++;
+
return referenced;
}
^ permalink raw reply [flat|nested] 52+ messages in thread* [RFC/PATCH 03/15] preparation: address of the 64bit extint parm in lowcore
[not found] <1206028710.6690.21.camel@cotte.boeblingen.de.ibm.com>
2008-03-20 16:24 ` [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable Carsten Otte
2008-03-20 16:24 ` [RFC/PATCH 02/15] preparation: host memory management changes for s390 kvm Carsten Otte
@ 2008-03-20 16:24 ` Carsten Otte
2008-03-20 16:24 ` [RFC/PATCH 04/15] preparation: split sysinfo defintions for kvm use Carsten Otte
` (17 subsequent siblings)
20 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:24 UTC (permalink / raw)
To: virtualization, kvm-devel, Avi Kivity
Cc: aliguori, EHRHARDT, arnd, hollisb, heiko.carstens, jeroney,
borntraeger, schwidefsky, rvdheij, os, jblunck, Zhang, Xiantao
From: Christian Borntraeger <borntraeger@de.ibm.com>
The address 0x11b8 is used by z/VM for pfault and diag 250 I/O to
provide a 64 bit extint parameter. virtio uses the same address, so
its time to update the lowcore structure.
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---
include/asm-s390/lowcore.h | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
Index: kvm/include/asm-s390/lowcore.h
===================================================================
--- kvm.orig/include/asm-s390/lowcore.h
+++ kvm/include/asm-s390/lowcore.h
@@ -380,27 +380,32 @@ struct _lowcore
/* whether the kernel died with panic() or not */
__u32 panic_magic; /* 0xe00 */
- __u8 pad13[0x1200-0xe04]; /* 0xe04 */
+ __u8 pad13[0x11b8-0xe04]; /* 0xe04 */
+
+ /* 64 bit extparam used for pfault, diag 250 etc */
+ __u64 ext_params2; /* 0x11B8 */
+
+ __u8 pad14[0x1200-0x11C0]; /* 0x11C0 */
/* System info area */
__u64 floating_pt_save_area[16]; /* 0x1200 */
__u64 gpregs_save_area[16]; /* 0x1280 */
__u32 st_status_fixed_logout[4]; /* 0x1300 */
- __u8 pad14[0x1318-0x1310]; /* 0x1310 */
+ __u8 pad15[0x1318-0x1310]; /* 0x1310 */
__u32 prefixreg_save_area; /* 0x1318 */
__u32 fpt_creg_save_area; /* 0x131c */
- __u8 pad15[0x1324-0x1320]; /* 0x1320 */
+ __u8 pad16[0x1324-0x1320]; /* 0x1320 */
__u32 tod_progreg_save_area; /* 0x1324 */
__u32 cpu_timer_save_area[2]; /* 0x1328 */
__u32 clock_comp_save_area[2]; /* 0x1330 */
- __u8 pad16[0x1340-0x1338]; /* 0x1338 */
+ __u8 pad17[0x1340-0x1338]; /* 0x1338 */
__u32 access_regs_save_area[16]; /* 0x1340 */
__u64 cregs_save_area[16]; /* 0x1380 */
/* align to the top of the prefix area */
- __u8 pad17[0x2000-0x1400]; /* 0x1400 */
+ __u8 pad18[0x2000-0x1400]; /* 0x1400 */
#endif /* !__s390x__ */
} __attribute__((packed)); /* End structure*/
^ permalink raw reply [flat|nested] 52+ messages in thread* [RFC/PATCH 04/15] preparation: split sysinfo defintions for kvm use
[not found] <1206028710.6690.21.camel@cotte.boeblingen.de.ibm.com>
` (2 preceding siblings ...)
2008-03-20 16:24 ` [RFC/PATCH 03/15] preparation: address of the 64bit extint parm in lowcore Carsten Otte
@ 2008-03-20 16:24 ` Carsten Otte
2008-03-20 16:24 ` [RFC/PATCH 05/15] kvm-s390: s390 arch backend for the kvm kernel module Carsten Otte
` (16 subsequent siblings)
20 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:24 UTC (permalink / raw)
To: virtualization, kvm-devel, Avi Kivity
Cc: aliguori, EHRHARDT, arnd, hollisb, heiko.carstens, jeroney,
borntraeger, schwidefsky, rvdheij, os, jblunck, Zhang, Xiantao
From: Christian Borntraeger <borntraeger@de.ibm.com>
drivers/s390/sysinfo.c uses the store system information intruction to query
the system about information of the machine, the LPAR and additional
hypervisors. KVM has to implement the host part for this instruction.
To avoid code duplication, this patch splits the common definitions from
sysinfo.c into a separate header file include/asm-s390/sysinfo.h for KVM use.
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---
drivers/s390/sysinfo.c | 100 ----------------------------------------
include/asm-s390/sysinfo.h | 112 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 113 insertions(+), 99 deletions(-)
Index: kvm/drivers/s390/sysinfo.c
===================================================================
--- kvm.orig/drivers/s390/sysinfo.c
+++ kvm/drivers/s390/sysinfo.c
@@ -11,111 +11,13 @@
#include <linux/init.h>
#include <linux/delay.h>
#include <asm/ebcdic.h>
+#include <asm/sysinfo.h>
/* Sigh, math-emu. Don't ask. */
#include <asm/sfp-util.h>
#include <math-emu/soft-fp.h>
#include <math-emu/single.h>
-struct sysinfo_1_1_1 {
- char reserved_0[32];
- char manufacturer[16];
- char type[4];
- char reserved_1[12];
- char model_capacity[16];
- char sequence[16];
- char plant[4];
- char model[16];
-};
-
-struct sysinfo_1_2_1 {
- char reserved_0[80];
- char sequence[16];
- char plant[4];
- char reserved_1[2];
- unsigned short cpu_address;
-};
-
-struct sysinfo_1_2_2 {
- char format;
- char reserved_0[1];
- unsigned short acc_offset;
- char reserved_1[24];
- unsigned int secondary_capability;
- unsigned int capability;
- unsigned short cpus_total;
- unsigned short cpus_configured;
- unsigned short cpus_standby;
- unsigned short cpus_reserved;
- unsigned short adjustment[0];
-};
-
-struct sysinfo_1_2_2_extension {
- unsigned int alt_capability;
- unsigned short alt_adjustment[0];
-};
-
-struct sysinfo_2_2_1 {
- char reserved_0[80];
- char sequence[16];
- char plant[4];
- unsigned short cpu_id;
- unsigned short cpu_address;
-};
-
-struct sysinfo_2_2_2 {
- char reserved_0[32];
- unsigned short lpar_number;
- char reserved_1;
- unsigned char characteristics;
- unsigned short cpus_total;
- unsigned short cpus_configured;
- unsigned short cpus_standby;
- unsigned short cpus_reserved;
- char name[8];
- unsigned int caf;
- char reserved_2[16];
- unsigned short cpus_dedicated;
- unsigned short cpus_shared;
-};
-
-#define LPAR_CHAR_DEDICATED (1 << 7)
-#define LPAR_CHAR_SHARED (1 << 6)
-#define LPAR_CHAR_LIMITED (1 << 5)
-
-struct sysinfo_3_2_2 {
- char reserved_0[31];
- unsigned char count;
- struct {
- char reserved_0[4];
- unsigned short cpus_total;
- unsigned short cpus_configured;
- unsigned short cpus_standby;
- unsigned short cpus_reserved;
- char name[8];
- unsigned int caf;
- char cpi[16];
- char reserved_1[24];
-
- } vm[8];
-};
-
-static inline int stsi(void *sysinfo, int fc, int sel1, int sel2)
-{
- register int r0 asm("0") = (fc << 28) | sel1;
- register int r1 asm("1") = sel2;
-
- asm volatile(
- " stsi 0(%2)\n"
- "0: jz 2f\n"
- "1: lhi %0,%3\n"
- "2:\n"
- EX_TABLE(0b,1b)
- : "+d" (r0) : "d" (r1), "a" (sysinfo), "K" (-ENOSYS)
- : "cc", "memory" );
- return r0;
-}
-
static inline int stsi_0(void)
{
int rc = stsi (NULL, 0, 0, 0);
Index: kvm/include/asm-s390/sysinfo.h
===================================================================
--- /dev/null
+++ kvm/include/asm-s390/sysinfo.h
@@ -0,0 +1,112 @@
+/*
+ * definition for store system information stsi
+ *
+ * Copyright IBM Corp. 2001,2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Ulrich Weigand <weigand@de.ibm.com>
+ * Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+struct sysinfo_1_1_1 {
+ char reserved_0[32];
+ char manufacturer[16];
+ char type[4];
+ char reserved_1[12];
+ char model_capacity[16];
+ char sequence[16];
+ char plant[4];
+ char model[16];
+};
+
+struct sysinfo_1_2_1 {
+ char reserved_0[80];
+ char sequence[16];
+ char plant[4];
+ char reserved_1[2];
+ unsigned short cpu_address;
+};
+
+struct sysinfo_1_2_2 {
+ char format;
+ char reserved_0[1];
+ unsigned short acc_offset;
+ char reserved_1[24];
+ unsigned int secondary_capability;
+ unsigned int capability;
+ unsigned short cpus_total;
+ unsigned short cpus_configured;
+ unsigned short cpus_standby;
+ unsigned short cpus_reserved;
+ unsigned short adjustment[0];
+};
+
+struct sysinfo_1_2_2_extension {
+ unsigned int alt_capability;
+ unsigned short alt_adjustment[0];
+};
+
+struct sysinfo_2_2_1 {
+ char reserved_0[80];
+ char sequence[16];
+ char plant[4];
+ unsigned short cpu_id;
+ unsigned short cpu_address;
+};
+
+struct sysinfo_2_2_2 {
+ char reserved_0[32];
+ unsigned short lpar_number;
+ char reserved_1;
+ unsigned char characteristics;
+ unsigned short cpus_total;
+ unsigned short cpus_configured;
+ unsigned short cpus_standby;
+ unsigned short cpus_reserved;
+ char name[8];
+ unsigned int caf;
+ char reserved_2[16];
+ unsigned short cpus_dedicated;
+ unsigned short cpus_shared;
+};
+
+#define LPAR_CHAR_DEDICATED (1 << 7)
+#define LPAR_CHAR_SHARED (1 << 6)
+#define LPAR_CHAR_LIMITED (1 << 5)
+
+struct sysinfo_3_2_2 {
+ char reserved_0[31];
+ unsigned char count;
+ struct {
+ char reserved_0[4];
+ unsigned short cpus_total;
+ unsigned short cpus_configured;
+ unsigned short cpus_standby;
+ unsigned short cpus_reserved;
+ char name[8];
+ unsigned int caf;
+ char cpi[16];
+ char reserved_1[24];
+
+ } vm[8];
+};
+
+static inline int stsi(void *sysinfo, int fc, int sel1, int sel2)
+{
+ register int r0 asm("0") = (fc << 28) | sel1;
+ register int r1 asm("1") = sel2;
+
+ asm volatile(
+ " stsi 0(%2)\n"
+ "0: jz 2f\n"
+ "1: lhi %0,%3\n"
+ "2:\n"
+ EX_TABLE(0b, 1b)
+ : "+d" (r0) : "d" (r1), "a" (sysinfo), "K" (-ENOSYS)
+ : "cc", "memory");
+ return r0;
+}
+
^ permalink raw reply [flat|nested] 52+ messages in thread* [RFC/PATCH 05/15] kvm-s390: s390 arch backend for the kvm kernel module
[not found] <1206028710.6690.21.camel@cotte.boeblingen.de.ibm.com>
` (3 preceding siblings ...)
2008-03-20 16:24 ` [RFC/PATCH 04/15] preparation: split sysinfo defintions for kvm use Carsten Otte
@ 2008-03-20 16:24 ` Carsten Otte
2008-03-20 16:25 ` [RFC/PATCH 06/15] kvm-s390: sie intercept handling Carsten Otte
` (15 subsequent siblings)
20 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:24 UTC (permalink / raw)
To: virtualization, kvm-devel, Avi Kivity
Cc: aliguori, EHRHARDT, arnd, hollisb, heiko.carstens, jeroney,
borntraeger, schwidefsky, rvdheij, os, jblunck, Zhang, Xiantao
From: Carsten Otte <cotte@de.ibm.com>
From: Christian Borntraeger <borntraeger@de.ibm.com>
From: Heiko Carstens <heiko.carstens@de.ibm.com>
This patch contains the port of Qumranet's kvm kernel module to IBM zSeries
(aka s390x, mainframe) architecture. It uses the mainframe's virtualization
instruction SIE to run virtual machines with up to 64 virtual CPUs each.
This port is only usable on 64bit host kernels, and can only run 64bit guest
kernels. However, running 31bit applications in guest userspace is possible.
The following source files are introduced by this patch
arch/s390/kvm/kvm-s390.c similar to arch/x86/kvm/x86.c, this implements all
arch callbacks for kvm. __vcpu_run calls back into
sie64a to enter the guest machine context
arch/s390/kvm/sie64a.S assembler function sie64a, which enters guest
context via SIE, and switches world before and after that
include/asm-s390/kvm_host.h contains all vital data structures needed to run
virtual machines on the mainframe
include/asm-s390/kvm.h defines kvm_regs and friends for user access to
guest register content
arch/s390/kvm/gaccess.h functions similar to uaccess to access guest memory
arch/s390/kvm/kvm-s390.h header file for kvm-s390 internals, extended by
later patches
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---
arch/s390/Makefile | 2
arch/s390/kernel/vtime.c | 1
arch/s390/kvm/Makefile | 14 +
arch/s390/kvm/gaccess.h | 280 +++++++++++++++++++++
arch/s390/kvm/kvm-s390.c | 574 ++++++++++++++++++++++++++++++++++++++++++++
arch/s390/kvm/kvm-s390.h | 29 ++
arch/s390/kvm/sie64a.S | 47 +++
include/asm-s390/Kbuild | 1
include/asm-s390/kvm.h | 44 +++
include/asm-s390/kvm_host.h | 119 +++++++++
include/asm-s390/kvm_para.h | 30 ++
include/linux/kvm.h | 15 +
include/linux/kvm_host.h | 4
13 files changed, 1159 insertions(+), 1 deletion(-)
Index: kvm/arch/s390/Makefile
===================================================================
--- kvm.orig/arch/s390/Makefile
+++ kvm/arch/s390/Makefile
@@ -87,7 +87,7 @@ LDFLAGS_vmlinux := -e start
head-y := arch/s390/kernel/head.o arch/s390/kernel/init_task.o
core-y += arch/s390/mm/ arch/s390/kernel/ arch/s390/crypto/ \
- arch/s390/appldata/ arch/s390/hypfs/
+ arch/s390/appldata/ arch/s390/hypfs/ arch/s390/kvm/
libs-y += arch/s390/lib/
drivers-y += drivers/s390/
drivers-$(CONFIG_MATHEMU) += arch/s390/math-emu/
Index: kvm/arch/s390/kernel/vtime.c
===================================================================
--- kvm.orig/arch/s390/kernel/vtime.c
+++ kvm/arch/s390/kernel/vtime.c
@@ -110,6 +110,7 @@ void account_system_vtime(struct task_st
S390_lowcore.steal_clock -= cputime << 12;
account_system_time(tsk, 0, cputime);
}
+EXPORT_SYMBOL_GPL(account_system_vtime);
static inline void set_vtimer(__u64 expires)
{
Index: kvm/arch/s390/kvm/Makefile
===================================================================
--- /dev/null
+++ kvm/arch/s390/kvm/Makefile
@@ -0,0 +1,14 @@
+# Makefile for kernel virtual machines on s390
+#
+# Copyright IBM Corp. 2008
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License (version 2 only)
+# as published by the Free Software Foundation.
+
+common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o)
+
+EXTRA_CFLAGS += -Ivirt/kvm -Iarch/s390/kvm
+
+kvm-objs := $(common-objs) kvm-s390.o sie64a.o
+obj-$(CONFIG_KVM) += kvm.o
Index: kvm/arch/s390/kvm/gaccess.h
===================================================================
--- /dev/null
+++ kvm/arch/s390/kvm/gaccess.h
@@ -0,0 +1,280 @@
+/*
+ * gaccess.h - access guest memory
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ */
+
+#ifndef __KVM_S390_GACCESS_H
+#define __KVM_S390_GACCESS_H
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+#include <asm/uaccess.h>
+
+static inline void __user *__guestaddr_to_user(struct kvm_vcpu *vcpu,
+ u64 guestaddr)
+{
+ u64 prefix = vcpu->arch.sie_block->prefix;
+ u64 origin = vcpu->kvm->arch.guest_origin;
+ u64 memsize = vcpu->kvm->arch.guest_memsize;
+
+ if (guestaddr < 2 * PAGE_SIZE)
+ guestaddr += prefix;
+ else if ((guestaddr >= prefix) && (guestaddr < prefix + 2 * PAGE_SIZE))
+ guestaddr -= prefix;
+
+ if (guestaddr > memsize)
+ return (void __user __force *) ERR_PTR(-EFAULT);
+
+ guestaddr += origin;
+
+ return (void __user *) guestaddr;
+}
+
+static inline int get_guest_u64(struct kvm_vcpu *vcpu, u64 guestaddr,
+ u64 *result)
+{
+ void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+ if (guestaddr & 7)
+ BUG();
+
+ if (IS_ERR((void __force *) uptr))
+ return PTR_ERR((void __force *) uptr);
+
+ return get_user(*result, (u64 __user *) uptr);
+}
+
+static inline int get_guest_u32(struct kvm_vcpu *vcpu, u64 guestaddr,
+ u32 *result)
+{
+ void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+ if (guestaddr & 3)
+ BUG();
+
+ if (IS_ERR((void __force *) uptr))
+ return PTR_ERR((void __force *) uptr);
+
+ return get_user(*result, (u32 __user *) uptr);
+}
+
+static inline int get_guest_u16(struct kvm_vcpu *vcpu, u64 guestaddr,
+ u16 *result)
+{
+ void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+ if (guestaddr & 1)
+ BUG();
+
+ if (IS_ERR(uptr))
+ return PTR_ERR(uptr);
+
+ return get_user(*result, (u16 __user *) uptr);
+}
+
+static inline int get_guest_u8(struct kvm_vcpu *vcpu, u64 guestaddr,
+ u8 *result)
+{
+ void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+ if (IS_ERR((void __force *) uptr))
+ return PTR_ERR((void __force *) uptr);
+
+ return get_user(*result, (u8 __user *) uptr);
+}
+
+static inline int put_guest_u64(struct kvm_vcpu *vcpu, u64 guestaddr,
+ u64 value)
+{
+ void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+ if (guestaddr & 7)
+ BUG();
+
+ if (IS_ERR((void __force *) uptr))
+ return PTR_ERR((void __force *) uptr);
+
+ return put_user(value, (u64 __user *) uptr);
+}
+
+static inline int put_guest_u32(struct kvm_vcpu *vcpu, u64 guestaddr,
+ u32 value)
+{
+ void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+ if (guestaddr & 3)
+ BUG();
+
+ if (IS_ERR((void __force *) uptr))
+ return PTR_ERR((void __force *) uptr);
+
+ return put_user(value, (u32 __user *) uptr);
+}
+
+static inline int put_guest_u16(struct kvm_vcpu *vcpu, u64 guestaddr,
+ u16 value)
+{
+ void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+ if (guestaddr & 1)
+ BUG();
+
+ if (IS_ERR((void __force *) uptr))
+ return PTR_ERR((void __force *) uptr);
+
+ return put_user(value, (u16 __user *) uptr);
+}
+
+static inline int put_guest_u8(struct kvm_vcpu *vcpu, u64 guestaddr,
+ u8 value)
+{
+ void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+ if (IS_ERR((void __force *) uptr))
+ return PTR_ERR((void __force *) uptr);
+
+ return put_user(value, (u8 __user *) uptr);
+}
+
+
+static inline int __copy_to_guest_slow(struct kvm_vcpu *vcpu, u64 guestdest,
+ const void *from, unsigned long n)
+{
+ int rc;
+ unsigned long i;
+ const u8 *data = from;
+
+ for (i = 0; i < n; i++) {
+ rc = put_guest_u8(vcpu, guestdest++, *(data++));
+ if (rc < 0)
+ return rc;
+ }
+ return 0;
+}
+
+static inline int copy_to_guest(struct kvm_vcpu *vcpu, u64 guestdest,
+ const void *from, unsigned long n)
+{
+ u64 prefix = vcpu->arch.sie_block->prefix;
+ u64 origin = vcpu->kvm->arch.guest_origin;
+ u64 memsize = vcpu->kvm->arch.guest_memsize;
+
+ if ((guestdest < 2 * PAGE_SIZE) && (guestdest + n > 2 * PAGE_SIZE))
+ goto slowpath;
+
+ if ((guestdest < prefix) && (guestdest + n > prefix))
+ goto slowpath;
+
+ if ((guestdest < prefix + 2 * PAGE_SIZE)
+ && (guestdest + n > prefix + 2 * PAGE_SIZE))
+ goto slowpath;
+
+ if (guestdest < 2 * PAGE_SIZE)
+ guestdest += prefix;
+ else if ((guestdest >= prefix) && (guestdest < prefix + 2 * PAGE_SIZE))
+ guestdest -= prefix;
+
+ if (guestdest + n > memsize)
+ return -EFAULT;
+
+ if (guestdest + n < guestdest)
+ return -EFAULT;
+
+ guestdest += origin;
+
+ return copy_to_user((void __user *) guestdest, from, n);
+slowpath:
+ return __copy_to_guest_slow(vcpu, guestdest, from, n);
+}
+
+static inline int __copy_from_guest_slow(struct kvm_vcpu *vcpu, void *to,
+ u64 guestsrc, unsigned long n)
+{
+ int rc;
+ unsigned long i;
+ u8 *data = to;
+
+ for (i = 0; i < n; i++) {
+ rc = get_guest_u8(vcpu, guestsrc++, data++);
+ if (rc < 0)
+ return rc;
+ }
+ return 0;
+}
+
+static inline int copy_from_guest(struct kvm_vcpu *vcpu, void *to,
+ u64 guestsrc, unsigned long n)
+{
+ u64 prefix = vcpu->arch.sie_block->prefix;
+ u64 origin = vcpu->kvm->arch.guest_origin;
+ u64 memsize = vcpu->kvm->arch.guest_memsize;
+
+ if ((guestsrc < 2 * PAGE_SIZE) && (guestsrc + n > 2 * PAGE_SIZE))
+ goto slowpath;
+
+ if ((guestsrc < prefix) && (guestsrc + n > prefix))
+ goto slowpath;
+
+ if ((guestsrc < prefix + 2 * PAGE_SIZE)
+ && (guestsrc + n > prefix + 2 * PAGE_SIZE))
+ goto slowpath;
+
+ if (guestsrc < 2 * PAGE_SIZE)
+ guestsrc += prefix;
+ else if ((guestsrc >= prefix) && (guestsrc < prefix + 2 * PAGE_SIZE))
+ guestsrc -= prefix;
+
+ if (guestsrc + n > memsize)
+ return -EFAULT;
+
+ if (guestsrc + n < guestsrc)
+ return -EFAULT;
+
+ guestsrc += origin;
+
+ return copy_from_user(to, (void __user *) guestsrc, n);
+slowpath:
+ return __copy_from_guest_slow(vcpu, to, guestsrc, n);
+}
+
+static inline int copy_to_guest_absolute(struct kvm_vcpu *vcpu, u64 guestdest,
+ const void *from, unsigned long n)
+{
+ u64 origin = vcpu->kvm->arch.guest_origin;
+ u64 memsize = vcpu->kvm->arch.guest_memsize;
+
+ if (guestdest + n > memsize)
+ return -EFAULT;
+
+ if (guestdest + n < guestdest)
+ return -EFAULT;
+
+ guestdest += origin;
+
+ return copy_to_user((void __user *) guestdest, from, n);
+}
+
+static inline int copy_from_guest_absolute(struct kvm_vcpu *vcpu, void *to,
+ u64 guestsrc, unsigned long n)
+{
+ u64 origin = vcpu->kvm->arch.guest_origin;
+ u64 memsize = vcpu->kvm->arch.guest_memsize;
+
+ if (guestsrc + n > memsize)
+ return -EFAULT;
+
+ if (guestsrc + n < guestsrc)
+ return -EFAULT;
+
+ guestsrc += origin;
+
+ return copy_from_user(to, (void __user *) guestsrc, n);
+}
+#endif
Index: kvm/arch/s390/kvm/kvm-s390.c
===================================================================
--- /dev/null
+++ kvm/arch/s390/kvm/kvm-s390.c
@@ -0,0 +1,574 @@
+/*
+ * s390host.c -- hosting zSeries kernel virtual machines
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ * Christian Borntraeger <borntraeger@de.ibm.com>
+ * Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/compiler.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <asm/lowcore.h>
+#include <asm/pgtable.h>
+
+#include "gaccess.h"
+
+#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+ { "userspace_handled", VCPU_STAT(exit_userspace) },
+ { NULL }
+};
+
+
+/* Section: not file related */
+void kvm_arch_hardware_enable(void *garbage)
+{
+ /* every s390 is virtualization enabled ;-) */
+}
+
+void kvm_arch_hardware_disable(void *garbage)
+{
+}
+
+void decache_vcpus_on_cpu(int cpu)
+{
+}
+
+int kvm_arch_hardware_setup(void)
+{
+ return 0;
+}
+
+void kvm_arch_hardware_unsetup(void)
+{
+}
+
+void kvm_arch_check_processor_compat(void *rtn)
+{
+}
+
+int kvm_arch_init(void *opaque)
+{
+ return 0;
+}
+
+void kvm_arch_exit(void)
+{
+}
+
+/* Section: device related */
+long kvm_arch_dev_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ if (ioctl == KVM_S390_ENABLE_SIE)
+ return s390_enable_sie();
+ return -EINVAL;
+}
+
+
+
+int kvm_dev_ioctl_check_extension(long ext)
+{
+ return 0;
+}
+
+/* Section: vm related */
+/*
+ * Get (and clear) the dirty memory log for a memory slot.
+ */
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
+ struct kvm_dirty_log *log)
+{
+ return 0;
+}
+
+long kvm_arch_vm_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ struct kvm *kvm = filp->private_data;
+ void __user *argp = (void __user *)arg;
+ int r;
+
+ switch (ioctl) {
+ default:
+ r = -EINVAL;
+ }
+
+ return r;
+}
+
+struct kvm *kvm_arch_create_vm(void)
+{
+ struct kvm *kvm;
+ int rc;
+ char debug_name[16];
+
+
+ rc = s390_enable_sie();
+ if (rc)
+ goto out_nokvm;
+
+ rc = -ENOMEM;
+ kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
+ if (!kvm)
+ goto out_nokvm;
+
+ kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
+ if (!kvm->arch.sca)
+ goto out_nosca;
+
+ sprintf(debug_name, "kvm-%u", current->pid);
+
+ kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
+ if (!kvm->arch.dbf)
+ goto out_nodbf;
+
+ debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
+ VM_EVENT(kvm, 3, "%s", "vm created");
+
+ try_module_get(THIS_MODULE);
+
+ return kvm;
+out_nodbf:
+ free_page((unsigned long)(kvm->arch.sca));
+out_nosca:
+ kfree(kvm);
+out_nokvm:
+ return ERR_PTR(rc);
+}
+
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
+ debug_unregister(kvm->arch.dbf);
+ free_page((unsigned long)(kvm->arch.sca));
+ kfree(kvm);
+ module_put(THIS_MODULE);
+}
+
+/* Section: vcpu related */
+int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+
+void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+ /* kvm common code refers to this, but does'nt call it */
+ BUG();
+}
+
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ save_fp_regs(&vcpu->arch.host_fpregs);
+ save_access_regs(vcpu->arch.host_acrs);
+ vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
+ restore_fp_regs(&vcpu->arch.guest_fpregs);
+ restore_access_regs(vcpu->arch.guest_acrs);
+
+ if (signal_pending(current))
+ atomic_set_mask(CPUSTAT_STOP_INT,
+ &vcpu->arch.sie_block->cpuflags);
+}
+
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+{
+ save_fp_regs(&vcpu->arch.guest_fpregs);
+ save_access_regs(vcpu->arch.guest_acrs);
+ restore_fp_regs(&vcpu->arch.host_fpregs);
+ restore_access_regs(vcpu->arch.host_acrs);
+}
+
+static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
+{
+ /* this equals initial cpu reset in pop, but we don't switch to ESA */
+ vcpu->arch.sie_block->gpsw.mask = 0UL;
+ vcpu->arch.sie_block->gpsw.addr = 0UL;
+ vcpu->arch.sie_block->prefix = 0UL;
+ vcpu->arch.sie_block->ihcpu = 0xffff;
+ vcpu->arch.sie_block->cputm = 0UL;
+ vcpu->arch.sie_block->ckc = 0UL;
+ vcpu->arch.sie_block->todpr = 0;
+ memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
+ vcpu->arch.sie_block->gcr[0] = 0xE0UL;
+ vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
+ vcpu->arch.guest_fpregs.fpc = 0;
+ asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
+ vcpu->arch.sie_block->gbea = 1;
+}
+
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+ atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
+ vcpu->arch.sie_block->gmslm = 0xffffffffffUL;
+ vcpu->arch.sie_block->gmsor = 0x000000000000;
+ vcpu->arch.sie_block->ecb = 2;
+ vcpu->arch.sie_block->eca = 0xC1002001U;
+
+ return 0;
+}
+
+struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
+ unsigned int id)
+{
+ struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
+ int rc = -ENOMEM;
+
+ if (!vcpu)
+ goto out_nomem;
+
+ vcpu->arch.sie_block = (struct sie_block *) get_zeroed_page(GFP_KERNEL);
+
+ if (!vcpu->arch.sie_block)
+ goto out_free_cpu;
+
+ vcpu->arch.sie_block->icpua = id;
+ BUG_ON(!kvm->arch.sca);
+ BUG_ON(kvm->arch.sca->cpu[id].sda);
+ kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
+ vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
+ vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
+
+ rc = kvm_vcpu_init(vcpu, kvm, id);
+ if (rc)
+ goto out_free_cpu;
+ VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
+ vcpu->arch.sie_block);
+
+ try_module_get(THIS_MODULE);
+
+ return vcpu;
+out_free_cpu:
+ kfree(vcpu);
+out_nomem:
+ return ERR_PTR(rc);
+}
+
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+ VCPU_EVENT(vcpu, 3, "%s", "destroy cpu");
+ free_page((unsigned long)(vcpu->arch.sie_block));
+ kfree(vcpu);
+ module_put(THIS_MODULE);
+}
+
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
+{
+ /* kvm common code refers to this, but never calls it */
+ BUG();
+ return 0;
+}
+
+static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
+{
+ vcpu_load(vcpu);
+ kvm_s390_vcpu_initial_reset(vcpu);
+ vcpu_put(vcpu);
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ vcpu_load(vcpu);
+ memcpy(&vcpu->arch.guest_gprs, ®s->gprs, sizeof(regs->gprs));
+ vcpu_put(vcpu);
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ vcpu_load(vcpu);
+ memcpy(®s->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs));
+ vcpu_put(vcpu);
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ vcpu_load(vcpu);
+ memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs));
+ memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
+ vcpu_put(vcpu);
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ vcpu_load(vcpu);
+ memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs));
+ memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
+ vcpu_put(vcpu);
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ vcpu_load(vcpu);
+ memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
+ vcpu->arch.guest_fpregs.fpc = fpu->fpc;
+ vcpu_put(vcpu);
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ vcpu_load(vcpu);
+ memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
+ fpu->fpc = vcpu->arch.guest_fpregs.fpc;
+ vcpu_put(vcpu);
+ return 0;
+}
+
+static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu,
+ psw_t psw)
+{
+ int rc = 0;
+
+ vcpu_load(vcpu);
+ if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
+ rc = -EBUSY;
+ else
+ vcpu->arch.sie_block->gpsw = psw;
+ vcpu_put(vcpu);
+ return rc;
+}
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+ struct kvm_translation *tr)
+{
+ return -EINVAL; /* not implemented yet */
+}
+
+int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
+ struct kvm_debug_guest *dbg)
+{
+ return -EINVAL; /* not implemented yet */
+}
+
+static void __vcpu_run(struct kvm_vcpu *vcpu)
+{
+ memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16);
+
+ if (need_resched())
+ schedule();
+
+ vcpu->arch.sie_block->icptcode = 0;
+ local_irq_disable();
+ kvm_guest_enter();
+ local_irq_enable();
+ VCPU_EVENT(vcpu, 6, "entering sie flags %x",
+ atomic_read(&vcpu->arch.sie_block->cpuflags));
+ sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs);
+ VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
+ vcpu->arch.sie_block->icptcode);
+ local_irq_disable();
+ kvm_guest_exit();
+ local_irq_enable();
+
+ memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16);
+}
+
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ sigset_t sigsaved;
+
+ vcpu_load(vcpu);
+
+ if (vcpu->sigset_active)
+ sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+
+ atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+
+ __vcpu_run(vcpu);
+
+ if (vcpu->sigset_active)
+ sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+ vcpu_put(vcpu);
+
+ vcpu->stat.exit_userspace++;
+ return 0;
+}
+
+static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from,
+ unsigned long n, int prefix)
+{
+ if (prefix)
+ return copy_to_guest(vcpu, guestdest, from, n);
+ else
+ return copy_to_guest_absolute(vcpu, guestdest, from, n);
+}
+
+/*
+ * store status at address
+ * we use have two special cases:
+ * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
+ * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
+ */
+int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+ const unsigned char archmode = 1;
+ int prefix;
+
+ if (addr == KVM_S390_STORE_STATUS_NOADDR) {
+ if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
+ return -EFAULT;
+ addr = SAVE_AREA_BASE;
+ prefix = 0;
+ } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
+ if (copy_to_guest(vcpu, 163ul, &archmode, 1))
+ return -EFAULT;
+ addr = SAVE_AREA_BASE;
+ prefix = 1;
+ } else
+ prefix = 0;
+
+
+ if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, fp_regs),
+ vcpu->arch.guest_fpregs.fprs, 128, prefix))
+ return -EFAULT;
+
+ if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, gp_regs),
+ vcpu->arch.guest_gprs, 128, prefix))
+ return -EFAULT;
+
+ if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, psw),
+ &vcpu->arch.sie_block->gpsw, 16, prefix))
+ return -EFAULT;
+
+ if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, pref_reg),
+ &vcpu->arch.sie_block->prefix, 4, prefix))
+ return -EFAULT;
+
+ if (__guestcopy(vcpu,
+ addr + offsetof(struct save_area_s390x, fp_ctrl_reg),
+ &vcpu->arch.guest_fpregs.fpc, 4, prefix))
+ return -EFAULT;
+
+ if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, tod_reg),
+ &vcpu->arch.sie_block->todpr, 4, prefix))
+ return -EFAULT;
+
+ if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, timer),
+ &vcpu->arch.sie_block->cputm, 8, prefix))
+ return -EFAULT;
+
+ if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, clk_cmp),
+ &vcpu->arch.sie_block->ckc, 8, prefix))
+ return -EFAULT;
+
+ if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, acc_regs),
+ &vcpu->arch.guest_acrs, 64, prefix))
+ return -EFAULT;
+
+ if (__guestcopy(vcpu,
+ addr + offsetof(struct save_area_s390x, ctrl_regs),
+ &vcpu->arch.sie_block->gcr, 128, prefix))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+ int rc;
+
+ vcpu_load(vcpu);
+ rc = __kvm_s390_vcpu_store_status(vcpu, addr);
+ vcpu_put(vcpu);
+
+ return rc;
+}
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ struct kvm_vcpu *vcpu = filp->private_data;
+ void __user *argp = (void __user *)arg;
+
+ switch (ioctl) {
+ case KVM_S390_STORE_STATUS:
+ return kvm_s390_vcpu_store_status(vcpu, arg);
+ case KVM_S390_SET_INITIAL_PSW: {
+ psw_t psw;
+
+ if (copy_from_user(&psw, argp, sizeof(psw)))
+ return -EFAULT;
+ return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
+ }
+ case KVM_S390_INITIAL_RESET:
+ return kvm_arch_vcpu_ioctl_initial_reset(vcpu);
+ default:
+ ;
+ }
+ return -EINVAL;
+}
+
+/* Section: memory related */
+int kvm_arch_set_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem,
+ struct kvm_memory_slot old,
+ int user_alloc)
+{
+ /* A few sanity checks. We can have exactly one memory slot which has
+ to start at guest virtual zero and which has to be located at a
+ page boundary in userland and which has to end at a page boundary.
+ The memory in userland is ok to be fragmented into various different
+ vmas. It is okay to mmap() and munmap() stuff in this slot after
+ doing this call at any time */
+
+ if (mem->slot != 0)
+ return -EINVAL;
+
+ if (mem->guest_phys_addr != 0)
+ return -EINVAL;
+
+ if (mem->userspace_addr % PAGE_SIZE)
+ return -EINVAL;
+
+ if (mem->memory_size % PAGE_SIZE)
+ return -EINVAL;
+
+ kvm->arch.guest_origin = mem->userspace_addr;
+ kvm->arch.guest_memsize = mem->memory_size;
+
+ /* FIXME: we do want to interrupt running CPUs and update their memory
+ configuration now to avoid race conditions. But hey, changing the
+ memory layout while virtual CPUs are running is usually bad
+ programming practice. */
+
+ return 0;
+}
+
+gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
+{
+ return gfn;
+}
+
+static int __init kvm_s390_init(void)
+{
+ return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
+}
+
+static void __exit kvm_s390_exit(void)
+{
+ kvm_exit();
+ return;
+}
+
+module_init(kvm_s390_init);
+module_exit(kvm_s390_exit);
Index: kvm/arch/s390/kvm/kvm-s390.h
===================================================================
--- /dev/null
+++ kvm/arch/s390/kvm/kvm-s390.h
@@ -0,0 +1,29 @@
+/*
+ * kvm_s390.h - definition for kvm on s390
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ * Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#ifndef ARCH_S390_KVM_S390_H
+#define ARCH_S390_KVM_S390_H
+#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
+do { \
+ debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \
+ d_args); \
+} while (0)
+
+#define VCPU_EVENT(d_vcpu, d_loglevel, d_string, d_args...)\
+do { \
+ debug_sprintf_event(d_vcpu->kvm->arch.dbf, d_loglevel, \
+ "%02d[%016lx-%016lx]: " d_string "\n", d_vcpu->vcpu_id, \
+ d_vcpu->arch.sie_block->gpsw.mask, d_vcpu->arch.sie_block->gpsw.addr,\
+ d_args); \
+} while (0)
+#endif
Index: kvm/arch/s390/kvm/sie64a.S
===================================================================
--- /dev/null
+++ kvm/arch/s390/kvm/sie64a.S
@@ -0,0 +1,47 @@
+/*
+ * sie64a.S - low level sie call
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/errno.h>
+#include <asm/asm-offsets.h>
+
+SP_R5 = 5 * 8 # offset into stackframe
+SP_R6 = 6 * 8
+
+/*
+ * sie64a calling convention:
+ * %r2 pointer to sie control block
+ * %r3 guest register save area
+ */
+ .globl sie64a
+sie64a:
+ lgr %r5,%r3
+ stmg %r5,%r14,SP_R5(%r15) # save register on entry
+ lgr %r14,%r2 # pointer to sie control block
+ lmg %r0,%r13,0(%r3) # load guest gprs 0-13
+sie_inst:
+ sie 0(%r14)
+ lg %r14,SP_R5(%r15)
+ stmg %r0,%r13,0(%r14) # save guest gprs 0-13
+ lghi %r2,0
+ lmg %r6,%r14,SP_R6(%r15)
+ br %r14
+
+sie_err:
+ lg %r14,SP_R5(%r15)
+ stmg %r0,%r13,0(%r14) # save guest gprs 0-13
+ lghi %r2,-EFAULT
+ lmg %r6,%r14,SP_R6(%r15)
+ br %r14
+
+ .section __ex_table,"a"
+ .quad sie_inst,sie_err
+ .previous
Index: kvm/include/asm-s390/Kbuild
===================================================================
--- kvm.orig/include/asm-s390/Kbuild
+++ kvm/include/asm-s390/Kbuild
@@ -7,6 +7,7 @@ header-y += tape390.h
header-y += ucontext.h
header-y += vtoc.h
header-y += zcrypt.h
+header-y += kvm.h
unifdef-y += cmb.h
unifdef-y += debug.h
Index: kvm/include/asm-s390/kvm.h
===================================================================
--- /dev/null
+++ kvm/include/asm-s390/kvm.h
@@ -0,0 +1,44 @@
+#ifndef __LINUX_KVM_S390_H
+#define __LINUX_KVM_S390_H
+/*
+ * asm-s390/kvm.h - KVM s390 specific structures and definitions
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ * Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+#include <asm/types.h>
+
+/* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */
+struct kvm_pic_state {
+ /* no PIC for s390 */
+};
+
+struct kvm_ioapic_state {
+ /* no IOAPIC for s390 */
+};
+
+/* for KVM_GET_REGS and KVM_SET_REGS */
+struct kvm_regs {
+ /* general purpose regs for s390 */
+ __u64 gprs[16];
+};
+
+/* for KVM_GET_SREGS and KVM_SET_SREGS */
+struct kvm_sregs {
+ __u32 acrs[16];
+ __u64 crs[16];
+};
+
+/* for KVM_GET_FPU and KVM_SET_FPU */
+struct kvm_fpu {
+ __u32 fpc;
+ __u64 fprs[16];
+};
+
+#endif
Index: kvm/include/asm-s390/kvm_host.h
===================================================================
--- /dev/null
+++ kvm/include/asm-s390/kvm_host.h
@@ -0,0 +1,119 @@
+/*
+ * asm-s390/kvm_host.h - definition for kernel virtual machines on s390
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ */
+
+
+#ifndef ASM_KVM_HOST_H
+#define ASM_KVM_HOST_H
+#include <linux/kvm_host.h>
+#include <asm/debug.h>
+
+struct sca_entry {
+ atomic_t scn;
+ __u64 reserved;
+ __u64 sda;
+ __u64 reserved2[2];
+} __attribute__((packed));
+
+
+struct sca_block {
+ __u64 ipte_control;
+ __u64 reserved[5];
+ __u64 mcn;
+ __u64 reserved2;
+ struct sca_entry cpu[64];
+} __attribute__((packed));
+
+#define KVM_PAGES_PER_HPAGE 256
+
+#define CPUSTAT_HOST 0x80000000
+#define CPUSTAT_WAIT 0x10000000
+#define CPUSTAT_ECALL_PEND 0x08000000
+#define CPUSTAT_STOP_INT 0x04000000
+#define CPUSTAT_IO_INT 0x02000000
+#define CPUSTAT_EXT_INT 0x01000000
+#define CPUSTAT_RUNNING 0x00800000
+#define CPUSTAT_RETAINED 0x00400000
+#define CPUSTAT_TIMING_SUB 0x00020000
+#define CPUSTAT_SIE_SUB 0x00010000
+#define CPUSTAT_RRF 0x00008000
+#define CPUSTAT_SLSV 0x00004000
+#define CPUSTAT_SLSR 0x00002000
+#define CPUSTAT_ZARCH 0x00000800
+#define CPUSTAT_MCDS 0x00000100
+#define CPUSTAT_SM 0x00000080
+#define CPUSTAT_G 0x00000008
+#define CPUSTAT_J 0x00000002
+#define CPUSTAT_P 0x00000001
+
+struct sie_block {
+ atomic_t cpuflags; /* 0x0000 */
+ __u32 prefix; /* 0x0004 */
+ __u8 reserved8[32]; /* 0x0008 */
+ __u64 cputm; /* 0x0028 */
+ __u64 ckc; /* 0x0030 */
+ __u64 epoch; /* 0x0038 */
+ __u8 reserved40[4]; /* 0x0040 */
+ __u16 lctl; /* 0x0044 */
+ __s16 icpua; /* 0x0046 */
+ __u32 ictl; /* 0x0048 */
+ __u32 eca; /* 0x004c */
+ __u8 icptcode; /* 0x0050 */
+ __u8 reserved51; /* 0x0051 */
+ __u16 ihcpu; /* 0x0052 */
+ __u8 reserved54[2]; /* 0x0054 */
+ __u16 ipa; /* 0x0056 */
+ __u32 ipb; /* 0x0058 */
+ __u32 scaoh; /* 0x005c */
+ __u8 reserved60; /* 0x0060 */
+ __u8 ecb; /* 0x0061 */
+ __u8 reserved62[2]; /* 0x0062 */
+ __u32 scaol; /* 0x0064 */
+ __u8 reserved68[4]; /* 0x0068 */
+ __u32 todpr; /* 0x006c */
+ __u8 reserved70[16]; /* 0x0070 */
+ __u64 gmsor; /* 0x0080 */
+ __u64 gmslm; /* 0x0088 */
+ psw_t gpsw; /* 0x0090 */
+ __u64 gg14; /* 0x00a0 */
+ __u64 gg15; /* 0x00a8 */
+ __u8 reservedb0[80]; /* 0x00b0 */
+ __u64 gcr[16]; /* 0x0100 */
+ __u64 gbea; /* 0x0180 */
+ __u8 reserved188[120]; /* 0x0188 */
+} __attribute__((packed));
+
+struct kvm_vcpu_stat {
+ u32 exit_userspace;
+};
+
+struct kvm_vcpu_arch {
+ struct sie_block *sie_block;
+ unsigned long guest_gprs[16];
+ s390_fp_regs host_fpregs;
+ unsigned int host_acrs[NUM_ACRS];
+ s390_fp_regs guest_fpregs;
+ unsigned int guest_acrs[NUM_ACRS];
+};
+
+struct kvm_vm_stat {
+ u32 remote_tlb_flush;
+};
+
+struct kvm_arch{
+ unsigned long guest_origin;
+ unsigned long guest_memsize;
+ struct sca_block *sca;
+ debug_info_t *dbf;
+};
+
+extern int sie64a(struct sie_block *, __u64 *);
+#endif
Index: kvm/include/asm-s390/kvm_para.h
===================================================================
--- /dev/null
+++ kvm/include/asm-s390/kvm_para.h
@@ -0,0 +1,30 @@
+/*
+ * asm-s390/kvm_para.h - definition for paravirtual devices on s390
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#ifndef __S390_KVM_PARA_H
+#define __S390_KVM_PARA_H
+
+/*
+ * No hypercalls for KVM on s390
+ */
+
+static inline int kvm_para_available(void)
+{
+ return 0;
+}
+
+static inline unsigned int kvm_arch_para_features(void)
+{
+ return 0;
+}
+
+#endif /* __S390_KVM_PARA_H */
Index: kvm/include/linux/kvm.h
===================================================================
--- kvm.orig/include/linux/kvm.h
+++ kvm/include/linux/kvm.h
@@ -205,6 +205,11 @@ struct kvm_vapic_addr {
__u64 vapic_addr;
};
+struct kvm_s390_psw {
+ __u64 mask;
+ __u64 addr;
+};
+
#define KVMIO 0xAE
/*
@@ -213,6 +218,8 @@ struct kvm_vapic_addr {
#define KVM_GET_API_VERSION _IO(KVMIO, 0x00)
#define KVM_CREATE_VM _IO(KVMIO, 0x01) /* returns a VM fd */
#define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 0x02, struct kvm_msr_list)
+
+#define KVM_S390_ENABLE_SIE _IO(KVMIO, 0x06)
/*
* Check if a kvm extension is available. Argument is extension number,
* return is 1 (yes) or 0 (no, sorry).
@@ -291,5 +298,13 @@ struct kvm_vapic_addr {
#define KVM_TPR_ACCESS_REPORTING _IOWR(KVMIO, 0x92, struct kvm_tpr_access_ctl)
/* Available with KVM_CAP_VAPIC */
#define KVM_SET_VAPIC_ADDR _IOW(KVMIO, 0x93, struct kvm_vapic_addr)
+/* store status for s390 */
+#define KVM_S390_STORE_STATUS_NOADDR (-1ul)
+#define KVM_S390_STORE_STATUS_PREFIXED (-2ul)
+#define KVM_S390_STORE_STATUS _IOW(KVMIO, 0x95, unsigned long)
+/* initial ipl psw for s390 */
+#define KVM_S390_SET_INITIAL_PSW _IOW(KVMIO, 0x96, struct kvm_s390_psw)
+/* initial reset for s390 */
+#define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97)
#endif
Index: kvm/include/linux/kvm_host.h
===================================================================
--- kvm.orig/include/linux/kvm_host.h
+++ kvm/include/linux/kvm_host.h
@@ -24,7 +24,11 @@
#include <asm/kvm_host.h>
+#ifdef CONFIG_S390
+#define KVM_MAX_VCPUS 64
+#else
#define KVM_MAX_VCPUS 16
+#endif
#define KVM_MEMORY_SLOTS 32
/* memory slots that does not exposed to userspace */
#define KVM_PRIVATE_MEM_SLOTS 4
^ permalink raw reply [flat|nested] 52+ messages in thread* [RFC/PATCH 06/15] kvm-s390: sie intercept handling
[not found] <1206028710.6690.21.camel@cotte.boeblingen.de.ibm.com>
` (4 preceding siblings ...)
2008-03-20 16:24 ` [RFC/PATCH 05/15] kvm-s390: s390 arch backend for the kvm kernel module Carsten Otte
@ 2008-03-20 16:25 ` Carsten Otte
2008-03-20 16:25 ` [RFC/PATCH 07/15] kvm-s390: interrupt subsystem, cpu timer, waitpsw Carsten Otte
` (14 subsequent siblings)
20 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:25 UTC (permalink / raw)
To: virtualization, kvm-devel, Avi Kivity
Cc: aliguori, EHRHARDT, arnd, hollisb, heiko.carstens, jeroney,
borntraeger, schwidefsky, rvdheij, os, jblunck, Zhang, Xiantao
From: Carsten Otte <cotte@de.ibm.com>
From: Christian Borntraeger <borntraeger@de.ibm.com>
This path introduces handling of sie intercepts in three flavors: Intercepts
are either handled completely in-kernel by kvm_handle_sie_intercept(),
or passed to userspace with corresponding data in struct kvm_run in case
kvm_handle_sie_intercept() returns -ENOTSUPP.
In case of partial execution in kernel with the need of userspace support,
kvm_handle_sie_intercept() may choose to set up struct kvm_run and return
-EREMOTE.
The trivial intercept reasons are handled in this patch:
handle_noop() just does nothing for intercepts that don't require our support
at all
handle_stop() is called when a cpu enters stopped state, and it drops out to
userland after updating our vcpu state
handle_validity() faults in the cpu lowcore if needed, or passes the request
to userland
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---
arch/s390/kvm/Makefile | 2 -
arch/s390/kvm/intercept.c | 83 ++++++++++++++++++++++++++++++++++++++++++++
arch/s390/kvm/kvm-s390.c | 46 +++++++++++++++++++++++-
arch/s390/kvm/kvm-s390.h | 6 +++
include/asm-s390/kvm_host.h | 4 ++
include/linux/kvm.h | 9 ++++
6 files changed, 148 insertions(+), 2 deletions(-)
Index: kvm/arch/s390/kvm/Makefile
===================================================================
--- kvm.orig/arch/s390/kvm/Makefile
+++ kvm/arch/s390/kvm/Makefile
@@ -10,5 +10,5 @@ common-objs = $(addprefix ../../../virt/
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/s390/kvm
-kvm-objs := $(common-objs) kvm-s390.o sie64a.o
+kvm-objs := $(common-objs) kvm-s390.o sie64a.o intercept.o
obj-$(CONFIG_KVM) += kvm.o
Index: kvm/arch/s390/kvm/intercept.c
===================================================================
--- /dev/null
+++ kvm/arch/s390/kvm/intercept.c
@@ -0,0 +1,83 @@
+/*
+ * intercept.c - in-kernel handling for sie intercepts
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ * Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+
+#include <asm/kvm_host.h>
+
+#include "kvm-s390.h"
+
+static int handle_noop(struct kvm_vcpu *vcpu)
+{
+ switch (vcpu->arch.sie_block->icptcode) {
+ case 0x10:
+ vcpu->stat.exit_external_request++;
+ break;
+ case 0x14:
+ vcpu->stat.exit_external_interrupt++;
+ break;
+ default:
+ break; /* nothing */
+ }
+ return 0;
+}
+
+static int handle_stop(struct kvm_vcpu *vcpu)
+{
+ vcpu->stat.exit_stop_request++;
+ VCPU_EVENT(vcpu, 3, "%s", "cpu stopped");
+ atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+ return -ENOTSUPP;
+}
+
+static int handle_validity(struct kvm_vcpu *vcpu)
+{
+ int viwhy = vcpu->arch.sie_block->ipb >> 16;
+ vcpu->stat.exit_validity++;
+ if (viwhy == 0x37) {
+ fault_in_pages_writeable((char __user *)
+ vcpu->kvm->arch.guest_origin +
+ vcpu->arch.sie_block->prefix, PAGE_SIZE);
+ return 0;
+ }
+ VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d",
+ viwhy);
+ return -ENOTSUPP;
+}
+
+static const intercept_handler_t intercept_funcs[0x48 >> 2] = {
+ [0x00 >> 2] = handle_noop,
+ [0x10 >> 2] = handle_noop,
+ [0x14 >> 2] = handle_noop,
+ [0x20 >> 2] = handle_validity,
+ [0x28 >> 2] = handle_stop,
+};
+
+int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
+{
+ intercept_handler_t func;
+ u8 code = vcpu->arch.sie_block->icptcode;
+
+ if (code & 3 || code > 0x48)
+ return -ENOTSUPP;
+
+ func = intercept_funcs[code >> 2];
+
+ if (func)
+ return func(vcpu);
+
+ return -ENOTSUPP;
+}
+
Index: kvm/arch/s390/kvm/kvm-s390.c
===================================================================
--- kvm.orig/arch/s390/kvm/kvm-s390.c
+++ kvm/arch/s390/kvm/kvm-s390.c
@@ -23,12 +23,17 @@
#include <asm/lowcore.h>
#include <asm/pgtable.h>
+#include "kvm-s390.h"
#include "gaccess.h"
#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "userspace_handled", VCPU_STAT(exit_userspace) },
+ { "exit_validity", VCPU_STAT(exit_validity) },
+ { "exit_stop_request", VCPU_STAT(exit_stop_request) },
+ { "exit_external_request", VCPU_STAT(exit_external_request) },
+ { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
{ NULL }
};
@@ -384,6 +389,7 @@ static void __vcpu_run(struct kvm_vcpu *
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
+ int rc;
sigset_t sigsaved;
vcpu_load(vcpu);
@@ -393,7 +399,45 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v
atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
- __vcpu_run(vcpu);
+ switch (kvm_run->exit_reason) {
+ case KVM_EXIT_S390_SIEIC:
+ vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask;
+ vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr;
+ break;
+ case KVM_EXIT_UNKNOWN:
+ case KVM_EXIT_S390_RESET:
+ break;
+ default:
+ BUG();
+ }
+
+ might_sleep();
+
+ do {
+ __vcpu_run(vcpu);
+
+ rc = kvm_handle_sie_intercept(vcpu);
+ } while (!signal_pending(current) && !rc);
+
+ if ((rc == 0) && signal_pending(current))
+ rc = -EINTR;
+
+ if (rc == -ENOTSUPP) {
+ /* intercept cannot be handled in-kernel, prepare kvm-run */
+ kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
+ kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
+ kvm_run->s390_sieic.mask = vcpu->arch.sie_block->gpsw.mask;
+ kvm_run->s390_sieic.addr = vcpu->arch.sie_block->gpsw.addr;
+ kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
+ kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
+ rc = 0;
+ }
+
+ if (rc == -EREMOTE) {
+ /* intercept was handled, but userspace support is needed
+ * kvm_run has been prepared by the handler */
+ rc = 0;
+ }
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
Index: kvm/arch/s390/kvm/kvm-s390.h
===================================================================
--- kvm.orig/arch/s390/kvm/kvm-s390.h
+++ kvm/arch/s390/kvm/kvm-s390.h
@@ -13,6 +13,12 @@
#ifndef ARCH_S390_KVM_S390_H
#define ARCH_S390_KVM_S390_H
+#include <linux/kvm_host.h>
+
+typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
+
+extern int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
+
#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
do { \
debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \
Index: kvm/include/asm-s390/kvm_host.h
===================================================================
--- kvm.orig/include/asm-s390/kvm_host.h
+++ kvm/include/asm-s390/kvm_host.h
@@ -93,6 +93,10 @@ struct sie_block {
struct kvm_vcpu_stat {
u32 exit_userspace;
+ u32 exit_external_request;
+ u32 exit_external_interrupt;
+ u32 exit_stop_request;
+ u32 exit_validity;
};
struct kvm_vcpu_arch {
Index: kvm/include/linux/kvm.h
===================================================================
--- kvm.orig/include/linux/kvm.h
+++ kvm/include/linux/kvm.h
@@ -74,6 +74,7 @@ struct kvm_irqchip {
#define KVM_EXIT_INTR 10
#define KVM_EXIT_SET_TPR 11
#define KVM_EXIT_TPR_ACCESS 12
+#define KVM_EXIT_S390_SIEIC 13
/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
struct kvm_run {
@@ -138,6 +139,14 @@ struct kvm_run {
__u32 is_write;
__u32 pad;
} tpr_access;
+ /* KVM_EXIT_S390_SIEIC */
+ struct {
+ __u8 icptcode;
+ __u64 mask; /* psw upper half */
+ __u64 addr; /* psw lower half */
+ __u16 ipa;
+ __u32 ipb;
+ } s390_sieic;
/* Fix the size of the union. */
char padding[256];
};
^ permalink raw reply [flat|nested] 52+ messages in thread* [RFC/PATCH 07/15] kvm-s390: interrupt subsystem, cpu timer, waitpsw
[not found] <1206028710.6690.21.camel@cotte.boeblingen.de.ibm.com>
` (5 preceding siblings ...)
2008-03-20 16:25 ` [RFC/PATCH 06/15] kvm-s390: sie intercept handling Carsten Otte
@ 2008-03-20 16:25 ` Carsten Otte
2008-03-20 16:25 ` [RFC/PATCH 08/15] kvm-s390: intercepts for privileged instructions Carsten Otte
` (13 subsequent siblings)
20 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:25 UTC (permalink / raw)
To: virtualization, kvm-devel, Avi Kivity
Cc: aliguori, EHRHARDT, arnd, hollisb, heiko.carstens, jeroney,
borntraeger, schwidefsky, rvdheij, os, jblunck, Zhang, Xiantao
From: Carsten Otte <cotte@de.ibm.com>
This patch contains the s390 interrupt subsystem (similar to in kernel apic)
including timer interrupts (similar to in-kernel-pit) and enabled wait
(similar to in kernel hlt).
In order to achieve that, this patch also introduces intercept handling
for instruction intercepts, and it implements load control instructions.
This patch introduces an ioctl KVM_S390_INTERRUPT which is valid for both
the vm file descriptors and the vcpu file descriptors. In case this ioctl is
issued against a vm file descriptor, the interrupt is considered floating.
Floating interrupts may be delivered to any virtual cpu in the configuration.
The following interrupts are supported:
SIGP STOP - interprocessor signal that stops a remote cpu
SIGP SET PREFIX - interprocessor signal that sets the prefix register of a
(stopped) remote cpu
INT EMERGENCY - interprocessor interrupt, usually used to signal need_reshed
and for smp_call_function() in the guest.
PROGRAM INT - exception during program execution such as page fault, illegal
instruction and friends
RESTART - interprocessor signal that starts a stopped cpu
INT VIRTIO - floating interrupt for virtio signalisation
INT SERVICE - floating interrupt for signalisations from the system
service processor
struct kvm_s390_interrupt, which is submitted as ioctl parameter when injecting
an interrupt, also carrys parameter data for interrupts along with the interrupt
type. Interrupts on s390 usually have a state that represents the current
operation, or identifies which device has caused the interruption on s390.
kvm_s390_handle_wait() does handle waitpsw in two flavors: in case of a
disabled wait (that is, disabled for interrupts), we exit to userspace. In case
of an enabled wait we set up a timer that equals the cpu clock comparator value
and sleep on a wait queue.
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---
arch/s390/kvm/Makefile | 2
arch/s390/kvm/intercept.c | 123 +++++++++
arch/s390/kvm/interrupt.c | 583 ++++++++++++++++++++++++++++++++++++++++++++
arch/s390/kvm/kvm-s390.c | 48 +++
arch/s390/kvm/kvm-s390.h | 15 +
include/asm-s390/kvm_host.h | 75 +++++
include/linux/kvm.h | 17 +
7 files changed, 860 insertions(+), 3 deletions(-)
Index: kvm/arch/s390/kvm/Makefile
===================================================================
--- kvm.orig/arch/s390/kvm/Makefile
+++ kvm/arch/s390/kvm/Makefile
@@ -10,5 +10,5 @@ common-objs = $(addprefix ../../../virt/
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/s390/kvm
-kvm-objs := $(common-objs) kvm-s390.o sie64a.o intercept.o
+kvm-objs := $(common-objs) kvm-s390.o sie64a.o intercept.o interrupt.o
obj-$(CONFIG_KVM) += kvm.o
Index: kvm/arch/s390/kvm/intercept.c
===================================================================
--- kvm.orig/arch/s390/kvm/intercept.c
+++ kvm/arch/s390/kvm/intercept.c
@@ -18,6 +18,91 @@
#include <asm/kvm_host.h>
#include "kvm-s390.h"
+#include "gaccess.h"
+
+static int handle_lctg(struct kvm_vcpu *vcpu)
+{
+ int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+ int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+ int base2 = vcpu->arch.sie_block->ipb >> 28;
+ int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
+ ((vcpu->arch.sie_block->ipb & 0xff00) << 4);
+ u64 useraddr;
+ int reg, rc;
+
+ vcpu->stat.instruction_lctg++;
+ if ((vcpu->arch.sie_block->ipb & 0xff) != 0x2f)
+ return -ENOTSUPP;
+
+ useraddr = disp2;
+ if (base2)
+ useraddr += vcpu->arch.guest_gprs[base2];
+
+ reg = reg1;
+
+ VCPU_EVENT(vcpu, 5, "lctg r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2,
+ disp2);
+
+ do {
+ rc = get_guest_u64(vcpu, useraddr,
+ &vcpu->arch.sie_block->gcr[reg]);
+ if (rc == -EFAULT) {
+ kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ break;
+ }
+ useraddr += 8;
+ if (reg == reg3)
+ break;
+ reg = reg + 1;
+ if (reg > 15)
+ reg = 0;
+ } while (1);
+ return 0;
+}
+
+static int handle_lctl(struct kvm_vcpu *vcpu)
+{
+ int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+ int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+ int base2 = vcpu->arch.sie_block->ipb >> 28;
+ int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+ u64 useraddr;
+ u32 val = 0;
+ int reg, rc;
+
+ vcpu->stat.instruction_lctl++;
+
+ useraddr = disp2;
+ if (base2)
+ useraddr += vcpu->arch.guest_gprs[base2];
+
+ reg = reg1;
+
+ VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2,
+ disp2);
+
+ do {
+ rc = get_guest_u32(vcpu, useraddr, &val);
+ if (rc == -EFAULT) {
+ kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ break;
+ }
+ vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
+ vcpu->arch.sie_block->gcr[reg] |= val;
+ useraddr += 4;
+ if (reg == reg3)
+ break;
+ reg = reg + 1;
+ if (reg > 15)
+ reg = 0;
+ } while (1);
+ return 0;
+}
+
+static intercept_handler_t instruction_handlers[256] = {
+ [0xb7] = handle_lctl,
+ [0xeb] = handle_lctg,
+};
static int handle_noop(struct kvm_vcpu *vcpu)
{
@@ -57,10 +142,48 @@ static int handle_validity(struct kvm_vc
return -ENOTSUPP;
}
+static int handle_instruction(struct kvm_vcpu *vcpu)
+{
+ intercept_handler_t handler =
+ instruction_handlers[vcpu->arch.sie_block->ipa >> 8];
+
+ vcpu->stat.exit_instruction++;
+
+ if (!handler)
+ return -ENOTSUPP;
+
+ return handler(vcpu);
+}
+
+static int handle_prog(struct kvm_vcpu *vcpu)
+{
+ vcpu->stat.exit_program_interruption++;
+ return kvm_s390_inject_program_int(vcpu, vcpu->arch.sie_block->iprcc);
+}
+
+static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
+{
+ int rc, rc2;
+
+ vcpu->stat.exit_instr_and_program++;
+ rc = handle_instruction(vcpu);
+ rc2 = handle_prog(vcpu);
+
+ if (rc == -ENOTSUPP)
+ vcpu->arch.sie_block->icptcode = 0x04;
+ if (rc)
+ return rc;
+ return rc2;
+}
+
static const intercept_handler_t intercept_funcs[0x48 >> 2] = {
[0x00 >> 2] = handle_noop,
+ [0x04 >> 2] = handle_instruction,
+ [0x08 >> 2] = handle_prog,
+ [0x0C >> 2] = handle_instruction_and_prog,
[0x10 >> 2] = handle_noop,
[0x14 >> 2] = handle_noop,
+ [0x1C >> 2] = kvm_s390_handle_wait,
[0x20 >> 2] = handle_validity,
[0x28 >> 2] = handle_stop,
};
Index: kvm/arch/s390/kvm/interrupt.c
===================================================================
--- /dev/null
+++ kvm/arch/s390/kvm/interrupt.c
@@ -0,0 +1,583 @@
+/*
+ * interrupt.c - handling kvm guest interrupts
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ */
+
+#include <asm/lowcore.h>
+#include <asm/uaccess.h>
+#include <linux/kvm_host.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+
+static int psw_extint_disabled(struct kvm_vcpu *vcpu)
+{
+ return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT);
+}
+
+static int psw_interrupts_disabled(struct kvm_vcpu *vcpu)
+{
+ if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) ||
+ (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO) ||
+ (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT))
+ return 0;
+ return 1;
+}
+
+static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
+ struct interrupt_info *inti)
+{
+ switch (inti->type) {
+ case KVM_S390_INT_EMERGENCY:
+ if (psw_extint_disabled(vcpu))
+ return 0;
+ if (vcpu->arch.sie_block->gcr[0] & 0x4000ul)
+ return 1;
+ return 0;
+ case KVM_S390_INT_SERVICE:
+ if (psw_extint_disabled(vcpu))
+ return 0;
+ if (vcpu->arch.sie_block->gcr[0] & 0x200ul)
+ return 1;
+ return 0;
+ case KVM_S390_INT_VIRTIO:
+ if (psw_extint_disabled(vcpu))
+ return 0;
+ if (vcpu->arch.sie_block->gcr[0] & 0x200ul)
+ return 1; /*FIXME virtio control register bit */
+ return 0;
+ case KVM_S390_PROGRAM_INT:
+ case KVM_S390_SIGP_STOP:
+ case KVM_S390_SIGP_SET_PREFIX:
+ case KVM_S390_RESTART:
+ return 1;
+ default:
+ BUG();
+ }
+ return 0;
+}
+
+static void __set_cpu_idle(struct kvm_vcpu *vcpu)
+{
+ BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1);
+ atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
+ set_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
+}
+
+static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
+{
+ BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1);
+ atomic_clear_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
+ clear_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
+}
+
+static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
+{
+ atomic_clear_mask(CPUSTAT_ECALL_PEND |
+ CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
+ &vcpu->arch.sie_block->cpuflags);
+ vcpu->arch.sie_block->lctl = 0x0000;
+}
+
+static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
+{
+ atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags);
+}
+
+static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
+ struct interrupt_info *inti)
+{
+ switch (inti->type) {
+ case KVM_S390_INT_EMERGENCY:
+ case KVM_S390_INT_SERVICE:
+ case KVM_S390_INT_VIRTIO:
+ if (psw_extint_disabled(vcpu))
+ __set_cpuflag(vcpu, CPUSTAT_EXT_INT);
+ else
+ vcpu->arch.sie_block->lctl |= LCTL_CR0;
+ break;
+ case KVM_S390_SIGP_STOP:
+ __set_cpuflag(vcpu, CPUSTAT_STOP_INT);
+ break;
+ default:
+ BUG();
+ }
+}
+
+static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
+ struct interrupt_info *inti)
+{
+ const unsigned short table[] = { 2, 4, 4, 6 };
+ int rc, exception = 0;
+
+ switch (inti->type) {
+ case KVM_S390_INT_EMERGENCY:
+ VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
+ vcpu->stat.deliver_emergency_signal++;
+ rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1201);
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+ __LC_EXT_NEW_PSW, sizeof(psw_t));
+ if (rc == -EFAULT)
+ exception = 1;
+ break;
+
+ case KVM_S390_INT_SERVICE:
+ VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
+ inti->ext.ext_params);
+ vcpu->stat.deliver_service_signal++;
+ rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2401);
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+ __LC_EXT_NEW_PSW, sizeof(psw_t));
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params);
+ if (rc == -EFAULT)
+ exception = 1;
+ break;
+
+ case KVM_S390_INT_VIRTIO:
+ VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%lx",
+ inti->ext.ext_params, inti->ext.ext_params2);
+ vcpu->stat.deliver_virtio_interrupt++;
+ rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1237);
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+ __LC_EXT_NEW_PSW, sizeof(psw_t));
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params);
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = put_guest_u64(vcpu, __LC_PFAULT_INTPARM,
+ inti->ext.ext_params2);
+ if (rc == -EFAULT)
+ exception = 1;
+ break;
+
+ case KVM_S390_SIGP_STOP:
+ VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
+ vcpu->stat.deliver_stop_signal++;
+ __set_intercept_indicator(vcpu, inti);
+ break;
+
+ case KVM_S390_SIGP_SET_PREFIX:
+ VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x",
+ inti->prefix.address);
+ vcpu->stat.deliver_prefix_signal++;
+ vcpu->arch.sie_block->prefix = inti->prefix.address;
+ vcpu->arch.sie_block->ihcpu = 0xffff;
+ break;
+
+ case KVM_S390_RESTART:
+ VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
+ vcpu->stat.deliver_restart_signal++;
+ rc = copy_to_guest(vcpu, offsetof(struct _lowcore,
+ restart_old_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+ offsetof(struct _lowcore, restart_psw), sizeof(psw_t));
+ if (rc == -EFAULT)
+ exception = 1;
+ break;
+
+ case KVM_S390_PROGRAM_INT:
+ VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
+ inti->pgm.code,
+ table[vcpu->arch.sie_block->ipa >> 14]);
+ vcpu->stat.deliver_program_int++;
+ rc = put_guest_u16(vcpu, __LC_PGM_INT_CODE, inti->pgm.code);
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = put_guest_u16(vcpu, __LC_PGM_ILC,
+ table[vcpu->arch.sie_block->ipa >> 14]);
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = copy_to_guest(vcpu, __LC_PGM_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+ __LC_PGM_NEW_PSW, sizeof(psw_t));
+ if (rc == -EFAULT)
+ exception = 1;
+ break;
+
+ default:
+ BUG();
+ }
+
+ if (exception) {
+ VCPU_EVENT(vcpu, 1, "%s", "program exception while delivering"
+ " interrupt");
+ kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ if (inti->type == KVM_S390_PROGRAM_INT) {
+ printk(KERN_WARNING "kvm: recursive program check\n");
+ BUG();
+ }
+ }
+}
+
+static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
+{
+ int rc, exception = 0;
+
+ if (psw_extint_disabled(vcpu))
+ return 0;
+ if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))
+ return 0;
+ rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1004);
+ if (rc == -EFAULT)
+ exception = 1;
+ rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ if (rc == -EFAULT)
+ exception = 1;
+ rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+ __LC_EXT_NEW_PSW, sizeof(psw_t));
+ if (rc == -EFAULT)
+ exception = 1;
+
+ if (exception) {
+ VCPU_EVENT(vcpu, 1, "%s", "program exception while delivering" \
+ " ckc interrupt");
+ kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ return 0;
+ }
+
+ return 1;
+}
+
+int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
+{
+ struct local_interrupt *li = &vcpu->arch.local_int;
+ struct float_interrupt *fi = vcpu->arch.local_int.float_int;
+ struct interrupt_info *inti;
+ int rc = 0;
+
+ if (atomic_read(&li->active)) {
+ spin_lock_bh(&li->lock);
+ list_for_each_entry(inti, &li->list, list)
+ if (__interrupt_is_deliverable(vcpu, inti)) {
+ rc = 1;
+ break;
+ }
+ spin_unlock_bh(&li->lock);
+ }
+
+ if ((!rc) && atomic_read(&fi->active)) {
+ spin_lock_bh(&fi->lock);
+ list_for_each_entry(inti, &fi->list, list)
+ if (__interrupt_is_deliverable(vcpu, inti)) {
+ rc = 1;
+ break;
+ }
+ spin_unlock_bh(&fi->lock);
+ }
+
+ if ((!rc) && (vcpu->arch.sie_block->ckc <
+ get_clock() + vcpu->arch.sie_block->epoch)) {
+ if ((!psw_extint_disabled(vcpu)) &&
+ (vcpu->arch.sie_block->gcr[0] & 0x800ul))
+ rc = 1;
+ }
+
+ return rc;
+}
+
+int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
+{
+ u64 now, sltime;
+ DECLARE_WAITQUEUE(wait, current);
+
+ vcpu->stat.exit_wait_state++;
+ if (kvm_cpu_has_interrupt(vcpu))
+ return 0;
+
+ if (psw_interrupts_disabled(vcpu)) {
+ VCPU_EVENT(vcpu, 3, "%s", "disabled wait");
+ __unset_cpu_idle(vcpu);
+ return -ENOTSUPP; /* disabled wait */
+ }
+
+ if (psw_extint_disabled(vcpu) ||
+ (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))) {
+ VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer");
+ goto no_timer;
+ }
+
+ now = get_clock() + vcpu->arch.sie_block->epoch;
+ if (vcpu->arch.sie_block->ckc < now) {
+ __unset_cpu_idle(vcpu);
+ return 0;
+ }
+
+ sltime = (vcpu->arch.sie_block->ckc - now) / (0xf4240000ul / HZ) + 1;
+
+ vcpu->arch.ckc_timer.expires = jiffies + sltime;
+
+ add_timer(&vcpu->arch.ckc_timer);
+ VCPU_EVENT(vcpu, 5, "enabled wait timer:%lx jiffies", sltime);
+no_timer:
+ spin_lock_bh(&vcpu->arch.local_int.float_int->lock);
+ spin_lock_bh(&vcpu->arch.local_int.lock);
+ __set_cpu_idle(vcpu);
+ vcpu->arch.local_int.timer_due = 0;
+ add_wait_queue(&vcpu->arch.local_int.wq, &wait);
+ while (list_empty(&vcpu->arch.local_int.list) &&
+ list_empty(&vcpu->arch.local_int.float_int->list) &&
+ (!vcpu->arch.local_int.timer_due) &&
+ !signal_pending(current)) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ spin_unlock_bh(&vcpu->arch.local_int.lock);
+ spin_unlock_bh(&vcpu->arch.local_int.float_int->lock);
+ vcpu_put(vcpu);
+ schedule();
+ vcpu_load(vcpu);
+ spin_lock_bh(&vcpu->arch.local_int.float_int->lock);
+ spin_lock_bh(&vcpu->arch.local_int.lock);
+ }
+ __unset_cpu_idle(vcpu);
+ __set_current_state(TASK_RUNNING);
+ remove_wait_queue(&vcpu->wq, &wait);
+ spin_unlock_bh(&vcpu->arch.local_int.lock);
+ spin_unlock_bh(&vcpu->arch.local_int.float_int->lock);
+ del_timer(&vcpu->arch.ckc_timer);
+ return 0;
+}
+
+void kvm_s390_idle_wakeup(unsigned long data)
+{
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
+
+ spin_lock_bh(&vcpu->arch.local_int.lock);
+ vcpu->arch.local_int.timer_due = 1;
+ if (waitqueue_active(&vcpu->arch.local_int.wq))
+ wake_up_interruptible(&vcpu->arch.local_int.wq);
+ spin_unlock_bh(&vcpu->arch.local_int.lock);
+}
+
+
+void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
+{
+ struct local_interrupt *li = &vcpu->arch.local_int;
+ struct float_interrupt *fi = vcpu->arch.local_int.float_int;
+ struct interrupt_info *n, *inti = NULL;
+ int deliver;
+
+ __reset_intercept_indicators(vcpu);
+ if (atomic_read(&li->active)) {
+ do {
+ deliver = 0;
+ spin_lock_bh(&li->lock);
+ list_for_each_entry_safe(inti, n, &li->list, list) {
+ if (__interrupt_is_deliverable(vcpu, inti)) {
+ list_del(&inti->list);
+ deliver = 1;
+ break;
+ }
+ __set_intercept_indicator(vcpu, inti);
+ }
+ if (list_empty(&li->list))
+ atomic_set(&li->active, 0);
+ spin_unlock_bh(&li->lock);
+ if (deliver) {
+ __do_deliver_interrupt(vcpu, inti);
+ kfree(inti);
+ }
+ } while (deliver);
+ }
+
+ if ((vcpu->arch.sie_block->ckc <
+ get_clock() + vcpu->arch.sie_block->epoch))
+ __try_deliver_ckc_interrupt(vcpu);
+
+ if (atomic_read(&fi->active)) {
+ do {
+ deliver = 0;
+ spin_lock_bh(&fi->lock);
+ list_for_each_entry_safe(inti, n, &fi->list, list) {
+ if (__interrupt_is_deliverable(vcpu, inti)) {
+ list_del(&inti->list);
+ deliver = 1;
+ break;
+ }
+ __set_intercept_indicator(vcpu, inti);
+ }
+ if (list_empty(&fi->list))
+ atomic_set(&fi->active, 0);
+ spin_unlock_bh(&fi->lock);
+ if (deliver) {
+ __do_deliver_interrupt(vcpu, inti);
+ kfree(inti);
+ }
+ } while (deliver);
+ }
+}
+
+int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
+{
+ struct local_interrupt *li = &vcpu->arch.local_int;
+ struct interrupt_info *inti;
+
+ inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+ if (!inti)
+ return -ENOMEM;
+
+ inti->type = KVM_S390_PROGRAM_INT;;
+ inti->pgm.code = code;
+
+ VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
+ spin_lock_bh(&li->lock);
+ list_add(&inti->list, &li->list);
+ atomic_set(&li->active, 1);
+ BUG_ON(waitqueue_active(&li->wq));
+ spin_unlock_bh(&li->lock);
+ return 0;
+}
+
+int kvm_s390_inject_vm(struct kvm *kvm,
+ struct kvm_s390_interrupt *s390int)
+{
+ struct local_interrupt *li;
+ struct float_interrupt *fi;
+ struct interrupt_info *inti;
+ int sigcpu;
+
+ inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+ if (!inti)
+ return -ENOMEM;
+
+ switch (s390int->type) {
+ case KVM_S390_INT_VIRTIO:
+ VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%lx",
+ s390int->parm, s390int->parm64);
+ inti->type = s390int->type;
+ inti->ext.ext_params = s390int->parm;
+ inti->ext.ext_params2 = s390int->parm64;
+ break;
+ case KVM_S390_INT_SERVICE:
+ VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm);
+ inti->type = s390int->type;
+ inti->ext.ext_params = s390int->parm;
+ break;
+ case KVM_S390_PROGRAM_INT:
+ case KVM_S390_SIGP_STOP:
+ case KVM_S390_INT_EMERGENCY:
+ default:
+ kfree(inti);
+ return -EINVAL;
+ }
+
+ mutex_lock(&kvm->lock);
+ fi = &kvm->arch.float_int;
+ spin_lock_bh(&fi->lock);
+ list_add_tail(&inti->list, &fi->list);
+ atomic_set(&fi->active, 1);
+ sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
+ if (sigcpu == KVM_MAX_VCPUS) {
+ do {
+ sigcpu = fi->next_rr_cpu++;
+ if (sigcpu == KVM_MAX_VCPUS)
+ sigcpu = fi->next_rr_cpu = 0;
+ } while (fi->local_int[sigcpu] == NULL);
+ }
+ li = fi->local_int[sigcpu];
+ spin_lock_bh(&li->lock);
+ atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+ if (waitqueue_active(&li->wq))
+ wake_up_interruptible(&li->wq);
+ spin_unlock_bh(&li->lock);
+ spin_unlock_bh(&fi->lock);
+ mutex_unlock(&kvm->lock);
+ return 0;
+}
+
+int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
+ struct kvm_s390_interrupt *s390int)
+{
+ struct local_interrupt *li;
+ struct interrupt_info *inti;
+
+ inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+ if (!inti)
+ return -ENOMEM;
+
+ switch (s390int->type) {
+ case KVM_S390_PROGRAM_INT:
+ if (s390int->parm & 0xffff0000) {
+ kfree(inti);
+ return -EINVAL;
+ }
+ inti->type = s390int->type;
+ inti->pgm.code = s390int->parm;
+ VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
+ s390int->parm);
+ break;
+ case KVM_S390_SIGP_STOP:
+ case KVM_S390_RESTART:
+ case KVM_S390_SIGP_SET_PREFIX:
+ case KVM_S390_INT_EMERGENCY:
+ VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type);
+ inti->type = s390int->type;
+ break;
+ case KVM_S390_INT_VIRTIO:
+ case KVM_S390_INT_SERVICE:
+ default:
+ kfree(inti);
+ return -EINVAL;
+ }
+
+ mutex_lock(&vcpu->kvm->lock);
+ li = &vcpu->arch.local_int;
+ spin_lock_bh(&li->lock);
+ if (inti->type == KVM_S390_PROGRAM_INT)
+ list_add(&inti->list, &li->list);
+ else
+ list_add_tail(&inti->list, &li->list);
+ atomic_set(&li->active, 1);
+ if (inti->type == KVM_S390_SIGP_STOP)
+ li->action_bits |= ACTION_STOP_ON_STOP;
+ atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+ if (waitqueue_active(&li->wq))
+ wake_up_interruptible(&vcpu->arch.local_int.wq);
+ spin_unlock_bh(&li->lock);
+ mutex_unlock(&vcpu->kvm->lock);
+ return 0;
+}
Index: kvm/arch/s390/kvm/kvm-s390.c
===================================================================
--- kvm.orig/arch/s390/kvm/kvm-s390.c
+++ kvm/arch/s390/kvm/kvm-s390.c
@@ -20,6 +20,7 @@
#include <linux/kvm_host.h>
#include <linux/module.h>
#include <linux/slab.h>
+#include <linux/timer.h>
#include <asm/lowcore.h>
#include <asm/pgtable.h>
@@ -34,6 +35,19 @@ struct kvm_stats_debugfs_item debugfs_en
{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
{ "exit_external_request", VCPU_STAT(exit_external_request) },
{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
+ { "exit_instruction", VCPU_STAT(exit_instruction) },
+ { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
+ { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
+ { "instruction_lctg", VCPU_STAT(instruction_lctg) },
+ { "instruction_lctl", VCPU_STAT(instruction_lctl) },
+ { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
+ { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
+ { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
+ { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
+ { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
+ { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
+ { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
+ { "exit_wait_state", VCPU_STAT(exit_wait_state) },
{ NULL }
};
@@ -108,6 +122,15 @@ long kvm_arch_vm_ioctl(struct file *filp
int r;
switch (ioctl) {
+ case KVM_S390_INTERRUPT: {
+ struct kvm_s390_interrupt s390int;
+
+ r = -EFAULT;
+ if (copy_from_user(&s390int, argp, sizeof(s390int)))
+ break;
+ r = kvm_s390_inject_vm(kvm, &s390int);
+ break;
+ }
default:
r = -EINVAL;
}
@@ -141,6 +164,9 @@ struct kvm *kvm_arch_create_vm(void)
if (!kvm->arch.dbf)
goto out_nodbf;
+ spin_lock_init(&kvm->arch.float_int.lock);
+ INIT_LIST_HEAD(&kvm->arch.float_int.list);
+
debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
VM_EVENT(kvm, 3, "%s", "vm created");
@@ -221,7 +247,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu
vcpu->arch.sie_block->gmsor = 0x000000000000;
vcpu->arch.sie_block->ecb = 2;
vcpu->arch.sie_block->eca = 0xC1002001U;
-
+ setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup,
+ (unsigned long) vcpu);
return 0;
}
@@ -246,6 +273,14 @@ struct kvm_vcpu *kvm_arch_vcpu_create(st
vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
+ spin_lock_init(&vcpu->arch.local_int.lock);
+ INIT_LIST_HEAD(&vcpu->arch.local_int.list);
+ vcpu->arch.local_int.float_int = &kvm->arch.float_int;
+ spin_lock_bh(&kvm->arch.float_int.lock);
+ kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
+ init_waitqueue_head(&vcpu->arch.local_int.wq);
+ spin_unlock_bh(&kvm->arch.float_int.lock);
+
rc = kvm_vcpu_init(vcpu, kvm, id);
if (rc)
goto out_free_cpu;
@@ -399,6 +434,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v
atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+ BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
+
switch (kvm_run->exit_reason) {
case KVM_EXIT_S390_SIEIC:
vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask;
@@ -414,8 +451,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v
might_sleep();
do {
+ kvm_s390_deliver_pending_interrupts(vcpu);
__vcpu_run(vcpu);
-
rc = kvm_handle_sie_intercept(vcpu);
} while (!signal_pending(current) && !rc);
@@ -545,6 +582,13 @@ long kvm_arch_vcpu_ioctl(struct file *fi
void __user *argp = (void __user *)arg;
switch (ioctl) {
+ case KVM_S390_INTERRUPT: {
+ struct kvm_s390_interrupt s390int;
+
+ if (copy_from_user(&s390int, argp, sizeof(s390int)))
+ return -EFAULT;
+ return kvm_s390_inject_vcpu(vcpu, &s390int);
+ }
case KVM_S390_STORE_STATUS:
return kvm_s390_vcpu_store_status(vcpu, arg);
case KVM_S390_SET_INITIAL_PSW: {
Index: kvm/arch/s390/kvm/kvm-s390.h
===================================================================
--- kvm.orig/arch/s390/kvm/kvm-s390.h
+++ kvm/arch/s390/kvm/kvm-s390.h
@@ -13,6 +13,7 @@
#ifndef ARCH_S390_KVM_S390_H
#define ARCH_S390_KVM_S390_H
+#include <linux/kvm.h>
#include <linux/kvm_host.h>
typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
@@ -32,4 +33,18 @@ do { \
d_vcpu->arch.sie_block->gpsw.mask, d_vcpu->arch.sie_block->gpsw.addr,\
d_args); \
} while (0)
+
+static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu)
+{
+ return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOP_INT;
+}
+
+int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
+void kvm_s390_idle_wakeup(unsigned long data);
+void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
+int kvm_s390_inject_vm(struct kvm *kvm,
+ struct kvm_s390_interrupt *s390int);
+int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
+ struct kvm_s390_interrupt *s390int);
+int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
#endif
Index: kvm/include/asm-s390/kvm_host.h
===================================================================
--- kvm.orig/include/asm-s390/kvm_host.h
+++ kvm/include/asm-s390/kvm_host.h
@@ -62,6 +62,7 @@ struct sie_block {
__u64 ckc; /* 0x0030 */
__u64 epoch; /* 0x0038 */
__u8 reserved40[4]; /* 0x0040 */
+#define LCTL_CR0 0x8000
__u16 lctl; /* 0x0044 */
__s16 icpua; /* 0x0046 */
__u32 ictl; /* 0x0048 */
@@ -97,8 +98,79 @@ struct kvm_vcpu_stat {
u32 exit_external_interrupt;
u32 exit_stop_request;
u32 exit_validity;
+ u32 exit_instruction;
+ u32 instruction_lctl;
+ u32 instruction_lctg;
+ u32 exit_program_interruption;
+ u32 exit_instr_and_program;
+ u32 deliver_emergency_signal;
+ u32 deliver_service_signal;
+ u32 deliver_virtio_interrupt;
+ u32 deliver_stop_signal;
+ u32 deliver_prefix_signal;
+ u32 deliver_restart_signal;
+ u32 deliver_program_int;
+ u32 exit_wait_state;
};
+struct io_info {
+ __u16 subchannel_id; /* 0x0b8 */
+ __u16 subchannel_nr; /* 0x0ba */
+ __u32 io_int_parm; /* 0x0bc */
+ __u32 io_int_word; /* 0x0c0 */
+};
+
+struct ext_info {
+ __u32 ext_params;
+ __u64 ext_params2;
+};
+
+#define PGM_OPERATION 0x01
+#define PGM_PRIVILEGED_OPERATION 0x02
+#define PGM_EXECUTE 0x03
+#define PGM_PROTECTION 0x04
+#define PGM_ADDRESSING 0x05
+#define PGM_SPECIFICATION 0x06
+#define PGM_DATA 0x07
+
+struct pgm_info {
+ __u16 code;
+};
+
+struct prefix_info {
+ __u32 address;
+};
+
+struct interrupt_info {
+ struct list_head list;
+ u64 type;
+ union {
+ struct io_info io;
+ struct ext_info ext;
+ struct pgm_info pgm;
+ struct prefix_info prefix;
+ };
+};
+
+struct local_interrupt {
+ spinlock_t lock;
+ struct list_head list;
+ atomic_t active;
+ struct float_interrupt *float_int;
+ int timer_due; /* event indicator for waitqueue below */
+ wait_queue_head_t wq;
+};
+
+struct float_interrupt {
+ spinlock_t lock;
+ struct list_head list;
+ atomic_t active;
+ int next_rr_cpu;
+ unsigned long idle_mask [(64 + sizeof(long) - 1) / sizeof(long)];
+ struct local_interrupt *local_int[64];
+};
+
+
struct kvm_vcpu_arch {
struct sie_block *sie_block;
unsigned long guest_gprs[16];
@@ -106,6 +178,8 @@ struct kvm_vcpu_arch {
unsigned int host_acrs[NUM_ACRS];
s390_fp_regs guest_fpregs;
unsigned int guest_acrs[NUM_ACRS];
+ struct local_interrupt local_int;
+ struct timer_list ckc_timer;
};
struct kvm_vm_stat {
@@ -117,6 +191,7 @@ struct kvm_arch{
unsigned long guest_memsize;
struct sca_block *sca;
debug_info_t *dbf;
+ struct float_interrupt float_int;
};
extern int sie64a(struct sie_block *, __u64 *);
Index: kvm/include/linux/kvm.h
===================================================================
--- kvm.orig/include/linux/kvm.h
+++ kvm/include/linux/kvm.h
@@ -219,6 +219,21 @@ struct kvm_s390_psw {
__u64 addr;
};
+/* valid values for type in kvm_s390_interrupt */
+#define KVM_S390_SIGP_STOP 0xfffe0000u
+#define KVM_S390_PROGRAM_INT 0xfffe0001u
+#define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u
+#define KVM_S390_RESTART 0xfffe0003u
+#define KVM_S390_INT_VIRTIO 0xffff1237u /*FIXME arch number */
+#define KVM_S390_INT_SERVICE 0xffff2401u
+#define KVM_S390_INT_EMERGENCY 0xffff1201u
+
+struct kvm_s390_interrupt {
+ __u32 type;
+ __u32 parm;
+ __u64 parm64;
+};
+
#define KVMIO 0xAE
/*
@@ -307,6 +322,8 @@ struct kvm_s390_psw {
#define KVM_TPR_ACCESS_REPORTING _IOWR(KVMIO, 0x92, struct kvm_tpr_access_ctl)
/* Available with KVM_CAP_VAPIC */
#define KVM_SET_VAPIC_ADDR _IOW(KVMIO, 0x93, struct kvm_vapic_addr)
+/* valid for virtual machine (for floating interrupt)_and_ vcpu */
+#define KVM_S390_INTERRUPT _IOW(KVMIO, 0x94, struct kvm_s390_interrupt)
/* store status for s390 */
#define KVM_S390_STORE_STATUS_NOADDR (-1ul)
#define KVM_S390_STORE_STATUS_PREFIXED (-2ul)
^ permalink raw reply [flat|nested] 52+ messages in thread* [RFC/PATCH 08/15] kvm-s390: intercepts for privileged instructions
[not found] <1206028710.6690.21.camel@cotte.boeblingen.de.ibm.com>
` (6 preceding siblings ...)
2008-03-20 16:25 ` [RFC/PATCH 07/15] kvm-s390: interrupt subsystem, cpu timer, waitpsw Carsten Otte
@ 2008-03-20 16:25 ` Carsten Otte
2008-03-20 16:25 ` [RFC/PATCH 09/15] kvm-s390: interprocessor communication via sigp Carsten Otte
` (12 subsequent siblings)
20 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:25 UTC (permalink / raw)
To: virtualization, kvm-devel, Avi Kivity
Cc: aliguori, EHRHARDT, arnd, hollisb, heiko.carstens, jeroney,
borntraeger, schwidefsky, rvdheij, os, jblunck, Zhang, Xiantao
From: Carsten Otte <cotte@de.ibm.com>
From: Christian Borntraeger <borntraeger@de.ibm.com>
This patch introduces in-kernel handling of some intercepts for privileged
instructions:
handle_set_prefix() sets the prefix register of the local cpu
handle_store_prefix() stores the content of the prefix register to memory
handle_store_cpu_address() stores the cpu number of the current cpu to memory
handle_skey() just decrements the instruction address and retries
handle_stsch() delivers condition code 3 "operation not supported"
handle_chsc() same here
handle_stfl() stores the facility list which contains the
capabilities of the cpu
handle_stidp() stores cpu type/model/revision and such
handle_stsi() stores information about the system topology
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---
arch/s390/kvm/Makefile | 2
arch/s390/kvm/intercept.c | 1
arch/s390/kvm/kvm-s390.c | 11 +
arch/s390/kvm/kvm-s390.h | 3
arch/s390/kvm/priv.c | 322 ++++++++++++++++++++++++++++++++++++++++++++
include/asm-s390/kvm_host.h | 13 +
6 files changed, 351 insertions(+), 1 deletion(-)
Index: kvm/arch/s390/kvm/Makefile
===================================================================
--- kvm.orig/arch/s390/kvm/Makefile
+++ kvm/arch/s390/kvm/Makefile
@@ -10,5 +10,5 @@ common-objs = $(addprefix ../../../virt/
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/s390/kvm
-kvm-objs := $(common-objs) kvm-s390.o sie64a.o intercept.o interrupt.o
+kvm-objs := $(common-objs) kvm-s390.o sie64a.o intercept.o interrupt.o priv.o
obj-$(CONFIG_KVM) += kvm.o
Index: kvm/arch/s390/kvm/intercept.c
===================================================================
--- kvm.orig/arch/s390/kvm/intercept.c
+++ kvm/arch/s390/kvm/intercept.c
@@ -100,6 +100,7 @@ static int handle_lctl(struct kvm_vcpu *
}
static intercept_handler_t instruction_handlers[256] = {
+ [0xb2] = kvm_s390_handle_priv,
[0xb7] = handle_lctl,
[0xeb] = handle_lctg,
};
Index: kvm/arch/s390/kvm/kvm-s390.c
===================================================================
--- kvm.orig/arch/s390/kvm/kvm-s390.c
+++ kvm/arch/s390/kvm/kvm-s390.c
@@ -48,6 +48,15 @@ struct kvm_stats_debugfs_item debugfs_en
{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
+ { "instruction_stidp", VCPU_STAT(instruction_stidp) },
+ { "instruction_spx", VCPU_STAT(instruction_spx) },
+ { "instruction_stpx", VCPU_STAT(instruction_stpx) },
+ { "instruction_stap", VCPU_STAT(instruction_stap) },
+ { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
+ { "instruction_stsch", VCPU_STAT(instruction_stsch) },
+ { "instruction_chsc", VCPU_STAT(instruction_chsc) },
+ { "instruction_stsi", VCPU_STAT(instruction_stsi) },
+ { "instruction_stfl", VCPU_STAT(instruction_stfl) },
{ NULL }
};
@@ -249,6 +258,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu
vcpu->arch.sie_block->eca = 0xC1002001U;
setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup,
(unsigned long) vcpu);
+ get_cpu_id(&vcpu->arch.cpu_id);
+ vcpu->arch.cpu_id.version = 0xfe;
return 0;
}
Index: kvm/arch/s390/kvm/kvm-s390.h
===================================================================
--- kvm.orig/arch/s390/kvm/kvm-s390.h
+++ kvm/arch/s390/kvm/kvm-s390.h
@@ -47,4 +47,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
struct kvm_s390_interrupt *s390int);
int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
+
+/* implemented in priv.c */
+int kvm_s390_handle_priv(struct kvm_vcpu *vcpu);
#endif
Index: kvm/arch/s390/kvm/priv.c
===================================================================
--- /dev/null
+++ kvm/arch/s390/kvm/priv.c
@@ -0,0 +1,322 @@
+/*
+ * priv.c - handling privileged instructions
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ * Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#include <linux/kvm.h>
+#include <linux/errno.h>
+#include <asm/current.h>
+#include <asm/debug.h>
+#include <asm/ebcdic.h>
+#include <asm/sysinfo.h>
+#include "gaccess.h"
+#include "kvm-s390.h"
+
+static int handle_set_prefix(struct kvm_vcpu *vcpu)
+{
+ int base2 = vcpu->arch.sie_block->ipb >> 28;
+ int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+ u64 operand2;
+ u32 address = 0;
+ u8 tmp;
+
+ vcpu->stat.instruction_spx++;
+
+ operand2 = disp2;
+ if (base2)
+ operand2 += vcpu->arch.guest_gprs[base2];
+
+ /* must be word boundary */
+ if (operand2 & 3) {
+ kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ goto out;
+ }
+
+ /* get the value */
+ if (get_guest_u32(vcpu, operand2, &address)) {
+ kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ goto out;
+ }
+
+ address = address & 0x7fffe000u;
+
+ /* make sure that the new value is valid memory */
+ if (copy_from_guest_absolute(vcpu, &tmp, address, 1) ||
+ (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) {
+ kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ goto out;
+ }
+
+ vcpu->arch.sie_block->prefix = address;
+ vcpu->arch.sie_block->ihcpu = 0xffff;
+
+ VCPU_EVENT(vcpu, 5, "setting prefix to %x", address);
+out:
+ return 0;
+}
+
+static int handle_store_prefix(struct kvm_vcpu *vcpu)
+{
+ int base2 = vcpu->arch.sie_block->ipb >> 28;
+ int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+ u64 operand2;
+ u32 address;
+
+ vcpu->stat.instruction_stpx++;
+ operand2 = disp2;
+ if (base2)
+ operand2 += vcpu->arch.guest_gprs[base2];
+
+ /* must be word boundary */
+ if (operand2 & 3) {
+ kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ goto out;
+ }
+
+ address = vcpu->arch.sie_block->prefix;
+ address = address & 0x7fffe000u;
+
+ /* get the value */
+ if (put_guest_u32(vcpu, operand2, address)) {
+ kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ goto out;
+ }
+
+ VCPU_EVENT(vcpu, 5, "storing prefix to %x", address);
+out:
+ return 0;
+}
+
+static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
+{
+ int base2 = vcpu->arch.sie_block->ipb >> 28;
+ int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+ u64 useraddr;
+ int rc;
+
+ vcpu->stat.instruction_stap++;
+ useraddr = disp2;
+ if (base2)
+ useraddr += vcpu->arch.guest_gprs[base2];
+
+ if (useraddr & 1) {
+ kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ goto out;
+ }
+
+ rc = put_guest_u16(vcpu, useraddr, vcpu->vcpu_id);
+ if (rc == -EFAULT) {
+ kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ goto out;
+ }
+
+ VCPU_EVENT(vcpu, 5, "storing cpu address to %lx", useraddr);
+out:
+ return 0;
+}
+
+static int handle_skey(struct kvm_vcpu *vcpu)
+{
+ vcpu->stat.instruction_storage_key++;
+ vcpu->arch.sie_block->gpsw.addr -= 4;
+ VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
+ return 0;
+}
+
+static int handle_stsch(struct kvm_vcpu *vcpu)
+{
+ vcpu->stat.instruction_stsch++;
+ VCPU_EVENT(vcpu, 4, "%s", "store subchannel - CC3");
+ /* condition code 3 */
+ vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+ vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44;
+ return 0;
+}
+
+static int handle_chsc(struct kvm_vcpu *vcpu)
+{
+ vcpu->stat.instruction_chsc++;
+ VCPU_EVENT(vcpu, 4, "%s", "channel subsystem call - CC3");
+ /* condition code 3 */
+ vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+ vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44;
+ return 0;
+}
+
+static unsigned int stfl(void)
+{
+ asm volatile(
+ " .insn s,0xb2b10000,0(0)\n" /* stfl */
+ "0:\n"
+ EX_TABLE(0b, 0b));
+ return S390_lowcore.stfl_fac_list;
+}
+
+static int handle_stfl(struct kvm_vcpu *vcpu)
+{
+ unsigned int facility_list = stfl();
+ int rc;
+
+ vcpu->stat.instruction_stfl++;
+ facility_list &= ~(1UL<<24); /* no stfle */
+
+ rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list),
+ &facility_list, sizeof(facility_list));
+ if (rc == -EFAULT)
+ kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ else
+ VCPU_EVENT(vcpu, 5, "store facility list value %x",
+ facility_list);
+
+ return 0;
+}
+
+static int handle_stidp(struct kvm_vcpu *vcpu)
+{
+ int base2 = vcpu->arch.sie_block->ipb >> 28;
+ int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+ u64 operand2;
+ int rc;
+
+ vcpu->stat.instruction_stidp++;
+ operand2 = disp2;
+ if (base2)
+ operand2 += vcpu->arch.guest_gprs[base2];
+
+ if (operand2 & 7) {
+ kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ goto out;
+ }
+
+ rc = put_guest_u64(vcpu, operand2, vcpu->arch.stidp_data);
+ if (rc == -EFAULT) {
+ kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ goto out;
+ }
+
+ VCPU_EVENT(vcpu, 5, "%s", "store cpu id");
+out:
+ return 0;
+}
+
+static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
+{
+ struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
+ int cpus = 0;
+ int n;
+
+ spin_lock_bh(&fi->lock);
+ for (n = 0; n < KVM_MAX_VCPUS; n++)
+ if (fi->local_int[n])
+ cpus++;
+ spin_unlock_bh(&fi->lock);
+
+ /* deal with other level 3 hypervisors */
+ if (stsi(mem, 3, 2, 2) == -ENOSYS)
+ mem->count = 0;
+ if (mem->count < 8)
+ mem->count++;
+ for (n = mem->count - 1; n > 0 ; n--)
+ memcpy(&mem->vm[n], &mem->vm[n - 1], sizeof(mem->vm[0]));
+
+ mem->vm[0].cpus_total = cpus;
+ mem->vm[0].cpus_configured = cpus;
+ mem->vm[0].cpus_standby = 0;
+ mem->vm[0].cpus_reserved = 0;
+ mem->vm[0].caf = 1000;
+ memcpy(mem->vm[0].name, "KVMguest", 8);
+ ASCEBC(mem->vm[0].name, 8);
+ memcpy(mem->vm[0].cpi, "KVM/Linux ", 16);
+ ASCEBC(mem->vm[0].cpi, 16);
+}
+
+static int handle_stsi(struct kvm_vcpu *vcpu)
+{
+ int fc = (vcpu->arch.guest_gprs[0] & 0xf0000000) >> 28;
+ int sel1 = vcpu->arch.guest_gprs[0] & 0xff;
+ int sel2 = vcpu->arch.guest_gprs[1] & 0xffff;
+ int base2 = vcpu->arch.sie_block->ipb >> 28;
+ int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+ u64 operand2;
+ unsigned long mem;
+
+ vcpu->stat.instruction_stsi++;
+ VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2);
+
+ operand2 = disp2;
+ if (base2)
+ operand2 += vcpu->arch.guest_gprs[base2];
+
+ if (operand2 & 0xfff && fc > 0)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ switch (fc) {
+ case 0:
+ vcpu->arch.guest_gprs[0] = 3 << 28;
+ vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+ return 0;
+ case 1: /* same handling for 1 and 2 */
+ case 2:
+ mem = get_zeroed_page(GFP_KERNEL);
+ if (!mem)
+ goto out_fail;
+ if (stsi((void *) mem, fc, sel1, sel2) == -ENOSYS)
+ goto out_mem;
+ break;
+ case 3:
+ if (sel1 != 2 || sel2 != 2)
+ goto out_fail;
+ mem = get_zeroed_page(GFP_KERNEL);
+ if (!mem)
+ goto out_fail;
+ handle_stsi_3_2_2(vcpu, (void *) mem);
+ break;
+ default:
+ goto out_fail;
+ }
+
+ if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) {
+ kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ goto out_mem;
+ }
+ free_page(mem);
+ vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+ vcpu->arch.guest_gprs[0] = 0;
+ return 0;
+out_mem:
+ free_page(mem);
+out_fail:
+ /* condition code 3 */
+ vcpu->arch.sie_block->gpsw.mask |= 3ul << 44;
+ return 0;
+}
+
+static intercept_handler_t priv_handlers[256] = {
+ [0x02] = handle_stidp,
+ [0x10] = handle_set_prefix,
+ [0x11] = handle_store_prefix,
+ [0x12] = handle_store_cpu_address,
+ [0x29] = handle_skey,
+ [0x2a] = handle_skey,
+ [0x2b] = handle_skey,
+ [0x34] = handle_stsch,
+ [0x5f] = handle_chsc,
+ [0x7d] = handle_stsi,
+ [0xb1] = handle_stfl,
+};
+
+int kvm_s390_handle_priv(struct kvm_vcpu *vcpu)
+{
+ if (priv_handlers[vcpu->arch.sie_block->ipa & 0x00ff])
+ return priv_handlers[vcpu->arch.sie_block->ipa & 0x00ff]
+ (vcpu);
+ return -ENOTSUPP;
+}
Index: kvm/include/asm-s390/kvm_host.h
===================================================================
--- kvm.orig/include/asm-s390/kvm_host.h
+++ kvm/include/asm-s390/kvm_host.h
@@ -111,6 +111,15 @@ struct kvm_vcpu_stat {
u32 deliver_restart_signal;
u32 deliver_program_int;
u32 exit_wait_state;
+ u32 instruction_stidp;
+ u32 instruction_spx;
+ u32 instruction_stpx;
+ u32 instruction_stap;
+ u32 instruction_storage_key;
+ u32 instruction_stsch;
+ u32 instruction_chsc;
+ u32 instruction_stsi;
+ u32 instruction_stfl;
};
struct io_info {
@@ -180,6 +189,10 @@ struct kvm_vcpu_arch {
unsigned int guest_acrs[NUM_ACRS];
struct local_interrupt local_int;
struct timer_list ckc_timer;
+ union {
+ cpuid_t cpu_id;
+ u64 stidp_data;
+ };
};
struct kvm_vm_stat {
^ permalink raw reply [flat|nested] 52+ messages in thread* [RFC/PATCH 09/15] kvm-s390: interprocessor communication via sigp
[not found] <1206028710.6690.21.camel@cotte.boeblingen.de.ibm.com>
` (7 preceding siblings ...)
2008-03-20 16:25 ` [RFC/PATCH 08/15] kvm-s390: intercepts for privileged instructions Carsten Otte
@ 2008-03-20 16:25 ` Carsten Otte
2008-03-20 16:25 ` [RFC/PATCH 10/15] kvm-s390: intercepts for diagnose instructions Carsten Otte
` (11 subsequent siblings)
20 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:25 UTC (permalink / raw)
To: virtualization, kvm-devel, Avi Kivity
Cc: aliguori, EHRHARDT, arnd, hollisb, heiko.carstens, jeroney,
borntraeger, schwidefsky, rvdheij, os, jblunck, Zhang, Xiantao
From: Carsten Otte <cotte@de.ibm.com>
From: Christian Borntraeger <borntraeger@de.ibm.com>
This patch introduces in-kernel handling of _some_ sigp interprocessor
signals (similar to ipi).
kvm_s390_handle_sigp() decodes the sigp instruction and calls individual
handlers depending on the operation requested:
- sigp sense tries to retrieve information such as existence or running state
of the remote cpu
- sigp emergency sends an external interrupt to the remove cpu
- sigp stop stops a remove cpu
- sigp stop store status stops a remote cpu, and stores its entire internal
state to the cpus lowcore
- sigp set arch sets the architecture mode of the remote cpu. setting to
ESAME (s390x 64bit) is accepted, setting to ESA/S390 (s390, 31 or 24 bit) is
denied, all others are passed to userland
- sigp set prefix sets the prefix register of a remote cpu
For implementation of this, the stop intercept indication starts to get reused
on purpose: a set of action bits defines what to do once a cpu gets stopped:
ACTION_STOP_ON_STOP really stops the cpu when a stop intercept is recognized
ACTION_STORE_ON_STOP stores the cpu status to lowcore when a stop intercept is
recognized
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---
arch/s390/kvm/Makefile | 2
arch/s390/kvm/intercept.c | 22 +++
arch/s390/kvm/kvm-s390.c | 7 +
arch/s390/kvm/kvm-s390.h | 7 +
arch/s390/kvm/sigp.c | 289 ++++++++++++++++++++++++++++++++++++++++++++
include/asm-s390/kvm_host.h | 12 +
6 files changed, 336 insertions(+), 3 deletions(-)
Index: kvm/arch/s390/kvm/Makefile
===================================================================
--- kvm.orig/arch/s390/kvm/Makefile
+++ kvm/arch/s390/kvm/Makefile
@@ -10,5 +10,5 @@ common-objs = $(addprefix ../../../virt/
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/s390/kvm
-kvm-objs := $(common-objs) kvm-s390.o sie64a.o intercept.o interrupt.o priv.o
+kvm-objs := $(common-objs) kvm-s390.o sie64a.o intercept.o interrupt.o priv.o sigp.o
obj-$(CONFIG_KVM) += kvm.o
Index: kvm/arch/s390/kvm/intercept.c
===================================================================
--- kvm.orig/arch/s390/kvm/intercept.c
+++ kvm/arch/s390/kvm/intercept.c
@@ -100,6 +100,7 @@ static int handle_lctl(struct kvm_vcpu *
}
static intercept_handler_t instruction_handlers[256] = {
+ [0xae] = kvm_s390_handle_sigp,
[0xb2] = kvm_s390_handle_priv,
[0xb7] = handle_lctl,
[0xeb] = handle_lctg,
@@ -122,10 +123,27 @@ static int handle_noop(struct kvm_vcpu *
static int handle_stop(struct kvm_vcpu *vcpu)
{
+ int rc;
+
vcpu->stat.exit_stop_request++;
- VCPU_EVENT(vcpu, 3, "%s", "cpu stopped");
atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
- return -ENOTSUPP;
+ spin_lock_bh(&vcpu->arch.local_int.lock);
+ if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) {
+ vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP;
+ rc = __kvm_s390_vcpu_store_status(vcpu,
+ KVM_S390_STORE_STATUS_NOADDR);
+ if (rc >= 0)
+ rc = -ENOTSUPP;
+ }
+
+ if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) {
+ vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP;
+ VCPU_EVENT(vcpu, 3, "%s", "cpu stopped");
+ rc = -ENOTSUPP;
+ } else
+ rc = 0;
+ spin_unlock_bh(&vcpu->arch.local_int.lock);
+ return rc;
}
static int handle_validity(struct kvm_vcpu *vcpu)
Index: kvm/arch/s390/kvm/kvm-s390.c
===================================================================
--- kvm.orig/arch/s390/kvm/kvm-s390.c
+++ kvm/arch/s390/kvm/kvm-s390.c
@@ -57,6 +57,12 @@ struct kvm_stats_debugfs_item debugfs_en
{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
+ { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
+ { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
+ { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
+ { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
+ { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
+ { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
{ NULL }
};
@@ -290,6 +296,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(st
spin_lock_bh(&kvm->arch.float_int.lock);
kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
init_waitqueue_head(&vcpu->arch.local_int.wq);
+ vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
spin_unlock_bh(&kvm->arch.float_int.lock);
rc = kvm_vcpu_init(vcpu, kvm, id);
Index: kvm/arch/s390/kvm/kvm-s390.h
===================================================================
--- kvm.orig/arch/s390/kvm/kvm-s390.h
+++ kvm/arch/s390/kvm/kvm-s390.h
@@ -50,4 +50,11 @@ int kvm_s390_inject_program_int(struct k
/* implemented in priv.c */
int kvm_s390_handle_priv(struct kvm_vcpu *vcpu);
+
+/* implemented in sigp.c */
+int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
+
+/* implemented in kvm-s390.c */
+int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu,
+ unsigned long addr);
#endif
Index: kvm/arch/s390/kvm/sigp.c
===================================================================
--- /dev/null
+++ kvm/arch/s390/kvm/sigp.c
@@ -0,0 +1,289 @@
+/*
+ * sigp.c - handlinge interprocessor communication
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ * Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include "gaccess.h"
+#include "kvm-s390.h"
+
+/* sigp order codes */
+#define SIGP_SENSE 0x01
+#define SIGP_EXTERNAL_CALL 0x02
+#define SIGP_EMERGENCY 0x03
+#define SIGP_START 0x04
+#define SIGP_STOP 0x05
+#define SIGP_RESTART 0x06
+#define SIGP_STOP_STORE_STATUS 0x09
+#define SIGP_INITIAL_CPU_RESET 0x0b
+#define SIGP_CPU_RESET 0x0c
+#define SIGP_SET_PREFIX 0x0d
+#define SIGP_STORE_STATUS_ADDR 0x0e
+#define SIGP_SET_ARCH 0x12
+
+/* cpu status bits */
+#define SIGP_STAT_EQUIPMENT_CHECK 0x80000000UL
+#define SIGP_STAT_INCORRECT_STATE 0x00000200UL
+#define SIGP_STAT_INVALID_PARAMETER 0x00000100UL
+#define SIGP_STAT_EXT_CALL_PENDING 0x00000080UL
+#define SIGP_STAT_STOPPED 0x00000040UL
+#define SIGP_STAT_OPERATOR_INTERV 0x00000020UL
+#define SIGP_STAT_CHECK_STOP 0x00000010UL
+#define SIGP_STAT_INOPERATIVE 0x00000004UL
+#define SIGP_STAT_INVALID_ORDER 0x00000002UL
+#define SIGP_STAT_RECEIVER_CHECK 0x00000001UL
+
+
+static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, u64 *reg)
+{
+ struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
+ int rc;
+
+ if (cpu_addr >= KVM_MAX_VCPUS)
+ return 3; /* not operational */
+
+ spin_lock_bh(&fi->lock);
+ if (fi->local_int[cpu_addr] == NULL)
+ rc = 3; /* not operational */
+ else if (atomic_read(fi->local_int[cpu_addr]->cpuflags)
+ & CPUSTAT_RUNNING) {
+ *reg &= 0xffffffff00000000UL;
+ rc = 1; /* status stored */
+ } else {
+ *reg &= 0xffffffff00000000UL;
+ *reg |= SIGP_STAT_STOPPED;
+ rc = 1; /* status stored */
+ }
+ spin_unlock_bh(&fi->lock);
+
+ VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc);
+ return rc;
+}
+
+static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
+{
+ struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
+ struct local_interrupt *li;
+ struct interrupt_info *inti;
+ int rc;
+
+ if (cpu_addr >= KVM_MAX_VCPUS)
+ return 3; /* not operational */
+
+ inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+ if (!inti)
+ return -ENOMEM;
+
+ inti->type = KVM_S390_INT_EMERGENCY;
+
+ spin_lock_bh(&fi->lock);
+ li = fi->local_int[cpu_addr];
+ if (li == NULL) {
+ rc = 3; /* not operational */
+ kfree(inti);
+ goto unlock;
+ }
+ spin_lock_bh(&li->lock);
+ list_add_tail(&inti->list, &li->list);
+ atomic_set(&li->active, 1);
+ atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+ if (waitqueue_active(&li->wq))
+ wake_up_interruptible(&li->wq);
+ spin_unlock_bh(&li->lock);
+ rc = 0; /* order accepted */
+unlock:
+ spin_unlock_bh(&fi->lock);
+ VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
+ return rc;
+}
+
+static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store)
+{
+ struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
+ struct local_interrupt *li;
+ struct interrupt_info *inti;
+ int rc;
+
+ if (cpu_addr >= KVM_MAX_VCPUS)
+ return 3; /* not operational */
+
+ inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+ if (!inti)
+ return -ENOMEM;
+
+ inti->type = KVM_S390_SIGP_STOP;
+
+ spin_lock_bh(&fi->lock);
+ li = fi->local_int[cpu_addr];
+ if (li == NULL) {
+ rc = 3; /* not operational */
+ kfree(inti);
+ goto unlock;
+ }
+ spin_lock_bh(&li->lock);
+ list_add_tail(&inti->list, &li->list);
+ atomic_set(&li->active, 1);
+ atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
+ if (store)
+ li->action_bits |= ACTION_STORE_ON_STOP;
+ li->action_bits |= ACTION_STOP_ON_STOP;
+ if (waitqueue_active(&li->wq))
+ wake_up_interruptible(&li->wq);
+ spin_unlock_bh(&li->lock);
+ rc = 0; /* order accepted */
+unlock:
+ spin_unlock_bh(&fi->lock);
+ VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
+ return rc;
+}
+
+static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
+{
+ int rc;
+ parameter = parameter & 0xff;
+
+ switch (parameter) {
+ case 0:
+ printk(KERN_WARNING "kvm: request to switch to ESA/390 mode"
+ " not supported");
+ rc = 3; /* not operational */
+ break;
+ case 1:
+ case 2:
+ rc = 0; /* order accepted */
+ break;
+ default:
+ rc = -ENOTSUPP;
+ }
+ return rc;
+}
+
+static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
+ u64 *reg)
+{
+ struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
+ struct local_interrupt *li;
+ struct interrupt_info *inti;
+ int rc;
+ u8 tmp;
+
+ /* make sure that the new value is valid memory */
+ address = address & 0x7fffe000u;
+ if ((copy_from_guest(vcpu, &tmp,
+ (u64) (address + vcpu->kvm->arch.guest_origin) , 1)) ||
+ (copy_from_guest(vcpu, &tmp, (u64) (address +
+ vcpu->kvm->arch.guest_origin + PAGE_SIZE), 1))) {
+ *reg |= SIGP_STAT_INVALID_PARAMETER;
+ return 1; /* invalid parameter */
+ }
+
+ inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+ if (!inti)
+ return 2; /* busy */
+
+ spin_lock_bh(&fi->lock);
+ li = fi->local_int[cpu_addr];
+
+ if ((cpu_addr >= KVM_MAX_VCPUS) || (li == NULL)) {
+ rc = 1; /* incorrect state */
+ *reg &= SIGP_STAT_INCORRECT_STATE;
+ kfree(inti);
+ goto out_fi;
+ }
+
+ spin_lock_bh(&li->lock);
+ /* cpu must be in stopped state */
+ if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) {
+ rc = 1; /* incorrect state */
+ *reg &= SIGP_STAT_INCORRECT_STATE;
+ kfree(inti);
+ goto out_li;
+ }
+
+ inti->type = KVM_S390_SIGP_SET_PREFIX;
+ inti->prefix.address = address;
+
+ list_add_tail(&inti->list, &li->list);
+ atomic_set(&li->active, 1);
+ if (waitqueue_active(&li->wq))
+ wake_up_interruptible(&li->wq);
+ rc = 0; /* order accepted */
+
+ VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address);
+out_li:
+ spin_unlock_bh(&li->lock);
+out_fi:
+ spin_unlock_bh(&fi->lock);
+ return rc;
+}
+
+int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
+{
+ int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+ int r3 = vcpu->arch.sie_block->ipa & 0x000f;
+ int base2 = vcpu->arch.sie_block->ipb >> 28;
+ int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+ u32 parameter;
+ u16 cpu_addr = vcpu->arch.guest_gprs[r3];
+ u8 order_code;
+ int rc;
+
+ order_code = disp2;
+ if (base2)
+ order_code += vcpu->arch.guest_gprs[base2];
+
+ if (r1 % 2)
+ parameter = vcpu->arch.guest_gprs[r1];
+ else
+ parameter = vcpu->arch.guest_gprs[r1 + 1];
+
+ switch (order_code) {
+ case SIGP_SENSE:
+ vcpu->stat.instruction_sigp_sense++;
+ rc = __sigp_sense(vcpu, cpu_addr,
+ &vcpu->arch.guest_gprs[r1]);
+ break;
+ case SIGP_EMERGENCY:
+ vcpu->stat.instruction_sigp_emergency++;
+ rc = __sigp_emergency(vcpu, cpu_addr);
+ break;
+ case SIGP_STOP:
+ vcpu->stat.instruction_sigp_stop++;
+ rc = __sigp_stop(vcpu, cpu_addr, 0);
+ break;
+ case SIGP_STOP_STORE_STATUS:
+ vcpu->stat.instruction_sigp_stop++;
+ rc = __sigp_stop(vcpu, cpu_addr, 1);
+ break;
+ case SIGP_SET_ARCH:
+ vcpu->stat.instruction_sigp_arch++;
+ rc = __sigp_set_arch(vcpu, parameter);
+ break;
+ case SIGP_SET_PREFIX:
+ vcpu->stat.instruction_sigp_prefix++;
+ rc = __sigp_set_prefix(vcpu, cpu_addr, parameter,
+ &vcpu->arch.guest_gprs[r1]);
+ break;
+ case SIGP_RESTART:
+ vcpu->stat.instruction_sigp_restart++;
+ /* user space must know about restart */
+ default:
+ return -ENOTSUPP;
+ }
+
+ if (rc < 0)
+ return rc;
+
+ vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+ vcpu->arch.sie_block->gpsw.mask |= (rc & 3ul) << 44;
+ return 0;
+}
Index: kvm/include/asm-s390/kvm_host.h
===================================================================
--- kvm.orig/include/asm-s390/kvm_host.h
+++ kvm/include/asm-s390/kvm_host.h
@@ -120,6 +120,12 @@ struct kvm_vcpu_stat {
u32 instruction_chsc;
u32 instruction_stsi;
u32 instruction_stfl;
+ u32 instruction_sigp_sense;
+ u32 instruction_sigp_emergency;
+ u32 instruction_sigp_stop;
+ u32 instruction_sigp_arch;
+ u32 instruction_sigp_prefix;
+ u32 instruction_sigp_restart;
};
struct io_info {
@@ -161,6 +167,10 @@ struct interrupt_info {
};
};
+/* for local_interrupt.action_flags */
+#define ACTION_STORE_ON_STOP 1
+#define ACTION_STOP_ON_STOP 2
+
struct local_interrupt {
spinlock_t lock;
struct list_head list;
@@ -168,6 +178,8 @@ struct local_interrupt {
struct float_interrupt *float_int;
int timer_due; /* event indicator for waitqueue below */
wait_queue_head_t wq;
+ atomic_t *cpuflags;
+ unsigned int action_bits;
};
struct float_interrupt {
^ permalink raw reply [flat|nested] 52+ messages in thread* [RFC/PATCH 10/15] kvm-s390: intercepts for diagnose instructions
[not found] <1206028710.6690.21.camel@cotte.boeblingen.de.ibm.com>
` (8 preceding siblings ...)
2008-03-20 16:25 ` [RFC/PATCH 09/15] kvm-s390: interprocessor communication via sigp Carsten Otte
@ 2008-03-20 16:25 ` Carsten Otte
2008-03-20 16:25 ` [RFC/PATCH 11/15] kvm-s390: add kvm to kconfig on s390 Carsten Otte
` (10 subsequent siblings)
20 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:25 UTC (permalink / raw)
To: virtualization, kvm-devel, Avi Kivity
Cc: aliguori, EHRHARDT, arnd, hollisb, heiko.carstens, jeroney,
borntraeger, schwidefsky, rvdheij, os, jblunck, Zhang, Xiantao
From: Carsten Otte <cotte@de.ibm.com>
From: Christian Borntraeger <borntraeger@de.ibm.com>
This patch introduces interpretation of some diagnose instruction intercepts.
Diagnose is our classic architected way of doing a hypercall. This patch
features the following diagnose codes:
- vm storage size, that tells the guest about its memory layout
- time slice end, which is used by the guest to indicate that it waits
for a lock and thus cannot use up its time slice in a useful way
- ipl functions, which a guest can use to reset and reboot itself
In order to implement ipl functions, we also introduce an exit reason that
causes userspace to perform various resets on the virtual machine. All resets
are described in the principles of operation book, except KVM_S390_RESET_IPL
which causes a reboot of the machine.
Acked-by: Martin Schwidefsky <martin.schwidefsky@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---
arch/s390/kvm/Makefile | 2 -
arch/s390/kvm/diag.c | 67 ++++++++++++++++++++++++++++++++++++++++++++
arch/s390/kvm/intercept.c | 1
arch/s390/kvm/kvm-s390.c | 1
arch/s390/kvm/kvm-s390.h | 2 +
include/asm-s390/kvm_host.h | 5 ++-
include/linux/kvm.h | 8 +++++
7 files changed, 84 insertions(+), 2 deletions(-)
Index: kvm/arch/s390/kvm/Makefile
===================================================================
--- kvm.orig/arch/s390/kvm/Makefile
+++ kvm/arch/s390/kvm/Makefile
@@ -10,5 +10,5 @@ common-objs = $(addprefix ../../../virt/
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/s390/kvm
-kvm-objs := $(common-objs) kvm-s390.o sie64a.o intercept.o interrupt.o priv.o sigp.o
+kvm-objs := $(common-objs) kvm-s390.o sie64a.o intercept.o interrupt.o priv.o sigp.o diag.o
obj-$(CONFIG_KVM) += kvm.o
Index: kvm/arch/s390/kvm/diag.c
===================================================================
--- /dev/null
+++ kvm/arch/s390/kvm/diag.c
@@ -0,0 +1,67 @@
+/*
+ * diag.c - handling diagnose instructions
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ * Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include "kvm-s390.h"
+
+static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
+{
+ VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
+ vcpu->stat.diagnose_44++;
+ vcpu_put(vcpu);
+ schedule();
+ vcpu_load(vcpu);
+ return 0;
+}
+
+static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
+{
+ unsigned int reg = vcpu->arch.sie_block->ipa & 0xf;
+ unsigned long subcode = vcpu->arch.guest_gprs[reg] & 0xffff;
+
+ VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode);
+ switch (subcode) {
+ case 3:
+ vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
+ break;
+ case 4:
+ vcpu->run->s390_reset_flags = 0;
+ break;
+ default:
+ return -ENOTSUPP;
+ }
+
+ atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+ vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM;
+ vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL;
+ vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT;
+ vcpu->run->exit_reason = KVM_EXIT_S390_RESET;
+ VCPU_EVENT(vcpu, 3, "requesting userspace resets %lx",
+ vcpu->run->s390_reset_flags);
+ return -EREMOTE;
+}
+
+int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
+{
+ int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16;
+
+ switch (code) {
+ case 0x44:
+ return __diag_time_slice_end(vcpu);
+ case 0x308:
+ return __diag_ipl_functions(vcpu);
+ default:
+ return -ENOTSUPP;
+ }
+}
Index: kvm/arch/s390/kvm/intercept.c
===================================================================
--- kvm.orig/arch/s390/kvm/intercept.c
+++ kvm/arch/s390/kvm/intercept.c
@@ -100,6 +100,7 @@ static int handle_lctl(struct kvm_vcpu *
}
static intercept_handler_t instruction_handlers[256] = {
+ [0x83] = kvm_s390_handle_diag,
[0xae] = kvm_s390_handle_sigp,
[0xb2] = kvm_s390_handle_priv,
[0xb7] = handle_lctl,
Index: kvm/arch/s390/kvm/kvm-s390.c
===================================================================
--- kvm.orig/arch/s390/kvm/kvm-s390.c
+++ kvm/arch/s390/kvm/kvm-s390.c
@@ -63,6 +63,7 @@ struct kvm_stats_debugfs_item debugfs_en
{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
+ { "diagnose_44", VCPU_STAT(diagnose_44) },
{ NULL }
};
Index: kvm/arch/s390/kvm/kvm-s390.h
===================================================================
--- kvm.orig/arch/s390/kvm/kvm-s390.h
+++ kvm/arch/s390/kvm/kvm-s390.h
@@ -57,4 +57,6 @@ int kvm_s390_handle_sigp(struct kvm_vcpu
/* implemented in kvm-s390.c */
int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu,
unsigned long addr);
+/* implemented in diag.c */
+int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
#endif
Index: kvm/include/asm-s390/kvm_host.h
===================================================================
--- kvm.orig/include/asm-s390/kvm_host.h
+++ kvm/include/asm-s390/kvm_host.h
@@ -86,7 +86,9 @@ struct sie_block {
psw_t gpsw; /* 0x0090 */
__u64 gg14; /* 0x00a0 */
__u64 gg15; /* 0x00a8 */
- __u8 reservedb0[80]; /* 0x00b0 */
+ __u8 reservedb0[30]; /* 0x00b0 */
+ __u16 iprcc; /* 0x00ce */
+ __u8 reservedd0[48]; /* 0x00d0 */
__u64 gcr[16]; /* 0x0100 */
__u64 gbea; /* 0x0180 */
__u8 reserved188[120]; /* 0x0188 */
@@ -126,6 +128,7 @@ struct kvm_vcpu_stat {
u32 instruction_sigp_arch;
u32 instruction_sigp_prefix;
u32 instruction_sigp_restart;
+ u32 diagnose_44;
};
struct io_info {
Index: kvm/include/linux/kvm.h
===================================================================
--- kvm.orig/include/linux/kvm.h
+++ kvm/include/linux/kvm.h
@@ -75,6 +75,7 @@ struct kvm_irqchip {
#define KVM_EXIT_SET_TPR 11
#define KVM_EXIT_TPR_ACCESS 12
#define KVM_EXIT_S390_SIEIC 13
+#define KVM_EXIT_S390_RESET 14
/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
struct kvm_run {
@@ -147,6 +148,13 @@ struct kvm_run {
__u16 ipa;
__u32 ipb;
} s390_sieic;
+ /* KVM_EXIT_S390_RESET */
+#define KVM_S390_RESET_POR 1
+#define KVM_S390_RESET_CLEAR 2
+#define KVM_S390_RESET_SUBSYSTEM 4
+#define KVM_S390_RESET_CPU_INIT 8
+#define KVM_S390_RESET_IPL 16
+ __u64 s390_reset_flags;
/* Fix the size of the union. */
char padding[256];
};
^ permalink raw reply [flat|nested] 52+ messages in thread* [RFC/PATCH 11/15] kvm-s390: add kvm to kconfig on s390
[not found] <1206028710.6690.21.camel@cotte.boeblingen.de.ibm.com>
` (9 preceding siblings ...)
2008-03-20 16:25 ` [RFC/PATCH 10/15] kvm-s390: intercepts for diagnose instructions Carsten Otte
@ 2008-03-20 16:25 ` Carsten Otte
2008-03-20 16:25 ` [RFC/PATCH 12/15] kvm-s390: API documentation Carsten Otte
` (9 subsequent siblings)
20 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:25 UTC (permalink / raw)
To: virtualization, kvm-devel, Avi Kivity
Cc: aliguori, EHRHARDT, arnd, hollisb, heiko.carstens, jeroney,
borntraeger, schwidefsky, rvdheij, os, jblunck, Zhang, Xiantao
From: Carsten Otte <cotte@de.ibm.com>
From: Christian Borntraeger <borntraeger@de.ibm.com>
This patch adds the virtualization submenu and the kvm option to the kernel
config. It also defines HAVE_KVM for 64bit kernels.
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---
arch/s390/Kconfig | 3 +++
arch/s390/kvm/Kconfig | 43 +++++++++++++++++++++++++++++++++++++++++++
2 files changed, 46 insertions(+)
Index: kvm/arch/s390/Kconfig
===================================================================
--- kvm.orig/arch/s390/Kconfig
+++ kvm/arch/s390/Kconfig
@@ -66,6 +66,7 @@ config S390
select HAVE_OPROFILE
select HAVE_KPROBES
select HAVE_KRETPROBES
+ select HAVE_KVM if 64BIT
source "init/Kconfig"
@@ -553,3 +554,5 @@ source "security/Kconfig"
source "crypto/Kconfig"
source "lib/Kconfig"
+
+source "arch/s390/kvm/Kconfig"
Index: kvm/arch/s390/kvm/Kconfig
===================================================================
--- /dev/null
+++ kvm/arch/s390/kvm/Kconfig
@@ -0,0 +1,43 @@
+#
+# KVM configuration
+#
+config HAVE_KVM
+ bool
+
+menuconfig VIRTUALIZATION
+ bool "Virtualization"
+ default y
+ ---help---
+ Say Y here to get to see options for using your Linux host to run other
+ operating systems inside virtual machines (guests).
+ This option alone does not add any kernel code.
+
+ If you say N, all options in this submenu will be skipped and disabled.
+
+if VIRTUALIZATION
+
+config KVM
+ tristate "Kernel-based Virtual Machine (KVM) support"
+ depends on HAVE_KVM && EXPERIMENTAL
+ select PREEMPT_NOTIFIERS
+ select ANON_INODES
+ select S390_SWITCH_AMODE
+ select PREEMPT
+ ---help---
+ Support hosting paravirtualized guest machines using the SIE
+ virtualization capability on the mainframe. This should work
+ on any 64bit machine.
+
+ This module provides access to the hardware capabilities through
+ a character device node named /dev/kvm.
+
+ To compile this as a module, choose M here: the module
+ will be called kvm.
+
+ If unsure, say N.
+
+# OK, it's a little counter-intuitive to do this, but it puts it neatly under
+# the virtualization menu.
+source drivers/virtio/Kconfig
+
+endif # VIRTUALIZATION
^ permalink raw reply [flat|nested] 52+ messages in thread* [RFC/PATCH 12/15] kvm-s390: API documentation
[not found] <1206028710.6690.21.camel@cotte.boeblingen.de.ibm.com>
` (10 preceding siblings ...)
2008-03-20 16:25 ` [RFC/PATCH 11/15] kvm-s390: add kvm to kconfig on s390 Carsten Otte
@ 2008-03-20 16:25 ` Carsten Otte
2008-03-20 16:25 ` [RFC/PATCH 13/15] kvm-s390: update maintainers Carsten Otte
` (8 subsequent siblings)
20 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:25 UTC (permalink / raw)
To: virtualization, kvm-devel, Avi Kivity
Cc: aliguori, EHRHARDT, arnd, hollisb, heiko.carstens, jeroney,
borntraeger, schwidefsky, rvdheij, os, jblunck, Zhang, Xiantao
From: Carsten Otte <cotte@de.ibm.com>
This patch adds Documentation/s390/kvm.txt, which describes specifics of kvm's
user interface that are unique to s390 architecture.
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---
Documentation/s390/kvm.txt | 125 +++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 125 insertions(+)
Index: kvm/Documentation/s390/kvm.txt
===================================================================
--- /dev/null
+++ kvm/Documentation/s390/kvm.txt
@@ -0,0 +1,125 @@
+*** BIG FAT WARNING ***
+The kvm module is currently in EXPERIMENTAL state for s390. This means, that
+the interface to the module is not yet considered to remain stable. Thus, be
+prepared that we keep breaking your userspace application and guest
+compatibility over and over again until we feel happy with the result. Make sure
+your guest kernel, your host kernel, and your userspace launcher are in a
+consistent state.
+
+This Documentation describes the unique ioctl calls to /dev/kvm, the resulting
+kvm-vm file descriptors, and the kvm-vcpu file descriptors that differ from x86.
+
+1. ioctl calls to /dev/kvm
+KVM does support the following ioctls on s390 that are common with other
+architectures and do behave the same:
+KVM_GET_API_VERSION
+KVM_CREATE_VM (*) see note
+KVM_CHECK_EXTENSION
+KVM_GET_VCPU_MMAP_SIZE
+
+Notes:
+* KVM_CREATE_VM may fail on s390, if the calling process has multiple
+threads and has not called KVM_S390_ENABLE_SIE before.
+
+In addition, on s390 the following architecture specific ioctls are supported:
+ioctl: KVM_S390_ENABLE_SIE
+args: none
+see also: include/linux/kvm.h
+This call causes the kernel to switch on PGSTE in the user page table. This
+operation is needed in order to run a virtual machine, and it requires the
+calling process to be single-threaded. Note that the first call to KVM_CREATE_VM
+will implicitly try to switch on PGSTE if the user process has not called
+KVM_S390_ENABLE_SIE before. User processes that want to launch multiple threads
+before creating a virtual machine have to call KVM_S390_ENABLE_SIE, or will
+observe an error calling KVM_CREATE_VM. Switching on PGSTE is a one-time
+operation, is not reversible, and will persist over the entire lifetime of
+the calling process. It does not have any user-visibe effect other than a small
+performance penalty.
+
+2. ioctl calls to the kvm-vm file descriptor
+KVM does support the following ioctls on s390 that are common with other
+architectures and do behave the same:
+KVM_CREATE_VCPU
+KVM_SET_USER_MEMORY_REGION (*) see note
+KVM_GET_DIRTY_LOG (**) see note
+
+Notes:
+* kvm does only allow exactly one memory slot on s390, which has to start
+ at guest absolute address zero and at a user address that is aligned on any
+ page boundary. This hardware "limitation" allows us to have a few unique
+ optimizations. The memory slot does'nt have to be filled
+ with memory actually, it may contain sparse holes. That said, with different
+ user memory layout this does still allow a large flexibility when
+ doing the guest memory setup.
+** KVM_GET_DIRTY_LOG does'nt work proper yet. The user will receive an empty
+log. This ioctl call is only needed for guest migration, and we intend to
+implement this one in the future.
+
+In addition, on s390 the following architecture specific ioctls for the kvm-vm
+file descriptor are supported:
+ioctl: KVM_S390_INTERRUPT
+args: struct kvm_s390_interrupt *
+see also: include/linux/kvm.h
+This ioctl is used to submit a floating interrupt for a virtual machine.
+Floating interrupts may be delivered to any virtual cpu in the configuration.
+Only some interrupt types defined in include/linux/kvm.h make sense when
+submitted as floating interrupt. The following interrupts are not considered
+to be useful as floating interrupt, and a call to inject them will result in
+-EINVAL error code: program interrupts, and interprocessor signals. Valid
+floating interrupts are:
+KVM_S390_INT_VIRTIO
+KVM_S390_INT_SERVICE
+
+3. ioctl calls to the kvm-vcpu file descriptor
+KVM does support the following ioctls on s390 that are common with other
+architectures and do behave the same:
+KVM_RUN
+KVM_GET_REGS
+KVM_SET_REGS
+KVM_GET_SREGS
+KVM_SET_SREGS
+KVM_GET_FPU
+KVM_SET_FPU
+
+In addition, on s390 the following architecture specific ioctls for the
+kvm-vcpu file descriptor are supported:
+ioctl: KVM_S390_INTERRUPT
+args: struct kvm_s390_interrupt *
+see also: include/linux/kvm.h
+This ioctl is used to submit an interrupt for a specific virtual cpu.
+Only some interrupt types defined in include/linux/kvm.h make sense when
+submitted for a specific cpu. The following interrupts are not considered
+to be useful, and a call to inject them will result in -EINVAL error code:
+service processor calls, and virtio interrupts. Valid interrupt types are:
+KVM_S390_PROGRAM_INT
+KVM_S390_SIGP_STOP
+KVM_S390_RESTART
+KVM_S390_SIGP_SET_PREFIX
+KVM_S390_INT_EMERGENCY
+
+ioctl: KVM_S390_STORE_STATUS
+args: unsigned long
+see also: include/linux/kvm.h
+This ioctl stores the state of the cpu at the guest real address given as
+argument, unless one of the following values defined in include/linux/kvm.h
+is given as arguement:
+KVM_S390_STORE_STATUS_NOADDR - the CPU stores its status to the save area in
+absolute lowcore as defined by the principles of operation
+KVM_S390_STORE_STATUS_PREFIXED - the CPU stores its status to the save area in
+its prefix page just like the dump tool that comes with zipl. This is useful
+to create a system dump for use with lkcdutils or crash.
+
+ioctl: KVM_S390_SET_INITIAL_PSW
+args: struct kvm_s390_psw *
+see also: include/linux/kvm.h
+This ioctl can be used to set the processor status word (psw) of a stopped cpu
+prior to running it with KVM_RUN. Note that this call is not required to modify
+the psw during sie intercepts that fall back to userspace because struct kvm_run
+does contain the psw, and this value is evaluated during reentry of KVM_RUN
+after the intercept exit was recognized.
+
+ioctl: KVM_S390_INITIAL_RESET
+args: none
+see also: include/linux/kvm.h
+This ioctl can be used to perform an initial cpu reset as defined by the
+principles of operation. The target cpu has to be in stopped state.
^ permalink raw reply [flat|nested] 52+ messages in thread* [RFC/PATCH 13/15] kvm-s390: update maintainers
[not found] <1206028710.6690.21.camel@cotte.boeblingen.de.ibm.com>
` (11 preceding siblings ...)
2008-03-20 16:25 ` [RFC/PATCH 12/15] kvm-s390: API documentation Carsten Otte
@ 2008-03-20 16:25 ` Carsten Otte
2008-03-20 16:25 ` [RFC/PATCH 14/15] guest: detect when running on kvm Carsten Otte
` (7 subsequent siblings)
20 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:25 UTC (permalink / raw)
To: virtualization, kvm-devel, Avi Kivity
Cc: aliguori, EHRHARDT, arnd, hollisb, heiko.carstens, jeroney,
borntraeger, schwidefsky, rvdheij, os, jblunck, Zhang, Xiantao
From: Christian Borntraeger <borntraeger@de.ibm.com>
This patch adds an entry for kvm on s390 to the MAINTAINERS file :-). We intend
to push all patches regarding this via Avi's kvm.git.
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---
MAINTAINERS | 10 ++++++++++
1 file changed, 10 insertions(+)
Index: kvm/MAINTAINERS
===================================================================
--- kvm.orig/MAINTAINERS
+++ kvm/MAINTAINERS
@@ -2296,6 +2296,16 @@ L: kvm-ia64-devel@lists.sourceforge.net
W: kvm.sourceforge.net
S: Supported
+KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
+P: Carsten Otte
+M: cotte@de.ibm.com
+P: Christian Borntraeger
+M: borntraeger@de.ibm.com
+M: linux390@de.ibm.com
+L: linux-s390@vger.kernel.org
+W: http://www.ibm.com/developerworks/linux/linux390/
+S: Supported
+
KEXEC
P: Eric Biederman
M: ebiederm@xmission.com
^ permalink raw reply [flat|nested] 52+ messages in thread* [RFC/PATCH 14/15] guest: detect when running on kvm
[not found] <1206028710.6690.21.camel@cotte.boeblingen.de.ibm.com>
` (12 preceding siblings ...)
2008-03-20 16:25 ` [RFC/PATCH 13/15] kvm-s390: update maintainers Carsten Otte
@ 2008-03-20 16:25 ` Carsten Otte
2008-03-20 16:25 ` [RFC/PATCH 15/15] guest: virtio device support, and kvm hypercalls Carsten Otte
` (6 subsequent siblings)
20 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:25 UTC (permalink / raw)
To: virtualization, kvm-devel, Avi Kivity
Cc: aliguori, EHRHARDT, arnd, hollisb, heiko.carstens, jeroney,
borntraeger, schwidefsky, rvdheij, os, jblunck, Zhang, Xiantao
From: Christian Borntraeger <borntraeger@de.ibm.com>
From: Carsten Otte <cotte@de.ibm.com>
This patch adds functionality to detect if the kernel runs under the KVM
hypervisor. A macro MACHINE_IS_KVM is exported for device drivers. This
allows drivers to skip device detection if the systems runs non-virtualized.
We also define a preferred console to avoid having the ttyS0, which is a line
mode only console.
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---
arch/s390/Kconfig | 7 +++++++
arch/s390/kernel/early.c | 4 ++++
arch/s390/kernel/setup.c | 10 +++++++---
include/asm-s390/setup.h | 1 +
4 files changed, 19 insertions(+), 3 deletions(-)
Index: kvm/arch/s390/Kconfig
===================================================================
--- kvm.orig/arch/s390/Kconfig
+++ kvm/arch/s390/Kconfig
@@ -533,6 +533,13 @@ config ZFCPDUMP
Select this option if you want to build an zfcpdump enabled kernel.
Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this.
+config S390_GUEST
+bool "s390 guest support (EXPERIMENTAL)"
+ depends on 64BIT && EXPERIMENTAL
+ select VIRTIO
+ select VIRTIO_RING
+ help
+ Select this option if you want to run the kernel under s390 linux
endmenu
source "net/Kconfig"
Index: kvm/arch/s390/kernel/early.c
===================================================================
--- kvm.orig/arch/s390/kernel/early.c
+++ kvm/arch/s390/kernel/early.c
@@ -143,6 +143,10 @@ static noinline __init void detect_machi
/* Running on a P/390 ? */
if (cpuinfo->cpu_id.machine == 0x7490)
machine_flags |= 4;
+
+ /* Running under KVM ? */
+ if (cpuinfo->cpu_id.version == 0xfe)
+ machine_flags |= 64;
}
#ifdef CONFIG_64BIT
Index: kvm/arch/s390/kernel/setup.c
===================================================================
--- kvm.orig/arch/s390/kernel/setup.c
+++ kvm/arch/s390/kernel/setup.c
@@ -793,9 +793,13 @@ setup_arch(char **cmdline_p)
"This machine has an IEEE fpu\n" :
"This machine has no IEEE fpu\n");
#else /* CONFIG_64BIT */
- printk((MACHINE_IS_VM) ?
- "We are running under VM (64 bit mode)\n" :
- "We are running native (64 bit mode)\n");
+ if (MACHINE_IS_VM)
+ printk("We are running under VM (64 bit mode)\n");
+ else if (MACHINE_IS_KVM) {
+ printk("We are running under KVM (64 bit mode)\n");
+ add_preferred_console("ttyS", 1, NULL);
+ } else
+ printk("We are running native (64 bit mode)\n");
#endif /* CONFIG_64BIT */
/* Save unparsed command line copy for /proc/cmdline */
Index: kvm/include/asm-s390/setup.h
===================================================================
--- kvm.orig/include/asm-s390/setup.h
+++ kvm/include/asm-s390/setup.h
@@ -62,6 +62,7 @@ extern unsigned long machine_flags;
#define MACHINE_IS_VM (machine_flags & 1)
#define MACHINE_IS_P390 (machine_flags & 4)
#define MACHINE_HAS_MVPG (machine_flags & 16)
+#define MACHINE_IS_KVM (machine_flags & 64)
#define MACHINE_HAS_IDTE (machine_flags & 128)
#define MACHINE_HAS_DIAG9C (machine_flags & 256)
^ permalink raw reply [flat|nested] 52+ messages in thread* [RFC/PATCH 15/15] guest: virtio device support, and kvm hypercalls
[not found] <1206028710.6690.21.camel@cotte.boeblingen.de.ibm.com>
` (13 preceding siblings ...)
2008-03-20 16:25 ` [RFC/PATCH 14/15] guest: detect when running on kvm Carsten Otte
@ 2008-03-20 16:25 ` Carsten Otte
[not found] ` <1206030298.6690.56.camel@cotte.boeblingen.de.ibm.com>
` (5 subsequent siblings)
20 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:25 UTC (permalink / raw)
To: virtualization, kvm-devel, Avi Kivity
Cc: aliguori, EHRHARDT, arnd, hollisb, heiko.carstens, jeroney,
borntraeger, schwidefsky, rvdheij, os, jblunck, Zhang, Xiantao
From: Christian Borntraeger <borntraeger@de.ibm.com>
This patch implements kvm guest kernel support for paravirtualized devices
and contains two parts:
o a basic virtio stub using virtio_ring and external interrupts and hypercalls
o full hypercall implementation in kvm_para.h
Currently we dont have PCI on s390. Making virtio_pci usable for s390 seems
more complicated that providing an own stub. This virtio stub is similar to
the lguest one, the memory for the descriptors and the device detection is made
via additional mapped memory on top of the guest storage. We use an external
interrupt with extint code 1237 for host->guest notification.
The hypercall definition uses the diag instruction for issuing a hypercall. The
parameters are written in R2-R7, the hypercall number is written in R1. This is
similar to the system call ABI (svc) which can use R1 for the number and R2-R6
for the parameters.
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
---
drivers/s390/Makefile | 2
drivers/s390/kvm/Makefile | 9 +
drivers/s390/kvm/kvm_virtio.c | 326 ++++++++++++++++++++++++++++++++++++++++++
drivers/s390/kvm/kvm_virtio.h | 47 ++++++
include/asm-s390/kvm_para.h | 124 +++++++++++++++
5 files changed, 505 insertions(+), 3 deletions(-)
Index: kvm/drivers/s390/Makefile
===================================================================
--- kvm.orig/drivers/s390/Makefile
+++ kvm/drivers/s390/Makefile
@@ -5,7 +5,7 @@
CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w
obj-y += s390mach.o sysinfo.o s390_rdev.o
-obj-y += cio/ block/ char/ crypto/ net/ scsi/
+obj-y += cio/ block/ char/ crypto/ net/ scsi/ kvm/
drivers-y += drivers/s390/built-in.o
Index: kvm/drivers/s390/kvm/Makefile
===================================================================
--- /dev/null
+++ kvm/drivers/s390/kvm/Makefile
@@ -0,0 +1,9 @@
+# Makefile for kvm guest drivers on s390
+#
+# Copyright IBM Corp. 2008
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License (version 2 only)
+# as published by the Free Software Foundation.
+
+obj-$(CONFIG_VIRTIO) += kvm_virtio.o
Index: kvm/drivers/s390/kvm/kvm_virtio.c
===================================================================
--- /dev/null
+++ kvm/drivers/s390/kvm/kvm_virtio.c
@@ -0,0 +1,326 @@
+/*
+ * kvm_virtio.c - virtio for kvm on s390
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/err.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/interrupt.h>
+#include <linux/virtio_ring.h>
+#include <asm/io.h>
+#include <asm/kvm_para.h>
+#include <asm/setup.h>
+#include <asm/s390_ext.h>
+
+#include "kvm_virtio.h"
+
+/*
+ * The pointer to our (page) of device descriptions.
+ */
+static void *kvm_devices;
+
+/*
+ * Unique numbering for kvm devices.
+ */
+static unsigned int dev_index;
+
+struct kvm_device {
+ struct virtio_device vdev;
+ struct kvm_device_desc *desc;
+};
+
+#define to_kvmdev(vd) container_of(vd, struct kvm_device, vdev)
+
+/*
+ * memory layout:
+ * - kvm_device_descriptor
+ * struct kvm_device_desc
+ * - configuration
+ * struct kvm_vqconfig
+ * - feature bits
+ * - config space
+ */
+static struct kvm_vqconfig *kvm_vq_config(const struct kvm_device_desc *desc)
+{
+ return (struct kvm_vqconfig *)(desc + 1);
+}
+
+static u8 *kvm_vq_features(const struct kvm_device_desc *desc)
+{
+ return (u8 *)(kvm_vq_config(desc) + desc->num_vq);
+}
+
+static u8 *kvm_vq_configspace(const struct kvm_device_desc *desc)
+{
+ return kvm_vq_features(desc) + desc->feature_len * 2;
+}
+
+/*
+ * The total size of the config page used by this device (incl. desc)
+ */
+static unsigned desc_size(const struct kvm_device_desc *desc)
+{
+ return sizeof(*desc)
+ + desc->num_vq * sizeof(struct kvm_vqconfig)
+ + desc->feature_len * 2
+ + desc->config_len;
+}
+
+/*
+ * This tests (and acknowleges) a feature bit.
+ */
+static bool kvm_feature(struct virtio_device *vdev, unsigned fbit)
+{
+ struct kvm_device_desc *desc = to_kvmdev(vdev)->desc;
+ u8 *features;
+
+ if (fbit / 8 > desc->feature_len)
+ return false;
+
+ features = kvm_vq_features(desc);
+ if (!(features[fbit / 8] & (1 << (fbit % 8))))
+ return false;
+
+ /*
+ * We set the matching bit in the other half of the bitmap to tell the
+ * Host we want to use this feature.
+ */
+ features[desc->feature_len + fbit / 8] |= (1 << (fbit % 8));
+ return true;
+}
+
+/*
+ * Reading and writing elements in config space
+ */
+static void kvm_get(struct virtio_device *vdev, unsigned int offset,
+ void *buf, unsigned len)
+{
+ struct kvm_device_desc *desc = to_kvmdev(vdev)->desc;
+
+ BUG_ON(offset + len > desc->config_len);
+ memcpy(buf, kvm_vq_configspace(desc) + offset, len);
+}
+
+static void kvm_set(struct virtio_device *vdev, unsigned int offset,
+ const void *buf, unsigned len)
+{
+ struct kvm_device_desc *desc = to_kvmdev(vdev)->desc;
+
+ BUG_ON(offset + len > desc->config_len);
+ memcpy(kvm_vq_configspace(desc) + offset, buf, len);
+}
+
+/*
+ * The operations to get and set the status word just access
+ * the status field of the device descriptor.
+ */
+static u8 kvm_get_status(struct virtio_device *vdev)
+{
+ return to_kvmdev(vdev)->desc->status;
+}
+
+static void kvm_set_status(struct virtio_device *vdev, u8 status)
+{
+ BUG_ON(!status);
+ to_kvmdev(vdev)->desc->status = status;
+}
+
+/*
+ * To reset the device, we (ab)use the NOTIFY hypercall, with the descriptor
+ * address of the device. The Host will zero the status and all the
+ * features.
+ */
+static void kvm_reset(struct virtio_device *vdev)
+{
+ unsigned long offset = (void *)to_kvmdev(vdev)->desc - kvm_devices;
+
+ kvm_hypercall1(1237, (max_pfn<<PAGE_SHIFT) + offset);
+}
+
+/*
+ * When the virtio_ring code wants to notify the Host, it calls us here and we
+ * make a hypercall. We hand the address of the virtqueue so the Host
+ * knows which virtqueue we're talking about.
+ */
+static void kvm_notify(struct virtqueue *vq)
+{
+ struct kvm_vqconfig *config = vq->priv;
+
+ kvm_hypercall1(1237, config->address);
+}
+
+/*
+ * This routine finds the first virtqueue described in the configuration of
+ * this device and sets it up.
+ */
+static struct virtqueue *kvm_find_vq(struct virtio_device *vdev,
+ unsigned index,
+ void (*callback)(struct virtqueue *vq))
+{
+ struct kvm_device *kdev = to_kvmdev(vdev);
+ struct kvm_vqconfig *config;
+ struct virtqueue *vq;
+ int err;
+
+ if (index >= kdev->desc->num_vq)
+ return ERR_PTR(-ENOENT);
+
+ config = kvm_vq_config(kdev->desc)+index;
+
+ if (add_shared_memory(config->address,
+ vring_size(config->num, PAGE_SIZE))) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ vq = vring_new_virtqueue(config->num, vdev, (void *) config->address,
+ kvm_notify, callback);
+ if (!vq) {
+ err = -ENOMEM;
+ goto unmap;
+ }
+
+ /*
+ * register a callback token
+ * The host will sent this via the external interrupt parameter
+ */
+ config->token = (u64) vq;
+
+ vq->priv = config;
+ return vq;
+unmap:
+ remove_shared_memory(config->address, vring_size(config->num,
+ PAGE_SIZE));
+out:
+ return ERR_PTR(err);
+}
+
+static void kvm_del_vq(struct virtqueue *vq)
+{
+ struct kvm_vqconfig *config = vq->priv;
+
+ vring_del_virtqueue(vq);
+ remove_shared_memory(config->address,
+ vring_size(config->num, PAGE_SIZE));
+}
+
+/*
+ * The config ops structure as defined by virtio config
+ */
+static struct virtio_config_ops kvm_vq_configspace_ops = {
+ .feature = kvm_feature,
+ .get = kvm_get,
+ .set = kvm_set,
+ .get_status = kvm_get_status,
+ .set_status = kvm_set_status,
+ .reset = kvm_reset,
+ .find_vq = kvm_find_vq,
+ .del_vq = kvm_del_vq,
+};
+
+/*
+ * The root device for the kvm virtio devices.
+ * This makes them appear as /sys/devices/kvm/0,1,2 not /sys/devices/0,1,2.
+ */
+static struct device kvm_root = {
+ .parent = NULL,
+ .bus_id = "kvm_s390",
+};
+
+/*
+ * adds a new device and register it with virtio
+ * appropriate drivers are loaded by the device model
+ */
+static void add_kvm_device(struct kvm_device_desc *d)
+{
+ struct kvm_device *kdev;
+
+ kdev = kzalloc(sizeof(*kdev), GFP_KERNEL);
+ if (!kdev) {
+ printk(KERN_EMERG "Cannot allocate kvm dev %u\n",
+ dev_index++);
+ return;
+ }
+
+ kdev->vdev.dev.parent = &kvm_root;
+ kdev->vdev.index = dev_index++;
+ kdev->vdev.id.device = d->type;
+ kdev->vdev.config = &kvm_vq_configspace_ops;
+ kdev->desc = d;
+
+ if (register_virtio_device(&kdev->vdev) != 0) {
+ printk(KERN_ERR "Failed to register kvm device %u\n",
+ kdev->vdev.index);
+ kfree(kdev);
+ }
+}
+
+/*
+ * scan_devices() simply iterates through the device page.
+ * The type 0 is reserved to mean "end of devices".
+ */
+static void scan_devices(void)
+{
+ unsigned int i;
+ struct kvm_device_desc *d;
+
+ for (i = 0; i < PAGE_SIZE; i += desc_size(d)) {
+ d = kvm_devices + i;
+
+ if (d->type == 0)
+ break;
+
+ add_kvm_device(d);
+ }
+}
+
+/*
+ * we emulate the request_irq behaviour on top of s390 extints
+ */
+static void kvm_extint_handler(u16 code)
+{
+ void *data = (void *) *(long *) __LC_PFAULT_INTPARM;
+
+ vring_interrupt(0, data);
+}
+
+/*
+ * Init function for virtio
+ * devices are in a single page above top of "normal" mem
+ */
+static int __init kvm_devices_init(void)
+{
+ if (!MACHINE_IS_KVM)
+ return -ENODEV;
+
+ if (device_register(&kvm_root) != 0)
+ panic("Could not register kvm root");
+
+ if (add_shared_memory((max_pfn) << PAGE_SHIFT, PAGE_SIZE)) {
+ device_unregister(&kvm_root);
+ return -ENOMEM;
+ }
+
+ kvm_devices = (void *) (max_pfn << PAGE_SHIFT);
+
+ register_external_interrupt(0x1237, kvm_extint_handler);
+ ctl_set_bit(0, 9);
+
+ scan_devices();
+ return 0;
+}
+
+/*
+ * We do this after core stuff, but before the drivers.
+ */
+postcore_initcall(kvm_devices_init);
Index: kvm/drivers/s390/kvm/kvm_virtio.h
===================================================================
--- /dev/null
+++ kvm/drivers/s390/kvm/kvm_virtio.h
@@ -0,0 +1,47 @@
+/*
+ * kvm_virtio.h - definition for virtio for kvm on s390
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#ifndef __KVM_S390_VIRTIO_H
+#define __KVM_S390_VIRTIO_H
+
+struct kvm_device_desc {
+ /* The device type: console, network, disk etc. Type 0 terminates. */
+ __u8 type;
+ /* The number of virtqueues (first in config array) */
+ __u8 num_vq;
+ /*
+ * The number of bytes of feature bits. Multiply by 2: one for host
+ * features and one for guest acknowledgements.
+ */
+ __u8 feature_len;
+ /* The number of bytes of the config array after virtqueues. */
+ __u8 config_len;
+ /* A status byte, written by the Guest. */
+ __u8 status;
+ __u8 config[0];
+};
+
+/*
+ * This is how we expect the device configuration field for a virtqueue
+ * to be laid out in config space.
+ */
+struct kvm_vqconfig {
+ /* The token returned with an interrupt. Set by the guest */
+ __u64 token;
+ /* The address of the virtio ring */
+ __u64 address;
+ /* The number of entries in the virtio_ring */
+ __u16 num;
+
+};
+#endif
+
Index: kvm/include/asm-s390/kvm_para.h
===================================================================
--- kvm.orig/include/asm-s390/kvm_para.h
+++ kvm/include/asm-s390/kvm_para.h
@@ -14,14 +14,134 @@
#define __S390_KVM_PARA_H
/*
- * No hypercalls for KVM on s390
+ * Hypercalls for KVM on s390. The calling convention is similar to the
+ * s390 ABI, so we use R2-R6 for parameters 1-5. In addition we use R1
+ * as hypercall number and R7 as parameter 6. The return value is
+ * written to R2. We use the diagnose instruction as hypercall. To avoid
+ * conflicts with existing diagnoses for LPAR and z/VM, we do not use
+ * the instruction encoded number, but specify the number in R1 and
+ * use 0x500 as KVM hypercall
+ *
+ * Copyright IBM Corp. 2007,2008
+ * Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
*/
+static inline long kvm_hypercall0(unsigned long nr)
+{
+ register unsigned long __nr asm("1") = nr;
+ register long __rc asm("2");
+
+ asm volatile ("diag 2,4,0x500\n"
+ : "=d" (__rc) : "d" (__nr): "memory", "cc");
+ return __rc;
+}
+
+static inline long kvm_hypercall1(unsigned long nr, unsigned long p1)
+{
+ register unsigned long __nr asm("1") = nr;
+ register unsigned long __p1 asm("2") = p1;
+ register long __rc asm("2");
+
+ asm volatile ("diag 2,4,0x500\n"
+ : "=d" (__rc) : "d" (__nr), "0" (__p1) : "memory", "cc");
+ return __rc;
+}
+
+static inline long kvm_hypercall2(unsigned long nr, unsigned long p1,
+ unsigned long p2)
+{
+ register unsigned long __nr asm("1") = nr;
+ register unsigned long __p1 asm("2") = p1;
+ register unsigned long __p2 asm("3") = p2;
+ register long __rc asm("2");
+
+ asm volatile ("diag 2,4,0x500\n"
+ : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2)
+ : "memory", "cc");
+ return __rc;
+}
+
+static inline long kvm_hypercall3(unsigned long nr, unsigned long p1,
+ unsigned long p2, unsigned long p3)
+{
+ register unsigned long __nr asm("1") = nr;
+ register unsigned long __p1 asm("2") = p1;
+ register unsigned long __p2 asm("3") = p2;
+ register unsigned long __p3 asm("4") = p3;
+ register long __rc asm("2");
+
+ asm volatile ("diag 2,4,0x500\n"
+ : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2),
+ "d" (__p3) : "memory", "cc");
+ return __rc;
+}
+
+
+static inline long kvm_hypercall4(unsigned long nr, unsigned long p1,
+ unsigned long p2, unsigned long p3,
+ unsigned long p4)
+{
+ register unsigned long __nr asm("1") = nr;
+ register unsigned long __p1 asm("2") = p1;
+ register unsigned long __p2 asm("3") = p2;
+ register unsigned long __p3 asm("4") = p3;
+ register unsigned long __p4 asm("5") = p4;
+ register long __rc asm("2");
+
+ asm volatile ("diag 2,4,0x500\n"
+ : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2),
+ "d" (__p3), "d" (__p4) : "memory", "cc");
+ return __rc;
+}
+
+static inline long kvm_hypercall5(unsigned long nr, unsigned long p1,
+ unsigned long p2, unsigned long p3,
+ unsigned long p4, unsigned long p5)
+{
+ register unsigned long __nr asm("1") = nr;
+ register unsigned long __p1 asm("2") = p1;
+ register unsigned long __p2 asm("3") = p2;
+ register unsigned long __p3 asm("4") = p3;
+ register unsigned long __p4 asm("5") = p4;
+ register unsigned long __p5 asm("6") = p5;
+ register long __rc asm("2");
+
+ asm volatile ("diag 2,4,0x500\n"
+ : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2),
+ "d" (__p3), "d" (__p4), "d" (__p5) : "memory", "cc");
+ return __rc;
+}
+
+static inline long kvm_hypercall6(unsigned long nr, unsigned long p1,
+ unsigned long p2, unsigned long p3,
+ unsigned long p4, unsigned long p5,
+ unsigned long p6)
+{
+ register unsigned long __nr asm("1") = nr;
+ register unsigned long __p1 asm("2") = p1;
+ register unsigned long __p2 asm("3") = p2;
+ register unsigned long __p3 asm("4") = p3;
+ register unsigned long __p4 asm("5") = p4;
+ register unsigned long __p5 asm("6") = p5;
+ register unsigned long __p6 asm("7") = p6;
+ register long __rc asm("2");
+
+ asm volatile ("diag 2,4,0x500\n"
+ : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2),
+ "d" (__p3), "d" (__p4), "d" (__p5), "d" (__p6)
+ : "memory", "cc");
+ return __rc;
+}
+
+/* kvm on s390 is always paravirtualization enabled */
static inline int kvm_para_available(void)
{
- return 0;
+ return 1;
}
+/* No feature bits are currently assigned for kvm on s390 */
static inline unsigned int kvm_arch_para_features(void)
{
return 0;
^ permalink raw reply [flat|nested] 52+ messages in thread[parent not found: <1206030298.6690.56.camel@cotte.boeblingen.de.ibm.com>]
* Re: [RFC/PATCH 05/15] KVM_MAX_VCPUS
[not found] ` <1206030298.6690.56.camel@cotte.boeblingen.de.ibm.com>
@ 2008-03-20 16:43 ` Hollis Blanchard
[not found] ` <1206031383.6356.13.camel@basalt>
1 sibling, 0 replies; 52+ messages in thread
From: Hollis Blanchard @ 2008-03-20 16:43 UTC (permalink / raw)
To: Carsten Otte
Cc: aliguori, EHRHARDT, arnd, kvm-devel, heiko.carstens, jeroney,
virtualization, borntraeger, schwidefsky, rvdheij, os, jblunck,
Zhang, Xiantao
On Thu, 2008-03-20 at 17:24 +0100, Carsten Otte wrote:
> Index: kvm/include/linux/kvm_host.h
> ===================================================================
> --- kvm.orig/include/linux/kvm_host.h
> +++ kvm/include/linux/kvm_host.h
> @@ -24,7 +24,11 @@
>
> #include <asm/kvm_host.h>
>
> +#ifdef CONFIG_S390
> +#define KVM_MAX_VCPUS 64
> +#else
> #define KVM_MAX_VCPUS 16
> +#endif
> #define KVM_MEMORY_SLOTS 32
> /* memory slots that does not exposed to userspace */
> #define KVM_PRIVATE_MEM_SLOTS 4
>
Why don't we just define this in <asm/kvm_host.h> ?
--
Hollis Blanchard
IBM Linux Technology Center
^ permalink raw reply [flat|nested] 52+ messages in thread[parent not found: <1206031383.6356.13.camel@basalt>]
* Re: [RFC/PATCH 05/15] KVM_MAX_VCPUS
[not found] ` <1206031383.6356.13.camel@basalt>
@ 2008-03-20 16:48 ` Carsten Otte
[not found] ` <47E2954B.7090903@de.ibm.com>
1 sibling, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 16:48 UTC (permalink / raw)
To: Hollis Blanchard
Cc: aliguori, Christian Ehrhardt, arnd, kvm-devel, mschwid2, heicars2,
jeroney, virtualization, borntrae, rvdheij, Olaf Schnapper,
jblunck, Zhang, Xiantao
Hollis Blanchard wrote:
> On Thu, 2008-03-20 at 17:24 +0100, Carsten Otte wrote:
>> Index: kvm/include/linux/kvm_host.h
>> ===================================================================
>> --- kvm.orig/include/linux/kvm_host.h
>> +++ kvm/include/linux/kvm_host.h
>> @@ -24,7 +24,11 @@
>>
>> #include <asm/kvm_host.h>
>>
>> +#ifdef CONFIG_S390
>> +#define KVM_MAX_VCPUS 64
>> +#else
>> #define KVM_MAX_VCPUS 16
>> +#endif
>> #define KVM_MEMORY_SLOTS 32
>> /* memory slots that does not exposed to userspace */
>> #define KVM_PRIVATE_MEM_SLOTS 4
>>
> Why don't we just define this in <asm/kvm_host.h> ?
No problem with that, I just wanted to keep impact on common code very
low and things like this seperated from the actual port. I have a few
things like this that can safely be taken care about later.
^ permalink raw reply [flat|nested] 52+ messages in thread[parent not found: <47E2954B.7090903@de.ibm.com>]
* Re: [kvm-devel] [RFC/PATCH 05/15] KVM_MAX_VCPUS
[not found] ` <47E2954B.7090903@de.ibm.com>
@ 2008-03-21 10:41 ` Avi Kivity
2008-03-21 11:13 ` Carsten Otte
0 siblings, 1 reply; 52+ messages in thread
From: Avi Kivity @ 2008-03-21 10:41 UTC (permalink / raw)
To: carsteno
Cc: Christian Ehrhardt, Hollis Blanchard, arnd, kvm-devel, mschwid2,
heicars2, jeroney, borntrae, virtualization, rvdheij,
Olaf Schnapper, jblunck, Zhang, Xiantao
Carsten Otte wrote:
> Hollis Blanchard wrote:
>
>> On Thu, 2008-03-20 at 17:24 +0100, Carsten Otte wrote:
>>
>>> Index: kvm/include/linux/kvm_host.h
>>> ===================================================================
>>> --- kvm.orig/include/linux/kvm_host.h
>>> +++ kvm/include/linux/kvm_host.h
>>> @@ -24,7 +24,11 @@
>>>
>>> #include <asm/kvm_host.h>
>>>
>>> +#ifdef CONFIG_S390
>>> +#define KVM_MAX_VCPUS 64
>>> +#else
>>> #define KVM_MAX_VCPUS 16
>>> +#endif
>>> #define KVM_MEMORY_SLOTS 32
>>> /* memory slots that does not exposed to userspace */
>>> #define KVM_PRIVATE_MEM_SLOTS 4
>>>
>>>
>> Why don't we just define this in <asm/kvm_host.h> ?
>>
> No problem with that, I just wanted to keep impact on common code very
> low and things like this seperated from the actual port. I have a few
> things like this that can safely be taken care about later.
>
>
Since there were a few other comments, I went ahead and moved those
#defines to asm-x86.
--
Any sufficiently difficult bug is indistinguishable from a feature.
^ permalink raw reply [flat|nested] 52+ messages in thread* Re: [kvm-devel] [RFC/PATCH 05/15] KVM_MAX_VCPUS
2008-03-21 10:41 ` [kvm-devel] " Avi Kivity
@ 2008-03-21 11:13 ` Carsten Otte
0 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-21 11:13 UTC (permalink / raw)
To: Avi Kivity
Cc: Christian Ehrhardt, Hollis Blanchard, arnd, carsteno, heicars2,
mschwid2, jeroney, borntrae, virtualization, kvm-devel, rvdheij,
Olaf Schnapper, jblunck, Zhang, Xiantao
Avi Kivity wrote:
> Carsten Otte wrote:
>> Hollis Blanchard wrote:
>>
>>> On Thu, 2008-03-20 at 17:24 +0100, Carsten Otte wrote:
>>>
>>>> Index: kvm/include/linux/kvm_host.h
>>>> ===================================================================
>>>> --- kvm.orig/include/linux/kvm_host.h
>>>> +++ kvm/include/linux/kvm_host.h
>>>> @@ -24,7 +24,11 @@
>>>>
>>>> #include <asm/kvm_host.h>
>>>>
>>>> +#ifdef CONFIG_S390
>>>> +#define KVM_MAX_VCPUS 64
>>>> +#else
>>>> #define KVM_MAX_VCPUS 16
>>>> +#endif
>>>> #define KVM_MEMORY_SLOTS 32
>>>> /* memory slots that does not exposed to userspace */
>>>> #define KVM_PRIVATE_MEM_SLOTS 4
>>>>
>>>>
>>> Why don't we just define this in <asm/kvm_host.h> ?
>>>
>> No problem with that, I just wanted to keep impact on common code very
>> low and things like this seperated from the actual port. I have a few
>> things like this that can safely be taken care about later.
>>
>>
>
> Since there were a few other comments, I went ahead and moved those
> #defines to asm-x86.
Great! I will rebase the patch series.
^ permalink raw reply [flat|nested] 52+ messages in thread
[parent not found: <1206030320.6690.63.camel@cotte.boeblingen.de.ibm.com>]
* Re: [RFC/PATCH 12/15] kvm-s390: API documentation
[not found] ` <1206030320.6690.63.camel@cotte.boeblingen.de.ibm.com>
@ 2008-03-20 17:22 ` Randy Dunlap
[not found] ` <20080320102200.3718e573.randy.dunlap@oracle.com>
1 sibling, 0 replies; 52+ messages in thread
From: Randy Dunlap @ 2008-03-20 17:22 UTC (permalink / raw)
To: Carsten Otte
Cc: aliguori, EHRHARDT, hollisb, arnd, kvm-devel, heiko.carstens,
jeroney, virtualization, borntraeger, schwidefsky, rvdheij, os,
jblunck, Zhang, Xiantao
On Thu, 20 Mar 2008 17:25:20 +0100 Carsten Otte wrote:
> This patch adds Documentation/s390/kvm.txt, which describes specifics of kvm's
> user interface that are unique to s390 architecture.
>
> Signed-off-by: Carsten Otte <cotte@de.ibm.com>
> ---
> Documentation/s390/kvm.txt | 125 +++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 125 insertions(+)
>
> Index: kvm/Documentation/s390/kvm.txt
> ===================================================================
> --- /dev/null
> +++ kvm/Documentation/s390/kvm.txt
> @@ -0,0 +1,125 @@
> +*** BIG FAT WARNING ***
> +The kvm module is currently in EXPERIMENTAL state for s390. This means, that
This means that [no comma]
> +the interface to the module is not yet considered to remain stable. Thus, be
> +prepared that we keep breaking your userspace application and guest
> +compatibility over and over again until we feel happy with the result. Make sure
> +your guest kernel, your host kernel, and your userspace launcher are in a
> +consistent state.
> +
> +This Documentation describes the unique ioctl calls to /dev/kvm, the resulting
> +kvm-vm file descriptors, and the kvm-vcpu file descriptors that differ from x86.
> +
> +1. ioctl calls to /dev/kvm
> +KVM does support the following ioctls on s390 that are common with other
> +architectures and do behave the same:
> +KVM_GET_API_VERSION
> +KVM_CREATE_VM (*) see note
> +KVM_CHECK_EXTENSION
> +KVM_GET_VCPU_MMAP_SIZE
> +
> +Notes:
> +* KVM_CREATE_VM may fail on s390, if the calling process has multiple
> +threads and has not called KVM_S390_ENABLE_SIE before.
> +
> +In addition, on s390 the following architecture specific ioctls are supported:
> +ioctl: KVM_S390_ENABLE_SIE
> +args: none
> +see also: include/linux/kvm.h
> +This call causes the kernel to switch on PGSTE in the user page table. This
> +operation is needed in order to run a virtual machine, and it requires the
> +calling process to be single-threaded. Note that the first call to KVM_CREATE_VM
> +will implicitly try to switch on PGSTE if the user process has not called
> +KVM_S390_ENABLE_SIE before. User processes that want to launch multiple threads
> +before creating a virtual machine have to call KVM_S390_ENABLE_SIE, or will
> +observe an error calling KVM_CREATE_VM. Switching on PGSTE is a one-time
> +operation, is not reversible, and will persist over the entire lifetime of
> +the calling process. It does not have any user-visibe effect other than a small
user-visible
> +performance penalty.
> +
> +2. ioctl calls to the kvm-vm file descriptor
> +KVM does support the following ioctls on s390 that are common with other
> +architectures and do behave the same:
> +KVM_CREATE_VCPU
> +KVM_SET_USER_MEMORY_REGION (*) see note
> +KVM_GET_DIRTY_LOG (**) see note
> +
> +Notes:
> +* kvm does only allow exactly one memory slot on s390, which has to start
> + at guest absolute address zero and at a user address that is aligned on any
> + page boundary. This hardware "limitation" allows us to have a few unique
> + optimizations. The memory slot does'nt have to be filled
doesn't
> + with memory actually, it may contain sparse holes. That said, with different
> + user memory layout this does still allow a large flexibility when
> + doing the guest memory setup.
> +** KVM_GET_DIRTY_LOG does'nt work proper yet. The user will receive an empty
doesn't work properly
> +log. This ioctl call is only needed for guest migration, and we intend to
> +implement this one in the future.
> +
> +In addition, on s390 the following architecture specific ioctls for the kvm-vm
> +file descriptor are supported:
> +ioctl: KVM_S390_INTERRUPT
> +args: struct kvm_s390_interrupt *
> +see also: include/linux/kvm.h
> +This ioctl is used to submit a floating interrupt for a virtual machine.
> +Floating interrupts may be delivered to any virtual cpu in the configuration.
> +Only some interrupt types defined in include/linux/kvm.h make sense when
> +submitted as floating interrupt. The following interrupts are not considered
interrupts.
> +to be useful as floating interrupt, and a call to inject them will result in
interrupts,
> +-EINVAL error code: program interrupts, and interprocessor signals. Valid
no comma
> +floating interrupts are:
> +KVM_S390_INT_VIRTIO
> +KVM_S390_INT_SERVICE
> +
> +3. ioctl calls to the kvm-vcpu file descriptor
> +KVM does support the following ioctls on s390 that are common with other
> +architectures and do behave the same:
> +KVM_RUN
> +KVM_GET_REGS
> +KVM_SET_REGS
> +KVM_GET_SREGS
> +KVM_SET_SREGS
> +KVM_GET_FPU
> +KVM_SET_FPU
> +
> +In addition, on s390 the following architecture specific ioctls for the
> +kvm-vcpu file descriptor are supported:
> +ioctl: KVM_S390_INTERRUPT
> +args: struct kvm_s390_interrupt *
> +see also: include/linux/kvm.h
> +This ioctl is used to submit an interrupt for a specific virtual cpu.
> +Only some interrupt types defined in include/linux/kvm.h make sense when
> +submitted for a specific cpu. The following interrupts are not considered
> +to be useful, and a call to inject them will result in -EINVAL error code:
> +service processor calls, and virtio interrupts. Valid interrupt types are:
no comma
> +KVM_S390_PROGRAM_INT
> +KVM_S390_SIGP_STOP
> +KVM_S390_RESTART
> +KVM_S390_SIGP_SET_PREFIX
> +KVM_S390_INT_EMERGENCY
> +
> +ioctl: KVM_S390_STORE_STATUS
> +args: unsigned long
> +see also: include/linux/kvm.h
> +This ioctl stores the state of the cpu at the guest real address given as
> +argument, unless one of the following values defined in include/linux/kvm.h
> +is given as arguement:
> +KVM_S390_STORE_STATUS_NOADDR - the CPU stores its status to the save area in
> +absolute lowcore as defined by the principles of operation
> +KVM_S390_STORE_STATUS_PREFIXED - the CPU stores its status to the save area in
> +its prefix page just like the dump tool that comes with zipl. This is useful
> +to create a system dump for use with lkcdutils or crash.
> +
> +ioctl: KVM_S390_SET_INITIAL_PSW
> +args: struct kvm_s390_psw *
> +see also: include/linux/kvm.h
> +This ioctl can be used to set the processor status word (psw) of a stopped cpu
> +prior to running it with KVM_RUN. Note that this call is not required to modify
> +the psw during sie intercepts that fall back to userspace because struct kvm_run
> +does contain the psw, and this value is evaluated during reentry of KVM_RUN
> +after the intercept exit was recognized.
> +
> +ioctl: KVM_S390_INITIAL_RESET
> +args: none
> +see also: include/linux/kvm.h
> +This ioctl can be used to perform an initial cpu reset as defined by the
> +principles of operation. The target cpu has to be in stopped state.
---
~Randy
^ permalink raw reply [flat|nested] 52+ messages in thread[parent not found: <20080320102200.3718e573.randy.dunlap@oracle.com>]
* Re: [kvm-devel] [RFC/PATCH 12/15] kvm-s390: API documentation
[not found] ` <20080320102200.3718e573.randy.dunlap@oracle.com>
@ 2008-03-21 10:33 ` Carsten Otte
0 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-21 10:33 UTC (permalink / raw)
To: Randy Dunlap
Cc: Christian Ehrhardt, hollisb, arnd, kvm-devel, mschwid2, heicars2,
jeroney, virtualization, borntrae, rvdheij, Olaf Schnapper,
jblunck, Zhang, Xiantao
Randy Dunlap wrote:
> This means that [no comma]
<snip>
Being a native speaker is cheating ;-). I've integrated your feedback,
for the next round of sending out these patches. Thank you :-).
^ permalink raw reply [flat|nested] 52+ messages in thread
[parent not found: <1206030278.6690.52.camel@cotte.boeblingen.de.ibm.com>]
* Re: [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable
[not found] ` <1206030278.6690.52.camel@cotte.boeblingen.de.ibm.com>
@ 2008-03-20 17:28 ` Jeremy Fitzhardinge
[not found] ` <47E29EC6.5050403@goop.org>
1 sibling, 0 replies; 52+ messages in thread
From: Jeremy Fitzhardinge @ 2008-03-20 17:28 UTC (permalink / raw)
To: Carsten Otte
Cc: aliguori, EHRHARDT, hollisb, arnd, borntraeger, kvm-devel,
heiko.carstens, jeroney, virtualization,
Linux Memory Management List, schwidefsky, rvdheij, os, jblunck,
Zhang, Xiantao
Carsten Otte wrote:
> +struct mm_struct *dup_mm(struct task_struct *tsk);
>
No prototypes in .c files. Put this in an appropriate header.
J
^ permalink raw reply [flat|nested] 52+ messages in thread[parent not found: <47E29EC6.5050403@goop.org>]
* Re: [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable
[not found] ` <47E29EC6.5050403@goop.org>
@ 2008-03-20 19:13 ` Dave Hansen
[not found] ` <1206040405.8232.24.camel@nimitz.home.sr71.net>
1 sibling, 0 replies; 52+ messages in thread
From: Dave Hansen @ 2008-03-20 19:13 UTC (permalink / raw)
To: Jeremy Fitzhardinge
Cc: Carsten Otte, aliguori, EHRHARDT, hollisb, arnd, borntraeger,
kvm-devel, heiko.carstens, jeroney, virtualization,
Linux Memory Management List, schwidefsky, rvdheij, os, jblunck,
Zhang, Xiantao
On Thu, 2008-03-20 at 10:28 -0700, Jeremy Fitzhardinge wrote:
> Carsten Otte wrote:
> > +struct mm_struct *dup_mm(struct task_struct *tsk);
>
> No prototypes in .c files. Put this in an appropriate header.
Well, and more fundamentally: do we really want dup_mm() able to be
called from other code?
Maybe we need a bit more detailed justification why fork() itself isn't
good enough. It looks to me like they basically need an arch-specific
argument to fork, telling the new process's page tables to take the
fancy new bit.
I'm really curious how this new stuff is going to get used. Are you
basically replacing fork() when creating kvm guests?
-- Dave
^ permalink raw reply [flat|nested] 52+ messages in thread[parent not found: <1206040405.8232.24.camel@nimitz.home.sr71.net>]
* Re: [kvm-devel] [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable
[not found] ` <1206040405.8232.24.camel@nimitz.home.sr71.net>
@ 2008-03-20 20:35 ` Carsten Otte
[not found] ` <47E2CAAC.6020903@de.ibm.com>
1 sibling, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 20:35 UTC (permalink / raw)
To: Dave Hansen
Cc: Christian Ehrhardt, hollisb, arnd, kvm-devel, mschwid2, heicars2,
jeroney, borntrae, virtualization, Linux Memory Management List,
rvdheij, Olaf Schnapper, jblunck, Zhang, Xiantao
Dave Hansen wrote:
> Well, and more fundamentally: do we really want dup_mm() able to be
> called from other code?
>
> Maybe we need a bit more detailed justification why fork() itself isn't
> good enough. It looks to me like they basically need an arch-specific
> argument to fork, telling the new process's page tables to take the
> fancy new bit.
>
> I'm really curious how this new stuff is going to get used. Are you
> basically replacing fork() when creating kvm guests?
No. The trick is, that we do need bigger page tables when running
guests: our page tables are usually 2k, but when running a guest
they're 4k to track both guest and host dirty&reference information.
This looks like this:
*----------*
*2k PTE's *
*----------*
*2k PGSTE *
*----------*
We don't want to waste precious memory for all page tables. We'd like
to have one kernel image that runs regular server workload _and_
guests. Therefore, we need to reallocate the page table after fork()
once we know that task is going to be a hypervisor. That's what this
code does: reallocate a bigger page table to accomondate the extra
information. The task needs to be single-threaded when calling for
extended page tables.
Btw: at fork() time, we cannot tell whether or not the user's going to
be a hypervisor. Therefore we cannot do this in fork.
^ permalink raw reply [flat|nested] 52+ messages in thread[parent not found: <47E2CAAC.6020903@de.ibm.com>]
* Re: [kvm-devel] [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable
[not found] ` <47E2CAAC.6020903@de.ibm.com>
@ 2008-03-21 18:29 ` Dave Hansen
2008-03-21 19:03 ` Carsten Otte
` (3 more replies)
0 siblings, 4 replies; 52+ messages in thread
From: Dave Hansen @ 2008-03-21 18:29 UTC (permalink / raw)
To: carsteno
Cc: Christian Ehrhardt, hollisb, arnd, kvm-devel, mschwid2, heicars2,
jeroney, borntrae, virtualization, Linux Memory Management List,
rvdheij, Olaf Schnapper, jblunck, Zhang, Xiantao
On Thu, 2008-03-20 at 21:35 +0100, Carsten Otte wrote:
> Dave Hansen wrote:
> > Well, and more fundamentally: do we really want dup_mm() able to be
> > called from other code?
> >
> > Maybe we need a bit more detailed justification why fork() itself isn't
> > good enough. It looks to me like they basically need an arch-specific
> > argument to fork, telling the new process's page tables to take the
> > fancy new bit.
> >
> > I'm really curious how this new stuff is going to get used. Are you
> > basically replacing fork() when creating kvm guests?
> No. The trick is, that we do need bigger page tables when running
> guests: our page tables are usually 2k, but when running a guest
> they're 4k to track both guest and host dirty&reference information.
> This looks like this:
> *----------*
> *2k PTE's *
> *----------*
> *2k PGSTE *
> *----------*
> We don't want to waste precious memory for all page tables. We'd like
> to have one kernel image that runs regular server workload _and_
> guests.
That makes a lot of sense.
Is that layout (the shadow and regular stacked together) specified in
hardware somehow, or was it just chosen?
What you've done with dup_mm() is probably the brute-force way that I
would have done it had I just been trying to make a proof of concept or
something. I'm worried that there are a bunch of corner cases that
haven't been considered.
What if someone else is poking around with ptrace or something similar
and they bump the mm_users:
+ if (tsk->mm->context.pgstes)
+ return 0;
+ if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
+ tsk->mm != tsk->active_mm || tsk->mm->ioctx_list)
+ return -EINVAL;
-------->HERE
+ tsk->mm->context.pgstes = 1; /* dirty little tricks .. */
+ mm = dup_mm(tsk);
It'll race, possibly fault in some other pages, and those faults will be
lost during the dup_mm(). I think you need to be able to lock out all
of the users of access_process_vm() before you go and do this. You also
need to make sure that anyone who has looked at task->mm doesn't go and
get a reference to it and get confused later when it isn't the task->mm
any more.
> Therefore, we need to reallocate the page table after fork()
> once we know that task is going to be a hypervisor. That's what this
> code does: reallocate a bigger page table to accomondate the extra
> information. The task needs to be single-threaded when calling for
> extended page tables.
>
> Btw: at fork() time, we cannot tell whether or not the user's going to
> be a hypervisor. Therefore we cannot do this in fork.
Can you convert the page tables at a later time without doing a
wholesale replacement of the mm? It should be a bit easier to keep
people off the pagetables than keep their grubby mitts off the mm
itself.
-- Dave
^ permalink raw reply [flat|nested] 52+ messages in thread* Re: [kvm-devel] [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable
2008-03-21 18:29 ` Dave Hansen
@ 2008-03-21 19:03 ` Carsten Otte
2008-03-22 17:57 ` Heiko Carstens
` (2 subsequent siblings)
3 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-21 19:03 UTC (permalink / raw)
To: Dave Hansen
Cc: Christian Ehrhardt, hollisb, arnd, Linux Memory Management List,
carsteno, mschwid2, heicars2, jeroney, borntrae, virtualization,
kvm-devel, rvdheij, Olaf Schnapper, jblunck, Zhang, Xiantao
Dave Hansen wrote:
> On Thu, 2008-03-20 at 21:35 +0100, Carsten Otte wrote:
>> Dave Hansen wrote:
>>> Well, and more fundamentally: do we really want dup_mm() able to be
>>> called from other code?
>>>
>>> Maybe we need a bit more detailed justification why fork() itself isn't
>>> good enough. It looks to me like they basically need an arch-specific
>>> argument to fork, telling the new process's page tables to take the
>>> fancy new bit.
>>>
>>> I'm really curious how this new stuff is going to get used. Are you
>>> basically replacing fork() when creating kvm guests?
>> No. The trick is, that we do need bigger page tables when running
>> guests: our page tables are usually 2k, but when running a guest
>> they're 4k to track both guest and host dirty&reference information.
>> This looks like this:
>> *----------*
>> *2k PTE's *
>> *----------*
>> *2k PGSTE *
>> *----------*
>> We don't want to waste precious memory for all page tables. We'd like
>> to have one kernel image that runs regular server workload _and_
>> guests.
>
> That makes a lot of sense.
>
> Is that layout (the shadow and regular stacked together) specified in
> hardware somehow, or was it just chosen?
It's defined by hardware. The chip just adds +2k to the ptep to get to
the corresponding pgste. Both pte and pgste are 64bit per page. I know
Heiko and Martin have thought a lot about possible races. I'll have to
leave your question on the race against pfault open for them.
Btw: thanks a lot for reviewing our changes :-)
cheers,
Carsten
^ permalink raw reply [flat|nested] 52+ messages in thread* Re: [kvm-devel] [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable
2008-03-21 18:29 ` Dave Hansen
2008-03-21 19:03 ` Carsten Otte
@ 2008-03-22 17:57 ` Heiko Carstens
[not found] ` <20080322175705.GD6367@osiris.boeblingen.de.ibm.com>
2008-03-25 15:37 ` Carsten Otte
3 siblings, 0 replies; 52+ messages in thread
From: Heiko Carstens @ 2008-03-22 17:57 UTC (permalink / raw)
To: Dave Hansen
Cc: Christian Ehrhardt, hollisb, arnd, Linux Memory Management List,
carsteno, heicars2, mschwid2, jeroney, borntrae, virtualization,
kvm-devel, rvdheij, Olaf Schnapper, jblunck, Zhang, Xiantao
> What you've done with dup_mm() is probably the brute-force way that I
> would have done it had I just been trying to make a proof of concept or
> something. I'm worried that there are a bunch of corner cases that
> haven't been considered.
>
> What if someone else is poking around with ptrace or something similar
> and they bump the mm_users:
>
> + if (tsk->mm->context.pgstes)
> + return 0;
> + if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
> + tsk->mm != tsk->active_mm || tsk->mm->ioctx_list)
> + return -EINVAL;
> -------->HERE
> + tsk->mm->context.pgstes = 1; /* dirty little tricks .. */
> + mm = dup_mm(tsk);
>
> It'll race, possibly fault in some other pages, and those faults will be
> lost during the dup_mm(). I think you need to be able to lock out all
> of the users of access_process_vm() before you go and do this. You also
> need to make sure that anyone who has looked at task->mm doesn't go and
> get a reference to it and get confused later when it isn't the task->mm
> any more.
>
> > Therefore, we need to reallocate the page table after fork()
> > once we know that task is going to be a hypervisor. That's what this
> > code does: reallocate a bigger page table to accomondate the extra
> > information. The task needs to be single-threaded when calling for
> > extended page tables.
> >
> > Btw: at fork() time, we cannot tell whether or not the user's going to
> > be a hypervisor. Therefore we cannot do this in fork.
>
> Can you convert the page tables at a later time without doing a
> wholesale replacement of the mm? It should be a bit easier to keep
> people off the pagetables than keep their grubby mitts off the mm
> itself.
Yes, as far as I can see you're right. And whatever we do in arch code,
after all it's just a work around to avoid a new clone flag.
If something like clone() with CLONE_KVM would be useful for more
architectures than just s390 then maybe we should try to get a flag.
Oh... there are just two unused clone flag bits left. Looks like the
namespace changes ate up a lot of them lately.
Well, we could still play dirty tricks like setting a bit in current
via whatever mechanism which indicates child-wants-extended-page-tables
and then just fork and be happy.
^ permalink raw reply [flat|nested] 52+ messages in thread[parent not found: <20080322175705.GD6367@osiris.boeblingen.de.ibm.com>]
* Re: [kvm-devel] [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable
[not found] ` <20080322175705.GD6367@osiris.boeblingen.de.ibm.com>
@ 2008-03-23 10:15 ` Avi Kivity
[not found] ` <47E62DBA.4050102@qumranet.com>
1 sibling, 0 replies; 52+ messages in thread
From: Avi Kivity @ 2008-03-23 10:15 UTC (permalink / raw)
To: Heiko Carstens
Cc: Christian Ehrhardt, hollisb, arnd, carsteno, heicars2,
Dave Hansen, jeroney, borntrae, virtualization,
Linux Memory Management List, mschwid2, rvdheij, Olaf Schnapper,
jblunck, Zhang, Xiantao, kvm-devel
Heiko Carstens wrote:
>> What you've done with dup_mm() is probably the brute-force way that I
>> would have done it had I just been trying to make a proof of concept or
>> something. I'm worried that there are a bunch of corner cases that
>> haven't been considered.
>>
>> What if someone else is poking around with ptrace or something similar
>> and they bump the mm_users:
>>
>> + if (tsk->mm->context.pgstes)
>> + return 0;
>> + if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
>> + tsk->mm != tsk->active_mm || tsk->mm->ioctx_list)
>> + return -EINVAL;
>> -------->HERE
>> + tsk->mm->context.pgstes = 1; /* dirty little tricks .. */
>> + mm = dup_mm(tsk);
>>
>> It'll race, possibly fault in some other pages, and those faults will be
>> lost during the dup_mm(). I think you need to be able to lock out all
>> of the users of access_process_vm() before you go and do this. You also
>> need to make sure that anyone who has looked at task->mm doesn't go and
>> get a reference to it and get confused later when it isn't the task->mm
>> any more.
>>
>>
>>> Therefore, we need to reallocate the page table after fork()
>>> once we know that task is going to be a hypervisor. That's what this
>>> code does: reallocate a bigger page table to accomondate the extra
>>> information. The task needs to be single-threaded when calling for
>>> extended page tables.
>>>
>>> Btw: at fork() time, we cannot tell whether or not the user's going to
>>> be a hypervisor. Therefore we cannot do this in fork.
>>>
>> Can you convert the page tables at a later time without doing a
>> wholesale replacement of the mm? It should be a bit easier to keep
>> people off the pagetables than keep their grubby mitts off the mm
>> itself.
>>
>
> Yes, as far as I can see you're right. And whatever we do in arch code,
> after all it's just a work around to avoid a new clone flag.
> If something like clone() with CLONE_KVM would be useful for more
> architectures than just s390 then maybe we should try to get a flag.
>
> Oh... there are just two unused clone flag bits left. Looks like the
> namespace changes ate up a lot of them lately.
>
> Well, we could still play dirty tricks like setting a bit in current
> via whatever mechanism which indicates child-wants-extended-page-tables
> and then just fork and be happy.
>
How about taking mmap_sem for write and converting all page tables
in-place? I'd rather avoid the need to fork() when creating a VM.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 52+ messages in thread[parent not found: <47E62DBA.4050102@qumranet.com>]
* Re: [kvm-devel] [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable
[not found] ` <47E62DBA.4050102@qumranet.com>
@ 2008-03-23 18:23 ` Martin Schwidefsky
2008-03-24 6:57 ` Avi Kivity
[not found] ` <47E750ED.7060509@qumranet.com>
0 siblings, 2 replies; 52+ messages in thread
From: Martin Schwidefsky @ 2008-03-23 18:23 UTC (permalink / raw)
To: Avi Kivity
Cc: Christian Ehrhardt, hollisb, arnd, carsteno, Heiko Carstens,
Dave Hansen, jeroney, borntrae, virtualization,
Linux Memory Management List, mschwid2, heicars2, rvdheij,
Olaf Schnapper, jblunck, Zhang, Xiantao, kvm-devel
On Sun, 2008-03-23 at 12:15 +0200, Avi Kivity wrote:
> >> Can you convert the page tables at a later time without doing a
> >> wholesale replacement of the mm? It should be a bit easier to keep
> >> people off the pagetables than keep their grubby mitts off the mm
> >> itself.
> >>
> >
> > Yes, as far as I can see you're right. And whatever we do in arch code,
> > after all it's just a work around to avoid a new clone flag.
> > If something like clone() with CLONE_KVM would be useful for more
> > architectures than just s390 then maybe we should try to get a flag.
> >
> > Oh... there are just two unused clone flag bits left. Looks like the
> > namespace changes ate up a lot of them lately.
> >
> > Well, we could still play dirty tricks like setting a bit in current
> > via whatever mechanism which indicates child-wants-extended-page-tables
> > and then just fork and be happy.
> >
>
> How about taking mmap_sem for write and converting all page tables
> in-place? I'd rather avoid the need to fork() when creating a VM.
That was my initial approach as well. If all the page table allocations
can be fullfilled the code is not too complicated. To handle allocation
failures gets tricky. At this point I realized that dup_mmap already
does what we want to do. It walks all the page tables, allocates new
page tables and copies the ptes. In principle I would reinvent the wheel
if we can not use dup_mmap.
--
blue skies,
Martin.
"Reality continues to ruin my life." - Calvin.
^ permalink raw reply [flat|nested] 52+ messages in thread* Re: [kvm-devel] [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable
2008-03-23 18:23 ` Martin Schwidefsky
@ 2008-03-24 6:57 ` Avi Kivity
[not found] ` <47E750ED.7060509@qumranet.com>
1 sibling, 0 replies; 52+ messages in thread
From: Avi Kivity @ 2008-03-24 6:57 UTC (permalink / raw)
To: schwidefsky
Cc: Christian Ehrhardt, hollisb, arnd, carsteno, Heiko Carstens,
Dave Hansen, jeroney, borntrae, virtualization,
Linux Memory Management List, mschwid2, heicars2, rvdheij,
Olaf Schnapper, jblunck, Zhang, Xiantao, kvm-devel
Martin Schwidefsky wrote:
> On Sun, 2008-03-23 at 12:15 +0200, Avi Kivity wrote:
>
>>>> Can you convert the page tables at a later time without doing a
>>>> wholesale replacement of the mm? It should be a bit easier to keep
>>>> people off the pagetables than keep their grubby mitts off the mm
>>>> itself.
>>>>
>>>>
>>> Yes, as far as I can see you're right. And whatever we do in arch code,
>>> after all it's just a work around to avoid a new clone flag.
>>> If something like clone() with CLONE_KVM would be useful for more
>>> architectures than just s390 then maybe we should try to get a flag.
>>>
>>> Oh... there are just two unused clone flag bits left. Looks like the
>>> namespace changes ate up a lot of them lately.
>>>
>>> Well, we could still play dirty tricks like setting a bit in current
>>> via whatever mechanism which indicates child-wants-extended-page-tables
>>> and then just fork and be happy.
>>>
>>>
>> How about taking mmap_sem for write and converting all page tables
>> in-place? I'd rather avoid the need to fork() when creating a VM.
>>
>
> That was my initial approach as well. If all the page table allocations
> can be fullfilled the code is not too complicated. To handle allocation
> failures gets tricky. At this point I realized that dup_mmap already
> does what we want to do. It walks all the page tables, allocates new
> page tables and copies the ptes. In principle I would reinvent the wheel
> if we can not use dup_mmap
Well, dup_mm() can't work (and now that I think about it, for more
reasons -- what if the process has threads?).
I don't think conversion is too bad. You'd need a four-level loop to
allocate and convert, and another loop to deallocate in case of error.
If, as I don't doubt, s390 hardware can modify the ptes, you'd need
cmpxchg to read and clear a pte in one operation.
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
^ permalink raw reply [flat|nested] 52+ messages in thread[parent not found: <47E750ED.7060509@qumranet.com>]
* Re: [kvm-devel] [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable
[not found] ` <47E750ED.7060509@qumranet.com>
@ 2008-03-25 6:08 ` Carsten Otte
[not found] ` <47E896EA.5060309@de.ibm.com>
1 sibling, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-25 6:08 UTC (permalink / raw)
To: Avi Kivity
Cc: Christian Ehrhardt, arnd, hollisb, carsteno, Heiko Carstens,
Dave Hansen, jeroney, borntrae, virtualization,
Linux Memory Management List, mschwid2, heicars2, schwidefsky,
rvdheij, Olaf Schnapper, jblunck, Zhang, Xiantao, kvm-devel
Avi Kivity wrote:
> Well, dup_mm() can't work (and now that I think about it, for more
> reasons -- what if the process has threads?).
We lock out multithreaded users already, -EINVAL.
^ permalink raw reply [flat|nested] 52+ messages in thread[parent not found: <47E896EA.5060309@de.ibm.com>]
* Re: [kvm-devel] [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable
[not found] ` <47E896EA.5060309@de.ibm.com>
@ 2008-03-25 6:12 ` Avi Kivity
0 siblings, 0 replies; 52+ messages in thread
From: Avi Kivity @ 2008-03-25 6:12 UTC (permalink / raw)
To: carsteno
Cc: Christian Ehrhardt, arnd, hollisb, kvm-devel, Heiko Carstens,
Dave Hansen, jeroney, borntrae, virtualization,
Linux Memory Management List, mschwid2, heicars2, schwidefsky,
rvdheij, Olaf Schnapper, jblunck, Zhang, Xiantao
Carsten Otte wrote:
> Avi Kivity wrote:
>> Well, dup_mm() can't work (and now that I think about it, for more
>> reasons -- what if the process has threads?).
> We lock out multithreaded users already, -EINVAL.
>
Would be much better if this can be avoided. It's surprising.
--
Any sufficiently difficult bug is indistinguishable from a feature.
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [kvm-devel] [RFC/PATCH 01/15] preparation: provide hook to enable pgstes in user pagetable
2008-03-21 18:29 ` Dave Hansen
` (2 preceding siblings ...)
[not found] ` <20080322175705.GD6367@osiris.boeblingen.de.ibm.com>
@ 2008-03-25 15:37 ` Carsten Otte
3 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-25 15:37 UTC (permalink / raw)
To: Dave Hansen
Cc: Christian Ehrhardt, hollisb, arnd, Linux Memory Management List,
carsteno, mschwid2, heicars2, jeroney, borntrae, virtualization,
kvm-devel, rvdheij, Olaf Schnapper, jblunck, Zhang, Xiantao
Am Freitag, den 21.03.2008, 11:29 -0700 schrieb Dave Hansen:
> What you've done with dup_mm() is probably the brute-force way that I
> would have done it had I just been trying to make a proof of concept or
> something. I'm worried that there are a bunch of corner cases that
> haven't been considered.
>
> What if someone else is poking around with ptrace or something similar
> and they bump the mm_users:
>
> + if (tsk->mm->context.pgstes)
> + return 0;
> + if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
> + tsk->mm != tsk->active_mm || tsk->mm->ioctx_list)
> + return -EINVAL;
> -------->HERE
> + tsk->mm->context.pgstes = 1; /* dirty little tricks .. */
> + mm = dup_mm(tsk);
>
> It'll race, possibly fault in some other pages, and those faults will be
> lost during the dup_mm(). I think you need to be able to lock out all
> of the users of access_process_vm() before you go and do this. You also
> need to make sure that anyone who has looked at task->mm doesn't go and
> get a reference to it and get confused later when it isn't the task->mm
> any more.
Good catch, Dave. We intend to get rid of that race via task_lock().
That should lock out ptrace and all others who modify mm_users via get_task_mm.
See patch below:
---
arch/s390/Kconfig | 4 ++
arch/s390/kernel/setup.c | 4 ++
arch/s390/mm/pgtable.c | 65 +++++++++++++++++++++++++++++++++++++++--
include/asm-s390/mmu.h | 1
include/asm-s390/mmu_context.h | 8 ++++-
include/asm-s390/pgtable.h | 1
include/linux/sched.h | 2 +
kernel/fork.c | 2 -
8 files changed, 82 insertions(+), 5 deletions(-)
Index: linux-host/arch/s390/Kconfig
===================================================================
--- linux-host.orig/arch/s390/Kconfig
+++ linux-host/arch/s390/Kconfig
@@ -55,6 +55,10 @@ config GENERIC_LOCKBREAK
default y
depends on SMP && PREEMPT
+config PGSTE
+ bool
+ default y if KVM
+
mainmenu "Linux Kernel Configuration"
config S390
Index: linux-host/arch/s390/kernel/setup.c
===================================================================
--- linux-host.orig/arch/s390/kernel/setup.c
+++ linux-host/arch/s390/kernel/setup.c
@@ -315,7 +315,11 @@ static int __init early_parse_ipldelay(c
early_param("ipldelay", early_parse_ipldelay);
#ifdef CONFIG_S390_SWITCH_AMODE
+#ifdef CONFIG_PGSTE
+unsigned int switch_amode = 1;
+#else
unsigned int switch_amode = 0;
+#endif
EXPORT_SYMBOL_GPL(switch_amode);
static void set_amode_and_uaccess(unsigned long user_amode,
Index: linux-host/arch/s390/mm/pgtable.c
===================================================================
--- linux-host.orig/arch/s390/mm/pgtable.c
+++ linux-host/arch/s390/mm/pgtable.c
@@ -30,11 +30,27 @@
#define TABLES_PER_PAGE 4
#define FRAG_MASK 15UL
#define SECOND_HALVES 10UL
+
+void clear_table_pgstes(unsigned long *table)
+{
+ clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
+ memset(table + 256, 0, PAGE_SIZE/4);
+ clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
+ memset(table + 768, 0, PAGE_SIZE/4);
+}
+
#else
#define ALLOC_ORDER 2
#define TABLES_PER_PAGE 2
#define FRAG_MASK 3UL
#define SECOND_HALVES 2UL
+
+void clear_table_pgstes(unsigned long *table)
+{
+ clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
+ memset(table + 256, 0, PAGE_SIZE/2);
+}
+
#endif
unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
@@ -153,7 +169,7 @@ unsigned long *page_table_alloc(struct m
unsigned long *table;
unsigned long bits;
- bits = mm->context.noexec ? 3UL : 1UL;
+ bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
spin_lock(&mm->page_table_lock);
page = NULL;
if (!list_empty(&mm->context.pgtable_list)) {
@@ -170,7 +186,10 @@ unsigned long *page_table_alloc(struct m
pgtable_page_ctor(page);
page->flags &= ~FRAG_MASK;
table = (unsigned long *) page_to_phys(page);
- clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
+ if (mm->context.pgstes)
+ clear_table_pgstes(table);
+ else
+ clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
spin_lock(&mm->page_table_lock);
list_add(&page->lru, &mm->context.pgtable_list);
}
@@ -191,7 +210,7 @@ void page_table_free(struct mm_struct *m
struct page *page;
unsigned long bits;
- bits = mm->context.noexec ? 3UL : 1UL;
+ bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
spin_lock(&mm->page_table_lock);
@@ -228,3 +247,43 @@ void disable_noexec(struct mm_struct *mm
mm->context.noexec = 0;
update_mm(mm, tsk);
}
+
+/*
+ * switch on pgstes for its userspace process (for kvm)
+ */
+int s390_enable_sie(void)
+{
+ struct task_struct *tsk = current;
+ struct mm_struct *mm;
+ int rc;
+
+ task_lock(tsk);
+
+ rc = 0;
+ if (tsk->mm->context.pgstes)
+ goto unlock;
+
+ rc = -EINVAL;
+ if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
+ tsk->mm != tsk->active_mm || tsk->mm->ioctx_list)
+ goto unlock;
+
+ tsk->mm->context.pgstes = 1; /* dirty little tricks .. */
+ mm = dup_mm(tsk);
+ tsk->mm->context.pgstes = 0;
+
+ rc = -ENOMEM;
+ if (!mm)
+ goto unlock;
+ mmput(tsk->mm);
+ tsk->mm = tsk->active_mm = mm;
+ preempt_disable();
+ update_mm(mm, tsk);
+ cpu_set(smp_processor_id(), mm->cpu_vm_mask);
+ preempt_enable();
+ rc = 0;
+unlock:
+ task_unlock(tsk);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(s390_enable_sie);
Index: linux-host/include/asm-s390/mmu.h
===================================================================
--- linux-host.orig/include/asm-s390/mmu.h
+++ linux-host/include/asm-s390/mmu.h
@@ -7,6 +7,7 @@ typedef struct {
unsigned long asce_bits;
unsigned long asce_limit;
int noexec;
+ int pgstes;
} mm_context_t;
#endif
Index: linux-host/include/asm-s390/mmu_context.h
===================================================================
--- linux-host.orig/include/asm-s390/mmu_context.h
+++ linux-host/include/asm-s390/mmu_context.h
@@ -20,7 +20,13 @@ static inline int init_new_context(struc
#ifdef CONFIG_64BIT
mm->context.asce_bits |= _ASCE_TYPE_REGION3;
#endif
- mm->context.noexec = s390_noexec;
+ if (current->mm->context.pgstes) {
+ mm->context.noexec = 0;
+ mm->context.pgstes = 1;
+ } else {
+ mm->context.noexec = s390_noexec;
+ mm->context.pgstes = 0;
+ }
mm->context.asce_limit = STACK_TOP_MAX;
crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
return 0;
Index: linux-host/include/asm-s390/pgtable.h
===================================================================
--- linux-host.orig/include/asm-s390/pgtable.h
+++ linux-host/include/asm-s390/pgtable.h
@@ -966,6 +966,7 @@ static inline pte_t mk_swap_pte(unsigned
extern int add_shared_memory(unsigned long start, unsigned long size);
extern int remove_shared_memory(unsigned long start, unsigned long size);
+extern int s390_enable_sie(void);
/*
* No page table caches to initialise
Index: linux-host/kernel/fork.c
===================================================================
--- linux-host.orig/kernel/fork.c
+++ linux-host/kernel/fork.c
@@ -498,7 +498,7 @@ void mm_release(struct task_struct *tsk,
* Allocate a new mm structure and copy contents from the
* mm structure of the passed in task structure.
*/
-static struct mm_struct *dup_mm(struct task_struct *tsk)
+struct mm_struct *dup_mm(struct task_struct *tsk)
{
struct mm_struct *mm, *oldmm = current->mm;
int err;
Index: linux-host/include/linux/sched.h
===================================================================
--- linux-host.orig/include/linux/sched.h
+++ linux-host/include/linux/sched.h
@@ -1758,6 +1758,8 @@ extern void mmput(struct mm_struct *);
extern struct mm_struct *get_task_mm(struct task_struct *task);
/* Remove the current tasks stale references to the old mm_struct */
extern void mm_release(struct task_struct *, struct mm_struct *);
+/* Allocate a new mm structure and copy contents from tsk->mm */
+extern struct mm_struct *dup_mm(struct task_struct *tsk);
extern int copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *);
extern void flush_thread(void);
^ permalink raw reply [flat|nested] 52+ messages in thread
[parent not found: <1206030326.6690.65.camel@cotte.boeblingen.de.ibm.com>]
* Re: [RFC/PATCH 14/15] guest: detect when running on kvm
[not found] ` <1206030326.6690.65.camel@cotte.boeblingen.de.ibm.com>
@ 2008-03-20 17:16 ` Randy Dunlap
[not found] ` <20080320101602.f2737c94.randy.dunlap@oracle.com>
` (2 subsequent siblings)
3 siblings, 0 replies; 52+ messages in thread
From: Randy Dunlap @ 2008-03-20 17:16 UTC (permalink / raw)
To: Carsten Otte
Cc: aliguori, EHRHARDT, hollisb, arnd, kvm-devel, heiko.carstens,
jeroney, virtualization, borntraeger, schwidefsky, rvdheij, os,
jblunck, Zhang, Xiantao
On Thu, 20 Mar 2008 17:25:26 +0100 Carsten Otte wrote:
> From: Christian Borntraeger <borntraeger@de.ibm.com>
> From: Carsten Otte <cotte@de.ibm.com>
>
> This patch adds functionality to detect if the kernel runs under the KVM
> hypervisor. A macro MACHINE_IS_KVM is exported for device drivers. This
> allows drivers to skip device detection if the systems runs non-virtualized.
> We also define a preferred console to avoid having the ttyS0, which is a line
> mode only console.
>
> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
> Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
> Signed-off-by: Carsten Otte <cotte@de.ibm.com>
> ---
> arch/s390/Kconfig | 7 +++++++
> arch/s390/kernel/early.c | 4 ++++
> arch/s390/kernel/setup.c | 10 +++++++---
> include/asm-s390/setup.h | 1 +
> 4 files changed, 19 insertions(+), 3 deletions(-)
>
> Index: kvm/arch/s390/kernel/early.c
> ===================================================================
> --- kvm.orig/arch/s390/kernel/early.c
> +++ kvm/arch/s390/kernel/early.c
> @@ -143,6 +143,10 @@ static noinline __init void detect_machi
> /* Running on a P/390 ? */
> if (cpuinfo->cpu_id.machine == 0x7490)
> machine_flags |= 4;
> +
> + /* Running under KVM ? */
> + if (cpuinfo->cpu_id.version == 0xfe)
Hi,
Where are these magic numbers documented? (0x7490, 0xfe, etc.)
> + machine_flags |= 64;
> }
>
> #ifdef CONFIG_64BIT
---
~Randy
^ permalink raw reply [flat|nested] 52+ messages in thread[parent not found: <20080320101602.f2737c94.randy.dunlap@oracle.com>]
* Re: [RFC/PATCH 14/15] guest: detect when running on kvm
[not found] ` <20080320101602.f2737c94.randy.dunlap@oracle.com>
@ 2008-03-20 17:27 ` Carsten Otte
0 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 17:27 UTC (permalink / raw)
To: Randy Dunlap
Cc: aliguori, Christian Ehrhardt, hollisb, arnd, kvm-devel, mschwid2,
heicars2, jeroney, virtualization, borntrae, rvdheij,
Olaf Schnapper, jblunck, Zhang, Xiantao
Randy Dunlap wrote:
>> Index: kvm/arch/s390/kernel/early.c
>> ===================================================================
>> --- kvm.orig/arch/s390/kernel/early.c
>> +++ kvm/arch/s390/kernel/early.c
>> @@ -143,6 +143,10 @@ static noinline __init void detect_machi
>> /* Running on a P/390 ? */
>> if (cpuinfo->cpu_id.machine == 0x7490)
>> machine_flags |= 4;
>> +
>> + /* Running under KVM ? */
>> + if (cpuinfo->cpu_id.version == 0xfe)
>
> Hi,
>
> Where are these magic numbers documented? (0x7490, 0xfe, etc.)
>
>
>> + machine_flags |= 64;
>> }
>>
>> #ifdef CONFIG_64BIT
The cpuid (and most other things about s390 arch) are documented in
the principles of operation:
http://publibz.boulder.ibm.com/epubs/pdf/a2278324.pdf
http://publibz.boulder.ibm.com/epubs/pdf/dz9zs001.pdf
(see chapter "control instructions" - store cpu id)
The 0xfe however is convention, the kvm arch code sets this value
where it implements that instruction. See "privileged instructions" patch.
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [kvm-devel] [RFC/PATCH 14/15] guest: detect when running on kvm
[not found] ` <1206030326.6690.65.camel@cotte.boeblingen.de.ibm.com>
2008-03-20 17:16 ` [RFC/PATCH 14/15] guest: detect when running on kvm Randy Dunlap
[not found] ` <20080320101602.f2737c94.randy.dunlap@oracle.com>
@ 2008-03-20 17:53 ` Christoph Hellwig
[not found] ` <20080320175357.GA30959@infradead.org>
3 siblings, 0 replies; 52+ messages in thread
From: Christoph Hellwig @ 2008-03-20 17:53 UTC (permalink / raw)
To: Carsten Otte
Cc: EHRHARDT, arnd, hollisb, kvm-devel, heiko.carstens, jeroney,
virtualization, borntraeger, schwidefsky, rvdheij, os, jblunck,
Zhang, Xiantao
On Thu, Mar 20, 2008 at 05:25:26PM +0100, Carsten Otte wrote:
> @@ -143,6 +143,10 @@ static noinline __init void detect_machi
> /* Running on a P/390 ? */
> if (cpuinfo->cpu_id.machine == 0x7490)
> machine_flags |= 4;
> +
> + /* Running under KVM ? */
> + if (cpuinfo->cpu_id.version == 0xfe)
> + machine_flags |= 64;
Shouldn't these have symbolic names?
^ permalink raw reply [flat|nested] 52+ messages in thread[parent not found: <20080320175357.GA30959@infradead.org>]
[parent not found: <47E2CAFF.3070203@de.ibm.com>]
* Re: [kvm-devel] [RFC/PATCH 14/15] guest: detect when running on kvm
[not found] ` <47E2CAFF.3070203@de.ibm.com>
@ 2008-03-20 19:41 ` Christoph Hellwig
[not found] ` <20080320194137.GA9975@infradead.org>
1 sibling, 0 replies; 52+ messages in thread
From: Christoph Hellwig @ 2008-03-20 19:41 UTC (permalink / raw)
To: carsteno
Cc: Christian Ehrhardt, arnd, hollisb, kvm-devel, mschwid2, heicars2,
jeroney, virtualization, Christoph Hellwig, borntrae, rvdheij,
Olaf Schnapper, jblunck, Zhang, Xiantao
On Thu, Mar 20, 2008 at 09:37:19PM +0100, Carsten Otte wrote:
> Christoph Hellwig wrote:
>> On Thu, Mar 20, 2008 at 05:25:26PM +0100, Carsten Otte wrote:
>>> @@ -143,6 +143,10 @@ static noinline __init void detect_machi
>>> /* Running on a P/390 ? */
>>> if (cpuinfo->cpu_id.machine == 0x7490)
>>> machine_flags |= 4;
>>> +
>>> + /* Running under KVM ? */
>>> + if (cpuinfo->cpu_id.version == 0xfe)
>>> + machine_flags |= 64;
>>
>> Shouldn't these have symbolic names?
> You mean symbolics for machine_flags? Or symbolics for cpu ids?
Either.
^ permalink raw reply [flat|nested] 52+ messages in thread[parent not found: <20080320194137.GA9975@infradead.org>]
* Re: [kvm-devel] [RFC/PATCH 14/15] guest: detect when running on kvm
[not found] ` <20080320194137.GA9975@infradead.org>
@ 2008-03-20 20:59 ` Carsten Otte
[not found] ` <47E2D034.4090600@de.ibm.com>
1 sibling, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 20:59 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Christian Ehrhardt, arnd, hollisb, carsteno, mschwid2, heicars2,
jeroney, virtualization, kvm-devel, borntrae, rvdheij,
Olaf Schnapper, jblunck, Zhang, Xiantao
Christoph Hellwig wrote:
> On Thu, Mar 20, 2008 at 09:37:19PM +0100, Carsten Otte wrote:
>> Christoph Hellwig wrote:
>>> On Thu, Mar 20, 2008 at 05:25:26PM +0100, Carsten Otte wrote:
>>>> @@ -143,6 +143,10 @@ static noinline __init void detect_machi
>>>> /* Running on a P/390 ? */
>>>> if (cpuinfo->cpu_id.machine == 0x7490)
>>>> machine_flags |= 4;
>>>> +
>>>> + /* Running under KVM ? */
>>>> + if (cpuinfo->cpu_id.version == 0xfe)
>>>> + machine_flags |= 64;
>>> Shouldn't these have symbolic names?
>> You mean symbolics for machine_flags? Or symbolics for cpu ids?
>
> Either.
Hmmh. For cpu id's did'nt make sense probably until now that kvm also
uses them. Before, this was the only one place that uses them.
With kvm and 0xfe, this one is sort of temporary one. We intend to
rework this code to use "store system information", which would give
us way more information about the machine and it's hypervisor
topology. Up until my todo list gets to that point, I think we'll have
to cope with a temporary number. We'll aim for making that change
before 2.6.26 gets released.
The machine flags do have symbolic names, defined in
include/asm-s390/setup.h. And yea, they should be used here. Will
change that.
^ permalink raw reply [flat|nested] 52+ messages in thread[parent not found: <47E2D034.4090600@de.ibm.com>]
* Re: [kvm-devel] [RFC/PATCH 14/15] guest: detect when running on kvm
[not found] ` <47E2D034.4090600@de.ibm.com>
@ 2008-03-20 21:22 ` Heiko Carstens
[not found] ` <20080320212255.GA4225@osiris.boeblingen.de.ibm.com>
1 sibling, 0 replies; 52+ messages in thread
From: Heiko Carstens @ 2008-03-20 21:22 UTC (permalink / raw)
To: carsteno
Cc: Christian Ehrhardt, arnd, hollisb, kvm-devel, mschwid2, heicars2,
jeroney, virtualization, Christoph Hellwig, borntrae, rvdheij,
Olaf Schnapper, jblunck, Zhang, Xiantao
On Thu, Mar 20, 2008 at 09:59:32PM +0100, Carsten Otte wrote:
> Christoph Hellwig wrote:
> > On Thu, Mar 20, 2008 at 09:37:19PM +0100, Carsten Otte wrote:
> >> Christoph Hellwig wrote:
> >>> On Thu, Mar 20, 2008 at 05:25:26PM +0100, Carsten Otte wrote:
> >>>> @@ -143,6 +143,10 @@ static noinline __init void detect_machi
> >>>> /* Running on a P/390 ? */
> >>>> if (cpuinfo->cpu_id.machine == 0x7490)
> >>>> machine_flags |= 4;
> >>>> +
> >>>> + /* Running under KVM ? */
> >>>> + if (cpuinfo->cpu_id.version == 0xfe)
> >>>> + machine_flags |= 64;
> >>> Shouldn't these have symbolic names?
> >> You mean symbolics for machine_flags? Or symbolics for cpu ids?
> >
> > Either.
> [...]
> The machine flags do have symbolic names, defined in
> include/asm-s390/setup.h. And yea, they should be used here. Will
> change that.
Since when do we have symbolic names for the bits?
It was always on my todo list to do a cleanup and replace the numbers
we use everywhere with names. Especially since we have clashes from time
to time... but that didn't hurt enough yet, obviously.
But now that you volunteered to take care of this... :)
^ permalink raw reply [flat|nested] 52+ messages in thread[parent not found: <20080320212255.GA4225@osiris.boeblingen.de.ibm.com>]
* Re: [kvm-devel] [RFC/PATCH 14/15] guest: detect when running on kvm
[not found] ` <20080320212255.GA4225@osiris.boeblingen.de.ibm.com>
@ 2008-03-21 11:12 ` Carsten Otte
[not found] ` <47E39804.4030605@de.ibm.com>
1 sibling, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-21 11:12 UTC (permalink / raw)
To: Heiko Carstens
Cc: carsteno, Christian Ehrhardt, arnd, hollisb, kvm-devel, heicars2,
mschwid2, jeroney, virtualization, Christoph Hellwig, borntrae,
rvdheij, Olaf Schnapper, jblunck, Zhang, Xiantao
MAILER-DAEMON@linux.ibm.com wrote:
> Since when do we have symbolic names for the bits?
> It was always on my todo list to do a cleanup and replace the numbers
> we use everywhere with names. Especially since we have clashes from time
> to time... but that didn't hurt enough yet, obviously.
> But now that you volunteered to take care of this... :)
Right. We only have defines for (machine_flags & bit). Looks to me
like the bits really should have a name on them. I've created a patch
that does this, but I want to talk it over with Martin before sending
that one out.
^ permalink raw reply [flat|nested] 52+ messages in thread[parent not found: <47E39804.4030605@de.ibm.com>]
* Re: [kvm-devel] [RFC/PATCH 14/15] guest: detect when running on kvm
[not found] ` <47E39804.4030605@de.ibm.com>
@ 2008-03-21 14:06 ` Heiko Carstens
[not found] ` <20080321140603.GC4128@osiris.boeblingen.de.ibm.com>
1 sibling, 0 replies; 52+ messages in thread
From: Heiko Carstens @ 2008-03-21 14:06 UTC (permalink / raw)
To: carsteno
Cc: carsteno, Christian Ehrhardt, arnd, hollisb, kvm-devel, heicars2,
mschwid2, jeroney, virtualization, Christoph Hellwig, borntrae,
rvdheij, Olaf Schnapper, jblunck, Zhang, Xiantao
On Fri, Mar 21, 2008 at 12:12:04PM +0100, Carsten Otte wrote:
> MAILER-DAEMON@linux.ibm.com wrote:
>> Since when do we have symbolic names for the bits?
>> It was always on my todo list to do a cleanup and replace the numbers
>> we use everywhere with names. Especially since we have clashes from time
>> to time... but that didn't hurt enough yet, obviously.
>> But now that you volunteered to take care of this... :)
> Right. We only have defines for (machine_flags & bit). Looks to me like
> the bits really should have a name on them. I've created a patch that
> does this, but I want to talk it over with Martin before sending that one
> out.
Just introduce something like MACHINE_FLAG_KVM. The rest can be converted
later. Unless you're bored and feel like fiddling around with assembly code :)
^ permalink raw reply [flat|nested] 52+ messages in thread[parent not found: <20080321140603.GC4128@osiris.boeblingen.de.ibm.com>]
* Re: [kvm-devel] [RFC/PATCH 14/15] guest: detect when running on kvm
[not found] ` <20080321140603.GC4128@osiris.boeblingen.de.ibm.com>
@ 2008-03-21 14:33 ` Carsten Otte
[not found] ` <1206110009.8363.3.camel@cotte.boeblingen.de.ibm.com>
1 sibling, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-21 14:33 UTC (permalink / raw)
To: Heiko Carstens
Cc: carsteno, Christian Ehrhardt, arnd, hollisb, carsteno, heicars2,
mschwid2, jeroney, virtualization, Christoph Hellwig, kvm-devel,
borntrae, rvdheij, Olaf Schnapper, jblunck, Zhang, Xiantao
Am Freitag, den 21.03.2008, 15:06 +0100 schrieb Heiko Carstens:
> Just introduce something like MACHINE_FLAG_KVM. The rest can be converted
> later. Unless you're bored and feel like fiddling around with assembly code :)
I've done that patch this morning already, see below. I agree with HCH
that we should do that, but after the kvm merge. I don't want kvm-s390
conflict with Martin's patches. This is just a beautification, and can
safely wait a release cycle.
---
arch/s390/kernel/early.c | 6 +++---
include/asm-s390/setup.h | 32 ++++++++++++++++++++++----------
2 files changed, 25 insertions(+), 13 deletions(-)
Index: linux-host/arch/s390/kernel/early.c
===================================================================
--- linux-host.orig/arch/s390/kernel/early.c
+++ linux-host/arch/s390/kernel/early.c
@@ -138,15 +138,15 @@ static noinline __init void detect_machi
/* Running under z/VM ? */
if (cpuinfo->cpu_id.version == 0xff)
- machine_flags |= 1;
+ machine_flags |= MACHINE_FLAG_VM;
/* Running on a P/390 ? */
if (cpuinfo->cpu_id.machine == 0x7490)
- machine_flags |= 4;
+ machine_flags |= MACHINE_FLAG_P390;
/* Running under KVM ? */
if (cpuinfo->cpu_id.version == 0xfe)
- machine_flags |= 64;
+ machine_flags |= MACHINE_FLAG_KVM;
}
#ifdef CONFIG_64BIT
Index: linux-host/include/asm-s390/setup.h
===================================================================
--- linux-host.orig/include/asm-s390/setup.h
+++ linux-host/include/asm-s390/setup.h
@@ -59,23 +59,35 @@ extern unsigned int s390_noexec;
*/
extern unsigned long machine_flags;
-#define MACHINE_IS_VM (machine_flags & 1)
-#define MACHINE_IS_P390 (machine_flags & 4)
-#define MACHINE_HAS_MVPG (machine_flags & 16)
-#define MACHINE_IS_KVM (machine_flags & 64)
-#define MACHINE_HAS_IDTE (machine_flags & 128)
-#define MACHINE_HAS_DIAG9C (machine_flags & 256)
+#define MACHINE_FLAG_VM 1
+#define MACHINE_FLAG_IEEE 2
+#define MACHINE_FLAG_P390 4
+#define MACHINE_FLAG_CSP 8
+#define MACHINE_FLAG_MVPG 16
+#define MACHINE_FLAG_DIAG44 32
+#define MACHINE_FLAG_KVM 64
+#define MACHINE_FLAG_IDTE 128
+#define MACHINE_FLAG_DIAG9C 256
+#define MACHINE_FLAG_MVCOS 512
+
+
+#define MACHINE_IS_VM (machine_flags & MACHINE_FLAG_VM)
+#define MACHINE_IS_KVM (machine_flags & MACHINE_FLAG_KVM)
+#define MACHINE_IS_P390 (machine_flags & MACHINE_FLAG_P390)
+#define MACHINE_HAS_MVPG (machine_flags & MACHINE_FLAG_MVPG)
+#define MACHINE_HAS_IDTE (machine_flags & MACHINE_FLAG_IDTE)
+#define MACHINE_HAS_DIAG9C (machine_flags & MACHINE_FLAG_DIAG9C)
#ifndef __s390x__
-#define MACHINE_HAS_IEEE (machine_flags & 2)
-#define MACHINE_HAS_CSP (machine_flags & 8)
+#define MACHINE_HAS_IEEE (machine_flags & MACHINE_FLAG_IEEE)
+#define MACHINE_HAS_CSP (machine_flags & MACHINE_FLAG_CSP)
#define MACHINE_HAS_DIAG44 (1)
#define MACHINE_HAS_MVCOS (0)
#else /* __s390x__ */
#define MACHINE_HAS_IEEE (1)
#define MACHINE_HAS_CSP (1)
-#define MACHINE_HAS_DIAG44 (machine_flags & 32)
-#define MACHINE_HAS_MVCOS (machine_flags & 512)
+#define MACHINE_HAS_DIAG44 (machine_flags & MACHINE_FLAG_DIAG44)
+#define MACHINE_HAS_MVCOS (machine_flags & MACHINE_FLAG_MVCOS)
#endif /* __s390x__ */
#define MACHINE_HAS_SCLP (!MACHINE_IS_P390)
^ permalink raw reply [flat|nested] 52+ messages in thread[parent not found: <1206110009.8363.3.camel@cotte.boeblingen.de.ibm.com>]
* Re: [kvm-devel] [RFC/PATCH 14/15] guest: detect when running on kvm
[not found] ` <1206110009.8363.3.camel@cotte.boeblingen.de.ibm.com>
@ 2008-03-22 17:25 ` Heiko Carstens
0 siblings, 0 replies; 52+ messages in thread
From: Heiko Carstens @ 2008-03-22 17:25 UTC (permalink / raw)
To: Carsten Otte
Cc: carsteno, Christian Ehrhardt, arnd, hollisb, carsteno, mschwid2,
heicars2, jeroney, virtualization, Christoph Hellwig, kvm-devel,
borntrae, rvdheij, Olaf Schnapper, jblunck, Zhang, Xiantao
On Fri, Mar 21, 2008 at 03:33:29PM +0100, Carsten Otte wrote:
> Am Freitag, den 21.03.2008, 15:06 +0100 schrieb Heiko Carstens:
> > Just introduce something like MACHINE_FLAG_KVM. The rest can be converted
> > later. Unless you're bored and feel like fiddling around with assembly code :)
> I've done that patch this morning already, see below. I agree with HCH
> that we should do that, but after the kvm merge. I don't want kvm-s390
> conflict with Martin's patches. This is just a beautification, and can
> safely wait a release cycle.
That's nice for a start. But you didn't convert the assembly files to use
the new defines. So there is still no connection between setting a bit
in asm code and the new defines.
That's the reason why I said something about fiddling around with asm code.
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [kvm-devel] [RFC/PATCH 14/15] guest: detect when running on kvm
[not found] ` <20080320175357.GA30959@infradead.org>
[not found] ` <47E2CAFF.3070203@de.ibm.com>
@ 2008-03-20 20:37 ` Carsten Otte
1 sibling, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-20 20:37 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Christian Ehrhardt, arnd, hollisb, kvm-devel, mschwid2, heicars2,
jeroney, virtualization, borntrae, rvdheij, Olaf Schnapper,
jblunck, Zhang, Xiantao
Christoph Hellwig wrote:
> On Thu, Mar 20, 2008 at 05:25:26PM +0100, Carsten Otte wrote:
>> @@ -143,6 +143,10 @@ static noinline __init void detect_machi
>> /* Running on a P/390 ? */
>> if (cpuinfo->cpu_id.machine == 0x7490)
>> machine_flags |= 4;
>> +
>> + /* Running under KVM ? */
>> + if (cpuinfo->cpu_id.version == 0xfe)
>> + machine_flags |= 64;
>
> Shouldn't these have symbolic names?
You mean symbolics for machine_flags? Or symbolics for cpu ids?
^ permalink raw reply [flat|nested] 52+ messages in thread
[parent not found: <1206030328.6690.66.camel@cotte.boeblingen.de.ibm.com>]
* Re: [RFC/PATCH 15/15] guest: virtio device support, and kvm hypercalls
[not found] ` <1206030328.6690.66.camel@cotte.boeblingen.de.ibm.com>
@ 2008-03-21 0:24 ` Rusty Russell
[not found] ` <200803211124.49829.rusty@rustcorp.com.au>
2008-03-21 10:44 ` Avi Kivity
2 siblings, 0 replies; 52+ messages in thread
From: Rusty Russell @ 2008-03-21 0:24 UTC (permalink / raw)
To: Carsten Otte
Cc: aliguori, EHRHARDT, arnd, hollisb, kvm-devel, heiko.carstens,
jeroney, virtualization, borntraeger, schwidefsky, rvdheij, os,
jblunck, Zhang, Xiantao
On Friday 21 March 2008 03:25:28 Carsten Otte wrote:
> +static void kvm_set_status(struct virtio_device *vdev, u8 status)
> +{
> + BUG_ON(!status);
> + to_kvmdev(vdev)->desc->status = status;
> +}
> +
> +/*
> + * To reset the device, we (ab)use the NOTIFY hypercall, with the descriptor
> + * address of the device. The Host will zero the status and all the
> + * features.
> + */
> +static void kvm_reset(struct virtio_device *vdev)
> +{
> + unsigned long offset = (void *)to_kvmdev(vdev)->desc - kvm_devices;
> +
> + kvm_hypercall1(1237, (max_pfn<<PAGE_SHIFT) + offset);
> +}
I'd recommend a hypercall after set_status, as well as reset. The
reason lguest doesn't do this is that we don't do feature negotiation
(assuming guest kernel matches host kernel). In general, the host
needs to know when the VIRTIO_CONFIG_S_DRIVER_OK is set so it can see
what features the guest driver accepted.
Overloading the notify hypercall is kind of a hack too, but it works so
no real need to change that.
> + * The root device for the kvm virtio devices.
> + * This makes them appear as /sys/devices/kvm/0,1,2 not /sys/devices/0,1,2.
> + */
> +static struct device kvm_root = {
> + .parent = NULL,
> + .bus_id = "kvm_s390",
> +};
You mean /sys/devices/kvm_s390/0,1,2?
> +static int __init kvm_devices_init(void)
> +{
> + if (!MACHINE_IS_KVM)
> + return -ENODEV;
> +
> + if (device_register(&kvm_root) != 0)
> + panic("Could not register kvm root");
> +
> + if (add_shared_memory((max_pfn) << PAGE_SHIFT, PAGE_SIZE)) {
> + device_unregister(&kvm_root);
> + return -ENOMEM;
> + }
Hmm, panic on device_register fail, but -ENOMEM on add_shared_memory fail?
My theory was that since this is boot time, panic() is the right thing.
Cheers,
Rusty.
^ permalink raw reply [flat|nested] 52+ messages in thread[parent not found: <200803211124.49829.rusty@rustcorp.com.au>]
* Re: [kvm-devel] [RFC/PATCH 15/15] guest: virtio device support, and kvm hypercalls
[not found] ` <200803211124.49829.rusty@rustcorp.com.au>
@ 2008-03-21 7:12 ` Carsten Otte
2008-03-21 8:15 ` Christian Borntraeger
[not found] ` <200803210915.48029.borntraeger@de.ibm.com>
2 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-21 7:12 UTC (permalink / raw)
To: Rusty Russell
Cc: Christian Ehrhardt, arnd, hollisb, kvm-devel, mschwid2, heicars2,
jeroney, virtualization, borntrae, rvdheij, Olaf Schnapper,
jblunck, Zhang, Xiantao
Rusty Russell wrote:
>> +static int __init kvm_devices_init(void)
>> +{
>> + if (!MACHINE_IS_KVM)
>> + return -ENODEV;
>> +
>> + if (device_register(&kvm_root) != 0)
>> + panic("Could not register kvm root");
>> +
>> + if (add_shared_memory((max_pfn) << PAGE_SHIFT, PAGE_SIZE)) {
>> + device_unregister(&kvm_root);
>> + return -ENOMEM;
>> + }
>
> Hmm, panic on device_register fail, but -ENOMEM on add_shared_memory fail?
> My theory was that since this is boot time, panic() is the right thing.
We can't tell whether or not this is an important device or not. Maybe
the guest is running with ramdisk as rootfs and can have a happy life
if we don't kill it here. Return the rc from device register seems to
be the right thing to me, if it was an important device we'll see
"panic: cannot mount rootfs" or something later.
^ permalink raw reply [flat|nested] 52+ messages in thread* Re: [RFC/PATCH 15/15] guest: virtio device support, and kvm hypercalls
[not found] ` <200803211124.49829.rusty@rustcorp.com.au>
2008-03-21 7:12 ` [kvm-devel] " Carsten Otte
@ 2008-03-21 8:15 ` Christian Borntraeger
[not found] ` <200803210915.48029.borntraeger@de.ibm.com>
2 siblings, 0 replies; 52+ messages in thread
From: Christian Borntraeger @ 2008-03-21 8:15 UTC (permalink / raw)
To: Rusty Russell
Cc: Carsten Otte, aliguori, EHRHARDT, arnd, hollisb, kvm-devel,
heiko.carstens, jeroney, virtualization, schwidefsky, rvdheij, os,
jblunck, Zhang, Xiantao
Am Freitag, 21. März 2008 schrieb Rusty Russell:
> I'd recommend a hypercall after set_status, as well as reset. The
> reason lguest doesn't do this is that we don't do feature negotiation
> (assuming guest kernel matches host kernel). In general, the host
> needs to know when the VIRTIO_CONFIG_S_DRIVER_OK is set so it can see
> what features the guest driver accepted.
Right. Will have a look.
>
> Overloading the notify hypercall is kind of a hack too, but it works so
> no real need to change that.
>
> > + * The root device for the kvm virtio devices.
> > + * This makes them appear as /sys/devices/kvm/0,1,2
not /sys/devices/0,1,2.
> > + */
> > +static struct device kvm_root = {
> > + .parent = NULL,
> > + .bus_id = "kvm_s390",
> > +};
>
> You mean /sys/devices/kvm_s390/0,1,2?
Yes, thanks.
>
> > +static int __init kvm_devices_init(void)
> > +{
> > + if (!MACHINE_IS_KVM)
> > + return -ENODEV;
> > +
> > + if (device_register(&kvm_root) != 0)
> > + panic("Could not register kvm root");
> > +
> > + if (add_shared_memory((max_pfn) << PAGE_SHIFT, PAGE_SIZE)) {
> > + device_unregister(&kvm_root);
> > + return -ENOMEM;
> > + }
>
> Hmm, panic on device_register fail, but -ENOMEM on add_shared_memory fail?
> My theory was that since this is boot time, panic() is the right thing.
Good spot, but I agree with Carsten. Drivers should not panic. I have module
load/unload capability on my long term todo list, but I can change the
panic now.
Christian
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 52+ messages in thread[parent not found: <200803210915.48029.borntraeger@de.ibm.com>]
* Re: [RFC/PATCH 15/15] guest: virtio device support, and kvm hypercalls
[not found] ` <200803210915.48029.borntraeger@de.ibm.com>
@ 2008-03-21 23:30 ` Rusty Russell
[not found] ` <200803221030.49457.rusty@rustcorp.com.au>
1 sibling, 0 replies; 52+ messages in thread
From: Rusty Russell @ 2008-03-21 23:30 UTC (permalink / raw)
To: Christian Borntraeger
Cc: Carsten Otte, aliguori, EHRHARDT, arnd, hollisb, kvm-devel,
heiko.carstens, jeroney, virtualization, schwidefsky, rvdheij, os,
jblunck, Zhang, Xiantao
On Friday 21 March 2008 19:15:47 Christian Borntraeger wrote:
> Am Freitag, 21. März 2008 schrieb Rusty Russell:
> > Hmm, panic on device_register fail, but -ENOMEM on add_shared_memory
> > fail? My theory was that since this is boot time, panic() is the right
> > thing.
>
> Good spot, but I agree with Carsten. Drivers should not panic. I have
> module load/unload capability on my long term todo list, but I can change
> the panic now.
Yep, that makes sense. For lguest, we panic: it's always at boot time so if
it fails we should die early to make it easier to diagnose (and that makes
sure it happens before we lose our early console).
Cheers,
Rusty.
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 52+ messages in thread[parent not found: <200803221030.49457.rusty@rustcorp.com.au>]
* Re: [RFC/PATCH 15/15] guest: virtio device support, and kvm hypercalls
[not found] ` <200803221030.49457.rusty@rustcorp.com.au>
@ 2008-03-22 7:36 ` Carsten Otte
0 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-22 7:36 UTC (permalink / raw)
To: Rusty Russell
Cc: aliguori, Christian Ehrhardt, arnd, hollisb, kvm-devel, mschwid2,
heicars2, jeroney, virtualization, borntrae, rvdheij,
Olaf Schnapper, jblunck, Zhang, Xiantao
Rusty Russell wrote:
> On Friday 21 March 2008 19:15:47 Christian Borntraeger wrote:
>> Am Freitag, 21. März 2008 schrieb Rusty Russell:
>>> Hmm, panic on device_register fail, but -ENOMEM on add_shared_memory
>>> fail? My theory was that since this is boot time, panic() is the right
>>> thing.
>> Good spot, but I agree with Carsten. Drivers should not panic. I have
>> module load/unload capability on my long term todo list, but I can change
>> the panic now.
>
> Yep, that makes sense. For lguest, we panic: it's always at boot time so if
> it fails we should die early to make it easier to diagnose (and that makes
> sure it happens before we lose our early console).
Diangnostic is easy here at any time during the boot process: we've
got our store status ioctl that userspace calls after guest execution
has ended. It causes all cpus to store their register content and such
into the cpu's lowcore area. Then it writes out our memory to a dump
image, which lkcdutils and/or crash can read.
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [RFC/PATCH 15/15] guest: virtio device support, and kvm hypercalls
[not found] ` <1206030328.6690.66.camel@cotte.boeblingen.de.ibm.com>
2008-03-21 0:24 ` [RFC/PATCH 15/15] guest: virtio device support, and kvm hypercalls Rusty Russell
[not found] ` <200803211124.49829.rusty@rustcorp.com.au>
@ 2008-03-21 10:44 ` Avi Kivity
2 siblings, 0 replies; 52+ messages in thread
From: Avi Kivity @ 2008-03-21 10:44 UTC (permalink / raw)
To: Carsten Otte
Cc: aliguori, EHRHARDT, arnd, hollisb, kvm-devel, heiko.carstens,
jeroney, virtualization, borntraeger, schwidefsky, rvdheij, os,
jblunck, Zhang, Xiantao
Carsten Otte wrote:
> Currently we dont have PCI on s390. Making virtio_pci usable for s390 seems
> more complicated that providing an own stub. This virtio stub is similar to
> the lguest one, the memory for the descriptors and the device detection is made
> via additional mapped memory on top of the guest storage. We use an external
> interrupt with extint code 1237 for host->guest notification.
>
So, sanity won in the end.
--
Any sufficiently difficult bug is indistinguishable from a feature.
^ permalink raw reply [flat|nested] 52+ messages in thread
[parent not found: <1206030302.6690.57.camel@cotte.boeblingen.de.ibm.com>]
* Re: [kvm-devel] [RFC/PATCH 06/15] kvm-s390: sie intercept handling
[not found] ` <1206030302.6690.57.camel@cotte.boeblingen.de.ibm.com>
@ 2008-03-21 10:53 ` Avi Kivity
2008-03-21 11:26 ` Carsten Otte
0 siblings, 1 reply; 52+ messages in thread
From: Avi Kivity @ 2008-03-21 10:53 UTC (permalink / raw)
To: Carsten Otte
Cc: EHRHARDT, arnd, hollisb, kvm-devel, heiko.carstens, jeroney,
virtualization, borntraeger, schwidefsky, rvdheij, os, jblunck,
Zhang, Xiantao
Carsten Otte wrote:
>
> /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
> struct kvm_run {
> @@ -138,6 +139,14 @@ struct kvm_run {
> __u32 is_write;
> __u32 pad;
> } tpr_access;
> + /* KVM_EXIT_S390_SIEIC */
> + struct {
> + __u8 icptcode;
> + __u64 mask; /* psw upper half */
> + __u64 addr; /* psw lower half */
> + __u16 ipa;
> + __u32 ipb;
> + } s390_sieic;
> /* Fix the size of the union. */
> char padding[256];
> };
>
>
Do you support 32-bit userspace on 64-bit kernel? If so, this is likely
badly aligned.
--
Any sufficiently difficult bug is indistinguishable from a feature.
^ permalink raw reply [flat|nested] 52+ messages in thread* Re: [kvm-devel] [RFC/PATCH 06/15] kvm-s390: sie intercept handling
2008-03-21 10:53 ` [kvm-devel] [RFC/PATCH 06/15] kvm-s390: sie intercept handling Avi Kivity
@ 2008-03-21 11:26 ` Carsten Otte
0 siblings, 0 replies; 52+ messages in thread
From: Carsten Otte @ 2008-03-21 11:26 UTC (permalink / raw)
To: Avi Kivity
Cc: Christian Ehrhardt, arnd, hollisb, kvm-devel, mschwid2, heicars2,
jeroney, borntrae, virtualization, rvdheij, Olaf Schnapper,
jblunck, Zhang, Xiantao
Avi Kivity wrote:
> Carsten Otte wrote:
>>
>> /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
>> struct kvm_run {
>> @@ -138,6 +139,14 @@ struct kvm_run {
>> __u32 is_write;
>> __u32 pad;
>> } tpr_access;
>> + /* KVM_EXIT_S390_SIEIC */
>> + struct {
>> + __u8 icptcode;
>> + __u64 mask; /* psw upper half */
>> + __u64 addr; /* psw lower half */
>> + __u16 ipa;
>> + __u32 ipb;
>> + } s390_sieic;
>> /* Fix the size of the union. */
>> char padding[256];
>> };
>>
>>
>
> Do you support 32-bit userspace on 64-bit kernel? If so, this is likely
> badly aligned.
32bit userspace is not pracitcal, current enterprise distributions
come with 64bit only on s390. Nevertheless, I don't get your point on
allignment. What is the problem caused by the struct, and how can I
solve it?
^ permalink raw reply [flat|nested] 52+ messages in thread