[PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support

public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
@ 2011-06-03 15:03 Christoffer Dall
  2011-06-03 15:03 ` [PATCH v3 2/8] ARM: KVM: Hypervisor identity mapping Christoffer Dall
                   ` (9 more replies)
  0 siblings, 10 replies; 58+ messages in thread
From: Christoffer Dall @ 2011-06-03 15:03 UTC (permalink / raw)
  To: catalin.marinas, android-virt; +Cc: s.raho, a.motakis, c.dall, kvm, a.costa

Targets KVM support for Cortex A-15 processors.

Contains no real functionality but all the framework components,
make files, header files and some tracing functionality.
---
 arch/arm/Kconfig                   |    2 
 arch/arm/Makefile                  |    1 
 arch/arm/include/asm/kvm.h         |   65 +++++
 arch/arm/include/asm/kvm_asm.h     |   28 ++
 arch/arm/include/asm/kvm_emulate.h |   89 +++++++
 arch/arm/include/asm/kvm_host.h    |  114 +++++++++
 arch/arm/include/asm/kvm_para.h    |    9 +
 arch/arm/include/asm/unified.h     |   12 +
 arch/arm/kvm/Kconfig               |   44 ++++
 arch/arm/kvm/Makefile              |   13 +
 arch/arm/kvm/arm.c                 |  363 ++++++++++++++++++++++++++++++
 arch/arm/kvm/arm_emulate.c         |   70 ++++++
 arch/arm/kvm/arm_guest.c           |  142 ++++++++++++
 arch/arm/kvm/arm_interrupts.S      |   17 +
 arch/arm/kvm/arm_mmu.c             |    0 
 arch/arm/kvm/trace.c               |  436 ++++++++++++++++++++++++++++++++++++
 arch/arm/kvm/trace.h               |  108 +++++++++
 arch/arm/mach-vexpress/Kconfig     |    1 
 arch/arm/mm/Kconfig                |    7 +
 include/linux/kvm.h                |    1 
 20 files changed, 1522 insertions(+), 0 deletions(-)
 create mode 100644 arch/arm/include/asm/kvm.h
 create mode 100644 arch/arm/include/asm/kvm_asm.h
 create mode 100644 arch/arm/include/asm/kvm_emulate.h
 create mode 100644 arch/arm/include/asm/kvm_host.h
 create mode 100644 arch/arm/include/asm/kvm_para.h
 create mode 100644 arch/arm/kvm/Kconfig
 create mode 100644 arch/arm/kvm/Makefile
 create mode 100644 arch/arm/kvm/arm.c
 create mode 100644 arch/arm/kvm/arm_emulate.c
 create mode 100644 arch/arm/kvm/arm_guest.c
 create mode 100644 arch/arm/kvm/arm_interrupts.S
 create mode 100644 arch/arm/kvm/arm_mmu.c
 create mode 100644 arch/arm/kvm/trace.c
 create mode 100644 arch/arm/kvm/trace.h

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index b410049..b2a2b65 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1951,3 +1951,5 @@ source "security/Kconfig"
 source "crypto/Kconfig"
 
 source "lib/Kconfig"
+
+source "arch/arm/kvm/Kconfig"
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 6f7b292..72335fc 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -254,6 +254,7 @@ core-$(CONFIG_VFP)		+= arch/arm/vfp/
 
 # If we have a machine-specific directory, then include it in the build.
 core-y				+= arch/arm/kernel/ arch/arm/mm/ arch/arm/common/
+core-$(CONFIG_KVM) 		+= arch/arm/kvm/
 core-y				+= $(machdirs) $(platdirs)
 
 drivers-$(CONFIG_OPROFILE)      += arch/arm/oprofile/
diff --git a/arch/arm/include/asm/kvm.h b/arch/arm/include/asm/kvm.h
new file mode 100644
index 0000000..8311198
--- /dev/null
+++ b/arch/arm/include/asm/kvm.h
@@ -0,0 +1,65 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ */
+
+#ifndef __ARM_KVM_H__
+#define __ARM_KVM_H__
+
+#include <asm/types.h>
+
+/*
+ * Modes used for short-hand mode determinition in the world-switch code and
+ * in emulation code.
+ *
+ * Note: These indices do NOT correspond to the value of the CPSR mode bits!
+ */
+#define MODE_FIQ	0
+#define MODE_IRQ	1
+#define MODE_SVC	2
+#define MODE_ABT	3
+#define MODE_UND	4
+#define MODE_USR	5
+#define MODE_SYS	6
+
+struct kvm_regs {
+	__u32 regs0_7[8];	/* Unbanked regs. (r0 - r7)	   */
+	__u32 fiq_regs8_12[5];	/* Banked fiq regs. (r8 - r12)	   */
+	__u32 usr_regs8_12[5];	/* Banked usr registers (r8 - r12) */
+	__u32 reg13[6];		/* Banked r13, indexed by MODE_	   */
+	__u32 reg14[6];		/* Banked r13, indexed by MODE_	   */
+	__u32 reg15;
+	__u32 cpsr;
+	__u32 spsr[5];		/* Banked SPSR,  indexed by MODE_  */
+	struct {
+		__u32 c2_base0;
+		__u32 c2_base1;
+		__u32 c3_dacr;
+	} cp15;
+
+};
+
+struct kvm_sregs {
+};
+
+struct kvm_fpu {
+};
+
+struct kvm_guest_debug_arch {
+};
+
+struct kvm_debug_exit_arch {
+};
+
+#endif /* __ARM_KVM_H__ */
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
new file mode 100644
index 0000000..c3d4458
--- /dev/null
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -0,0 +1,28 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ */
+
+#ifndef __ARM_KVM_ASM_H__
+#define __ARM_KVM_ASM_H__
+
+#define ARM_EXCEPTION_RESET	  0
+#define ARM_EXCEPTION_UNDEFINED   1
+#define ARM_EXCEPTION_SOFTWARE    2
+#define ARM_EXCEPTION_PREF_ABORT  3
+#define ARM_EXCEPTION_DATA_ABORT  4
+#define ARM_EXCEPTION_IRQ	  5
+#define ARM_EXCEPTION_FIQ	  6
+
+#endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
new file mode 100644
index 0000000..8eed752
--- /dev/null
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -0,0 +1,89 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ */
+
+#ifndef __ARM_KVM_EMULATE_H__
+#define __ARM_KVM_EMULATE_H__
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_asm.h>
+
+u32* kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
+
+static inline unsigned char vcpu_mode(struct kvm_vcpu *vcpu)
+{
+	u8 modes_table[16] = {
+		MODE_USR,	// 0x0
+		MODE_FIQ,	// 0x1
+		MODE_IRQ,	// 0x2
+		MODE_SVC,	// 0x3
+		0xf, 0xf, 0xf,
+		MODE_ABT,	// 0x7
+		0xf, 0xf, 0xf,
+		MODE_UND,	// 0xb
+		0xf, 0xf, 0xf,
+		MODE_SYS};	// 0xf
+
+	BUG_ON(modes_table[vcpu->arch.regs.cpsr & 0xf] == 0xf);
+	return modes_table[vcpu->arch.regs.cpsr & 0xf];
+}
+
+/*
+ * Return the SPSR for the specified mode of the virtual CPU.
+ */
+static inline u32 kvm_vcpu_spsr(struct kvm_vcpu *vcpu, u32 mode)
+{
+	switch (mode) {
+	case MODE_SVC:
+		return vcpu->arch.regs.svc_regs[2];
+	case MODE_ABT:
+		return vcpu->arch.regs.svc_regs[2];
+	case MODE_UND:
+		return vcpu->arch.regs.svc_regs[2];
+	case MODE_IRQ:
+		return vcpu->arch.regs.svc_regs[2];
+	case MODE_FIQ:
+		return vcpu->arch.regs.fiq_regs[7];
+	default:
+		BUG();
+	}
+}
+
+/* Get vcpu register for current mode */
+#define vcpu_reg(_vcpu, _reg_num) \
+	(*kvm_vcpu_reg((_vcpu), _reg_num, vcpu_mode(_vcpu)))
+
+/* Get vcpu register for specific mode */
+#define vcpu_reg_m(_vcpu, _reg_num, _mode) \
+	(*kvm_vcpu_reg(_vcpu, _reg_num, _mode))
+
+#define vcpu_cpsr(_vcpu) \
+	(_vcpu->arch.regs.cpsr)
+
+/* Get vcpu SPSR for current mode */
+#define vcpu_spsr(_vcpu) \
+	kvm_vcpu_spsr(_vcpu, vcpu_mode(_vcpu))
+
+/* Get vcpu SPSR for specific mode */
+#define vcpu_spsr_m(_vcpu, _mode) \
+	kvm_vcpu_spsr(_vcpu, _mode)
+
+#define MODE_HAS_SPSR(_vcpu) \
+	 ((vcpu_mode(_vcpu)) < MODE_USR)
+
+#define VCPU_MODE_PRIV(_vcpu) \
+	(((vcpu_mode(_vcpu)) == MODE_USR) ? 0 : 1)
+
+#endif /* __ARM_KVM_EMULATE_H__ */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
new file mode 100644
index 0000000..6e8a08d
--- /dev/null
+++ b/arch/arm/include/asm/kvm_host.h
@@ -0,0 +1,114 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ */
+
+#ifndef __ARM_KVM_HOST_H__
+#define __ARM_KVM_HOST_H__
+
+#define KVM_MAX_VCPUS 1
+#define KVM_MEMORY_SLOTS 32
+#define KVM_PRIVATE_MEM_SLOTS 4
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
+/* We don't currently support large pages. */
+#define KVM_HPAGE_GFN_SHIFT(x)	0
+#define KVM_NR_PAGE_SIZES	1
+#define KVM_PAGES_PER_HPAGE(x)	(1UL<<31)
+
+struct kvm_vcpu;
+u32* kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
+
+struct kvm_arch {
+};
+
+#define EXCEPTION_NONE      0
+#define EXCEPTION_RESET     0x80
+#define EXCEPTION_UNDEFINED 0x40
+#define EXCEPTION_SOFTWARE  0x20
+#define EXCEPTION_PREFETCH  0x10
+#define EXCEPTION_DATA      0x08
+#define EXCEPTION_IMPRECISE 0x04
+#define EXCEPTION_IRQ       0x02
+#define EXCEPTION_FIQ       0x01
+
+struct kvm_vcpu_regs {
+	u32 usr_regs[15];	/* R0_usr - R14_usr */
+	u32 svc_regs[3];	/* SP_svc, LR_svc, SPSR_svc */
+	u32 abt_regs[3];	/* SP_abt, LR_abt, SPSR_abt */
+	u32 und_regs[3];	/* SP_und, LR_und, SPSR_und */
+	u32 irq_regs[3];	/* SP_irq, LR_irq, SPSR_irq */
+	u32 fiq_regs[8];	/* R8_fiq - R14_fiq, SPSR_fiq */
+	u32 pc;			/* The program counter (r15) */
+	u32 cpsr;		/* Guest emulated CPSR */
+} __packed;
+
+struct kvm_vcpu_arch {
+	/* Pointer to regs struct on shared page */
+	struct kvm_vcpu_regs regs;
+
+	/* Pointer to cached mode on shared page */
+	unsigned long *mode;
+
+	/* System control coprocessor (cp15) */
+	struct {
+		u32 c1_SCTLR;		/* System Control Register */
+		u32 c1_ACTLR;		/* Auxilliary Control Register */
+		u32 c1_CPACR;		/* Coprocessor Access Control Register */
+		u64 c2_TTBR0;		/* Translation Table Base Register 0 */
+		u64 c2_TTBR1;		/* Translation Table Base Register 1 */
+		u32 c2_TTBCR;		/* Translation Table Base Control Register */
+		u32 c3_DACR;		/* Domain Access Control Register */
+	} cp15;
+
+	u32 exception_pending;  	/* Exception to raise after emulation */
+
+	/* Exception Information */
+	u32 hsr;		/* Hyp Syndrom Register */
+	u32 hdfar;		/* Hyp Data Fault Address Register */
+	u32 hifar;		/* Hyp Inst. Fault Address Register */
+	u32 hpfar;		/* Hyp IPA Fault Address Register */
+
+	/* IO related fields */
+	u32 mmio_rd;
+
+	/* Misc. fields */
+	u32 wait_for_interrupts;
+};
+
+struct kvm_vm_stat {
+	u32 remote_tlb_flush;
+};
+
+struct kvm_vcpu_stat {
+	u32 sum_exits;
+	u32 mmio_exits;
+	u32 dcr_exits;
+	u32 signal_exits;
+	u32 light_exits;
+	/* Account for special types of light exits: */
+	u32 itlb_real_miss_exits;
+	u32 itlb_virt_miss_exits;
+	u32 dtlb_real_miss_exits;
+	u32 dtlb_virt_miss_exits;
+	u32 syscall_exits;
+	u32 isi_exits;
+	u32 dsi_exits;
+	u32 emulated_inst_exits;
+	u32 dec_exits;
+	u32 ext_intr_exits;
+	u32 halt_wakeup;
+};
+
+#endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/kvm_para.h b/arch/arm/include/asm/kvm_para.h
new file mode 100644
index 0000000..7ce5f1c
--- /dev/null
+++ b/arch/arm/include/asm/kvm_para.h
@@ -0,0 +1,9 @@
+#ifndef _ASM_X86_KVM_PARA_H
+#define _ASM_X86_KVM_PARA_H
+
+static inline unsigned int kvm_arch_para_features(void)
+{
+	return 0;
+}
+
+#endif /* _ASM_X86_KVM_PARA_H */
diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h
index bc63116..14ad6bb 100644
--- a/arch/arm/include/asm/unified.h
+++ b/arch/arm/include/asm/unified.h
@@ -54,6 +54,18 @@
 
 #endif	/* CONFIG_THUMB2_KERNEL */
 
+#ifdef CONFIG_KVM
+#ifdef __ASSEMBLY__
+.arch_extension sec
+.arch_extension virt
+#else
+__asm__(
+"	.arch_extension sec\n"
+"	.arch_extension virt\n"
+);
+#endif
+#endif
+
 #ifndef CONFIG_ARM_ASM_UNIFIED
 
 /*
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
new file mode 100644
index 0000000..1806a6d
--- /dev/null
+++ b/arch/arm/kvm/Kconfig
@@ -0,0 +1,44 @@
+#
+# KVM configuration
+#
+
+source "virt/kvm/Kconfig"
+
+menuconfig VIRTUALIZATION
+	bool "Virtualization"
+	---help---
+	  Say Y here to get to see options for using your Linux host to run
+	  other operating systems inside virtual machines (guests).
+	  This option alone does not add any kernel code.
+
+	  If you say N, all options in this submenu will be skipped and
+	  disabled.
+
+if VIRTUALIZATION
+
+config KVM
+	bool "Kernel-based Virtual Machine (KVM) support"
+	select PREEMPT_NOTIFIERS
+	select ANON_INODES
+	select KVM_ARM_HOST
+	select KVM_MMIO
+	---help---
+	  Support hosting virtualized guest machines. You will also
+	  need to select one or more of the processor modules below.
+
+	  This module provides access to the hardware capabilities through
+	  a character device node named /dev/kvm.
+
+	  If unsure, say N.
+
+config KVM_ARM_HOST
+	bool "KVM host support for ARM cpus."
+	depends on KVM
+	depends on MMU
+	depends on CPU_V7 || ARM_VIRT_EXT
+	---help---
+	  Provides host support for ARM processors.
+
+source drivers/virtio/Kconfig
+
+endif # VIRTUALIZATION
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
new file mode 100644
index 0000000..4ff905d
--- /dev/null
+++ b/arch/arm/kvm/Makefile
@@ -0,0 +1,13 @@
+#
+# Makefile for Kernel-based Virtual Machine module
+#
+
+EXTRA_CFLAGS += -Ivirt/kvm -Iarch/arm/kvm
+AFLAGS_arm_interrupts.o := -I$(obj)
+
+kvm-arm-y += $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
+
+kvm-arm-y += arm.o arm_guest.o arm_interrupts.o arm_mmu.o arm_emulate.o \
+		trace.o
+
+obj-$(CONFIG_KVM) += kvm-arm.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
new file mode 100644
index 0000000..2157c1e
--- /dev/null
+++ b/arch/arm/kvm/arm.c
@@ -0,0 +1,363 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/mman.h>
+#include <asm/uaccess.h>
+#include <asm/ptrace.h>
+#include <asm/mman.h>
+
+#include "trace.h"
+
+int kvm_arch_hardware_enable(void *garbage)
+{
+	return 0;
+}
+
+void kvm_arch_hardware_disable(void *garbage)
+{
+}
+
+int kvm_arch_hardware_setup(void)
+{
+	return 0;
+}
+
+void kvm_arch_hardware_unsetup(void)
+{
+}
+
+void kvm_arch_check_processor_compat(void *rtn)
+{
+	*(int *)rtn = 0;
+}
+
+void kvm_arch_sync_events(struct kvm *kvm)
+{
+}
+
+int kvm_arch_init_vm(struct kvm *kvm)
+{
+	return 0;
+}
+
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
+	int i;
+
+	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+		if (kvm->vcpus[i]) {
+			kvm_arch_vcpu_free(kvm->vcpus[i]);
+			kvm->vcpus[i] = NULL;
+		}
+	}
+}
+
+int kvm_dev_ioctl_check_extension(long ext)
+{
+	int r;
+	switch (ext) {
+	case KVM_CAP_USER_MEMORY:
+	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
+		r = 1;
+		break;
+	case KVM_CAP_COALESCED_MMIO:
+		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
+		break;
+	default:
+		r = 0;
+		break;
+	}
+	return r;
+}
+
+long kvm_arch_dev_ioctl(struct file *filp,
+			unsigned int ioctl, unsigned long arg)
+{
+	int ret = 0;
+
+	switch (ioctl) {
+	default:
+		ret = -EINVAL;
+	}
+
+	if (ret < 0)
+		printk(KERN_ERR "error processing ARM ioct: %d", ret);
+	return ret;
+}
+
+int kvm_arch_set_memory_region(struct kvm *kvm,
+			       struct kvm_userspace_memory_region *mem,
+			       struct kvm_memory_slot old,
+			       int user_alloc)
+{
+	return 0;
+}
+
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+				   struct kvm_memory_slot *memslot,
+				   struct kvm_memory_slot old,
+				   struct kvm_userspace_memory_region *mem,
+				   int user_alloc)
+{
+	return 0;
+}
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+				   struct kvm_userspace_memory_region *mem,
+				   struct kvm_memory_slot old,
+				   int user_alloc)
+{
+}
+
+void kvm_arch_flush_shadow(struct kvm *kvm)
+{
+	// XXX What should this do?
+}
+
+struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+	int err;
+	struct kvm_vcpu *vcpu;
+
+	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+	if (!vcpu) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = kvm_vcpu_init(vcpu, kvm, id);
+	if (err)
+		goto free_vcpu;
+
+	return vcpu;
+free_vcpu:
+	kmem_cache_free(kvm_vcpu_cache, vcpu);
+out:
+	return ERR_PTR(err);
+}
+
+void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
+{
+	KVMARM_NOT_IMPLEMENTED();
+}
+
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+	kvm_arch_vcpu_free(vcpu);
+}
+
+int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	KVMARM_NOT_IMPLEMENTED();
+	return 0;
+}
+
+void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+{
+}
+
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+                                        struct kvm_guest_debug *dbg)
+{
+	return -EINVAL;
+}
+
+
+int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
+				    struct kvm_mp_state *mp_state)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
+				    struct kvm_mp_state *mp_state)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
+{
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	KVMARM_NOT_IMPLEMENTED();
+	return -EINVAL;
+}
+
+static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
+				    struct kvm_interrupt *intr)
+{
+	u32 mask;
+
+	switch (intr->irq) {
+	case EXCEPTION_IRQ:
+		/* IRQ */
+		mask = EXCEPTION_IRQ;
+		break;
+	case EXCEPTION_FIQ:
+		/* FIQ */
+		mask = EXCEPTION_FIQ;
+		break;
+	default:
+		/* Only async exceptions are supported here */
+		return -EINVAL;
+	}
+
+	if (intr->raise) {
+		if (mask == EXCEPTION_IRQ)
+			kvm_trace_activity(101, "raise IRQ");
+		else if (mask == EXCEPTION_FIQ)
+			kvm_trace_activity(102, "raise FIQ");
+		vcpu->arch.exception_pending |= mask;
+		vcpu->arch.wait_for_interrupts = 0;
+	} else {
+		if (mask == EXCEPTION_IRQ)
+			kvm_trace_activity(103, "lower IRQ");
+		else if (mask == EXCEPTION_FIQ)
+			kvm_trace_activity(104, "lower FIQ");
+
+		vcpu->arch.exception_pending &= ~mask;
+	}
+
+	return 0;
+}
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+			 unsigned int ioctl, unsigned long arg)
+{
+	struct kvm_vcpu *vcpu = filp->private_data;
+	void __user *argp = (void __user *)arg;
+	int r;
+
+	switch (ioctl) {
+	case KVM_S390_STORE_STATUS: {
+		return -EINVAL;
+	}
+	case KVM_INTERRUPT: {
+		struct kvm_interrupt intr;
+
+		r = -EFAULT;
+		if (copy_from_user(&intr, argp, sizeof intr))
+			break;
+		r = kvm_vcpu_ioctl_interrupt(vcpu, &intr);
+		break;
+	}
+	default:
+		r = -EINVAL;
+	}
+
+	return r;
+}
+
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
+{
+	return -ENOTSUPP;
+}
+
+long kvm_arch_vm_ioctl(struct file *filp,
+		       unsigned int ioctl, unsigned long arg)
+{
+	printk(KERN_ERR "kvm_arch_vm_ioctl: Unsupported ioctl (%d)\n", ioctl);
+	return -EINVAL;
+}
+
+int kvm_arch_init(void *opaque)
+{
+	return 0;
+}
+
+void kvm_arch_exit(void)
+{
+}
+
+static int k_show(struct seq_file *m, void *v)
+{
+	print_kvm_debug_info(&seq_printf, m);
+	return 0;
+}
+
+static void *k_start(struct seq_file *m, loff_t *pos)
+{
+	return *pos < 1 ? (void *)1 : NULL;
+}
+
+static void *k_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return NULL;
+}
+
+static void k_stop(struct seq_file *m, void *v)
+{
+}
+
+static const struct seq_operations kvmproc_op = {
+	.start	= k_start,
+	.next	= k_next,
+	.stop	= k_stop,
+	.show	= k_show
+};
+
+static int kvm_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &kvmproc_op);
+}
+
+static const struct file_operations proc_kvm_operations = {
+	.open		= kvm_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static int arm_init(void)
+{
+	int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+	if (rc == 0)
+		proc_create("kvm", 0, NULL, &proc_kvm_operations);
+	return rc;
+}
+
+static void __exit arm_exit(void)
+{
+	kvm_exit();
+}
+
+module_init(arm_init);
+module_exit(arm_exit)
diff --git a/arch/arm/kvm/arm_emulate.c b/arch/arm/kvm/arm_emulate.c
new file mode 100644
index 0000000..3dd4f08
--- /dev/null
+++ b/arch/arm/kvm/arm_emulate.c
@@ -0,0 +1,70 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ */
+
+#include <asm/kvm_emulate.h>
+
+/*
+ * Return a pointer to the register number valid in the specified mode of
+ * the virtual CPU.
+ */
+u32* kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode)
+{
+	struct kvm_vcpu_regs *regs;
+	u8 reg_idx;
+	BUG_ON(reg_num > 15);
+
+	regs = &vcpu->arch.regs;
+
+	/* The PC is trivial */
+	if (reg_num == 15)
+		return &(regs->pc);
+
+	/* Non-banked registers */
+	if (reg_num < 8)
+		return &(regs->usr_regs[reg_num]);
+
+	/* Banked registers r13 and r14 */
+	if (reg_num >= 13) {
+		reg_idx = reg_num - 13; /* 0=r13 and 1=r14 */
+		switch (mode) {
+		case MODE_FIQ:
+			return &(regs->fiq_regs[reg_idx + 5]);
+		case MODE_IRQ:
+			return &(regs->irq_regs[reg_idx]);
+		case MODE_SVC:
+			return &(regs->svc_regs[reg_idx]);
+		case MODE_ABT:
+			return &(regs->abt_regs[reg_idx]);
+		case MODE_UND:
+			return &(regs->und_regs[reg_idx]);
+		case MODE_USR:
+		case MODE_SYS:
+			return &(regs->usr_regs[reg_idx]);
+		}
+	}
+
+	/* Banked FIQ registers r8-r12 */
+	if (reg_num >= 8 && reg_num <= 12) {
+		if (mode == MODE_FIQ) {
+			reg_idx = reg_num - 8; /* 0=r8, ..., 4=r12 */
+			return &(regs->fiq_regs[reg_idx]);
+		} else
+			return &(regs->usr_regs[reg_num]);
+	}
+
+	BUG();
+	return NULL;
+}
diff --git a/arch/arm/kvm/arm_guest.c b/arch/arm/kvm/arm_guest.c
new file mode 100644
index 0000000..646f60c
--- /dev/null
+++ b/arch/arm/kvm/arm_guest.c
@@ -0,0 +1,142 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <asm/uaccess.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+
+
+#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
+#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+};
+
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	struct kvm_vcpu_regs *vcpu_regs = &vcpu->arch.regs;
+
+	/*
+	 * GPRs and PSRs
+	 */
+	memcpy(regs->regs0_7, &(vcpu_regs->usr_regs[0]), sizeof(u32) * 8);
+	memcpy(regs->usr_regs8_12, &(vcpu_regs->usr_regs[8]), sizeof(u32) * 5);
+	memcpy(regs->fiq_regs8_12, &(vcpu_regs->fiq_regs[0]), sizeof(u32) * 5);
+	regs->reg13[MODE_FIQ] = vcpu_regs->fiq_regs[5];
+	regs->reg14[MODE_FIQ] = vcpu_regs->fiq_regs[6];
+	regs->reg13[MODE_IRQ] = vcpu_regs->irq_regs[0];
+	regs->reg14[MODE_IRQ] = vcpu_regs->irq_regs[1];
+	regs->reg13[MODE_SVC] = vcpu_regs->svc_regs[0];
+	regs->reg14[MODE_SVC] = vcpu_regs->svc_regs[1];
+	regs->reg13[MODE_ABT] = vcpu_regs->abt_regs[0];
+	regs->reg14[MODE_ABT] = vcpu_regs->abt_regs[1];
+	regs->reg13[MODE_UND] = vcpu_regs->und_regs[0];
+	regs->reg14[MODE_UND] = vcpu_regs->und_regs[1];
+	regs->reg13[MODE_USR] = vcpu_regs->usr_regs[0];
+	regs->reg14[MODE_USR] = vcpu_regs->usr_regs[1];
+
+	regs->spsr[MODE_FIQ]  = vcpu_regs->fiq_regs[7];
+	regs->spsr[MODE_IRQ]  = vcpu_regs->irq_regs[2];
+	regs->spsr[MODE_SVC]  = vcpu_regs->svc_regs[2];
+	regs->spsr[MODE_ABT]  = vcpu_regs->abt_regs[2];
+	regs->spsr[MODE_UND]  = vcpu_regs->und_regs[2];
+
+	regs->reg15 = vcpu_regs->pc;
+	regs->cpsr = vcpu_regs->cpsr;
+
+
+	/*
+	 * Co-processor registers.
+	 */
+	regs->cp15.c2_base0 = vcpu->arch.cp15.c2_TTBR0;
+	regs->cp15.c2_base1 = vcpu->arch.cp15.c2_TTBR1;
+	regs->cp15.c3_dacr = vcpu->arch.cp15.c3_DACR;
+
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	struct kvm_vcpu_regs *vcpu_regs = &vcpu->arch.regs;
+
+	memcpy(&(vcpu_regs->usr_regs[0]), regs->regs0_7, sizeof(u32) * 8);
+	memcpy(&(vcpu_regs->usr_regs[8]), regs->usr_regs8_12, sizeof(u32) * 5);
+	memcpy(&(vcpu_regs->fiq_regs[0]), regs->fiq_regs8_12, sizeof(u32) * 5);
+
+	vcpu_regs->fiq_regs[5] = regs->reg13[MODE_FIQ];
+	vcpu_regs->fiq_regs[6] = regs->reg14[MODE_FIQ];
+	vcpu_regs->irq_regs[0] = regs->reg13[MODE_IRQ];
+	vcpu_regs->irq_regs[1] = regs->reg14[MODE_IRQ];
+	vcpu_regs->svc_regs[0] = regs->reg13[MODE_SVC];
+	vcpu_regs->svc_regs[1] = regs->reg14[MODE_SVC];
+	vcpu_regs->abt_regs[0] = regs->reg13[MODE_ABT];
+	vcpu_regs->abt_regs[1] = regs->reg14[MODE_ABT];
+	vcpu_regs->und_regs[0] = regs->reg13[MODE_UND];
+	vcpu_regs->und_regs[1] = regs->reg14[MODE_UND];
+	vcpu_regs->usr_regs[0] = regs->reg13[MODE_USR];
+	vcpu_regs->usr_regs[1] = regs->reg14[MODE_USR];
+
+	vcpu_regs->fiq_regs[7] = regs->spsr[MODE_FIQ];
+	vcpu_regs->irq_regs[2] = regs->spsr[MODE_IRQ];
+	vcpu_regs->svc_regs[2] = regs->spsr[MODE_SVC];
+	vcpu_regs->abt_regs[2] = regs->spsr[MODE_ABT];
+	vcpu_regs->und_regs[2] = regs->spsr[MODE_UND];
+
+	vcpu_regs->pc = regs->reg15;
+	vcpu_regs->cpsr = regs->cpsr;
+
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+				  struct kvm_sregs *sregs)
+{
+	return -ENOTSUPP;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+				  struct kvm_sregs *sregs)
+{
+	return -ENOTSUPP;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -ENOTSUPP;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -ENOTSUPP;
+}
+
+/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+				  struct kvm_translation *tr)
+{
+	return 0;
+}
diff --git a/arch/arm/kvm/arm_interrupts.S b/arch/arm/kvm/arm_interrupts.S
new file mode 100644
index 0000000..073a494
--- /dev/null
+++ b/arch/arm/kvm/arm_interrupts.S
@@ -0,0 +1,17 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ */
+#include <asm/asm-offsets.h>
+#include <asm/kvm_asm.h>
diff --git a/arch/arm/kvm/arm_mmu.c b/arch/arm/kvm/arm_mmu.c
new file mode 100644
index 0000000..e69de29
diff --git a/arch/arm/kvm/trace.c b/arch/arm/kvm/trace.c
new file mode 100644
index 0000000..8ea1155
--- /dev/null
+++ b/arch/arm/kvm/trace.c
@@ -0,0 +1,436 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ */
+#include <linux/types.h>
+#include <linux/kvm_types.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_emulate.h>
+#include "trace.h"
+
+
+/******************************************************************************
+ * Simple event counting
+ */
+
+struct kvm_event {
+	unsigned long long cnt;
+	char *descr;
+};
+
+static struct kvm_event kvm_eventc_log[KVM_EVENTC_ITEMS] =
+{
+	{ 0, "switch to guest" },
+	{ 0, "exit from guest" },
+	{ 0, "Block VCPU" },
+	{ 0, "Exit to QEMU for IRQ window" },
+	{ 0, "Switch VCPU mode" },
+	{ 0, "VCPU IRQs on" },
+	{ 0, "VCPU IRQs off" },
+	{ 0, "Wait-for-interrupts" },
+	{ 0, "Flush shadow page table" },
+	{ 0, "Virtual TTBR change" },
+	{ 0, "Read guest page table entry" },
+	{ 0, "Map GVA to GFN" },
+	{ 0, "Virtual DACR change" },
+	{ 0, "VCPU switch to privileged mode" },
+	{ 0, "VCPU switch from privileged mode" },
+	{ 0, "VCPU process ID registers change" },
+	{ 0, "Emulate Load/Store with translation" },
+	{ 0, "Emulate MRS" },
+	{ 0, "Emulate MSR" },
+	{ 0, "Emulate CPS" },
+	{ 0, "Need reschedule in execution loop" },
+	{ 0, "MCR 7,  5, 0 - Invalidate entire I-cache" },
+	{ 0, "MCR 7,  5, 1 - Invalidate line in I-cache MVA" },
+	{ 0, "MCR 7,  5, 2 - Invalidate line in I-cache set/way" },
+	{ 0, "MCR 7,  5, 7 - Flush branch target cache - MVA" },
+	{ 0, "MCR 7,  6, 0 - Invalidate entire data cache" },
+	{ 0, "MCR 7,  6, 1 - Invalidate data cache line - MVA" },
+	{ 0, "MCR 7,  6, 2 - Invalidate data cache line - set/way" },
+	{ 0, "MCR 7,  7, 0 - Invalidate D- and I-cache" },
+	{ 0, "MCR 7, 10, 0 - Clean entire data cache" },
+	{ 0, "MCR 7, 10, 1 - Clean data cache line - MVA" },
+	{ 0, "MCR 7, 10, 4 - Data Synchronization Barrier (DSB)" },
+	{ 0, "MCR 7, 14, 0 - Clean and invalidate entire D-cache" },
+	{ 0, "MCR 7, 14, 1 - Clean and invalidate D-cache line - MVA" },
+	{ 0, "MCR 7, 15, 0 - Clean and invalidate unified cache" },
+	{ 0, "MCR 8,  5, 0 - Invalidate instruction TLB" },
+	{ 0, "MCR 8,  6, 0 - Invalidate data TLB" },
+	{ 0, "MCR 8,  7, 0 - Invalidate unified TLB" },
+	{ 0, "Emulate Load-Store multiple" },
+};
+
+void kvm_arm_count_event(unsigned int event)
+{
+	if (event >= KVM_EVENTC_ITEMS)
+		return;
+
+	kvm_eventc_log[event].cnt++;
+}
+
+void kvm_arm_init_eventc(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < KVM_EVENTC_ITEMS; i++)
+		kvm_eventc_log[i].cnt = 0;
+}
+
+struct kvm_event_order {
+	struct kvm_event *event;
+	struct kvm_event_order *next;
+	struct kvm_event_order *prev;
+};
+static struct kvm_event_order event_order[KVM_EVENTC_ITEMS];
+
+static struct kvm_event_order *sort_kvm_event_log(void)
+{
+	unsigned int i;
+	struct kvm_event_order *ptr;
+	struct kvm_event_order head =
+		{ .event = NULL, .next = &head, .prev = &head };
+
+	for (i = 0; i < KVM_EVENTC_ITEMS; i++) {
+		event_order[i].event = &kvm_eventc_log[i];
+		ptr = head.next;
+		while (ptr->event != NULL &&
+		       ptr->event->cnt > kvm_eventc_log[i].cnt) {
+			ptr = ptr->next;
+		}
+		ptr->prev->next = &event_order[i];
+		event_order[i].prev = ptr->prev;
+		event_order[i].next = ptr;
+		ptr->prev = &event_order[i];
+	}
+
+	head.prev->next = NULL; /* Mark end of linked list */
+	return head.next;
+}
+
+/******************************************************************************
+ * Trace ring-buffer local to KVM/ARM
+ */
+
+#define KVM_TRACE_ACTIVITY
+#ifndef KVM_TRACE_ACTIVITY
+void kvm_trace_activity(unsigned int activity, char *fmt, ...)
+{
+}
+#else
+
+#define ACTIVITY_TRACE_ITEMS 50
+#define TRACE_DESCR_LEN 80
+static u32 activity_trace[ACTIVITY_TRACE_ITEMS];
+static u32 activity_trace_cnt[ACTIVITY_TRACE_ITEMS];
+static char activity_trace_descr[ACTIVITY_TRACE_ITEMS][TRACE_DESCR_LEN];
+static int activity_trace_index = 0;
+static bool trace_init = false;
+
+void kvm_trace_activity(unsigned int activity, char *fmt, ...)
+{
+	va_list ap;
+	unsigned int i;
+	char *ptr;
+
+	if (!trace_init) {
+		for (i = 0; i < ACTIVITY_TRACE_ITEMS; i++)
+			activity_trace_descr[i][0] = '\0';
+		trace_init = true;
+	}
+
+	if (activity_trace[activity_trace_index] == activity) {
+		activity_trace_cnt[activity_trace_index]++;
+	} else {
+		activity_trace_index = (activity_trace_index + 1)
+			% ACTIVITY_TRACE_ITEMS;
+		activity_trace[activity_trace_index] = activity;
+		activity_trace_cnt[activity_trace_index] = 0;
+
+		ptr = activity_trace_descr[activity_trace_index];
+		va_start(ap, fmt);
+		vsnprintf(ptr, TRACE_DESCR_LEN, fmt, ap);
+		va_end(ap);
+	}
+}
+#endif
+
+/******************************************************************************
+ * World-switch ring-buffer
+ */
+
+#define WS_TRACE_ITEMS 10
+static u32 ws_trace_enter[WS_TRACE_ITEMS];
+static int ws_trace_enter_index = 0;
+static u32 ws_trace_exit[WS_TRACE_ITEMS];
+static int ws_trace_exit_index = 0;
+static u32 ws_trace_exit_codes[WS_TRACE_ITEMS];
+DEFINE_MUTEX(ws_trace_mutex);
+
+void trace_ws_enter(u32 guest_pc)
+{
+	mutex_lock(&ws_trace_mutex);
+	ws_trace_enter[ws_trace_enter_index++] = guest_pc;
+	if (ws_trace_enter_index >= WS_TRACE_ITEMS)
+		ws_trace_enter_index = 0;
+	mutex_unlock(&ws_trace_mutex);
+}
+
+void trace_ws_exit(u32 guest_pc, u32 exit_code)
+{
+	mutex_lock(&ws_trace_mutex);
+	ws_trace_exit[ws_trace_exit_index] = guest_pc;
+	ws_trace_exit_codes[ws_trace_exit_index++] = exit_code;
+	if (ws_trace_exit_index >= WS_TRACE_ITEMS)
+		ws_trace_exit_index = 0;
+	mutex_unlock(&ws_trace_mutex);
+}
+
+void print_ws_trace(void)
+{
+	int i;
+	mutex_lock(&ws_trace_mutex);
+
+	if (ws_trace_enter_index != ws_trace_exit_index) {
+		kvm_msg("enter and exit WS trace count differ");
+		mutex_unlock(&ws_trace_mutex);
+		return;
+	}
+
+	/* Avoid potential endless loop */
+	if (ws_trace_enter_index < 0 || ws_trace_enter_index >= WS_TRACE_ITEMS) {
+		kvm_msg("ws_trace_enter_index out of bounds: %d",
+				ws_trace_enter_index);
+		mutex_unlock(&ws_trace_mutex);
+		return;
+	}
+
+	for (i = ws_trace_enter_index - 1; i != ws_trace_enter_index; i--) {
+		if (i < 0) {
+			i = WS_TRACE_ITEMS;
+			continue;
+		}
+
+		printk(KERN_ERR "Enter: %08x    Exit: %08x (%d)\n",
+			ws_trace_enter[i],
+			ws_trace_exit[i],
+			ws_trace_exit_codes[i]);
+	}
+	mutex_unlock(&ws_trace_mutex);
+}
+
+/******************************************************************************
+ * Dump total debug info, or write to /proc/kvm
+ */
+
+struct kvm_vcpu *latest_vcpu = NULL;
+
+void print_kvm_debug_info(int (*print_fn)(print_fn_args), struct seq_file *m)
+{
+	int i;
+	struct kvm_vcpu_regs *regs;
+	char *mode = NULL;
+	char *exceptions[7];
+	struct kvm_vcpu *vcpu = latest_vcpu;
+	struct kvm_event_order *ptr;
+
+	print_fn(m, "KVM/ARM runtime info\n");
+	print_fn(m, "======================================================");
+	print_fn(m, "\n\n");
+
+	if (vcpu == NULL) {
+		print_fn(m, "No registered VCPU\n");
+		goto print_ws_hist;
+	}
+
+
+	switch (vcpu_mode(vcpu)) {
+		case MODE_USR:	mode = "USR"; break;
+		case MODE_FIQ:	mode = "FIQ"; break;
+		case MODE_IRQ:	mode = "IRQ"; break;
+		case MODE_SVC:	mode = "SVC"; break;
+		case MODE_ABT:	mode = "ABT"; break;
+		case MODE_UND:	mode = "UND"; break;
+		case MODE_SYS:	mode = "SYS"; break;
+	}
+
+	vcpu_load(vcpu);
+	regs = &vcpu->arch.regs;
+
+	print_fn(m, "Virtual CPU state:\n\n");
+	print_fn(m, "PC is at: \t%08x\n", vcpu_reg(vcpu, 15));
+	print_fn(m, "CPSR:     \t%08x\n(Mode: %s)  (IRQs: %s)  (FIQs: %s) "
+		      "  (Vec: %s)\n",
+		      regs->cpsr, mode,
+		      (regs->cpsr & PSR_I_BIT) ? "off" : "on",
+		      (regs->cpsr & PSR_F_BIT) ? "off" : "on",
+		      (regs->cpsr & PSR_V_BIT) ? "high" : "low");
+
+	for (i = 0; i <= 12; i++) {
+		if ((i % 4) == 0)
+			print_fn(m, "\nregs[%u]: ", i);
+
+		print_fn(m, "\t0x%08x", vcpu_reg_m(vcpu, i, MODE_USR));
+	}
+
+	print_fn(m, "\n\n");
+	print_fn(m, "Banked registers:  \tr13\t\tr14\t\tspsr\n");
+	print_fn(m, "-------------------\t--------\t--------\t--------\n");
+	print_fn(m, "             USR:  \t%08x\t%08x\t////////\n",
+			vcpu_reg_m(vcpu, 13, MODE_USR),
+			vcpu_reg_m(vcpu, 14, MODE_USR));
+	print_fn(m, "             SVC:  \t%08x\t%08x\t%08x\n",
+			vcpu_reg_m(vcpu, 13, MODE_SVC),
+			vcpu_reg_m(vcpu, 14, MODE_SVC),
+			vcpu_spsr_m(vcpu, MODE_SVC));
+	print_fn(m, "             ABT:  \t%08x\t%08x\t%08x\n",
+			vcpu_reg_m(vcpu, 13, MODE_ABT),
+			vcpu_reg_m(vcpu, 14, MODE_ABT),
+			vcpu_spsr_m(vcpu, MODE_ABT));
+	print_fn(m, "             UND:  \t%08x\t%08x\t%08x\n",
+			vcpu_reg_m(vcpu, 13, MODE_UND),
+			vcpu_reg_m(vcpu, 14, MODE_UND),
+			vcpu_spsr_m(vcpu, MODE_UND));
+	print_fn(m, "             IRQ:  \t%08x\t%08x\t%08x\n",
+			vcpu_reg_m(vcpu, 13, MODE_IRQ),
+			vcpu_reg_m(vcpu, 14, MODE_IRQ),
+			vcpu_spsr_m(vcpu, MODE_IRQ));
+	print_fn(m, "             FIQ:  \t%08x\t%08x\t%08x\n",
+			vcpu_reg_m(vcpu, 13, MODE_FIQ),
+			vcpu_reg_m(vcpu, 14, MODE_FIQ),
+			vcpu_spsr_m(vcpu, MODE_FIQ));
+
+	print_fn(m, "\n");
+	print_fn(m, "fiq regs:\t%08x\t%08x\t%08x\t%08x\n"
+			  "         \t%08x\n",
+			vcpu_reg_m(vcpu, 8, MODE_FIQ),
+			vcpu_reg_m(vcpu, 9, MODE_FIQ),
+			vcpu_reg_m(vcpu, 10, MODE_FIQ),
+			vcpu_reg_m(vcpu, 11, MODE_FIQ),
+			vcpu_reg_m(vcpu, 12, MODE_FIQ));
+
+print_ws_hist:
+	/*
+	 * Print world-switch trace circular buffer
+	 */
+	print_fn(m, "\n\nWorld switch history:\n");
+	print_fn(m, "---------------------\n");
+	mutex_lock(&ws_trace_mutex);
+
+	if (ws_trace_enter_index != ws_trace_exit_index ||
+			ws_trace_enter_index < 0 ||
+			ws_trace_enter_index >= WS_TRACE_ITEMS)
+	{
+		mutex_unlock(&ws_trace_mutex);
+		goto print_trace_activity;
+	}
+
+	exceptions[0] = "reset";
+	exceptions[1] = "undefined";
+	exceptions[2] = "software";
+	exceptions[3] = "prefetch abort";
+	exceptions[4] = "data abort";
+	exceptions[5] = "irq";
+	exceptions[6] = "fiq";
+
+	for (i = ws_trace_enter_index - 1; i != ws_trace_enter_index; i--) {
+		if (i < 0) {
+			i = WS_TRACE_ITEMS;
+			continue;
+		}
+
+		print_fn(m, "Enter: %08x    Exit: %08x (%s)\n",
+			ws_trace_enter[i], ws_trace_exit[i],
+			exceptions[ws_trace_exit_codes[i]]);
+	}
+	mutex_unlock(&ws_trace_mutex);
+
+print_trace_activity:
+#ifdef KVM_TRACE_ACTIVITY
+	/*
+	 * Print activity trace
+	 */
+	print_fn(m, "\n\nActivity circular buffer:\n");
+	print_fn(m, "-----------------------------\n");
+	for (i = activity_trace_index - 1; i != activity_trace_index; i--) {
+		if (i < 0) {
+			i = ACTIVITY_TRACE_ITEMS;
+			continue;
+		}
+
+		print_fn(m, "%lu: \t %s\n",
+				activity_trace_cnt[i],
+				activity_trace_descr[i]);
+	}
+#endif
+
+	/*
+	 * Print event counters sorted
+	 */
+	print_fn(m, "\n\nEvent counters:\n");
+	print_fn(m, "-----------------------------\n");
+	ptr = sort_kvm_event_log();
+	while (ptr != NULL) {
+		if (ptr->event->cnt > 0) {
+			print_fn(m, "%12llu  #  %s\n", ptr->event->cnt,
+							ptr->event->descr);
+		}
+		ptr = ptr->next;
+	}
+
+	if (vcpu != NULL) {
+		vcpu_put(vcpu);
+	}
+}
+
+static int __printk_relay(struct seq_file *m, const char *fmt, ...)
+{
+	va_list ap;
+	va_start(ap, fmt);
+	vprintk(fmt, ap);
+	va_end(ap);
+	return 0;
+}
+
+void kvm_dump_vcpu_state(void)
+{
+	print_kvm_debug_info(&__printk_relay, NULL);
+}
+
+/******************************************************************************
+ * Printk-log-wrapping functionality
+ */
+
+#define TMP_LOG_LEN 512
+static char __tmp_log_data[TMP_LOG_LEN];
+DEFINE_MUTEX(__tmp_log_lock);
+void __kvm_print_msg(char *fmt, ...)
+{
+	va_list ap;
+	unsigned int size;
+
+	mutex_lock(&__tmp_log_lock);
+
+	va_start(ap, fmt);
+	size = vsnprintf(__tmp_log_data, TMP_LOG_LEN, fmt, ap);
+	va_end(ap);
+
+	if (size >= TMP_LOG_LEN)
+		printk("Message exceeded log length!\n");
+	else
+		printk("%s", __tmp_log_data);
+
+	mutex_unlock(&__tmp_log_lock);
+}
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
new file mode 100644
index 0000000..020240a
--- /dev/null
+++ b/arch/arm/kvm/trace.h
@@ -0,0 +1,108 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ *
+ *
+ * This file contains debugging and tracing functions and definitions for KVM/ARM.
+ *
+ */
+#ifndef __ARM_KVM_TRACE_H__
+#define __ARM_KVM_TRACE_H__
+
+#include <linux/types.h>
+#include <linux/kvm_types.h>
+#include <linux/kvm_host.h>
+
+#define EVENT_GUEST_ENTER	0
+#define EVENT_GUEST_EXIT	1
+#define EVENT_VCPU_BLOCK	2
+#define EVENT_IRQ_WINDOW	3
+#define EVENT_SWITCH_MODE	4
+#define EVENT_VCPU_IRQS_ON	5
+#define EVENT_VCPU_IRQS_OFF	6
+#define EVENT_WFI		7
+#define EVENT_FLUSH_SHADOW	8
+#define EVENT_MOD_TTBR		9
+#define EVENT_READ_GUEST_ENTRY	10
+#define EVENT_MAP_GVA_TO_GFN	11
+#define EVENT_DACR_CHANGE	12
+#define EVENT_SWITCH_PRIV	13
+#define EVENT_SWITCH_USER	14
+#define EVENT_VCPU_ASID		15
+#define EVENT_LS_TRANS		16
+#define EVENT_EMUL_MRS		17
+#define EVENT_EMUL_MSR		18
+#define EVENT_EMUL_CPS		19
+#define EVENT_NEED_RESCHED	20
+#define EVENT_MCR_7_5_0		21
+#define EVENT_MCR_7_5_1		22
+#define EVENT_MCR_7_5_2		23
+#define EVENT_MCR_7_5_7		24
+#define EVENT_MCR_7_6_0		25
+#define EVENT_MCR_7_6_1		26
+#define EVENT_MCR_7_6_2		27
+#define EVENT_MCR_7_7_0		28
+#define EVENT_MCR_7_10_0	29
+#define EVENT_MCR_7_10_1	30
+#define EVENT_MCR_7_10_4	31
+#define EVENT_MCR_7_14_0	32
+#define EVENT_MCR_7_14_1	33
+#define EVENT_MCR_7_15_0	34
+#define EVENT_MCR_8_5_X		35
+#define EVENT_MCR_8_6_X		36
+#define EVENT_MCR_8_7_X		37
+#define EVENT_EMUL_LSMULT	38
+
+#define KVM_EVENTC_ITEMS	39
+
+void kvm_arm_init_eventc(void);
+void kvm_arm_count_event(unsigned int event);
+void kvm_dump_vcpu_state(void);
+
+void trace_ws_enter(u32 guest_pc);
+void trace_ws_exit(u32 guest_pc, u32 exit_code);
+
+
+#define print_fn_args struct seq_file *, const char *, ...
+void print_kvm_debug_info(int (*print_fn)(print_fn_args), struct seq_file *m);
+
+
+void __kvm_print_msg(char *_fmt, ...);
+
+#define kvm_err(err, fmt, args...) do {			\
+	__kvm_print_msg(KERN_ERR "KVM error [%s:%d]: (%d) ", \
+			__FUNCTION__, __LINE__, err); \
+	__kvm_print_msg(fmt "\n", ##args); \
+} while (0)
+
+#define __kvm_msg(fmt, args...) do {			\
+	__kvm_print_msg(KERN_ERR "KVM [%s:%d]: ", __FUNCTION__, __LINE__); \
+	__kvm_print_msg(fmt, ##args); \
+} while (0)
+
+#define kvm_msg(__fmt, __args...) __kvm_msg(__fmt "\n", ##__args)
+
+
+#define KVMARM_NOT_IMPLEMENTED() \
+   { \
+	    printk(KERN_ERR "KVM not implemented [%s:%d] in %s \n", \
+		   __FILE__, __LINE__, __FUNCTION__); \
+   }
+
+extern bool trace_gva_to_gfn;
+void print_shadow_mapping(struct kvm_vcpu *vcpu, gva_t gva);
+void print_ws_trace(void);
+void kvm_trace_activity(unsigned int activity, char *fmt, ...);
+
+#endif  /* __ARM_KVM_TRACE_H__ */
diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig
index cfbc0f1..5b00905 100644
--- a/arch/arm/mach-vexpress/Kconfig
+++ b/arch/arm/mach-vexpress/Kconfig
@@ -33,6 +33,7 @@ config ARCH_VEXPRESS_CA15X4
 	bool "Versatile Express Cortex-A15x4 tile"
 	depends on VEXPRESS_EXTENDED_MEMORY_MAP
 	select CPU_V7
+	select ARM_VIRT_EXT
 	select ARM_GIC
 	select HAVE_ARCH_TIMERS
 
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 04de742..ad77805 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -628,6 +628,13 @@ config ARM_LPAE
 	  Say Y if you have an ARMv7 processor supporting the LPAE page table
 	  format and you would like to access memory beyond the 4GB limit.
 
+config ARM_VIRT_EXT
+	bool "Support for ARM Virtualization Extensions"
+	depends on ARM_LPAE
+	help
+	  Say Y if you have an ARMv7 processor supporting the ARM hardware
+	  Virtualization extensions.
+
 config ARCH_PHYS_ADDR_T_64BIT
 	def_bool ARM_LPAE
 
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index ea2dc1a..d2ab07e 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -310,6 +310,7 @@ struct kvm_translation {
 struct kvm_interrupt {
 	/* in */
 	__u32 irq;
+	__u8  raise;
 };
 
 /* for KVM_GET_DIRTY_LOG */


^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v3 2/8] ARM: KVM: Hypervisor identity mapping
  2011-06-03 15:03 [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support Christoffer Dall
@ 2011-06-03 15:03 ` Christoffer Dall
  2011-06-03 15:03 ` [PATCH v3 3/8] ARM: KVM: Add hypervisor inititalization Christoffer Dall
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 58+ messages in thread
From: Christoffer Dall @ 2011-06-03 15:03 UTC (permalink / raw)
  To: catalin.marinas, android-virt; +Cc: s.raho, a.motakis, c.dall, kvm, a.costa

Adds support in the identity mapping feature that allows KVM to setup
identity mapping for the Hyp mode with the AP[1] bit set as required by
the specification and also supports freeing created sub pmd's after
finished use.
---
 arch/arm/include/asm/pgtable-3level-hwdef.h |    1 +
 arch/arm/include/asm/pgtable.h              |    5 +++
 arch/arm/mm/idmap.c                         |   47 ++++++++++++++++++++++++++-
 3 files changed, 52 insertions(+), 1 deletions(-)

diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h
index 6c0fb9b..9142208 100644
--- a/arch/arm/include/asm/pgtable-3level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-3level-hwdef.h
@@ -48,6 +48,7 @@
 #endif
 #define PMD_SECT_AP_WRITE	(_AT(pmdval_t, 0))
 #define PMD_SECT_AP_READ	(_AT(pmdval_t, 0))
+#define PMD_SECT_AP1		(_AT(pmdval_t, 1) << 6)
 #define PMD_SECT_TEX(x)		(_AT(pmdval_t, 0))
 
 /*
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 110f6f4..56081c0 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -403,6 +403,11 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 void identity_mapping_add(pgd_t *, unsigned long, unsigned long);
 void identity_mapping_del(pgd_t *, unsigned long, unsigned long);
 
+#ifdef CONFIG_KVM
+void hyp_identity_mapping_add(pgd_t *, unsigned long, unsigned long);
+void hyp_identity_mapping_del(pgd_t *pgd, unsigned long addr, unsigned long end);
+#endif
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* CONFIG_MMU */
diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c
index 834b803..66125d5 100644
--- a/arch/arm/mm/idmap.c
+++ b/arch/arm/mm/idmap.c
@@ -33,11 +33,16 @@ static void idmap_add_pmd(pgd_t *pgd, unsigned long addr, unsigned long end,
 	flush_pmd_entry(pmd);
 }
 
-void identity_mapping_add(pgd_t *pgd, unsigned long addr, unsigned long end)
+static void __identity_mapping_add(pgd_t *pgd, unsigned long addr,
+				   unsigned long end, bool hyp_mapping)
 {
 	unsigned long prot, next;
 
 	prot = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AF;
+
+	if (hyp_mapping)
+		prot |= PMD_SECT_AP1;
+
 	if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale())
 		prot |= PMD_BIT4;
 
@@ -47,6 +52,12 @@ void identity_mapping_add(pgd_t *pgd, unsigned long addr, unsigned long end)
 	} while (addr = next, addr < end);
 }
 
+void identity_mapping_add(pgd_t *pgd, unsigned long addr, unsigned long end)
+{
+	__identity_mapping_add(pgd, addr, end, false);
+}
+
+
 #ifdef CONFIG_SMP
 static void idmap_del_pmd(pgd_t *pgd, unsigned long addr, unsigned long end)
 {
@@ -69,6 +80,40 @@ void identity_mapping_del(pgd_t *pgd, unsigned long addr, unsigned long end)
 }
 #endif
 
+#ifdef CONFIG_KVM
+void hyp_identity_mapping_add(pgd_t *pgd, unsigned long addr, unsigned long end)
+{
+	__identity_mapping_add(pgd, addr, end, true);
+}
+
+static void hyp_idmap_del_pmd(pgd_t *pgd, unsigned long addr)
+{
+	pmd_t *pmd;
+
+	pmd = pmd_offset(pgd, addr);
+	pmd_free(NULL, pmd);
+}
+
+/*
+ * This version actually frees the underlying pmds for all pgds in range and
+ * clear the pgds themselves afterwards.
+ */
+void hyp_identity_mapping_del(pgd_t *pgd, unsigned long addr, unsigned long end)
+{
+	unsigned long next;
+	pgd_t *next_pgd;
+
+	do {
+		next = pgd_addr_end(addr, end);
+		next_pgd = pgd + pgd_index(addr);
+		if (!pgd_none_or_clear_bad(next_pgd)) {
+			hyp_idmap_del_pmd(next_pgd, addr);
+			pgd_clear(next_pgd);
+		}
+	} while (addr = next, addr < end);
+}
+#endif
+
 /*
  * In order to soft-boot, we need to insert a 1:1 mapping in place of
  * the user-mode pages.  This will then ensure that we have predictable


^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v3 3/8] ARM: KVM: Add hypervisor inititalization
  2011-06-03 15:03 [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support Christoffer Dall
  2011-06-03 15:03 ` [PATCH v3 2/8] ARM: KVM: Hypervisor identity mapping Christoffer Dall
@ 2011-06-03 15:03 ` Christoffer Dall
  2011-06-03 15:03 ` [PATCH v3 4/8] ARM: KVM: Memory virtualization setup Christoffer Dall
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 58+ messages in thread
From: Christoffer Dall @ 2011-06-03 15:03 UTC (permalink / raw)
  To: catalin.marinas, android-virt; +Cc: s.raho, a.motakis, c.dall, kvm, a.costa

Sets up the required registers to run code in HYP-mode from the kernel.
No major controversies, but we should consider how to deal with SMP
support for hypervisor stack page.

Works by setting the HVBAR the kernel can execute code in Hyp-mode with
the MMU disabled which initializes other registers and enables the MMU
for Hyp-mode.

Also provides memory mapping code to map required code pages and data
structures accessed in Hyp mode at the same virtual address as the
host kernel virtual addresses, but which conforms to the architectural
requirements for translations in Hyp mode.
---
 arch/arm/include/asm/kvm_arm.h              |  103 +++++++++++
 arch/arm/include/asm/kvm_asm.h              |   12 +
 arch/arm/include/asm/kvm_host.h             |    1 
 arch/arm/include/asm/kvm_mmu.h              |   40 ++++
 arch/arm/include/asm/pgtable-3level-hwdef.h |    5 +
 arch/arm/include/asm/pgtable.h              |    5 +
 arch/arm/kvm/arm.c                          |  163 +++++++++++++++++
 arch/arm/kvm/arm_interrupts.S               |  124 +++++++++++++
 arch/arm/kvm/arm_mmu.c                      |  255 +++++++++++++++++++++++++++
 9 files changed, 708 insertions(+), 0 deletions(-)
 create mode 100644 arch/arm/include/asm/kvm_arm.h
 create mode 100644 arch/arm/include/asm/kvm_mmu.h

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
new file mode 100644
index 0000000..835abd1
--- /dev/null
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -0,0 +1,103 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ */
+
+#ifndef __KVM_ARM_H__
+#define __KVM_ARM_H__
+
+#include <asm/types.h>
+
+/* Hyp Configuration Register (HCR) bits */
+#define HCR_TGE		(1 << 27)
+#define HCR_TVM		(1 << 26)
+#define HCR_TTLB	(1 << 25)
+#define HCR_TPU		(1 << 24)
+#define HCR_TPC		(1 << 23)
+#define HCR_TSW		(1 << 22)
+#define HCR_TAC		(1 << 21)
+#define HCR_TIDCP	(1 << 20)
+#define HCR_TSC		(1 << 19)
+#define HCR_TID3	(1 << 18)
+#define HCR_TID2	(1 << 17)
+#define HCR_TID1	(1 << 16)
+#define HCR_TID0	(1 << 15)
+#define HCR_TWE		(1 << 14)
+#define HCR_TWI		(1 << 13)
+#define HCR_DC		(1 << 12)
+#define HCR_BSU		(3 << 10)
+#define HCR_FB		(1 << 9)
+#define HCR_VA		(1 << 8)
+#define HCR_VI		(1 << 7)
+#define HCR_VF		(1 << 6)
+#define HCR_AMO		(1 << 5)
+#define HCR_IMO		(1 << 4)
+#define HCR_FMO		(1 << 3)
+#define HCR_PTW		(1 << 2)
+#define HCR_SWIO	(1 << 1)
+#define HCR_VM		1
+#define HCR_GUEST_MASK (HCR_TSC | HCR_TWE | HCR_TWI | HCR_VM | HCR_AMO | \
+			HCR_AMO | HCR_IMO | HCR_FMO | HCR_SWIO)
+
+/* Hyp System Control Register (HSCTLR) bits */
+#define HSCTLR_TE	(1 << 30)
+#define HSCTLR_EE	(1 << 25)
+#define HSCTLR_FI	(1 << 21)
+#define HSCTLR_WXN	(1 << 19)
+#define HSCTLR_I	(1 << 12)
+#define HSCTLR_C	(1 << 2)
+#define HSCTLR_A	(1 << 1)
+#define HSCTLR_M	1
+#define HSCTLR_MASK	(HSCTLR_M | HSCTLR_A | HSCTLR_C | HSCTLR_I | \
+			 HSCTLR_WXN | HSCTLR_FI | HSCTLR_EE | HSCTLR_TE)
+
+/* TTBCR and HTCR Registers bits */
+#define TTBCR_EAE	(1 << 31)
+#define TTBCR_IMP	(1 << 30)
+#define TTBCR_SH1	(3 << 28)
+#define TTBCR_ORGN1	(3 << 26)
+#define TTBCR_IRGN1	(3 << 24)
+#define TTBCR_EPD1	(1 << 23)
+#define TTBCR_A1	(1 << 22)
+#define TTBCR_T1SZ	(3 << 16)
+#define TTBCR_SH0	(3 << 12)
+#define TTBCR_ORGN0	(3 << 10)
+#define TTBCR_IRGN0	(3 << 8)
+#define TTBCR_EPD0	(1 << 7)
+#define TTBCR_T0SZ	3
+#define HTCR_MASK	(TTBCR_T0SZ | TTBCR_IRGN0 | TTBCR_ORGN0 | TTBCR_SH0)
+
+
+/* Virtualization Translation Control Register (VTCR) bits */
+#define VTCR_SH0	(3 << 12)
+#define VTCR_ORGN0	(3 << 10)
+#define VTCR_IRGN0	(3 << 8)
+#define VTCR_SL0	(3 << 6)
+#define VTCR_S		(1 << 4)
+#define VTCR_T0SZ	3
+#define VTCR_MASK	(VTCR_SH0 | VTCR_ORGN0 | VTCR_IRGN0 | VTCR_SL0 | \
+			 VTCR_S | VTCR_T0SZ | VTCR_MASK)
+#define VTCR_HTCR_SH	(VTCR_SH0 | VTCR_ORGN0 | VTCR_IRGN0)
+#define VTCR_SL_L2	0		/* Starting-level: 2 */
+#define VTCR_SL_L1	(1 << 6)	/* Starting-level: 1 */
+#define VTCR_GUEST_SL	VTCR_SL_L1
+#define VTCR_GUEST_T0SZ	0
+#if VTCR_GUEST_SL == 0
+#define VTTBR_X		(14 - VTCR_GUEST_T0SZ)
+#else
+#define VTTBR_X		(5 - VTCR_GUEST_T0SZ)
+#endif
+
+
+#endif /* __KVM_ARM_H__ */
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index c3d4458..99991b4 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -24,5 +24,17 @@
 #define ARM_EXCEPTION_DATA_ABORT  4
 #define ARM_EXCEPTION_IRQ	  5
 #define ARM_EXCEPTION_FIQ	  6
+#define ARM_EXCEPTION_HVC	  7
+
+/*
+ * SMC Hypervisor API call numbers
+ */
+#ifdef __ASSEMBLY__
+#define SMC_HYP_CALL(n, x) .equ n, x
+#else /* !__ASSEMBLY__ */
+#define SMC_HYP_CALL(n, x) asm(".equ " #n ", " #x);
+#endif /* __ASSEMBLY__ */
+
+SMC_HYP_CALL(SMCHYP_HVBAR_W  , 0xfffffff0)
 
 #endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 6e8a08d..9fa9b20 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -31,6 +31,7 @@ struct kvm_vcpu;
 u32* kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
 
 struct kvm_arch {
+	pgd_t *pgd;     /* 1-level 2nd stage table */
 };
 
 #define EXCEPTION_NONE      0
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
new file mode 100644
index 0000000..d22aad0
--- /dev/null
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -0,0 +1,40 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ */
+
+#ifndef __ARM_KVM_MMU_H__
+#define __ARM_KVM_MMU_H__
+
+/*
+ * The architecture supports 40-bit IPA as input to the 2nd stage translations
+ * and PTRS_PER_PGD2 could therefore be 1024.
+ *
+ * To save a bit of memory and to avoid alignment issues we assume 39-bit IPA
+ * for now, but remember that the level-1 table must be aligned to its size.
+ */
+#define PTRS_PER_PGD2	512
+#define PGD2_ORDER	get_order(PTRS_PER_PGD2 * sizeof(pgd_t))
+
+extern pgd_t *kvm_hyp_pgd;
+
+int create_hyp_mappings(pgd_t *hyp_pgd,
+			unsigned long start,
+			unsigned long end);
+void remove_hyp_mappings(pgd_t *hyp_pgd,
+			 unsigned long start,
+			 unsigned long end);
+void free_hyp_pmds(pgd_t *hyp_pgd);
+
+#endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h
index 9142208..6cfe57c 100644
--- a/arch/arm/include/asm/pgtable-3level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-3level-hwdef.h
@@ -31,6 +31,9 @@
 #define PMD_TYPE_SECT		(_AT(pmdval_t, 1) << 0)
 #define PMD_BIT4		(_AT(pmdval_t, 0))
 #define PMD_DOMAIN(x)		(_AT(pmdval_t, 0))
+#define PMD_APTABLE_SHIFT	(61)
+#define PMD_APTABLE		(_AT(pgdval_t, 3) << PGD_APTABLE_SHIFT)
+#define PMD_PXNTABLE		(_AT(pgdval_t, 1) << 59)
 
 /*
  *   - section
@@ -43,8 +46,10 @@
 #ifdef __ASSEMBLY__
 /* avoid 'shift count out of range' warning */
 #define PMD_SECT_XN		(0)
+#define PMD_SECT_PXN		(0)
 #else
 #define PMD_SECT_XN		((pmdval_t)1 << 54)
+#define PMD_SECT_PXN		((pmdval_t)1 << 53)
 #endif
 #define PMD_SECT_AP_WRITE	(_AT(pmdval_t, 0))
 #define PMD_SECT_AP_READ	(_AT(pmdval_t, 0))
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 56081c0..2906f35 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -87,6 +87,7 @@ extern pgprot_t		pgprot_kernel;
 #define PAGE_READONLY_EXEC	_MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_RDONLY)
 #define PAGE_KERNEL		_MOD_PROT(pgprot_kernel, L_PTE_XN)
 #define PAGE_KERNEL_EXEC	pgprot_kernel
+#define PAGE_HYP		_MOD_PROT(pgprot_kernel, L_PTE_USER)
 
 #define __PAGE_NONE		__pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN)
 #define __PAGE_SHARED		__pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
@@ -217,6 +218,10 @@ static inline pmd_t *pgd_page_vaddr(pgd_t pgd)
 #ifdef CONFIG_ARM_LPAE
 
 #define pmd_bad(pmd)		(!(pmd_val(pmd) & 2))
+#define pmd_table(pmd)		((pmd_val(pmd) & PMD_TYPE_MASK) == \
+						 PMD_TYPE_TABLE)
+#define pmd_sect(pmd)		((pmd_val(pmd) & PMD_TYPE_MASK) == \
+						 PMD_TYPE_SECT)
 
 #define copy_pmd(pmdpd,pmdps)		\
 	do {				\
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 2157c1e..4f691be 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -22,13 +22,31 @@
 #include <linux/fs.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <linux/mm.h>
 #include <linux/mman.h>
+#include <linux/sched.h>
+#include <asm/unified.h>
 #include <asm/uaccess.h>
 #include <asm/ptrace.h>
 #include <asm/mman.h>
+#include <asm/tlbflush.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmu.h>
 
 #include "trace.h"
 
+static void *kvm_arm_hyp_stack_page = NULL;
+extern unsigned long __kvm_hyp_init, __kvm_hyp_init_end;
+extern unsigned long __kvm_hyp_vector, __kvm_hyp_vector_end;
+extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+extern unsigned long __kvm_vcpu_run_end;
+
+/* The VMID used in the VTTBR */
+#define VMID_SIZE (1<<8)
+static DECLARE_BITMAP(kvm_vmids, VMID_SIZE);
+static DEFINE_MUTEX(kvm_vmids_mutex);
+
 int kvm_arch_hardware_enable(void *garbage)
 {
 	return 0;
@@ -36,6 +54,7 @@ int kvm_arch_hardware_enable(void *garbage)
 
 void kvm_arch_hardware_disable(void *garbage)
 {
+	/* There is no need for this now, so we just ignore that */
 }
 
 int kvm_arch_hardware_setup(void)
@@ -297,13 +316,157 @@ long kvm_arch_vm_ioctl(struct file *filp,
 	return -EINVAL;
 }
 
+static int init_hyp_mode(void)
+{
+	phys_addr_t init_phys_addr, init_end_phys_addr;
+	unsigned long vector_ptr, hyp_stack_ptr;
+	int err = 0;
+
+	/*
+	 * Allocate Hyp level-1 page table
+	 */
+	kvm_hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
+	if (!kvm_hyp_pgd)
+		return -ENOMEM;
+
+	/*
+	 * Allocate stack page for Hypervisor-mode
+	 */
+	kvm_arm_hyp_stack_page = (void *)__get_free_page(GFP_KERNEL);
+	if (!kvm_arm_hyp_stack_page) {
+		err = -ENOMEM;
+		goto out_free_pgd;
+	}
+
+	hyp_stack_ptr = (unsigned long)kvm_arm_hyp_stack_page + PAGE_SIZE;
+
+	init_phys_addr = virt_to_phys((void *)&__kvm_hyp_init);
+	init_end_phys_addr = virt_to_phys((void *)&__kvm_hyp_init_end);
+
+	/*
+	 * Create identity mapping
+	 */
+	hyp_identity_mapping_add(kvm_hyp_pgd,
+				 (unsigned long)init_phys_addr,
+				 (unsigned long)init_end_phys_addr);
+
+	/*
+	 * Set the HVBAR
+	 */
+	BUG_ON(init_phys_addr & 0x1f);
+	asm volatile (
+		"mov	r0, %[vector_ptr]\n\t"
+		"ldr	r7, =SMCHYP_HVBAR_W\n\t"
+		"smc	#0\n\t" :
+		: [vector_ptr] "r" ((unsigned long)init_phys_addr)
+		: "r0", "r7");
+
+	/*
+	 * Call initialization code
+	 */
+	asm volatile (
+		"mov	r0, %[pgd_ptr]\n\t"
+		"mov	r1, %[stack_ptr]\n\t"
+		"hvc	#0\n\t" :
+		: [pgd_ptr] "r" (virt_to_phys(kvm_hyp_pgd)),
+		  [stack_ptr] "r" (hyp_stack_ptr)
+		: "r0", "r1");
+
+	/*
+	 * Unmap the identity mapping
+	 */
+	hyp_identity_mapping_del(kvm_hyp_pgd,
+				 (unsigned long)init_phys_addr,
+				 (unsigned long)init_end_phys_addr);
+
+	/*
+	 * Set the HVBAR to the virtual kernel address
+	 */
+	vector_ptr = (unsigned long)&__kvm_hyp_vector;
+	asm volatile (
+		"mov	r0, %[vector_ptr]\n\t"
+		"ldr	r7, =SMCHYP_HVBAR_W\n\t"
+		"smc	#0\n\t" :
+		: [vector_ptr] "r" ((unsigned long)vector_ptr)
+		: "r0", "r7");
+
+	return err;
+out_free_pgd:
+	kfree(kvm_hyp_pgd);
+	kvm_hyp_pgd = NULL;
+	return err;
+}
+
+static int init_hyp_memory(void)
+{
+	int err = 0;
+	unsigned long start, end;
+
+	/*
+	 * Map Hyp exception vectors
+	 */
+	start = (unsigned long)&__kvm_hyp_vector;
+	end = (unsigned long)&__kvm_hyp_vector_end;
+	err = create_hyp_mappings(kvm_hyp_pgd, start, end);
+	if (err)
+		goto out_free_mappings;
+
+	/*
+	 * Map the world-switch code
+	 */
+	start = (unsigned long)&__kvm_vcpu_run;
+	end = (unsigned long)&__kvm_vcpu_run_end;
+	err = create_hyp_mappings(kvm_hyp_pgd, start, end);
+	if (err)
+		goto out_free_mappings;
+
+	/*
+	 * Map the Hyp stack page
+	 */
+	start = (unsigned long)kvm_arm_hyp_stack_page;
+	end = start + PAGE_SIZE - 1;
+	err = create_hyp_mappings(kvm_hyp_pgd, start, end);
+	if (err)
+		goto out_free_mappings;
+
+	/* TODO: Is this necessary? */
+	flush_tlb_all();
+
+	/* TODO: Is this necessary? */
+	__asm__ volatile ("dsb\n\t"
+			  "isb\n\t");
+
+	return err;
+out_free_mappings:
+	free_hyp_pmds(kvm_hyp_pgd);
+	return err;
+}
+
 int kvm_arch_init(void *opaque)
 {
+	int err;
+
+	err = init_hyp_mode();
+	if (err)
+		goto out_err;
+
+	err = init_hyp_memory();
+	if (err)
+		goto out_err;
+
+	set_bit(0, kvm_vmids);
 	return 0;
+out_err:
+	return err;
 }
 
 void kvm_arch_exit(void)
 {
+	if (kvm_hyp_pgd) {
+		free_hyp_pmds(kvm_hyp_pgd);
+		kfree(kvm_hyp_pgd);
+		kvm_hyp_pgd = NULL;
+	}
 }
 
 static int k_show(struct seq_file *m, void *v)
diff --git a/arch/arm/kvm/arm_interrupts.S b/arch/arm/kvm/arm_interrupts.S
index 073a494..e0bcf9e 100644
--- a/arch/arm/kvm/arm_interrupts.S
+++ b/arch/arm/kvm/arm_interrupts.S
@@ -13,5 +13,129 @@
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  *
  */
+
+#include <linux/linkage.h>
+#include <asm/unified.h>
+#include <asm/page.h>
 #include <asm/asm-offsets.h>
 #include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@  Hypervisor initialization
+@    - should be called with:
+@        r0 = Hypervisor pgd pointer
+@        r1 = top of Hyp stack (kernel VA)
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	.text
+	.align 12
+__kvm_hyp_init:
+	.globl __kvm_hyp_init
+
+	@ Hyp-mode exception vector
+	nop
+	nop
+	nop
+	nop
+	nop
+	b	__do_hyp_init
+	nop
+	nop
+
+__do_hyp_init:
+	@ Set the sp to end of this page and push data for later use
+	mov	sp, pc
+	bic	sp, sp, #0x0ff
+	bic	sp, sp, #0xf00
+	add	sp, sp, #0x1000
+	push	{r1, r2, r12}
+
+	@ Set the HTTBR to be the same as the TTBR1 holding the kernel
+	@ level-1 page table
+	mrrc	p15, 1, r1, r2, c2
+	@mov	r1, #0
+	mcrr	p15, 4, r0, r2, c2
+
+	@ Set the HTCR and VTCR to the same shareability and cacheability
+	@ settings as the non-secure TTBCR and with T0SZ == 0.
+	mrc	p15, 4, r0, c2, c0, 2	@ HTCR
+	ldr	r12, =HTCR_MASK
+	bic	r0, r0, r12
+	mrc	p15, 0, r1, c2, c0, 2	@ TTBCR
+	and	r1, r1, #(HTCR_MASK & ~TTBCR_T0SZ)
+	orr	r0, r0, r1
+	mcr	p15, 4, r0, c2, c0, 2	@ HTCR
+
+	mrc	p15, 4, r1, c2, c1, 2	@ VTCR
+	bic	r1, r1, #(VTCR_HTCR_SH | VTCR_SL0)
+	bic	r0, r0, #(~VTCR_HTCR_SH)
+	orr	r1, r0, r1
+	orr	r1, r1, #(VTCR_SL_L1 | VTCR_GUEST_T0SZ)
+	mcr	p15, 4, r1, c2, c1, 2	@ VTCR
+
+	@ Use the same memory attributes for hyp. accesses as the kernel
+	@ (copy MAIRx ro HMAIRx).
+	mrc	p15, 0, r0, c10, c2, 0
+	mcr	p15, 4, r0, c10, c2, 0
+	mrc	p15, 0, r0, c10, c2, 1
+	mcr	p15, 4, r0, c10, c2, 1
+
+	@ Set the HSCTLR to:
+	@  - ARM/THUMB exceptions: Kernel config
+	@  - Endianness: Kernel config
+	@  - Fast Interrupt Features: Kernel config
+	@  - Write permission implies XN: disabled
+	@  - Instruction cache: enabled
+	@  - Data/Unified cache: enabled
+	@  - Memory alignment checks: enabled
+	@  - MMU: enabled (this code must be run from an identity mapping)
+	mrc	p15, 4, r0, c1, c0, 0	@ HSCR
+	ldr	r12, =HSCTLR_MASK
+	bic	r0, r0, r12
+	mrc	p15, 0, r1, c1, c0, 0	@ SCTLR
+	ldr	r12, =(HSCTLR_TE | HSCTLR_EE | HSCTLR_FI)
+	and	r1, r1, r12
+	ldr	r12, =(HSCTLR_M | HSCTLR_A | HSCTLR_I)
+	orr	r1, r1, r12
+	orr	r0, r0, r1
+	isb
+	mcr	p15, 4, r0, c1, c0, 0	@ HSCR
+	isb
+
+	@ Set stack pointer and return to the kernel
+	pop	{r1, r2, r12}
+	mov	sp, r1
+	eret
+
+	.ltorg
+
+	.align 12
+
+	__kvm_init_sp:
+	.globl __kvm_hyp_init_end
+__kvm_hyp_init_end:
+
+
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@  Hypervisor world-switch code
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+/*
+ * This is a stub
+ */
+ENTRY(__kvm_vcpu_run)
+	mov	pc, lr
+__kvm_vcpu_run_end:
+	.globl __kvm_vcpu_run_end
+
+
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@  Hypervisor exception vector and handlers
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+	.align 5
+__kvm_hyp_vector:
+	.globl __kvm_hyp_vector
+	nop
+__kvm_hyp_vector_end:
+	.globl __kvm_hyp_vector_end
diff --git a/arch/arm/kvm/arm_mmu.c b/arch/arm/kvm/arm_mmu.c
index e69de29..683f971 100644
--- a/arch/arm/kvm/arm_mmu.c
+++ b/arch/arm/kvm/arm_mmu.c
@@ -0,0 +1,255 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ */
+
+#include <linux/mman.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+#include <asm/pgalloc.h>
+
+#include "../mm/mm.h"
+#include "trace.h"
+
+extern struct   mm_struct init_mm;
+
+pgd_t *kvm_hyp_pgd = NULL;
+
+static void free_hyp_ptes(pmd_t *hyp_pmd, unsigned long addr)
+{
+	pmd_t *pmd;
+	pte_t *pte;
+	unsigned int i;
+
+	for (i = 0; i < PTRS_PER_PMD; i++, addr += PMD_SIZE) {
+		pmd = hyp_pmd + i;
+		if (!pmd_none(*pmd) && pmd_table(*pmd)) {
+			pte = pte_offset_kernel(hyp_pmd, addr);
+			pte_free_kernel(NULL, pte);
+		}
+	}
+}
+
+/*
+ * Free a Hyp-mode level-2 tables and child level-3 tables.
+ */
+void free_hyp_pmds(pgd_t *hyp_pgd)
+{
+	pgd_t *pgd;
+	pmd_t *pmd;
+	unsigned long addr, next, end;
+
+	addr = PAGE_OFFSET;
+	end = ~0;
+	do {
+		next = pgd_addr_end(addr, (~0));
+		pgd = hyp_pgd + pgd_index(addr);
+
+		BUG_ON(pgd_bad(*pgd));
+
+		if (pgd_none(*pgd))
+			continue;
+
+		pmd = pmd_offset(pgd, addr);
+		free_hyp_ptes(pmd, addr);
+		pmd_free(NULL, pmd);
+	} while (addr = next, addr != end);
+}
+
+static void remove_hyp_pte_mappings(pmd_t *pmd, unsigned long addr,
+						unsigned long end)
+{
+	pte_t *pte;
+
+	do {
+		pte = pte_offset_kernel(pmd, addr);
+		pte_clear(NULL, addr, pte);
+	} while (addr += PAGE_SIZE, addr < end);
+}
+
+static void remove_hyp_pmd_mappings(pgd_t *pgd, unsigned long addr,
+					       unsigned long end)
+{
+	pmd_t *pmd;
+	unsigned long next;
+
+	do {
+		next = pmd_addr_end(addr, end);
+		pmd = pmd_offset(pgd, addr);
+
+		BUG_ON(pmd_sect(*pmd));
+
+		if (!pmd_none(*pmd))
+			remove_hyp_pte_mappings(pmd, addr, next);
+	} while (addr = next, addr < end);
+}
+
+/*
+ * Clear hypervisor mappings from specified range (doesn't actually free the
+ * page tables.
+ */
+void remove_hyp_mappings(pgd_t *hyp_pgd, unsigned long start,
+					 unsigned long end)
+{
+	pgd_t *pgd;
+	unsigned long addr, next;
+
+	BUG_ON(start > end);
+	BUG_ON(start < PAGE_OFFSET);
+
+	addr = start;
+	do {
+		next = pgd_addr_end(addr, end);
+		pgd = hyp_pgd + pgd_index(addr);
+
+		BUG_ON(pgd_bad(*pgd));
+
+		if (pgd_none(*pgd))
+			continue;
+
+		remove_hyp_pmd_mappings(pgd, addr, next);
+	} while (addr = next, addr < end);
+}
+
+extern unsigned long __kvm_hyp_vector, __kvm_hyp_vector_end;
+
+static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long addr,
+						unsigned long end)
+{
+	pte_t *pte;
+	struct page *page;
+
+	addr &= PAGE_MASK;
+	do {
+		pte = pte_offset_kernel(pmd, addr);
+		BUG_ON(!virt_addr_valid(addr));
+		page = virt_to_page(addr);
+
+		set_pte_ext(pte, mk_pte(page, PAGE_HYP), 0);
+	} while (addr += PAGE_SIZE, addr < end);
+}
+
+static int create_hyp_pmd_mappings(pgd_t *pgd, unsigned long addr,
+					       unsigned long end)
+{
+	pmd_t *pmd;
+	pte_t *pte;
+	unsigned long next;
+
+	do {
+		next = pmd_addr_end(addr, end);
+		pmd = pmd_offset(pgd, addr);
+
+		BUG_ON(pmd_sect(*pmd));
+
+		if (pmd_none(*pmd)) {
+			pte = pte_alloc_one_kernel(NULL, addr);
+			if (!pte) {
+				kvm_err(-ENOMEM, "Cannot allocate Hyp pte");
+				return -ENOMEM;
+			}
+			pmd_populate_kernel(NULL, pmd, pte);
+		}
+
+		create_hyp_pte_mappings(pmd, addr, next);
+	} while (addr = next, addr < end);
+
+	return 0;
+}
+
+/*
+ * Map the requested kernel virtual address range to their corresponing physical
+ * addresses in the hyp table.
+ *
+ * @hyp_pgd: The allocated hypervisor level-1 table
+ * @start:   The virtual kernel start address of the range
+ * @end:     The virtual kernel end address of the range
+ */
+int create_hyp_mappings(pgd_t *hyp_pgd, unsigned long start, unsigned long end)
+{
+	pgd_t *pgd;
+	pmd_t *pmd;
+	unsigned long addr, next;
+	int err = 0;
+
+	BUG_ON(start > end);
+	if (start < PAGE_OFFSET)
+		return -EINVAL;
+
+	addr = start;
+	do {
+		next = pgd_addr_end(addr, end);
+		pgd = hyp_pgd + pgd_index(addr);
+
+		if (pgd_none_or_clear_bad(pgd)) {
+			pmd = pmd_alloc_one(NULL, addr);
+			if (!pmd) {
+				kvm_err(-ENOMEM, "Cannot allocate Hyp pmd");
+				return -ENOMEM;
+			}
+			pgd_populate(NULL, pgd, pmd);
+		}
+
+		err = create_hyp_pmd_mappings(pgd, addr, next);
+		if (err)
+			return err;
+	} while (addr = next, addr < end);
+
+	return err;
+}
+
+/*
+ * Allocate level-1 translation table for stage-2 translation.
+ */
+int kvm_alloc_stage2_pgd(struct kvm *kvm)
+{
+	pgd_t *pgd;
+
+	if (kvm->arch.pgd != NULL) {
+		kvm_err(-EINVAL, "kvm_arch already initialized?\n");
+		return -EINVAL;
+	}
+
+	pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, PGD2_ORDER);
+	if (!pgd)
+		return -ENOMEM;
+
+	memset(pgd, 0, PTRS_PER_PGD2 * sizeof(pgd_t));
+	kvm->arch.pgd = pgd;
+
+	return 0;
+}
+
+/*
+ * Free level-1 translation table for stage-2 translation and all belonging
+ * level-2 and level-3 tables.
+ */
+void kvm_free_stage2_pgd(struct kvm *kvm)
+{
+	if (kvm->arch.pgd == NULL)
+		return;
+
+	free_pages((unsigned long)kvm->arch.pgd, PGD2_ORDER);
+	kvm->arch.pgd = NULL;
+
+	/* TODO: Free child tables */
+	KVMARM_NOT_IMPLEMENTED();
+}
+
+int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	KVMARM_NOT_IMPLEMENTED();
+	return -EINVAL;
+}


^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v3 4/8] ARM: KVM: Memory virtualization setup
  2011-06-03 15:03 [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support Christoffer Dall
  2011-06-03 15:03 ` [PATCH v3 2/8] ARM: KVM: Hypervisor identity mapping Christoffer Dall
  2011-06-03 15:03 ` [PATCH v3 3/8] ARM: KVM: Add hypervisor inititalization Christoffer Dall
@ 2011-06-03 15:03 ` Christoffer Dall
  2011-06-05 12:41   ` Avi Kivity
  2011-06-03 15:03 ` [PATCH v3 5/8] ARM: KVM: World-switch implementation Christoffer Dall
                   ` (6 subsequent siblings)
  9 siblings, 1 reply; 58+ messages in thread
From: Christoffer Dall @ 2011-06-03 15:03 UTC (permalink / raw)
  To: catalin.marinas, android-virt; +Cc: s.raho, a.motakis, c.dall, kvm, a.costa

Initializes a blank level-1 translation table for the second stage
translation and handles freeing it as well.
---
 arch/arm/include/asm/kvm_host.h |    4 ++-
 arch/arm/include/asm/kvm_mmu.h  |    5 ++++
 arch/arm/kvm/arm.c              |   54 ++++++++++++++++++++++++++++++++++++++-
 3 files changed, 61 insertions(+), 2 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 9fa9b20..5955ff4 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -31,7 +31,9 @@ struct kvm_vcpu;
 u32* kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
 
 struct kvm_arch {
-	pgd_t *pgd;     /* 1-level 2nd stage table */
+	u32    vmid;	/* The VMID used for the virt. memory system */
+	pgd_t *pgd;	/* 1-level 2nd stage table */
+	u64    vttbr;	/* VTTBR value associated with above pgd and vmid */
 };
 
 #define EXCEPTION_NONE      0
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index d22aad0..a64ab2d 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -37,4 +37,9 @@ void remove_hyp_mappings(pgd_t *hyp_pgd,
 			 unsigned long end);
 void free_hyp_pmds(pgd_t *hyp_pgd);
 
+int kvm_alloc_stage2_pgd(struct kvm *kvm);
+void kvm_free_stage2_pgd(struct kvm *kvm);
+
+int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);
+
 #endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 4f691be..714f415 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -77,13 +77,56 @@ void kvm_arch_sync_events(struct kvm *kvm)
 
 int kvm_arch_init_vm(struct kvm *kvm)
 {
-	return 0;
+	int ret = 0;
+	phys_addr_t pgd_phys;
+	unsigned long vmid;
+	unsigned long start, end;
+
+
+	mutex_lock(&kvm_vmids_mutex);
+	vmid = find_first_zero_bit(kvm_vmids, VMID_SIZE);
+	if (vmid >= VMID_SIZE) {
+		mutex_unlock(&kvm_vmids_mutex);
+		return -EBUSY;
+	}
+	__set_bit(vmid, kvm_vmids);
+	kvm->arch.vmid = vmid;
+	mutex_unlock(&kvm_vmids_mutex);
+
+	ret = kvm_alloc_stage2_pgd(kvm);
+	if (ret)
+		goto out_fail_alloc;
+
+	pgd_phys = virt_to_phys(kvm->arch.pgd);
+	kvm->arch.vttbr = (pgd_phys & ((1LLU << 40) - 1) & ~((2 << VTTBR_X) - 1)) |
+			  ((u64)vmid << 48);
+
+	start = (unsigned long)kvm,
+	end = start + sizeof(struct kvm);
+	ret = create_hyp_mappings(kvm_hyp_pgd, start, end);
+	if (ret)
+		goto out_fail_hyp_mappings;
+
+	return ret;
+out_fail_hyp_mappings:
+	remove_hyp_mappings(kvm_hyp_pgd, start, end);
+out_fail_alloc:
+	clear_bit(vmid, kvm_vmids);
+	return ret;
 }
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
 	int i;
 
+	kvm_free_stage2_pgd(kvm);
+
+	if (kvm->arch.vmid != 0) {
+		mutex_lock(&kvm_vmids_mutex);
+		clear_bit(kvm->arch.vmid, kvm_vmids);
+		mutex_unlock(&kvm_vmids_mutex);
+	}
+
 	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
 		if (kvm->vcpus[i]) {
 			kvm_arch_vcpu_free(kvm->vcpus[i]);
@@ -158,6 +201,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 {
 	int err;
 	struct kvm_vcpu *vcpu;
+	unsigned long start, end;
 
 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
 	if (!vcpu) {
@@ -169,7 +213,15 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 	if (err)
 		goto free_vcpu;
 
+	start = (unsigned long)vcpu,
+	end = start + sizeof(struct kvm_vcpu);
+	err = create_hyp_mappings(kvm_hyp_pgd, start, end);
+	if (err)
+		goto out_fail_hyp_mappings;
+
 	return vcpu;
+out_fail_hyp_mappings:
+	remove_hyp_mappings(kvm_hyp_pgd, start, end);
 free_vcpu:
 	kmem_cache_free(kvm_vcpu_cache, vcpu);
 out:


^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v3 5/8] ARM: KVM: World-switch implementation
  2011-06-03 15:03 [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support Christoffer Dall
                   ` (2 preceding siblings ...)
  2011-06-03 15:03 ` [PATCH v3 4/8] ARM: KVM: Memory virtualization setup Christoffer Dall
@ 2011-06-03 15:03 ` Christoffer Dall
  2011-06-03 15:04 ` [PATCH v3 6/8] ARM: KVM: Emulation framework and CP15 emulation Christoffer Dall
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 58+ messages in thread
From: Christoffer Dall @ 2011-06-03 15:03 UTC (permalink / raw)
  To: catalin.marinas, android-virt; +Cc: s.raho, a.motakis, c.dall, kvm, a.costa

Provides complete world-switch implementation to switch to other guests
runinng in non-secure modes. Includes Hyp exception handlers that
captures necessary exception information and stores the information on
the VCPU and KVM structures.

Only controversy may be the back-door call to __irq_svc (the host
kernel's own physical IRQ handler) which is called when a physical IRQ
exception is taken in Hyp mode while running in the guest.
---
 arch/arm/include/asm/kvm_arm.h  |   24 +++
 arch/arm/include/asm/kvm_host.h |    2 
 arch/arm/kernel/asm-offsets.c   |   23 ++
 arch/arm/kernel/entry-armv.S    |    1 
 arch/arm/kvm/arm.c              |   20 ++
 arch/arm/kvm/arm_interrupts.S   |  373 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 439 insertions(+), 4 deletions(-)

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 835abd1..505a1a5 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -99,5 +99,29 @@
 #define VTTBR_X		(5 - VTCR_GUEST_T0SZ)
 #endif
 
+/* Hyp Syndrome Register (HSR) bits */
+#define HSR_EC_SHIFT	(26)
+#define HSR_EC		(0x3fU << HSR_EC_SHIFT)
+#define HSR_IL		(1U << 25)
+#define HSR_ISS		(HSR_IL - 1)
+
+#define HSR_EC_UNKNOWN	(0x00)
+#define HSR_EC_WFI	(0x01)
+#define HSR_EC_CP15_32	(0x03)
+#define HSR_EC_CP15_64	(0x04)
+#define HSR_EC_CP14_MR	(0x05)
+#define HSR_EC_CP14_LS	(0x06)
+#define HSR_EC_CP_0_13	(0x07)
+#define HSR_EC_CP10_ID	(0x08)
+#define HSR_EC_JAZELLE	(0x09)
+#define HSR_EC_BXJ	(0x0A)
+#define HSR_EC_CP14_64	(0x0C)
+#define HSR_EC_SVC_HYP	(0x11)
+#define HSR_EC_HVC	(0x12)
+#define HSR_EC_SMC	(0x13)
+#define HSR_EC_IABT	(0x20)
+#define HSR_EC_IABT_HYP	(0x21)
+#define HSR_EC_DABT	(0x24)
+#define HSR_EC_DABT_HYP	(0x25)
 
 #endif /* __KVM_ARM_H__ */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 5955ff4..7f96974 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -73,6 +73,8 @@ struct kvm_vcpu_arch {
 		u64 c2_TTBR1;		/* Translation Table Base Register 1 */
 		u32 c2_TTBCR;		/* Translation Table Base Control Register */
 		u32 c3_DACR;		/* Domain Access Control Register */
+		u32 c10_PRRR;		/* Primary Region Remap Register */
+		u32 c10_NMRR;		/* Normal Memory Remap Register */
 	} cp15;
 
 	u32 exception_pending;  	/* Exception to raise after emulation */
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 82da661..769fa97 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -13,6 +13,7 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/dma-mapping.h>
+#include <linux/kvm_host.h>
 #include <asm/mach/arch.h>
 #include <asm/thread_info.h>
 #include <asm/memory.h>
@@ -118,5 +119,27 @@ int main(void)
   DEFINE(DMA_BIDIRECTIONAL,	DMA_BIDIRECTIONAL);
   DEFINE(DMA_TO_DEVICE,		DMA_TO_DEVICE);
   DEFINE(DMA_FROM_DEVICE,	DMA_FROM_DEVICE);
+#ifdef CONFIG_KVM
+  DEFINE(VCPU_KVM,		offsetof(struct kvm_vcpu, kvm));
+  DEFINE(VCPU_SCTLR,		offsetof(struct kvm_vcpu, arch.cp15.c1_SCTLR));
+  DEFINE(VCPU_CPACR,		offsetof(struct kvm_vcpu, arch.cp15.c1_CPACR));
+  DEFINE(VCPU_TTBR0,		offsetof(struct kvm_vcpu, arch.cp15.c2_TTBR0));
+  DEFINE(VCPU_TTBR1,		offsetof(struct kvm_vcpu, arch.cp15.c2_TTBR1));
+  DEFINE(VCPU_TTBCR,		offsetof(struct kvm_vcpu, arch.cp15.c2_TTBCR));
+  DEFINE(VCPU_DACR,		offsetof(struct kvm_vcpu, arch.cp15.c3_DACR));
+  DEFINE(VCPU_PRRR,		offsetof(struct kvm_vcpu, arch.cp15.c10_PRRR));
+  DEFINE(VCPU_NMRR,		offsetof(struct kvm_vcpu, arch.cp15.c10_NMRR));
+  DEFINE(VCPU_REGS,		offsetof(struct kvm_vcpu, arch.regs));
+  DEFINE(VCPU_USR_REGS,		offsetof(struct kvm_vcpu, arch.regs.usr_regs));
+  DEFINE(VCPU_SVC_REGS,		offsetof(struct kvm_vcpu, arch.regs.svc_regs));
+  DEFINE(VCPU_ABT_REGS,		offsetof(struct kvm_vcpu, arch.regs.abt_regs));
+  DEFINE(VCPU_UND_REGS,		offsetof(struct kvm_vcpu, arch.regs.und_regs));
+  DEFINE(VCPU_IRQ_REGS,		offsetof(struct kvm_vcpu, arch.regs.irq_regs));
+  DEFINE(VCPU_FIQ_REGS,		offsetof(struct kvm_vcpu, arch.regs.fiq_regs));
+  DEFINE(VCPU_PC,		offsetof(struct kvm_vcpu, arch.regs.pc));
+  DEFINE(VCPU_CPSR,		offsetof(struct kvm_vcpu, arch.regs.cpsr));
+  DEFINE(VCPU_HSR,		offsetof(struct kvm_vcpu, arch.hsr));
+  DEFINE(KVM_VTTBR,		offsetof(struct kvm, arch.vttbr));
+#endif
   return 0; 
 }
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 2b46fea..9cdc65e 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -197,6 +197,7 @@ __dabt_svc:
 ENDPROC(__dabt_svc)
 
 	.align	5
+	.globl __irq_svc
 __irq_svc:
 	svc_entry
 
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 714f415..1a9f168 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -245,7 +245,19 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
-	KVMARM_NOT_IMPLEMENTED();
+	unsigned long cpsr;
+	unsigned long sctlr;
+
+	/* Init execution CPSR */
+	asm volatile ("mrs	%[cpsr], cpsr": [cpsr] "=r" (cpsr));
+	vcpu->arch.regs.cpsr = SVC_MODE | PSR_I_BIT | PSR_F_BIT | PSR_A_BIT |
+				(cpsr & PSR_E_BIT);
+
+	/* Init SCTLR with MMU disabled */
+	asm volatile ("mrc	p15, 0, %[sctlr], c1, c0, 0":
+			[sctlr] "=r" (sctlr));
+	vcpu->arch.cp15.c1_SCTLR = sctlr & ~1U;
+
 	return 0;
 }
 
@@ -287,6 +299,12 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
+	unsigned long flags;
+
+	local_irq_save(flags);
+	__kvm_vcpu_run(vcpu);
+	local_irq_restore(flags);
+
 	KVMARM_NOT_IMPLEMENTED();
 	return -EINVAL;
 }
diff --git a/arch/arm/kvm/arm_interrupts.S b/arch/arm/kvm/arm_interrupts.S
index e0bcf9e..6d3044c 100644
--- a/arch/arm/kvm/arm_interrupts.S
+++ b/arch/arm/kvm/arm_interrupts.S
@@ -21,6 +21,12 @@
 #include <asm/kvm_asm.h>
 #include <asm/kvm_arm.h>
 
+#define VCPU_USR_REG(_reg_nr)	(VCPU_USR_REGS + (_reg_nr * 4))
+#define VCPU_USR_SP		(VCPU_USR_REG(13))
+#define VCPU_FIQ_REG(_reg_nr)	(VCPU_FIQ_REGS + (_reg_nr * 4))
+#define VCPU_FIQ_SPSR		(VCPU_FIQ_REG(7))
+
+
 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 @  Hypervisor initialization
 @    - should be called with:
@@ -120,11 +126,269 @@ __kvm_hyp_init_end:
 @  Hypervisor world-switch code
 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 
-/*
- * This is a stub
+.macro store_mode_state base_reg, mode
+	.if \mode == usr
+	mrs	r2, SP_usr
+	mov	r3, lr
+	stmdb	\base_reg!, {r2, r3}
+	.elseif \mode != fiq
+	mrs	r2, SP_\mode
+	mrs	r3, LR_\mode
+	mrs	r4, SPSR_\mode
+	stmdb	\base_reg!, {r2, r3, r4}
+	.else
+	mrs	r2, r8_fiq
+	mrs	r3, r9_fiq
+	mrs	r4, r10_fiq
+	mrs	r5, r11_fiq
+	mrs	r6, r12_fiq
+	mrs	r7, SP_fiq
+	mrs	r8, LR_fiq
+	mrs	r9, SPSR_fiq
+	stmdb	\base_reg!, {r2-r9}
+	.endif
+.endm
+
+.macro load_mode_state base_reg, mode
+	.if \mode == usr
+	ldmia	\base_reg!, {r2, r3}
+	msr	SP_usr, r2
+	mov	lr, r3
+	.elseif \mode != fiq
+	ldmia	\base_reg!, {r2, r3, r4}
+	msr	SP_\mode, r2
+	msr	LR_\mode, r3
+	msr	SPSR_\mode, r4
+	.else
+	ldmia	\base_reg!, {r2-r9}
+	msr	r8_fiq, r2
+	msr	r9_fiq, r3
+	msr	r10_fiq, r4
+	msr	r11_fiq, r5
+	msr	r12_fiq, r6
+	msr	SP_fiq, r7
+	msr	LR_fiq, r8
+	msr	SPSR_fiq, r9
+	.endif
+.endm
+
+/* Reads cp15 registers from hardware and stores then in memory
+ * @vcpu:   If 0, registers are written in-order to the stack,
+ * 	    otherwise to the VCPU struct pointed to by vcpup
+ * @vcpup:  Register pointing to VCPU struct
+ */
+.macro read_cp15_state vcpu=0, vcpup
+	mrc	p15, 0, r2, c1, c0, 0	@ SCTLR
+	mrc	p15, 0, r3, c1, c0, 2	@ CPACR
+	mrc	p15, 0, r4, c2, c0, 2	@ TTBCR
+	mcr	p15, 0, r5, c3, c0, 0	@ DACR
+	mrrc	p15, 0, r6, r7, c2	@ TTBR 0
+	mrrc	p15, 1, r8, r9, c2	@ TTBR 1
+	mrc	p15, 0, r10, c10, c2, 0	@ PRRR
+	mrc	p15, 0, r11, c10, c2, 1	@ NMRR
+
+	.if \vcpu == 0
+	push	{r2-r11}		@ Push CP15 registers
+	.else
+	str	r2, [\vcpup, #VCPU_SCTLR]
+	str	r3, [\vcpup, #VCPU_CPACR]
+	str	r4, [\vcpup, #VCPU_TTBCR]
+	str	r5, [\vcpup, #VCPU_DACR]
+	add	\vcpup, \vcpup, #VCPU_TTBR0
+	strd	r6, r7, [\vcpup]
+	add	\vcpup, \vcpup, #(VCPU_TTBR1 - VCPU_TTBR0)
+	strd	r8, r9, [\vcpup]
+	sub	\vcpup, \vcpup, #(VCPU_TTBR1)
+	str	r10, [\vcpup, #VCPU_PRRR]
+	str	r11, [\vcpup, #VCPU_NMRR]
+	.endif
+.endm
+
+/* Reads cp15 registers from memory and writes them to hardware
+ * @vcpu:   If 0, registers are read in-order from the stack,
+ * 	    otherwise from the VCPU struct pointed to by vcpup
+ * @vcpup:  Register pointing to VCPU struct
  */
+.macro write_cp15_state vcpu=0, vcpup
+	.if \vcpu == 0
+	pop	{r2-r11}
+	.else
+	ldr	r2, [\vcpup, #VCPU_SCTLR]
+	ldr	r3, [\vcpup, #VCPU_CPACR]
+	ldr	r4, [\vcpup, #VCPU_TTBCR]
+	ldr	r5, [\vcpup, #VCPU_DACR]
+	add	\vcpup, \vcpup, #VCPU_TTBR0
+	ldrd	r6, r7, [\vcpup]
+	add	\vcpup, \vcpup, #(VCPU_TTBR1 - VCPU_TTBR0)
+	ldrd	r8, r9, [\vcpup]
+	sub	\vcpup, \vcpup, #(VCPU_TTBR1)
+	ldr	r10, [\vcpup, #VCPU_PRRR]
+	ldr	r11, [\vcpup, #VCPU_NMRR]
+	.endif
+
+	mcr	p15, 0, r2, c1, c0, 0	@ SCTLR
+	mcr	p15, 0, r3, c1, c0, 2	@ CPACR
+	mcr	p15, 0, r4, c2, c0, 2	@ TTBCR
+	mcr	p15, 0, r5, c3, c0, 0	@ DACR
+	mcrr	p15, 0, r6, r7, c2	@ TTBR 0
+	mcrr	p15, 1, r8, r9, c2	@ TTBR 1
+	mcr	p15, 0, r10, c10, c2, 0	@ PRRR
+	mcr	p15, 0, r11, c10, c2, 1	@ NMRR
+.endm
+
+/* Configures the HSTR (Hyp System Trap Register) on entry/return 
+ * (hardware reset value is 0) */
+.macro set_hstr entry
+	mrc	p15, 4, r2, c1, c1, 3
+	ldr	r3, =0xbe00
+	.if \entry == 1
+	orr	r2, r2, r3		@ Trap CR{9,10,11,12,13,15}
+	.else
+	bic	r2, r2, r3		@ Don't trap any CRx accesses
+	.endif
+	mcr	p15, 4, r2, c1, c1, 3
+.endm
+
+/* Enable/Disable: stage-2 trans., trap interrupts, trap wfi/wfe, trap smc */
+.macro configure_hyp_role entry
+	mrc	p15, 4, r2, c1, c1, 0	@ HCR
+	ldr	r3, =HCR_GUEST_MASK
+	.if \entry == 1
+	orr	r2, r2, r3
+	.else
+	bic	r2, r2, r3
+	.endif
+	mcr	p15, 4, r2, c1, c1, 0
+.endm
+
+@ This must be called from Hyp mode!
+@ Arguments:
+@  r0: pointer to vcpu struct
 ENTRY(__kvm_vcpu_run)
+	hvc	#0			@ Change to Hyp-mode
+
+	@ Now we're in Hyp-mode and lr_usr, spsr_hyp are on the stack
+	mrs	r2, sp_usr
+	push	{r2}			@ Push r13_usr
+	push	{r4-r12}		@ Push r4-r12
+
+	store_mode_state sp, svc
+	store_mode_state sp, abt
+	store_mode_state sp, und
+	store_mode_state sp, irq
+	store_mode_state sp, fiq
+
+	@ Store hardware CP15 state and load guest state
+	read_cp15_state
+	write_cp15_state 1, r0
+
+	push	{r0}			@ Push the VCPU pointer
+
+	@ Set up guest memory translation
+	ldr	r1, [r0, #VCPU_KVM]	@ r1 points to kvm struct
+	ldrd	r2, r3, [r1, #KVM_VTTBR]
+	mcrr	p15, 6, r2, r3, c2	@ Write VTTBR
+
+	@ Configure Hyp-role
+	configure_hyp_role 1
+
+	@ Trap coprocessor CRx for all x except 2 and 14
+	set_hstr 1
+
+	@ Load guest registers
+	add	r0, r0, #(VCPU_USR_SP)
+	load_mode_state r0, usr
+	load_mode_state r0, svc
+	load_mode_state r0, abt
+	load_mode_state r0, und
+	load_mode_state r0, irq
+	load_mode_state r0, fiq
+
+	@ Load return state (r0 now points to vcpu->arch.regs.pc)
+	ldmia	r0, {r2, r3}
+	msr	ELR_hyp, r2
+	msr	spsr, r3
+
+	@ Load remaining registers and do the switch
+	sub	r0, r0, #(VCPU_PC - VCPU_USR_REGS)
+	ldmia	r0, {r0-r12}
+	eret
+
+__kvm_vcpu_return:
+	@ Store return state
+	mrs	r2, ELR_hyp
+	mrs	r3, spsr
+	str	r2, [r1, #VCPU_PC]
+	str	r3, [r1, #VCPU_CPSR]
+
+	@ Store guest registers
+	add	r1, r1, #(VCPU_FIQ_SPSR + 4)
+	store_mode_state r1, fiq
+	store_mode_state r1, irq
+	store_mode_state r1, und
+	store_mode_state r1, abt
+	store_mode_state r1, svc
+	store_mode_state r1, usr
+
+	@ Don't trap coprocessor accesses for host kernel
+	set_hstr 0
+
+	@ Reset Hyp-role
+	configure_hyp_role 0
+
+	@ Set VMID == 0
+	mov	r2, #0
+	mov	r3, #0
+	mcrr	p15, 6, r2, r3, c2	@ Write VTTBR
+
+	@ Store guest CP15 state and restore host state
+	read_cp15_state 1, r1
+	write_cp15_state
+
+	load_mode_state sp, fiq
+	load_mode_state sp, irq
+	load_mode_state sp, und
+	load_mode_state sp, abt
+	load_mode_state sp, svc
+
+	pop	{r4-r12}		@ Pop r4-r12
+	pop	{r2}			@ Pop r13_usr
+	msr	sp_usr, r2
+
+	hvc	#0
+
+	cmp	r0, #ARM_EXCEPTION_IRQ
+	bne	return_to_ioctl
+
+	/*
+	 * It's time to launch the kernel IRQ handler for IRQ exceptions. This
+	 * requires some manipulation though.
+	 *
+	 *  - The easiest entry point to the host handler is __irq_svc.
+	 *  - The __irq_svc expects to be called from SVC mode, which has been
+	 *    switched to from vector_stub code in entry-armv.S. The __irq_svc calls
+	 *    svc_entry which uses values stored in memory and pointed to by r0
+	 *    to return from handler. We allocate this memory on the stack, which
+	 *    will contain these values:
+	 *      0x8:   cpsr
+	 *      0x4:   return_address
+	 *      0x0:   r0
+	 */
+	adr	r1, irq_kernel_resume	@ Where to resume
+	mrs	r2, cpsr		@ CPSR when we return
+	push	{r0 - r2}
+	mov	r0, sp
+	b	__irq_svc
+
+irq_kernel_resume:
+	pop	{r0}
+	add	sp, sp, #8
+
+return_to_ioctl:
 	mov	pc, lr
+
+	.ltorg
+
 __kvm_vcpu_run_end:
 	.globl __kvm_vcpu_run_end
 
@@ -136,6 +400,109 @@ __kvm_vcpu_run_end:
 	.align 5
 __kvm_hyp_vector:
 	.globl __kvm_hyp_vector
-	nop
+
+	@ Hyp-mode exception vector
+	b	hyp_reset
+	b	hyp_undef
+	b	hyp_svc
+	b	hyp_pabt
+	b	hyp_dabt
+	b	hyp_hvc
+	b	hyp_irq
+	b	hyp_fiq
+
+	.align
+hyp_reset:
+	sub	pc, pc, #8
+
+	.align
+hyp_undef:
+	sub	pc, pc, #8
+
+	.align
+hyp_svc:
+	@ Can only get here if HVC or SVC is called from Hyp, mode which means
+	@ we want to change mode back to SVC mode.
+	@ NB: Stack pointer should be where hyp_hvc handler left it!
+	ldr	lr, [sp, #4]
+	msr	spsr, lr
+	ldr	lr, [sp]
+	add	sp, sp, #8
+	eret
+
+	.align
+hyp_pabt:
+	sub	pc, pc, #8
+
+	.align
+hyp_dabt:
+	sub	pc, pc, #8
+
+	.align
+hyp_hvc:
+	@ Getting here is either becuase of a trap from a guest or from calling
+	@ HVC from the host kernel, which means "switch to Hyp mode".
+	push	{r0, r1, r2}
+
+	@ Check syndrome register
+	mrc	p15, 4, r0, c5, c2, 0	@ HSR
+	lsr	r1, r0, #HSR_EC_SHIFT
+	cmp	r1, #HSR_EC_HVC
+	bne	guest_trap		@ Not HVC instr.
+
+	@ Let's check if the HVC came from VMID 0 and allow simple
+	@ switch to Hyp mode
+	mrrc    p15, 6, r1, r2, c2
+	lsr     r2, r2, #16
+	and     r2, r2, #0xff
+	cmp     r2, #0
+	bne	guest_trap		@ Guest called HVC
+
+	pop	{r0, r1, r2}
+
+	@ Store lr_usr,spsr (svc cpsr) on stack
+	sub	sp, sp, #8
+	str	lr, [sp]
+	mrs	lr, spsr
+	str	lr, [sp, #4]
+
+	@ Return to caller in Hyp mode
+	mrs	lr, ELR_hyp
+	mov	pc, lr
+
+	@ Not HVC from VMID 0 - this requires more careful investigation
+	@ TODO: Not implemented
+guest_trap:
+	ldr	r1, [sp, #12]		@ Load VCPU pointer
+	str	r0, [r1, #VCPU_HSR]
+	add	r1, r1, #VCPU_USR_REG(3)
+	stmia	r1, {r3-r12}
+	sub	r1, r1, #(VCPU_USR_REG(3) - VCPU_USR_REG(0))
+	pop	{r3, r4, r5}
+	add	sp, sp, #4		@ We loaded the VCPU pointer above
+	stmia	r1, {r3, r4, r5}
+	sub	r1, r1, #VCPU_USR_REG(0)
+
+	mov	r0, #ARM_EXCEPTION_HVC
+	b	__kvm_vcpu_return
+
+	.align
+hyp_irq:
+	push	{r0}
+	ldr	r0, [sp, #4]		@ Load VCPU pointer
+	add	r0, r0, #(VCPU_USR_REG(1))
+	stmia	r0, {r1-r12}
+	pop	{r0, r1}		@ r1 == vcpu pointer
+	str	r0, [r1, #VCPU_USR_REG(0)]
+
+	mov	r0, #ARM_EXCEPTION_IRQ
+	b	__kvm_vcpu_return
+
+	.align
+hyp_fiq:
+	sub	pc, pc, #8
+
+	.ltorg
+
 __kvm_hyp_vector_end:
 	.globl __kvm_hyp_vector_end


^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v3 6/8] ARM: KVM: Emulation framework and CP15 emulation
  2011-06-03 15:03 [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support Christoffer Dall
                   ` (3 preceding siblings ...)
  2011-06-03 15:03 ` [PATCH v3 5/8] ARM: KVM: World-switch implementation Christoffer Dall
@ 2011-06-03 15:04 ` Christoffer Dall
  2011-06-03 15:04 ` [PATCH v3 7/8] ARM: KVM: Handle guest faults in KVM Christoffer Dall
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 58+ messages in thread
From: Christoffer Dall @ 2011-06-03 15:04 UTC (permalink / raw)
  To: catalin.marinas, android-virt; +Cc: s.raho, a.motakis, c.dall, kvm, a.costa

A few stub functions and support for emulating access to memory remap
registers PRRR and NMRR.

Provides some framework for handling trapped CP15 accesses in general.
---
 arch/arm/include/asm/kvm_emulate.h |    7 ++
 arch/arm/kvm/arm.c                 |   73 +++++++++++++++-
 arch/arm/kvm/arm_emulate.c         |  166 ++++++++++++++++++++++++++++++++++++
 3 files changed, 241 insertions(+), 5 deletions(-)

diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 8eed752..6483b2a 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -40,6 +40,13 @@ static inline unsigned char vcpu_mode(struct kvm_vcpu *vcpu)
 	return modes_table[vcpu->arch.regs.cpsr & 0xf];
 }
 
+int kvm_handle_cp10_id(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp_0_13_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp15_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run);
+
 /*
  * Return the SPSR for the specified mode of the virtual CPU.
  */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 1a9f168..abed683 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -33,6 +33,7 @@
 #include <asm/kvm_arm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmu.h>
+#include <asm/kvm_emulate.h>
 
 #include "trace.h"
 
@@ -297,16 +298,78 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 	return 0;
 }
 
+static inline int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	unsigned long hsr_ec;
+
+	hsr_ec = (vcpu->arch.hsr & HSR_EC) >> HSR_EC_SHIFT;
+	switch (hsr_ec) {
+	case HSR_EC_WFI:
+		return kvm_handle_wfi(vcpu, run);
+	case HSR_EC_CP15_32:
+	case HSR_EC_CP15_64:
+		return kvm_handle_cp15_access(vcpu, run);
+	case HSR_EC_CP14_MR:
+		return kvm_handle_cp14_access(vcpu, run);
+	case HSR_EC_CP14_LS:
+		return kvm_handle_cp14_load_store(vcpu, run);
+	case HSR_EC_CP14_64:
+		return kvm_handle_cp14_access(vcpu, run);
+	case HSR_EC_CP_0_13:
+		return kvm_handle_cp_0_13_access(vcpu, run);
+	case HSR_EC_CP10_ID:
+		return kvm_handle_cp10_id(vcpu, run);
+	case HSR_EC_SVC_HYP:
+		/* SVC called from Hyp mode should never get here */
+		kvm_msg("SVC called from Hyp mode shouldn't go here");
+		BUG();
+	case HSR_EC_HVC:
+		kvm_err(-EINVAL, "Guest called HVC, not supported");
+		return -EINVAL;
+	case HSR_EC_IABT:
+	case HSR_EC_DABT:
+		return kvm_handle_guest_abort(vcpu, run);
+	case HSR_EC_IABT_HYP:
+	case HSR_EC_DABT_HYP:
+		/* The hypervisor should never cause aborts */
+		kvm_msg("The hypervisor itself shouldn't cause aborts");
+		BUG();
+	default:
+		kvm_msg("Unkown exception class: %08x (%08x)", hsr_ec,
+				vcpu->arch.hsr);
+		BUG();
+	}
+
+	return 0;
+}
+
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	unsigned long flags;
+	int ret;
 
-	local_irq_save(flags);
-	__kvm_vcpu_run(vcpu);
-	local_irq_restore(flags);
+	for (;;) {
+		local_irq_save(flags);
+		ret = __kvm_vcpu_run(vcpu);
+		local_irq_restore(flags);
 
-	KVMARM_NOT_IMPLEMENTED();
-	return -EINVAL;
+		if (ret == ARM_EXCEPTION_IRQ)
+			continue;
+
+		if (ret != ARM_EXCEPTION_HVC) {
+			ret = -EINVAL;
+			kvm_err(ret, "Unsupported exception type");
+			break;
+		}
+
+		ret = handle_exit(vcpu, run);
+		if (ret) {
+			kvm_err(ret, "Error in handle_exit");
+			break;
+		}
+	}
+
+	return ret;
 }
 
 static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
diff --git a/arch/arm/kvm/arm_emulate.c b/arch/arm/kvm/arm_emulate.c
index 3dd4f08..f370d87 100644
--- a/arch/arm/kvm/arm_emulate.c
+++ b/arch/arm/kvm/arm_emulate.c
@@ -14,8 +14,33 @@
  *
  */
 
+#include <linux/mm.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_host.h>
 #include <asm/kvm_emulate.h>
 
+#include "trace.h"
+
+struct coproc_params {
+	unsigned long CRm;
+	unsigned long CRn;
+	unsigned long Op1;
+	unsigned long Op2;
+	unsigned long Rt1;
+	unsigned long Rt2;
+	bool is_64bit;
+	bool is_write;
+};
+
+#define CP15_OP(_vcpu, _params, _cp15_reg) \
+do { \
+	if (_params->is_write) \
+		_vcpu->arch.cp15._cp15_reg = vcpu_reg(_vcpu, _params->Rt1); \
+	else \
+		vcpu_reg(_vcpu, _params->Rt1) = _vcpu->arch.cp15._cp15_reg; \
+} while (0);
+
+
 /*
  * Return a pointer to the register number valid in the specified mode of
  * the virtual CPU.
@@ -68,3 +93,144 @@ u32* kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode)
 	BUG();
 	return NULL;
 }
+
+static inline void print_cp_instr(struct coproc_params *p)
+{
+	if (p->is_64bit) {
+		kvm_msg("    %s\tp15, %u, r%u, r%u, c%u",
+				(p->is_write) ? "mcrr" : "mrrc",
+				p->Op1, p->Rt1, p->Rt2, p->CRm);
+	} else {
+		kvm_msg("    %s\tp15, %u, r%u, c%u, c%u, %u",
+				(p->is_write) ? "mcr" : "mrc",
+				p->Op1, p->Rt1, p->CRn, p->CRm, p->Op2);
+	}
+}
+
+int kvm_handle_cp10_id(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	KVMARM_NOT_IMPLEMENTED();
+	return -EINVAL;
+}
+
+int kvm_handle_cp_0_13_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	KVMARM_NOT_IMPLEMENTED();
+	return -EINVAL;
+}
+
+int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	KVMARM_NOT_IMPLEMENTED();
+	return -EINVAL;
+}
+
+int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	KVMARM_NOT_IMPLEMENTED();
+	return -EINVAL;
+}
+
+/**
+ * emulate_cp15_cp15_access -- emulates cp15 accesses for CRn == 10
+ * @vcpu: The VCPU pointer
+ * @p:    Thr coprocessor parameters struct pointer holding trap inst. details
+ *
+ * This funciton may not need to exist - if we can ignore guest attempts to
+ * tamper with TLB lockdowns then it should be enough to store/restore the
+ * host/guest PRRR and NMRR memory remap registers and allow guest direct access
+ * to these registers.
+ */
+static int emulate_cp15_cp10_access(struct kvm_vcpu *vcpu,
+				    struct coproc_params *p)
+{
+	BUG_ON(p->CRn != 10);
+	BUG_ON(p->is_64bit);
+
+	if ((p->CRm == 0 || p->CRm == 1 || p->CRm == 4 || p->CRm == 8) &&
+	    (p->Op2 <= 7)) {
+		/* TLB Lockdown operations - ignored */
+		return 0;
+	}
+
+	if (p->CRm == 2 && p->Op2 == 0) {
+		CP15_OP(vcpu, p, c10_PRRR);
+		return 0;
+	}
+
+	if (p->CRm == 2 && p->Op2 == 1) {
+		CP15_OP(vcpu, p, c10_NMRR);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+/**
+ * kvm_handle_cp15_access -- handles a trap on a guest CP15 access
+ * @vcpu: The VCPU pointer
+ * @run:  The kvm_run struct
+ *
+ * Investigates the CRn/CRm and wether this was mcr/mrc or mcrr/mrrc and either
+ * simply errors out if the operation was not supported (should maybe raise
+ * undefined to guest instead?) and otherwise emulated access.
+ */
+int kvm_handle_cp15_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	unsigned long hsr_ec, instr_len;
+	struct coproc_params params;
+	int ret = 0;
+
+	hsr_ec = vcpu->arch.hsr >> HSR_EC_SHIFT;
+	params.CRm = (vcpu->arch.hsr >> 1) & 0xf;
+	params.Rt1 = (vcpu->arch.hsr >> 5) & 0xf;
+	BUG_ON(params.Rt1 >= 15);
+	params.is_write = ((vcpu->arch.hsr & 1) == 0);
+	params.is_64bit = (hsr_ec == HSR_EC_CP15_64);
+
+	if (params.is_64bit) {
+		/* mrrc, mccr operation */
+		params.Op1 = (vcpu->arch.hsr >> 16) & 0xf;
+		params.Op2 = 0;
+		params.Rt2 = (vcpu->arch.hsr >> 10) & 0xf;
+		BUG_ON(params.Rt2 >= 15);
+		params.CRn = 0;
+	} else {
+		params.CRn = (vcpu->arch.hsr >> 10) & 0xf;
+		params.Op1 = (vcpu->arch.hsr >> 14) & 0x7;
+		params.Op2 = (vcpu->arch.hsr >> 17) & 0x7;
+		params.Rt2 = 0;
+	}
+
+	/* So far no mrrc/mcrr accesses are emulated */
+	if (params.is_64bit)
+		goto unsupp_err_out;
+
+	switch (params.CRn) {
+	case 10:
+		ret = emulate_cp15_cp10_access(vcpu, &params);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	if (ret)
+		goto unsupp_err_out;
+
+	/* Skip instruction, since it was emulated */
+	instr_len = ((vcpu->arch.hsr >> 25) & 1) ? 4 : 2;
+	vcpu_reg(vcpu, 15) += instr_len;
+
+	return ret;
+unsupp_err_out:
+	kvm_msg("Unsupported guest CP15 access:");
+	print_cp_instr(&params);
+	return -EINVAL;
+}
+
+int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	KVMARM_NOT_IMPLEMENTED();
+	return -EINVAL;
+}


^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v3 7/8] ARM: KVM: Handle guest faults in KVM
  2011-06-03 15:03 [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support Christoffer Dall
                   ` (4 preceding siblings ...)
  2011-06-03 15:04 ` [PATCH v3 6/8] ARM: KVM: Emulation framework and CP15 emulation Christoffer Dall
@ 2011-06-03 15:04 ` Christoffer Dall
  2011-06-05 12:48   ` Avi Kivity
  2011-06-03 15:04 ` [PATCH v3 8/8] ARM: KVM: Handle I/O aborts Christoffer Dall
                   ` (3 subsequent siblings)
  9 siblings, 1 reply; 58+ messages in thread
From: Christoffer Dall @ 2011-06-03 15:04 UTC (permalink / raw)
  To: catalin.marinas, android-virt; +Cc: s.raho, a.motakis, c.dall, kvm, a.costa

Handles the guest faults in KVM by mapping in corresponding user pages
in the 2nd stage page tables.

Introduces new ARM-specific kernel memory types, PAGE_KVM_GUEST and
pgprot_guest variables used to map 2nd stage memory for KVM guests.
---
 arch/arm/include/asm/pgtable-3level.h |    9 +++
 arch/arm/include/asm/pgtable.h        |    4 ++
 arch/arm/kernel/asm-offsets.c         |    3 +
 arch/arm/kvm/arm_interrupts.S         |   14 +++++
 arch/arm/kvm/arm_mmu.c                |   86 +++++++++++++++++++++++++++++++++
 arch/arm/mm/mmu.c                     |    3 +
 6 files changed, 118 insertions(+), 1 deletions(-)

diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 14a3e28..f90d120 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -103,4 +103,13 @@
  */
 #define L_PGD_SWAPPER		(_AT(pgdval_t, 1) << 55)	/* swapper_pg_dir entry */
 
+/*
+ * 2-nd stage PTE definitions for LPAE.
+ */
+#define L_PTE2_READ		(_AT(pteval_t, 1) << 6)		/* HAP[0] */
+#define L_PTE2_WRITE		(_AT(pteval_t, 1) << 7)		/* HAP[1] */
+#define L_PTE2_NORM_WB		(_AT(pteval_t, 3) << 4)		/* MemAttr[3:2] */
+#define L_PTE2_INNER_WB		(_AT(pteval_t, 3) << 2)		/* MemAttr[1:0] */
+
+
 #endif /* _ASM_PGTABLE_3LEVEL_H */
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 2906f35..c4e71ff 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -75,6 +75,7 @@ extern void __pgd_error(const char *file, int line, pgd_t);
 
 extern pgprot_t		pgprot_user;
 extern pgprot_t		pgprot_kernel;
+extern pgprot_t		pgprot_guest;
 
 #define _MOD_PROT(p, b)	__pgprot(pgprot_val(p) | (b))
 
@@ -88,6 +89,9 @@ extern pgprot_t		pgprot_kernel;
 #define PAGE_KERNEL		_MOD_PROT(pgprot_kernel, L_PTE_XN)
 #define PAGE_KERNEL_EXEC	pgprot_kernel
 #define PAGE_HYP		_MOD_PROT(pgprot_kernel, L_PTE_USER)
+#define PAGE_KVM_GUEST		_MOD_PROT(pgprot_guest, L_PTE2_READ | \
+					  L_PTE2_WRITE | L_PTE2_NORM_WB | \
+					  L_PTE2_INNER_WB)
 
 #define __PAGE_NONE		__pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN)
 #define __PAGE_SHARED		__pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 769fa97..9f485aa 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -139,6 +139,9 @@ int main(void)
   DEFINE(VCPU_PC,		offsetof(struct kvm_vcpu, arch.regs.pc));
   DEFINE(VCPU_CPSR,		offsetof(struct kvm_vcpu, arch.regs.cpsr));
   DEFINE(VCPU_HSR,		offsetof(struct kvm_vcpu, arch.hsr));
+  DEFINE(VCPU_HDFAR,		offsetof(struct kvm_vcpu, arch.hdfar));
+  DEFINE(VCPU_HIFAR,		offsetof(struct kvm_vcpu, arch.hifar));
+  DEFINE(VCPU_HPFAR,		offsetof(struct kvm_vcpu, arch.hpfar));
   DEFINE(KVM_VTTBR,		offsetof(struct kvm, arch.vttbr));
 #endif
   return 0; 
diff --git a/arch/arm/kvm/arm_interrupts.S b/arch/arm/kvm/arm_interrupts.S
index 6d3044c..689b337 100644
--- a/arch/arm/kvm/arm_interrupts.S
+++ b/arch/arm/kvm/arm_interrupts.S
@@ -483,7 +483,19 @@ guest_trap:
 	stmia	r1, {r3, r4, r5}
 	sub	r1, r1, #VCPU_USR_REG(0)
 
-	mov	r0, #ARM_EXCEPTION_HVC
+	@ Check if we need the fault information
+	lsr	r0, r0, #HSR_EC_SHIFT
+	cmp	r0, #HSR_EC_IABT
+	beq	2f
+	cmp	r0, #HSR_EC_DABT
+	beq	2f
+	b	1f
+2:	mrc	p15, 4, r2, c6, c0, 0	@ HDFAR
+	mrc	p15, 4, r3, c6, c0, 2	@ HIFAR
+	mrc	p15, 4, r4, c6, c0, 4	@ HPFAR
+	add	r5, r1, #VCPU_HDFAR
+	stmia	r5, {r2, r3, r4}
+1:	mov	r0, #ARM_EXCEPTION_HVC
 	b	__kvm_vcpu_return
 
 	.align
diff --git a/arch/arm/kvm/arm_mmu.c b/arch/arm/kvm/arm_mmu.c
index 683f971..fe27e59 100644
--- a/arch/arm/kvm/arm_mmu.c
+++ b/arch/arm/kvm/arm_mmu.c
@@ -248,8 +248,94 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
 	KVMARM_NOT_IMPLEMENTED();
 }
 
+static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+			  gfn_t gfn, struct kvm_memory_slot *memslot)
+{
+	pfn_t pfn;
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte, new_pte;
+
+	pfn = gfn_to_pfn(vcpu->kvm, gfn);
+
+	if (is_error_pfn(pfn)) {
+		kvm_err(-EFAULT, "Guest gfn %u (0x%08lx) does not have "
+				"corresponding host mapping",
+				gfn, gfn << PAGE_SHIFT);
+		return -EFAULT;
+	}
+
+	/* Create 2nd stage page table mapping - Level 1 */
+	pgd = vcpu->kvm->arch.pgd + pgd_index(fault_ipa);
+	if (pgd_none(*pgd)) {
+		pmd = pmd_alloc_one(NULL, fault_ipa);
+		if (!pmd) {
+			kvm_err(-ENOMEM, "Cannot allocate 2nd stage pmd");
+			return -ENOMEM;
+		}
+		pgd_populate(NULL, pgd, pmd);
+		pmd += pmd_index(fault_ipa);
+	} else
+		pmd = pmd_offset(pgd, fault_ipa);
+
+	/* Create 2nd stage page table mapping - Level 2 */
+	if (pmd_none(*pmd)) {
+		pte = pte_alloc_one_kernel(NULL, fault_ipa);
+		if (!pte) {
+			kvm_err(-ENOMEM, "Cannot allocate 2nd stage pte");
+			return -ENOMEM;
+		}
+		pmd_populate_kernel(NULL, pmd, pte);
+		pte += pte_index(fault_ipa);
+	} else
+		pte = pte_offset_kernel(pmd, fault_ipa);
+
+	/* Create 2nd stage page table mapping - Level 3 */
+	new_pte = pfn_pte(pfn, PAGE_KVM_GUEST);
+	set_pte_ext(pte, new_pte, 0);
+
+	return 0;
+}
+
+#define HSR_ABT_FS	(0x3f)
+#define HPFAR_MASK	(~0xf)
 int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
+	unsigned long hsr_ec;
+	unsigned long fault_status;
+	phys_addr_t fault_ipa;
+	struct kvm_memory_slot *memslot = NULL;
+	bool is_iabt;
+	gfn_t gfn;
+
+	hsr_ec = vcpu->arch.hsr >> HSR_EC_SHIFT;
+	is_iabt = (hsr_ec == HSR_EC_IABT);
+
+	/* Check that the second stage fault is a translation fault */
+	fault_status = vcpu->arch.hsr & HSR_ABT_FS;
+	if ((fault_status & 0x3c) != 0x4) {
+		kvm_err(-EFAULT, "Unsupported fault status: %x",
+				fault_status & 0x3c);
+		return -EFAULT;
+	}
+
+	fault_ipa = ((phys_addr_t)vcpu->arch.hpfar & HPFAR_MASK) << 8;
+
+	gfn = fault_ipa >> PAGE_SHIFT;
+	if (!kvm_is_visible_gfn(vcpu->kvm, gfn))
+		goto io_mem_abort;
+
+	memslot = gfn_to_memslot(vcpu->kvm, gfn);
+	if (memslot->user_alloc)
+		return user_mem_abort(vcpu, fault_ipa, gfn, memslot);
+
+io_mem_abort:
+	if (is_iabt) {
+		kvm_err(-EFAULT, "Inst. abort on I/O address");
+		return -EFAULT;
+	}
+
+	kvm_msg("I/O address abort...");
 	KVMARM_NOT_IMPLEMENTED();
 	return -EINVAL;
 }
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index d1da559..c5cbcd3 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -55,9 +55,11 @@ static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK;
 static unsigned int ecc_mask __initdata = 0;
 pgprot_t pgprot_user;
 pgprot_t pgprot_kernel;
+pgprot_t pgprot_guest;
 
 EXPORT_SYMBOL(pgprot_user);
 EXPORT_SYMBOL(pgprot_kernel);
+EXPORT_SYMBOL(pgprot_guest);
 
 struct cachepolicy {
 	const char	policy[16];
@@ -497,6 +499,7 @@ static void __init build_mem_type_table(void)
 	pgprot_user   = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
 	pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
 				 L_PTE_DIRTY | kern_pgprot);
+	pgprot_guest  = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG);
 
 	mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
 	mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;


^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v3 8/8] ARM: KVM: Handle I/O aborts
  2011-06-03 15:03 [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support Christoffer Dall
                   ` (5 preceding siblings ...)
  2011-06-03 15:04 ` [PATCH v3 7/8] ARM: KVM: Handle guest faults in KVM Christoffer Dall
@ 2011-06-03 15:04 ` Christoffer Dall
  2011-06-03 15:31 ` [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support Jan Kiszka
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 58+ messages in thread
From: Christoffer Dall @ 2011-06-03 15:04 UTC (permalink / raw)
  To: catalin.marinas, android-virt; +Cc: s.raho, a.motakis, c.dall, kvm, a.costa

When the guest accesses I/O memory this will create data abort
exceptions and they are handled by decoding the HSR information
(physical address, read/write, length, register) and forwarding reads
and writes to QEMU which performs the device emulation.

This requires changing the general flow somewhat since new calls to run
the VCPU must check if there's a pending MMIO load and perform the write
after QEMU has made the data available.
---
 arch/arm/include/asm/kvm_host.h |    1 
 arch/arm/include/asm/kvm_mmu.h  |    1 
 arch/arm/kvm/arm.c              |   11 ++++
 arch/arm/kvm/arm_mmu.c          |  106 ++++++++++++++++++++++++++++++++++++++-
 4 files changed, 115 insertions(+), 4 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 7f96974..5393e25 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -86,6 +86,7 @@ struct kvm_vcpu_arch {
 	u32 hpfar;		/* Hyp IPA Fault Address Register */
 
 	/* IO related fields */
+	bool mmio_sign_extend;	/* for byte/halfword loads */
 	u32 mmio_rd;
 
 	/* Misc. fields */
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index a64ab2d..f06f42d 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -40,6 +40,7 @@ void free_hyp_pmds(pgd_t *hyp_pgd);
 int kvm_alloc_stage2_pgd(struct kvm *kvm);
 void kvm_free_stage2_pgd(struct kvm *kvm);
 
+int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);
 
 #endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index abed683..d01f234 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -349,6 +349,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	int ret;
 
 	for (;;) {
+		if (run->exit_reason == KVM_EXIT_MMIO) {
+			ret = kvm_handle_mmio_return(vcpu, vcpu->run);
+			if (ret)
+				break;
+		}
+
 		local_irq_save(flags);
 		ret = __kvm_vcpu_run(vcpu);
 		local_irq_restore(flags);
@@ -367,8 +373,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 			kvm_err(ret, "Error in handle_exit");
 			break;
 		}
+
+		if (run->exit_reason == KVM_EXIT_MMIO)
+			break;
 	}
 
+	if (ret < 0)
+		run->exit_reason = KVM_EXIT_EXCEPTION;
 	return ret;
 }
 
diff --git a/arch/arm/kvm/arm_mmu.c b/arch/arm/kvm/arm_mmu.c
index fe27e59..b04a211 100644
--- a/arch/arm/kvm/arm_mmu.c
+++ b/arch/arm/kvm/arm_mmu.c
@@ -16,9 +16,10 @@
 
 #include <linux/mman.h>
 #include <linux/kvm_host.h>
+#include <asm/pgalloc.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_mmu.h>
-#include <asm/pgalloc.h>
+#include <asm/kvm_emulate.h>
 
 #include "../mm/mm.h"
 #include "trace.h"
@@ -297,6 +298,105 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	return 0;
 }
 
+/**
+ * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation
+ * @vcpu: The VCPU pointer
+ * @run:  The VCPU run struct containing the mmio data
+ *
+ * This should only be called after returning to QEMU for MMIO load emulation.
+ */
+int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	int *dest;
+	unsigned int len;
+	int mask;
+
+	if (!run->mmio.is_write) {
+		dest = &vcpu_reg(vcpu, vcpu->arch.mmio_rd);
+		memset(dest, 0, sizeof(int));
+
+		if (run->mmio.len > 4) {
+			kvm_err(-EINVAL, "Incorrect mmio length");
+			return -EINVAL;
+		}
+
+		len = run->mmio.len;
+		memcpy(dest, run->mmio.data, len);
+
+		if (vcpu->arch.mmio_sign_extend && len < 4) {
+			mask = 1U << ((len * 8) - 1);
+			*dest = (*dest ^ mask) - mask;
+		}
+	}
+
+	return 0;
+}
+
+static int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
+			phys_addr_t fault_ipa, struct kvm_memory_slot *memslot)
+{
+	unsigned long rd, len, instr_len;
+	bool is_write, sign_extend;
+
+	if (!((vcpu->arch.hsr >> 24) & 1) || ((vcpu->arch.hsr >> 8) & 1)) {
+		kvm_err(-EFAULT, "Invalid I/O abort");
+		return -EFAULT;
+	}
+
+	if ((vcpu->arch.hsr >> 7) & 1) {
+		kvm_err(-EFAULT, "Translation table accesses I/O memory");
+		return -EFAULT;
+	}
+
+	switch ((vcpu->arch.hsr >> 22) & 0x3) {
+	case 0: len = 1; break;
+	case 1: len = 2; break;
+	case 2: len = 4; break;
+	default:
+		kvm_err(-EFAULT, "Invalid I/O abort");
+		return -EFAULT;
+	}
+
+	is_write = ((vcpu->arch.hsr >> 6) & 1);
+	sign_extend = ((vcpu->arch.hsr >> 21) & 1);
+	rd = (vcpu->arch.hsr >> 16) & 0xf;
+	BUG_ON(rd > 15);
+
+	if (rd == 15) {
+		kvm_err(-EFAULT, "I/O memory trying to read/write pc");
+		return -EFAULT;
+	}
+
+	/* Get instruction length in bytes */
+	instr_len = ((vcpu->arch.hsr >> 25) & 1) ? 4 : 2;
+
+	if (!memslot) {
+		/* QEMU hack for missing devices - simply return 0 */
+		if (!is_write)
+			vcpu_reg(vcpu, rd) = 0;
+		vcpu_reg(vcpu, 15) += instr_len;
+		return 0;
+	}
+
+	/* Export MMIO operations to user space */
+	vcpu->run->exit_reason = KVM_EXIT_MMIO;
+	vcpu->run->mmio.is_write = is_write;
+	vcpu->run->mmio.phys_addr = fault_ipa;
+	vcpu->run->mmio.len = len;
+	vcpu->arch.mmio_sign_extend = sign_extend;
+	vcpu->arch.mmio_rd = rd;
+
+	if (is_write)
+		memcpy(run->mmio.data, &vcpu_reg(vcpu, rd), len);
+
+	/*
+	 * The MMIO instruction is emulated and should not be re-executed
+	 * in the guest.
+	 */
+	vcpu_reg(vcpu, 15) += instr_len;
+	return 0;
+}
+
 #define HSR_ABT_FS	(0x3f)
 #define HPFAR_MASK	(~0xf)
 int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
@@ -335,7 +435,5 @@ io_mem_abort:
 		return -EFAULT;
 	}
 
-	kvm_msg("I/O address abort...");
-	KVMARM_NOT_IMPLEMENTED();
-	return -EINVAL;
+	return io_mem_abort(vcpu, run, fault_ipa, memslot);
 }


^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-03 15:03 [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support Christoffer Dall
                   ` (6 preceding siblings ...)
  2011-06-03 15:04 ` [PATCH v3 8/8] ARM: KVM: Handle I/O aborts Christoffer Dall
@ 2011-06-03 15:31 ` Jan Kiszka
  2011-06-03 15:53   ` Jan Kiszka
  2011-06-05 12:36 ` Avi Kivity
  2011-06-05 12:52 ` Avi Kivity
  9 siblings, 1 reply; 58+ messages in thread
From: Jan Kiszka @ 2011-06-03 15:31 UTC (permalink / raw)
  To: Christoffer Dall
  Cc: catalin.marinas, android-virt, s.raho, a.motakis, c.dall, kvm,
	a.costa

On 2011-06-03 17:03, Christoffer Dall wrote:
> Targets KVM support for Cortex A-15 processors.
> 
> Contains no real functionality but all the framework components,
> make files, header files and some tracing functionality.
> ---

...

> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> index ea2dc1a..d2ab07e 100644
> --- a/include/linux/kvm.h
> +++ b/include/linux/kvm.h
> @@ -310,6 +310,7 @@ struct kvm_translation {
>  struct kvm_interrupt {
>  	/* in */
>  	__u32 irq;
> +	__u8  raise;
>  };

This touches an existing ABI and corrupts the definition of
KVM_INTERRUPT IOCTL. The might exist jurisdictions considering this a
capital crime. :)

You rather have to define a new CPU IRQ injection interface that
supports both raising and lowering and declare its availability via a
KVM_CAP. Don't forget to make it extensible (flags field) so that future
requirements can be added without breaking existing users.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-03 15:31 ` [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support Jan Kiszka
@ 2011-06-03 15:53   ` Jan Kiszka
  2011-06-03 16:19     ` Christoffer Dall
                       ` (2 more replies)
  0 siblings, 3 replies; 58+ messages in thread
From: Jan Kiszka @ 2011-06-03 15:53 UTC (permalink / raw)
  To: Christoffer Dall
  Cc: catalin.marinas, android-virt, s.raho, a.motakis, c.dall, kvm,
	a.costa

On 2011-06-03 17:31, Jan Kiszka wrote:
> On 2011-06-03 17:03, Christoffer Dall wrote:
>> Targets KVM support for Cortex A-15 processors.
>>
>> Contains no real functionality but all the framework components,
>> make files, header files and some tracing functionality.
>> ---
> 
> ...
> 
>> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
>> index ea2dc1a..d2ab07e 100644
>> --- a/include/linux/kvm.h
>> +++ b/include/linux/kvm.h
>> @@ -310,6 +310,7 @@ struct kvm_translation {
>>  struct kvm_interrupt {
>>  	/* in */
>>  	__u32 irq;
>> +	__u8  raise;
>>  };
> 
> This touches an existing ABI and corrupts the definition of
> KVM_INTERRUPT IOCTL. The might exist jurisdictions considering this a
> capital crime. :)
> 
> You rather have to define a new CPU IRQ injection interface that
> supports both raising and lowering and declare its availability via a
> KVM_CAP. Don't forget to make it extensible (flags field) so that future
> requirements can be added without breaking existing users.

Or much easier (this is what PowerPC is doing): Define irq values in a
way that they include a raise/lower flag.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-03 15:53   ` Jan Kiszka
@ 2011-06-03 16:19     ` Christoffer Dall
  2011-06-03 16:31       ` [Android-virt] " Alexander Graf
  2011-06-04 14:13     ` Alexander Graf
  2011-06-05 12:21     ` Avi Kivity
  2 siblings, 1 reply; 58+ messages in thread
From: Christoffer Dall @ 2011-06-03 16:19 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: catalin.marinas, android-virt, s.raho, a.motakis, kvm, a.costa

thanks, I will fix this for next version.

On Fri, Jun 3, 2011 at 5:53 PM, Jan Kiszka <jan.kiszka@siemens.com> wrote:
> On 2011-06-03 17:31, Jan Kiszka wrote:
>> On 2011-06-03 17:03, Christoffer Dall wrote:
>>> Targets KVM support for Cortex A-15 processors.
>>>
>>> Contains no real functionality but all the framework components,
>>> make files, header files and some tracing functionality.
>>> ---
>>
>> ...
>>
>>> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
>>> index ea2dc1a..d2ab07e 100644
>>> --- a/include/linux/kvm.h
>>> +++ b/include/linux/kvm.h
>>> @@ -310,6 +310,7 @@ struct kvm_translation {
>>>  struct kvm_interrupt {
>>>      /* in */
>>>      __u32 irq;
>>> +    __u8  raise;
>>>  };
>>
>> This touches an existing ABI and corrupts the definition of
>> KVM_INTERRUPT IOCTL. The might exist jurisdictions considering this a
>> capital crime. :)
>>
>> You rather have to define a new CPU IRQ injection interface that
>> supports both raising and lowering and declare its availability via a
>> KVM_CAP. Don't forget to make it extensible (flags field) so that future
>> requirements can be added without breaking existing users.
>
> Or much easier (this is what PowerPC is doing): Define irq values in a
> way that they include a raise/lower flag.
>
> Jan
>
> --
> Siemens AG, Corporate Technology, CT T DE IT 1
> Corporate Competence Center Embedded Linux
>

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [Android-virt] [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-03 16:19     ` Christoffer Dall
@ 2011-06-03 16:31       ` Alexander Graf
  0 siblings, 0 replies; 58+ messages in thread
From: Alexander Graf @ 2011-06-03 16:31 UTC (permalink / raw)
  To: Christoffer Dall
  Cc: Jan Kiszka, KVM list, a.costa, android-virt, s.raho, Scott Wood

On 03.06.2011, at 18:19, Christoffer Dall wrote:

> thanks, I will fix this for next version.

Please be prepared that we might want to model the API as well, so I'd propose you send out an RFC that patches Documentation/kvm with the respective API documentation so we can have some constructive influence :).

Currently, on book3s at least, PPC simply uses the irq field to indicate raise/lower since there's only a single "external interrupt" line that the CPU sees. All other dispatching happens in user space. I don't know if you could use the same model or of you need specific table offsets for different external interrupt vectors.

Alex

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [Android-virt] [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-03 15:53   ` Jan Kiszka
  2011-06-03 16:19     ` Christoffer Dall
@ 2011-06-04 14:13     ` Alexander Graf
  2011-06-05 12:21     ` Avi Kivity
  2 siblings, 0 replies; 58+ messages in thread
From: Alexander Graf @ 2011-06-04 14:13 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Christoffer Dall, c.dall, kvm, a.costa, android-virt, s.raho


On 03.06.2011, at 17:53, Jan Kiszka wrote:

> On 2011-06-03 17:31, Jan Kiszka wrote:
>> On 2011-06-03 17:03, Christoffer Dall wrote:
>>> Targets KVM support for Cortex A-15 processors.
>>> 
>>> Contains no real functionality but all the framework components,
>>> make files, header files and some tracing functionality.
>>> ---
>> 
>> ...
>> 
>>> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
>>> index ea2dc1a..d2ab07e 100644
>>> --- a/include/linux/kvm.h
>>> +++ b/include/linux/kvm.h
>>> @@ -310,6 +310,7 @@ struct kvm_translation {
>>> struct kvm_interrupt {
>>> 	/* in */
>>> 	__u32 irq;
>>> +	__u8  raise;
>>> };
>> 
>> This touches an existing ABI and corrupts the definition of
>> KVM_INTERRUPT IOCTL. The might exist jurisdictions considering this a
>> capital crime. :)
>> 
>> You rather have to define a new CPU IRQ injection interface that
>> supports both raising and lowering and declare its availability via a
>> KVM_CAP. Don't forget to make it extensible (flags field) so that future
>> requirements can be added without breaking existing users.
> 
> Or much easier (this is what PowerPC is doing): Define irq values in a
> way that they include a raise/lower flag.

It's not quite what PPC does. On PPC we have constant magic values for "irq" that mean "raise external interrupt" and "lower external interrupt". I like the idea with the mask though. It allows you to still have 31 bits of irq number information :)


Alex


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-03 15:53   ` Jan Kiszka
  2011-06-03 16:19     ` Christoffer Dall
  2011-06-04 14:13     ` Alexander Graf
@ 2011-06-05 12:21     ` Avi Kivity
  2011-06-05 14:13       ` Jan Kiszka
  2 siblings, 1 reply; 58+ messages in thread
From: Avi Kivity @ 2011-06-05 12:21 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: Christoffer Dall, catalin.marinas, android-virt, s.raho,
	a.motakis, c.dall, kvm, a.costa

On 06/03/2011 06:53 PM, Jan Kiszka wrote:
> >>  @@ -310,6 +310,7 @@ struct kvm_translation {
> >>   struct kvm_interrupt {
> >>   	/* in */
> >>   	__u32 irq;
> >>  +	__u8  raise;
> >>   };
> >
> >  This touches an existing ABI and corrupts the definition of
> >  KVM_INTERRUPT IOCTL. The might exist jurisdictions considering this a
> >  capital crime. :)
> >
> >  You rather have to define a new CPU IRQ injection interface that
> >  supports both raising and lowering

This is KVM_IRQ_LINE:



> and declare its availability via a
> >  KVM_CAP. Don't forget to make it extensible (flags field) so that future
> >  requirements can be added without breaking existing users.
>
> Or much easier (this is what PowerPC is doing): Define irq values in a
> way that they include a raise/lower flag.

Much easier and much horribler.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-03 15:03 [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support Christoffer Dall
                   ` (7 preceding siblings ...)
  2011-06-03 15:31 ` [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support Jan Kiszka
@ 2011-06-05 12:36 ` Avi Kivity
  2011-06-05 16:03   ` Christoffer Dall
  2011-06-05 12:52 ` Avi Kivity
  9 siblings, 1 reply; 58+ messages in thread
From: Avi Kivity @ 2011-06-05 12:36 UTC (permalink / raw)
  To: Christoffer Dall
  Cc: catalin.marinas, android-virt, s.raho, a.motakis, c.dall, kvm,
	a.costa

On 06/03/2011 06:03 PM, Christoffer Dall wrote:
> Targets KVM support for Cortex A-15 processors.
>
> Contains no real functionality but all the framework components,
> make files, header files and some tracing functionality.
>
> +
> +struct kvm_regs {
> +	__u32 regs0_7[8];	/* Unbanked regs. (r0 - r7)	   */
> +	__u32 fiq_regs8_12[5];	/* Banked fiq regs. (r8 - r12)	   */
> +	__u32 usr_regs8_12[5];	/* Banked usr registers (r8 - r12) */
> +	__u32 reg13[6];		/* Banked r13, indexed by MODE_	   */
> +	__u32 reg14[6];		/* Banked r13, indexed by MODE_	   */
> +	__u32 reg15;
> +	__u32 cpsr;
> +	__u32 spsr[5];		/* Banked SPSR,  indexed by MODE_  */
> +	struct {
> +		__u32 c2_base0;
> +		__u32 c2_base1;
> +		__u32 c3_dacr;
> +	} cp15;
> +
> +};
> +
> +struct kvm_sregs {
> +};
> +
> +struct kvm_fpu {
> +};
> +
> +struct kvm_guest_debug_arch {
> +};
> +
> +struct kvm_debug_exit_arch {
> +};

Presumably, to be filled in later?

> +
> +/* Get vcpu register for current mode */
> +#define vcpu_reg(_vcpu, _reg_num) \
> +	(*kvm_vcpu_reg((_vcpu), _reg_num, vcpu_mode(_vcpu)))
> +
> +/* Get vcpu register for specific mode */
> +#define vcpu_reg_m(_vcpu, _reg_num, _mode) \
> +	(*kvm_vcpu_reg(_vcpu, _reg_num, _mode))
> +
> +#define vcpu_cpsr(_vcpu) \
> +	(_vcpu->arch.regs.cpsr)
> +
> +/* Get vcpu SPSR for current mode */
> +#define vcpu_spsr(_vcpu) \
> +	kvm_vcpu_spsr(_vcpu, vcpu_mode(_vcpu))
> +
> +/* Get vcpu SPSR for specific mode */
> +#define vcpu_spsr_m(_vcpu, _mode) \
> +	kvm_vcpu_spsr(_vcpu, _mode)
> +
> +#define MODE_HAS_SPSR(_vcpu) \
> +	 ((vcpu_mode(_vcpu))<  MODE_USR)
> +
> +#define VCPU_MODE_PRIV(_vcpu) \
> +	(((vcpu_mode(_vcpu)) == MODE_USR) ? 0 : 1)

Please use static inlines.  Yes, you'll need more helpers to set 
registers, but it's worth it, especially as some macros evaluate an 
argument multiple times.

> +if VIRTUALIZATION
> +
> +config KVM
> +	bool "Kernel-based Virtual Machine (KVM) support"
> +	select PREEMPT_NOTIFIERS
> +	select ANON_INODES
> +	select KVM_ARM_HOST
> +	select KVM_MMIO
> +	---help---
> +	  Support hosting virtualized guest machines. You will also
> +	  need to select one or more of the processor modules below.
> +
> +	  This module provides access to the hardware capabilities through
> +	  a character device node named /dev/kvm.
> +
> +	  If unsure, say N.

I see you can't support a modular build, which is a pity.

> +
> +static int k_show(struct seq_file *m, void *v)
> +{
> +	print_kvm_debug_info(&seq_printf, m);
> +	return 0;
> +}
> +
> +static void *k_start(struct seq_file *m, loff_t *pos)
> +{
> +	return *pos<  1 ? (void *)1 : NULL;
> +}
> +
> +static void *k_next(struct seq_file *m, void *v, loff_t *pos)
> +{
> +	++*pos;
> +	return NULL;
> +}
> +
> +static void k_stop(struct seq_file *m, void *v)
> +{
> +}
> +
> +static const struct seq_operations kvmproc_op = {
> +	.start	= k_start,
> +	.next	= k_next,
> +	.stop	= k_stop,
> +	.show	= k_show
> +};
> +
> +static int kvm_open(struct inode *inode, struct file *file)
> +{
> +	return seq_open(file,&kvmproc_op);
> +}
> +
> +static const struct file_operations proc_kvm_operations = {
> +	.open		= kvm_open,
> +	.read		= seq_read,
> +	.llseek		= seq_lseek,
> +	.release	= seq_release,
> +};
> +
> +static int arm_init(void)
> +{
> +	int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
> +	if (rc == 0)
> +		proc_create("kvm", 0, NULL,&proc_kvm_operations);
> +	return rc;
> +}

/proc is frowned upon these days.  Is there no better place for this?+
> +/*
> + * Return a pointer to the register number valid in the specified mode of
> + * the virtual CPU.
> + */
> +u32* kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode)
> +{
> +	struct kvm_vcpu_regs *regs;
> +	u8 reg_idx;
> +	BUG_ON(reg_num>  15);
> +
> +	regs =&vcpu->arch.regs;
> +
> +	/* The PC is trivial */
> +	if (reg_num == 15)
> +		return&(regs->pc);
> +
> +	/* Non-banked registers */
> +	if (reg_num<  8)
> +		return&(regs->usr_regs[reg_num]);
> +
> +	/* Banked registers r13 and r14 */
> +	if (reg_num>= 13) {
> +		reg_idx = reg_num - 13; /* 0=r13 and 1=r14 */
> +		switch (mode) {
> +		case MODE_FIQ:
> +			return&(regs->fiq_regs[reg_idx + 5]);
> +		case MODE_IRQ:
> +			return&(regs->irq_regs[reg_idx]);
> +		case MODE_SVC:
> +			return&(regs->svc_regs[reg_idx]);
> +		case MODE_ABT:
> +			return&(regs->abt_regs[reg_idx]);
> +		case MODE_UND:
> +			return&(regs->und_regs[reg_idx]);
> +		case MODE_USR:
> +		case MODE_SYS:
> +			return&(regs->usr_regs[reg_idx]);
> +		}
> +	}
> +
> +	/* Banked FIQ registers r8-r12 */
> +	if (reg_num>= 8&&  reg_num<= 12) {
> +		if (mode == MODE_FIQ) {
> +			reg_idx = reg_num - 8; /* 0=r8, ..., 4=r12 */
> +			return&(regs->fiq_regs[reg_idx]);
> +		} else
> +			return&(regs->usr_regs[reg_num]);
> +	}

You could have a static 2D array indexed by mode and register number, 
returning an offsetof() into the vcpu structure.

> +
> +	BUG();
> +	return NULL;
> +}
>
> diff --git a/arch/arm/kvm/trace.c b/arch/arm/kvm/trace.c
> new file mode 100644
> index 0000000..8ea1155
> --- /dev/null
> +++ b/arch/arm/kvm/trace.c
> @@ -0,0 +1,436 @@
> +/*
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License, version 2, as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
> + *
> + */
> +#include<linux/types.h>
> +#include<linux/kvm_types.h>
> +#include<linux/kvm_host.h>
> +
> +#include<asm/kvm_emulate.h>
> +#include "trace.h"
> +
> +
> +/******************************************************************************
> + * Simple event counting
> + */
> +
> +struct kvm_event {
> +	unsigned long long cnt;
> +	char *descr;
> +};
> +
> +static struct kvm_event kvm_eventc_log[KVM_EVENTC_ITEMS] =
> +{
> +	{ 0, "switch to guest" },
> +	{ 0, "exit from guest" },
> +	{ 0, "Block VCPU" },
> +	{ 0, "Exit to QEMU for IRQ window" },
> +	{ 0, "Switch VCPU mode" },
> +	{ 0, "VCPU IRQs on" },
> +	{ 0, "VCPU IRQs off" },
> +	{ 0, "Wait-for-interrupts" },
> +	{ 0, "Flush shadow page table" },
> +	{ 0, "Virtual TTBR change" },
> +	{ 0, "Read guest page table entry" },
> +	{ 0, "Map GVA to GFN" },
> +	{ 0, "Virtual DACR change" },
> +	{ 0, "VCPU switch to privileged mode" },
> +	{ 0, "VCPU switch from privileged mode" },
> +	{ 0, "VCPU process ID registers change" },
> +	{ 0, "Emulate Load/Store with translation" },
> +	{ 0, "Emulate MRS" },
> +	{ 0, "Emulate MSR" },
> +	{ 0, "Emulate CPS" },
> +	{ 0, "Need reschedule in execution loop" },
> +	{ 0, "MCR 7,  5, 0 - Invalidate entire I-cache" },
> +	{ 0, "MCR 7,  5, 1 - Invalidate line in I-cache MVA" },
> +	{ 0, "MCR 7,  5, 2 - Invalidate line in I-cache set/way" },
> +	{ 0, "MCR 7,  5, 7 - Flush branch target cache - MVA" },
> +	{ 0, "MCR 7,  6, 0 - Invalidate entire data cache" },
> +	{ 0, "MCR 7,  6, 1 - Invalidate data cache line - MVA" },
> +	{ 0, "MCR 7,  6, 2 - Invalidate data cache line - set/way" },
> +	{ 0, "MCR 7,  7, 0 - Invalidate D- and I-cache" },
> +	{ 0, "MCR 7, 10, 0 - Clean entire data cache" },
> +	{ 0, "MCR 7, 10, 1 - Clean data cache line - MVA" },
> +	{ 0, "MCR 7, 10, 4 - Data Synchronization Barrier (DSB)" },
> +	{ 0, "MCR 7, 14, 0 - Clean and invalidate entire D-cache" },
> +	{ 0, "MCR 7, 14, 1 - Clean and invalidate D-cache line - MVA" },
> +	{ 0, "MCR 7, 15, 0 - Clean and invalidate unified cache" },
> +	{ 0, "MCR 8,  5, 0 - Invalidate instruction TLB" },
> +	{ 0, "MCR 8,  6, 0 - Invalidate data TLB" },
> +	{ 0, "MCR 8,  7, 0 - Invalidate unified TLB" },
> +	{ 0, "Emulate Load-Store multiple" },
> +};
> +
> +void kvm_arm_count_event(unsigned int event)
> +{
> +	if (event>= KVM_EVENTC_ITEMS)
> +		return;
> +
> +	kvm_eventc_log[event].cnt++;
> +}

We've switched to ftrace for this sort of thing.  Simply add a 
tracepoint for each interesting event, and the kernel can provide you with

- a count of events ('perf stat')
- a log of events ('trace-cmd record/report'), possibly with other 
kernel events interspersed
- a running histogram ('kvm_stat')

with near-zero impact when disabled.

See include/trace/events/kvm.h, arch/x86/kvm/trace.h.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 4/8] ARM: KVM: Memory virtualization setup
  2011-06-03 15:03 ` [PATCH v3 4/8] ARM: KVM: Memory virtualization setup Christoffer Dall
@ 2011-06-05 12:41   ` Avi Kivity
  2011-06-05 14:50     ` Christoffer Dall
  0 siblings, 1 reply; 58+ messages in thread
From: Avi Kivity @ 2011-06-05 12:41 UTC (permalink / raw)
  To: Christoffer Dall
  Cc: catalin.marinas, android-virt, s.raho, a.motakis, c.dall, kvm,
	a.costa

On 06/03/2011 06:03 PM, Christoffer Dall wrote:
> Initializes a blank level-1 translation table for the second stage
> translation and handles freeing it as well.
>
> +	start = (unsigned long)kvm,
> +	end = start + sizeof(struct kvm);
> +	ret = create_hyp_mappings(kvm_hyp_pgd, start, end);

Why not map all GFP_KERNEL memory?

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 7/8] ARM: KVM: Handle guest faults in KVM
  2011-06-03 15:04 ` [PATCH v3 7/8] ARM: KVM: Handle guest faults in KVM Christoffer Dall
@ 2011-06-05 12:48   ` Avi Kivity
  2011-06-11 10:37     ` Christoffer Dall
  0 siblings, 1 reply; 58+ messages in thread
From: Avi Kivity @ 2011-06-05 12:48 UTC (permalink / raw)
  To: Christoffer Dall
  Cc: catalin.marinas, android-virt, s.raho, a.motakis, c.dall, kvm,
	a.costa

On 06/03/2011 06:04 PM, Christoffer Dall wrote:
> Handles the guest faults in KVM by mapping in corresponding user pages
> in the 2nd stage page tables.
>
>
>
> +static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
> +			  gfn_t gfn, struct kvm_memory_slot *memslot)
> +{
> +	pfn_t pfn;
> +	pgd_t *pgd;
> +	pmd_t *pmd;
> +	pte_t *pte, new_pte;
> +
> +	pfn = gfn_to_pfn(vcpu->kvm, gfn);
> +
> +	if (is_error_pfn(pfn)) {
> +		kvm_err(-EFAULT, "Guest gfn %u (0x%08lx) does not have "
> +				"corresponding host mapping",
> +				gfn, gfn<<  PAGE_SHIFT);
> +		return -EFAULT;
> +	}
> +
> +	/* Create 2nd stage page table mapping - Level 1 */
> +	pgd = vcpu->kvm->arch.pgd + pgd_index(fault_ipa);
> +	if (pgd_none(*pgd)) {
> +		pmd = pmd_alloc_one(NULL, fault_ipa);
> +		if (!pmd) {
> +			kvm_err(-ENOMEM, "Cannot allocate 2nd stage pmd");
> +			return -ENOMEM;
> +		}
> +		pgd_populate(NULL, pgd, pmd);
> +		pmd += pmd_index(fault_ipa);
> +	} else
> +		pmd = pmd_offset(pgd, fault_ipa);
> +
> +	/* Create 2nd stage page table mapping - Level 2 */
> +	if (pmd_none(*pmd)) {
> +		pte = pte_alloc_one_kernel(NULL, fault_ipa);
> +		if (!pte) {
> +			kvm_err(-ENOMEM, "Cannot allocate 2nd stage pte");
> +			return -ENOMEM;
> +		}
> +		pmd_populate_kernel(NULL, pmd, pte);
> +		pte += pte_index(fault_ipa);
> +	} else
> +		pte = pte_offset_kernel(pmd, fault_ipa);
> +
> +	/* Create 2nd stage page table mapping - Level 3 */
> +	new_pte = pfn_pte(pfn, PAGE_KVM_GUEST);
> +	set_pte_ext(pte, new_pte, 0);
> +
> +	return 0;
> +}
> +
> +#define HSR_ABT_FS	(0x3f)
> +#define HPFAR_MASK	(~0xf)
>   int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
>   {
> +	unsigned long hsr_ec;
> +	unsigned long fault_status;
> +	phys_addr_t fault_ipa;
> +	struct kvm_memory_slot *memslot = NULL;
> +	bool is_iabt;
> +	gfn_t gfn;
> +
> +	hsr_ec = vcpu->arch.hsr>>  HSR_EC_SHIFT;
> +	is_iabt = (hsr_ec == HSR_EC_IABT);
> +
> +	/* Check that the second stage fault is a translation fault */
> +	fault_status = vcpu->arch.hsr&  HSR_ABT_FS;
> +	if ((fault_status&  0x3c) != 0x4) {
> +		kvm_err(-EFAULT, "Unsupported fault status: %x",
> +				fault_status&  0x3c);
> +		return -EFAULT;
> +	}
> +
> +	fault_ipa = ((phys_addr_t)vcpu->arch.hpfar&  HPFAR_MASK)<<  8;
> +
> +	gfn = fault_ipa>>  PAGE_SHIFT;
> +	if (!kvm_is_visible_gfn(vcpu->kvm, gfn))
> +		goto io_mem_abort;
> +
> +	memslot = gfn_to_memslot(vcpu->kvm, gfn);
> +	if (memslot->user_alloc)
> +		return user_mem_abort(vcpu, fault_ipa, gfn, memslot);

Non-user_alloc should not exist for ARM (and are not supported for x86 
these days, except for a few implementation internal slots).

> +
> +io_mem_abort:
> +	if (is_iabt) {
> +		kvm_err(-EFAULT, "Inst. abort on I/O address");
> +		return -EFAULT;
> +	}
> +
> +	kvm_msg("I/O address abort...");
>   	KVMARM_NOT_IMPLEMENTED();
>   	return -EINVAL;
>   }

Okay, this is about a zillion times simpler than x86.  Congratulations.

What are your thoughts about mmu notifier support?


-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-03 15:03 [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support Christoffer Dall
                   ` (8 preceding siblings ...)
  2011-06-05 12:36 ` Avi Kivity
@ 2011-06-05 12:52 ` Avi Kivity
  2011-06-05 14:00   ` Avi Kivity
  9 siblings, 1 reply; 58+ messages in thread
From: Avi Kivity @ 2011-06-05 12:52 UTC (permalink / raw)
  To: Christoffer Dall
  Cc: catalin.marinas, android-virt, s.raho, a.motakis, c.dall, kvm,
	a.costa

On 06/03/2011 06:03 PM, Christoffer Dall wrote:
> Targets KVM support for Cortex A-15 processors.
>
> Contains no real functionality but all the framework components,
> make files, header files and some tracing functionality.


(series review - please have a cover letter in the future for this stuff)

Looks good in general.  Of course I can't say much about technical 
correctness and will rely on the ARM maintainers for that.

Please document which ioctls are supported in 
Documentation/virtual/kvm/api.txt, and make sure that any ARM-specific 
ioctls (if you ever have any) are documented there.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-05 12:52 ` Avi Kivity
@ 2011-06-05 14:00   ` Avi Kivity
  2011-06-05 14:13     ` Christoffer Dall
  0 siblings, 1 reply; 58+ messages in thread
From: Avi Kivity @ 2011-06-05 14:00 UTC (permalink / raw)
  To: Christoffer Dall
  Cc: catalin.marinas, android-virt, s.raho, a.motakis, c.dall, kvm,
	a.costa

On 06/05/2011 03:52 PM, Avi Kivity wrote:
>
> (series review - please have a cover letter in the future for this stuff)
>

Oh - you did - but in a separate thread.  Oh well.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-05 14:00   ` Avi Kivity
@ 2011-06-05 14:13     ` Christoffer Dall
  2011-06-05 14:18       ` Avi Kivity
  0 siblings, 1 reply; 58+ messages in thread
From: Christoffer Dall @ 2011-06-05 14:13 UTC (permalink / raw)
  To: Avi Kivity; +Cc: catalin.marinas, android-virt, s.raho, a.motakis, kvm, a.costa

On Sun, Jun 5, 2011 at 4:00 PM, Avi Kivity <avi@redhat.com> wrote:
> On 06/05/2011 03:52 PM, Avi Kivity wrote:
>>
>> (series review - please have a cover letter in the future for this stuff)
>>
>
> Oh - you did - but in a separate thread.  Oh well.

My internet router decided to reset immediately after sending the
cover letter, when it was backup stgit gave up on sending the rest of
the patches, and I didn't think about putting the rest in the same
thread. Sorry about that.

>
> --
> error compiling committee.c: too many arguments to function
>
>

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-05 12:21     ` Avi Kivity
@ 2011-06-05 14:13       ` Jan Kiszka
  2011-06-05 14:18         ` Avi Kivity
  0 siblings, 1 reply; 58+ messages in thread
From: Jan Kiszka @ 2011-06-05 14:13 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Christoffer Dall, catalin.marinas, android-virt, s.raho,
	a.motakis, c.dall, kvm, a.costa

[-- Attachment #1: Type: text/plain, Size: 1163 bytes --]

On 2011-06-05 14:21, Avi Kivity wrote:
> On 06/03/2011 06:53 PM, Jan Kiszka wrote:
>> >>  @@ -310,6 +310,7 @@ struct kvm_translation {
>> >>   struct kvm_interrupt {
>> >>       /* in */
>> >>       __u32 irq;
>> >>  +    __u8  raise;
>> >>   };
>> >
>> >  This touches an existing ABI and corrupts the definition of
>> >  KVM_INTERRUPT IOCTL. The might exist jurisdictions considering this a
>> >  capital crime. :)
>> >
>> >  You rather have to define a new CPU IRQ injection interface that
>> >  supports both raising and lowering
> 
> This is KVM_IRQ_LINE:
> 

It's so far associated with in-kernel irqchip input pins, not with
raising CPU IRQs.

> 
> 
>> and declare its availability via a
>> >  KVM_CAP. Don't forget to make it extensible (flags field) so that
>> future
>> >  requirements can be added without breaking existing users.
>>
>> Or much easier (this is what PowerPC is doing): Define irq values in a
>> way that they include a raise/lower flag.
> 
> Much easier and much horribler.
> 

Less horrible than overloading KVM_IRQ_LINE IMHO. The semantics of
kvm_interrupt::irq are in arch hands anyway.

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 259 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-05 14:13       ` Jan Kiszka
@ 2011-06-05 14:18         ` Avi Kivity
  2011-06-05 14:58           ` Jan Kiszka
  0 siblings, 1 reply; 58+ messages in thread
From: Avi Kivity @ 2011-06-05 14:18 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: Christoffer Dall, catalin.marinas, android-virt, s.raho,
	a.motakis, c.dall, kvm, a.costa

On 06/05/2011 05:13 PM, Jan Kiszka wrote:
> On 2011-06-05 14:21, Avi Kivity wrote:
> >  On 06/03/2011 06:53 PM, Jan Kiszka wrote:
> >>  >>   @@ -310,6 +310,7 @@ struct kvm_translation {
> >>  >>    struct kvm_interrupt {
> >>  >>        /* in */
> >>  >>        __u32 irq;
> >>  >>   +    __u8  raise;
> >>  >>    };
> >>  >
> >>  >   This touches an existing ABI and corrupts the definition of
> >>  >   KVM_INTERRUPT IOCTL. The might exist jurisdictions considering this a
> >>  >   capital crime. :)
> >>  >
> >>  >   You rather have to define a new CPU IRQ injection interface that
> >>  >   supports both raising and lowering
> >
> >  This is KVM_IRQ_LINE:
> >
>
> It's so far associated with in-kernel irqchip input pins, not with
> raising CPU IRQs.

It's up to the architecture to define what it's connected to.

Note that with KVM_SET_GSI_ROUTING (bad name for ARM...) we can even 
choose if an irq line is connected to a kernel-emulated interrupt 
controller or to the core's irq input.

> >>  and declare its availability via a
> >>  >   KVM_CAP. Don't forget to make it extensible (flags field) so that
> >>  future
> >>  >   requirements can be added without breaking existing users.
> >>
> >>  Or much easier (this is what PowerPC is doing): Define irq values in a
> >>  way that they include a raise/lower flag.
> >
> >  Much easier and much horribler.
> >
>
> Less horrible than overloading KVM_IRQ_LINE IMHO. The semantics of
> kvm_interrupt::irq are in arch hands anyway.

Something that can be raised or lowered is an irq line.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-05 14:13     ` Christoffer Dall
@ 2011-06-05 14:18       ` Avi Kivity
  0 siblings, 0 replies; 58+ messages in thread
From: Avi Kivity @ 2011-06-05 14:18 UTC (permalink / raw)
  To: Christoffer Dall
  Cc: catalin.marinas, android-virt, s.raho, a.motakis, kvm, a.costa

On 06/05/2011 05:13 PM, Christoffer Dall wrote:
> On Sun, Jun 5, 2011 at 4:00 PM, Avi Kivity<avi@redhat.com>  wrote:
> >  On 06/05/2011 03:52 PM, Avi Kivity wrote:
> >>
> >>  (series review - please have a cover letter in the future for this stuff)
> >>
> >
> >  Oh - you did - but in a separate thread.  Oh well.
>
> My internet router decided to reset immediately after sending the
> cover letter, when it was backup stgit gave up on sending the rest of
> the patches, and I didn't think about putting the rest in the same
> thread. Sorry about that.

No problem.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 4/8] ARM: KVM: Memory virtualization setup
  2011-06-05 12:41   ` Avi Kivity
@ 2011-06-05 14:50     ` Christoffer Dall
  2011-06-05 14:53       ` Avi Kivity
  0 siblings, 1 reply; 58+ messages in thread
From: Christoffer Dall @ 2011-06-05 14:50 UTC (permalink / raw)
  To: Avi Kivity; +Cc: catalin.marinas, android-virt, s.raho, a.motakis, kvm, a.costa

On Sun, Jun 5, 2011 at 2:41 PM, Avi Kivity <avi@redhat.com> wrote:
> On 06/03/2011 06:03 PM, Christoffer Dall wrote:
>>
>> Initializes a blank level-1 translation table for the second stage
>> translation and handles freeing it as well.
>>
>> +       start = (unsigned long)kvm,
>> +       end = start + sizeof(struct kvm);
>> +       ret = create_hyp_mappings(kvm_hyp_pgd, start, end);
>
> Why not map all GFP_KERNEL memory?
>
I wanted to only map things I was sure would be there and stay there
so no assumptions were made about existing pages which could have been
removed, since I don't handle aborts taken in the hypervisor itself.
But, if it would be as safe to map all GFP_KERNEL memory and that also
maps the necessary code segments, then we could do that. Do you feel
it would me simpler/faster/easier?

> --
> error compiling committee.c: too many arguments to function
>
>

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 4/8] ARM: KVM: Memory virtualization setup
  2011-06-05 14:50     ` Christoffer Dall
@ 2011-06-05 14:53       ` Avi Kivity
  2011-06-05 15:14         ` Avi Kivity
  0 siblings, 1 reply; 58+ messages in thread
From: Avi Kivity @ 2011-06-05 14:53 UTC (permalink / raw)
  To: Christoffer Dall
  Cc: catalin.marinas, android-virt, s.raho, a.motakis, kvm, a.costa

On 06/05/2011 05:50 PM, Christoffer Dall wrote:
> On Sun, Jun 5, 2011 at 2:41 PM, Avi Kivity<avi@redhat.com>  wrote:
> >  On 06/03/2011 06:03 PM, Christoffer Dall wrote:
> >>
> >>  Initializes a blank level-1 translation table for the second stage
> >>  translation and handles freeing it as well.
> >>
> >>  +       start = (unsigned long)kvm,
> >>  +       end = start + sizeof(struct kvm);
> >>  +       ret = create_hyp_mappings(kvm_hyp_pgd, start, end);
> >
> >  Why not map all GFP_KERNEL memory?
> >
> I wanted to only map things I was sure would be there and stay there
> so no assumptions were made about existing pages which could have been
> removed, since I don't handle aborts taken in the hypervisor itself.
> But, if it would be as safe to map all GFP_KERNEL memory and that also
> maps the necessary code segments, then we could do that. Do you feel
> it would me simpler/faster/easier?

I think so - you wouldn't have to worry about dereferencing pointers 
within the vcpu structure.

Of course, it's up to you, I don't really have a enough understanding of 
the architecture.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-05 14:18         ` Avi Kivity
@ 2011-06-05 14:58           ` Jan Kiszka
  2011-06-05 15:10             ` Avi Kivity
  2011-06-05 16:24             ` Christoffer Dall
  0 siblings, 2 replies; 58+ messages in thread
From: Jan Kiszka @ 2011-06-05 14:58 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Christoffer Dall, catalin.marinas, android-virt, s.raho,
	a.motakis, c.dall, kvm, a.costa

[-- Attachment #1: Type: text/plain, Size: 1445 bytes --]

On 2011-06-05 16:18, Avi Kivity wrote:
> On 06/05/2011 05:13 PM, Jan Kiszka wrote:
>> On 2011-06-05 14:21, Avi Kivity wrote:
>> >  On 06/03/2011 06:53 PM, Jan Kiszka wrote:
>> >>  >>   @@ -310,6 +310,7 @@ struct kvm_translation {
>> >>  >>    struct kvm_interrupt {
>> >>  >>        /* in */
>> >>  >>        __u32 irq;
>> >>  >>   +    __u8  raise;
>> >>  >>    };
>> >>  >
>> >>  >   This touches an existing ABI and corrupts the definition of
>> >>  >   KVM_INTERRUPT IOCTL. The might exist jurisdictions considering
>> this a
>> >>  >   capital crime. :)
>> >>  >
>> >>  >   You rather have to define a new CPU IRQ injection interface that
>> >>  >   supports both raising and lowering
>> >
>> >  This is KVM_IRQ_LINE:
>> >
>>
>> It's so far associated with in-kernel irqchip input pins, not with
>> raising CPU IRQs.
> 
> It's up to the architecture to define what it's connected to.
> 
> Note that with KVM_SET_GSI_ROUTING (bad name for ARM...) we can even
> choose if an irq line is connected to a kernel-emulated interrupt
> controller or to the core's irq input.

Makes some sense: Add KVM_IRQ_ROUTING_CPU, and kvm_irq_routing_entry's
union would require some struct kvm_irq_routing_cpu containing the
target identifier.

However, I would recommend to carefully check the generic irq routing
bits before use - if they still contain some x86/ia64 specifics or
unwanted irqchip_in_kernel().

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 259 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-05 14:58           ` Jan Kiszka
@ 2011-06-05 15:10             ` Avi Kivity
  2011-06-05 15:14               ` Jan Kiszka
  2011-06-05 16:24             ` Christoffer Dall
  1 sibling, 1 reply; 58+ messages in thread
From: Avi Kivity @ 2011-06-05 15:10 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: Christoffer Dall, catalin.marinas, android-virt, s.raho,
	a.motakis, c.dall, kvm, a.costa

On 06/05/2011 05:58 PM, Jan Kiszka wrote:
> >
> >  Note that with KVM_SET_GSI_ROUTING (bad name for ARM...) we can even
> >  choose if an irq line is connected to a kernel-emulated interrupt
> >  controller or to the core's irq input.
>
> Makes some sense: Add KVM_IRQ_ROUTING_CPU, and kvm_irq_routing_entry's
> union would require some struct kvm_irq_routing_cpu containing the
> target identifier.

Right.  Note it would be the default, so no need to implement 
KVM_SET_GSI_ROUTING just yet.

An additional advantage is that this is a vm ioctl, not a vcpu ioctl, so 
no need to interrupt the vcpu thread in userspace in order to queue an 
interrupt.  Of course, it still happens in the kernel, but it's easier 
for userspace to implement its device model this way.

> However, I would recommend to carefully check the generic irq routing
> bits before use - if they still contain some x86/ia64 specifics or
> unwanted irqchip_in_kernel().

Most likely yes.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-05 15:10             ` Avi Kivity
@ 2011-06-05 15:14               ` Jan Kiszka
  2011-06-05 15:18                 ` Avi Kivity
  2011-06-05 16:25                 ` Christoffer Dall
  0 siblings, 2 replies; 58+ messages in thread
From: Jan Kiszka @ 2011-06-05 15:14 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Christoffer Dall, catalin.marinas, android-virt, s.raho,
	a.motakis, c.dall, kvm, a.costa

[-- Attachment #1: Type: text/plain, Size: 998 bytes --]

On 2011-06-05 17:10, Avi Kivity wrote:
> On 06/05/2011 05:58 PM, Jan Kiszka wrote:
>> >
>> >  Note that with KVM_SET_GSI_ROUTING (bad name for ARM...) we can even
>> >  choose if an irq line is connected to a kernel-emulated interrupt
>> >  controller or to the core's irq input.
>>
>> Makes some sense: Add KVM_IRQ_ROUTING_CPU, and kvm_irq_routing_entry's
>> union would require some struct kvm_irq_routing_cpu containing the
>> target identifier.
> 
> Right.  Note it would be the default, so no need to implement
> KVM_SET_GSI_ROUTING just yet.
> 
> An additional advantage is that this is a vm ioctl, not a vcpu ioctl, so
> no need to interrupt the vcpu thread in userspace in order to queue an
> interrupt.  Of course, it still happens in the kernel, but it's easier
> for userspace to implement its device model this way.

So supporting this over existing archs would have some benefits as well,
though a bit smaller if in-kernel irqchip is already implemented.

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 259 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 4/8] ARM: KVM: Memory virtualization setup
  2011-06-05 14:53       ` Avi Kivity
@ 2011-06-05 15:14         ` Avi Kivity
  2011-06-05 15:27           ` Christoffer Dall
  0 siblings, 1 reply; 58+ messages in thread
From: Avi Kivity @ 2011-06-05 15:14 UTC (permalink / raw)
  To: Christoffer Dall
  Cc: catalin.marinas, android-virt, s.raho, a.motakis, kvm, a.costa

On 06/05/2011 05:53 PM, Avi Kivity wrote:
> On 06/05/2011 05:50 PM, Christoffer Dall wrote:
>> On Sun, Jun 5, 2011 at 2:41 PM, Avi Kivity<avi@redhat.com>  wrote:
>> >  On 06/03/2011 06:03 PM, Christoffer Dall wrote:
>> >>
>> >>  Initializes a blank level-1 translation table for the second stage
>> >>  translation and handles freeing it as well.
>> >>
>> >>  +       start = (unsigned long)kvm,
>> >>  +       end = start + sizeof(struct kvm);
>> >>  +       ret = create_hyp_mappings(kvm_hyp_pgd, start, end);
>> >
>> >  Why not map all GFP_KERNEL memory?
>> >
>> I wanted to only map things I was sure would be there and stay there
>> so no assumptions were made about existing pages which could have been
>> removed, since I don't handle aborts taken in the hypervisor itself.
>> But, if it would be as safe to map all GFP_KERNEL memory and that also
>> maps the necessary code segments, then we could do that. Do you feel
>> it would me simpler/faster/easier?
>
> I think so - you wouldn't have to worry about dereferencing pointers 
> within the vcpu structure.

Also, you could use huge pages for the mapping, yes?  that should 
improve switching performance a bit.

Can you run the host kernel in hypervisor mode?  That may reduce 
switching time even further.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-05 15:14               ` Jan Kiszka
@ 2011-06-05 15:18                 ` Avi Kivity
  2011-06-05 16:25                 ` Christoffer Dall
  1 sibling, 0 replies; 58+ messages in thread
From: Avi Kivity @ 2011-06-05 15:18 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: Christoffer Dall, catalin.marinas, android-virt, s.raho,
	a.motakis, c.dall, kvm, a.costa

On 06/05/2011 06:14 PM, Jan Kiszka wrote:
> >  An additional advantage is that this is a vm ioctl, not a vcpu ioctl, so
> >  no need to interrupt the vcpu thread in userspace in order to queue an
> >  interrupt.  Of course, it still happens in the kernel, but it's easier
> >  for userspace to implement its device model this way.
>
> So supporting this over existing archs would have some benefits as well,
> though a bit smaller if in-kernel irqchip is already implemented.

Yes, except for s390, of course.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 4/8] ARM: KVM: Memory virtualization setup
  2011-06-05 15:14         ` Avi Kivity
@ 2011-06-05 15:27           ` Christoffer Dall
  2011-06-05 16:02             ` Avi Kivity
  0 siblings, 1 reply; 58+ messages in thread
From: Christoffer Dall @ 2011-06-05 15:27 UTC (permalink / raw)
  To: Avi Kivity; +Cc: catalin.marinas, android-virt, s.raho, a.motakis, kvm, a.costa

On Sun, Jun 5, 2011 at 5:14 PM, Avi Kivity <avi@redhat.com> wrote:
> On 06/05/2011 05:53 PM, Avi Kivity wrote:
>>
>> On 06/05/2011 05:50 PM, Christoffer Dall wrote:
>>>
>>> On Sun, Jun 5, 2011 at 2:41 PM, Avi Kivity<avi@redhat.com>  wrote:
>>> >  On 06/03/2011 06:03 PM, Christoffer Dall wrote:
>>> >>
>>> >>  Initializes a blank level-1 translation table for the second stage
>>> >>  translation and handles freeing it as well.
>>> >>
>>> >>  +       start = (unsigned long)kvm,
>>> >>  +       end = start + sizeof(struct kvm);
>>> >>  +       ret = create_hyp_mappings(kvm_hyp_pgd, start, end);
>>> >
>>> >  Why not map all GFP_KERNEL memory?
>>> >
>>> I wanted to only map things I was sure would be there and stay there
>>> so no assumptions were made about existing pages which could have been
>>> removed, since I don't handle aborts taken in the hypervisor itself.
>>> But, if it would be as safe to map all GFP_KERNEL memory and that also
>>> maps the necessary code segments, then we could do that. Do you feel
>>> it would me simpler/faster/easier?
>>
>> I think so - you wouldn't have to worry about dereferencing pointers
>> within the vcpu structure.
>
> Also, you could use huge pages for the mapping, yes?  that should improve
> switching performance a bit.

well, the only advantage here would be to save a few entries in the
TLB right? So really it would only be the case if the data and the
code and such accessed during switches lie within the same sections,
which could happen, but could also not happen. I don't see a big
performance gain here, but slightly more complicated code. For
instance, if the VCPU struct is mapped using a page, not a section
mapping, I cannot use section mappings since I would map weird things.
So I would have to support both for allocating and freeing.

I suggest keeping this in place for now and experiment with
performance later on to see if there's a gain. Allocating all of
GFP_KERNEL memory could be good to prevent bugs, but I would like to
clear this with the ARM memory experts that it is in fact a good idea.

>
> Can you run the host kernel in hypervisor mode?  That may reduce switching
> time even further.

No, I think the implications would be way too widespread all over the kernel.

>
> --
> error compiling committee.c: too many arguments to function
>
>

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 4/8] ARM: KVM: Memory virtualization setup
  2011-06-05 15:27           ` Christoffer Dall
@ 2011-06-05 16:02             ` Avi Kivity
  0 siblings, 0 replies; 58+ messages in thread
From: Avi Kivity @ 2011-06-05 16:02 UTC (permalink / raw)
  To: Christoffer Dall
  Cc: catalin.marinas, android-virt, s.raho, a.motakis, kvm, a.costa

On 06/05/2011 06:27 PM, Christoffer Dall wrote:
> On Sun, Jun 5, 2011 at 5:14 PM, Avi Kivity<avi@redhat.com>  wrote:
> >  On 06/05/2011 05:53 PM, Avi Kivity wrote:
> >>
> >>  On 06/05/2011 05:50 PM, Christoffer Dall wrote:
> >>>
> >>>  On Sun, Jun 5, 2011 at 2:41 PM, Avi Kivity<avi@redhat.com>    wrote:
> >>>  >    On 06/03/2011 06:03 PM, Christoffer Dall wrote:
> >>>  >>
> >>>  >>    Initializes a blank level-1 translation table for the second stage
> >>>  >>    translation and handles freeing it as well.
> >>>  >>
> >>>  >>    +       start = (unsigned long)kvm,
> >>>  >>    +       end = start + sizeof(struct kvm);
> >>>  >>    +       ret = create_hyp_mappings(kvm_hyp_pgd, start, end);
> >>>  >
> >>>  >    Why not map all GFP_KERNEL memory?
> >>>  >
> >>>  I wanted to only map things I was sure would be there and stay there
> >>>  so no assumptions were made about existing pages which could have been
> >>>  removed, since I don't handle aborts taken in the hypervisor itself.
> >>>  But, if it would be as safe to map all GFP_KERNEL memory and that also
> >>>  maps the necessary code segments, then we could do that. Do you feel
> >>>  it would me simpler/faster/easier?
> >>
> >>  I think so - you wouldn't have to worry about dereferencing pointers
> >>  within the vcpu structure.
> >
> >  Also, you could use huge pages for the mapping, yes?  that should improve
> >  switching performance a bit.
>
> well, the only advantage here would be to save a few entries in the
> TLB right? So really it would only be the case if the data and the
> code and such accessed during switches lie within the same sections,
> which could happen, but could also not happen. I don't see a big
> performance gain here, but slightly more complicated code. For
> instance, if the VCPU struct is mapped using a page, not a section
> mapping, I cannot use section mappings since I would map weird things.
> So I would have to support both for allocating and freeing.
>
> I suggest keeping this in place for now and experiment with
> performance later on to see if there's a gain. Allocating all of
> GFP_KERNEL memory could be good to prevent bugs, but I would like to
> clear this with the ARM memory experts that it is in fact a good idea.

Sure.  All of my arch related comments are made from ignorance anyway, 
feel free to ignore or use them as you like.  The only important ones 
are those related to the API.

> >
> >  Can you run the host kernel in hypervisor mode?  That may reduce switching
> >  time even further.
>
> No, I think the implications would be way too widespread all over the kernel.

Okay.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-05 12:36 ` Avi Kivity
@ 2011-06-05 16:03   ` Christoffer Dall
  2011-06-05 16:06     ` Avi Kivity
       [not found]     ` <211B3F42-9B68-41BB-B1FA-348B5500C60A@suse.de>
  0 siblings, 2 replies; 58+ messages in thread
From: Christoffer Dall @ 2011-06-05 16:03 UTC (permalink / raw)
  To: Avi Kivity; +Cc: catalin.marinas, android-virt, s.raho, a.motakis, kvm, a.costa

On Sun, Jun 5, 2011 at 2:36 PM, Avi Kivity <avi@redhat.com> wrote:
> On 06/03/2011 06:03 PM, Christoffer Dall wrote:
>>
>> Targets KVM support for Cortex A-15 processors.
>>
>> Contains no real functionality but all the framework components,
>> make files, header files and some tracing functionality.
>>
>> +
>> +struct kvm_regs {
>> +       __u32 regs0_7[8];       /* Unbanked regs. (r0 - r7)        */
>> +       __u32 fiq_regs8_12[5];  /* Banked fiq regs. (r8 - r12)     */
>> +       __u32 usr_regs8_12[5];  /* Banked usr registers (r8 - r12) */
>> +       __u32 reg13[6];         /* Banked r13, indexed by MODE_    */
>> +       __u32 reg14[6];         /* Banked r13, indexed by MODE_    */
>> +       __u32 reg15;
>> +       __u32 cpsr;
>> +       __u32 spsr[5];          /* Banked SPSR,  indexed by MODE_  */
>> +       struct {
>> +               __u32 c2_base0;
>> +               __u32 c2_base1;
>> +               __u32 c3_dacr;
>> +       } cp15;
>> +
>> +};
>> +
>> +struct kvm_sregs {
>> +};
>> +
>> +struct kvm_fpu {
>> +};
>> +
>> +struct kvm_guest_debug_arch {
>> +};
>> +
>> +struct kvm_debug_exit_arch {
>> +};
>
> Presumably, to be filled in later?
>

I simply didn't look at these yet and didn't need them yet. I will
look into this later on.

>> +
>> +/* Get vcpu register for current mode */
>> +#define vcpu_reg(_vcpu, _reg_num) \
>> +       (*kvm_vcpu_reg((_vcpu), _reg_num, vcpu_mode(_vcpu)))
>> +
>> +/* Get vcpu register for specific mode */
>> +#define vcpu_reg_m(_vcpu, _reg_num, _mode) \
>> +       (*kvm_vcpu_reg(_vcpu, _reg_num, _mode))
>> +
>> +#define vcpu_cpsr(_vcpu) \
>> +       (_vcpu->arch.regs.cpsr)
>> +
>> +/* Get vcpu SPSR for current mode */
>> +#define vcpu_spsr(_vcpu) \
>> +       kvm_vcpu_spsr(_vcpu, vcpu_mode(_vcpu))
>> +
>> +/* Get vcpu SPSR for specific mode */
>> +#define vcpu_spsr_m(_vcpu, _mode) \
>> +       kvm_vcpu_spsr(_vcpu, _mode)
>> +
>> +#define MODE_HAS_SPSR(_vcpu) \
>> +        ((vcpu_mode(_vcpu))<  MODE_USR)
>> +
>> +#define VCPU_MODE_PRIV(_vcpu) \
>> +       (((vcpu_mode(_vcpu)) == MODE_USR) ? 0 : 1)
>
> Please use static inlines.  Yes, you'll need more helpers to set registers,
> but it's worth it, especially as some macros evaluate an argument multiple
> times.

ok.

>
>> +if VIRTUALIZATION
>> +
>> +config KVM
>> +       bool "Kernel-based Virtual Machine (KVM) support"
>> +       select PREEMPT_NOTIFIERS
>> +       select ANON_INODES
>> +       select KVM_ARM_HOST
>> +       select KVM_MMIO
>> +       ---help---
>> +         Support hosting virtualized guest machines. You will also
>> +         need to select one or more of the processor modules below.
>> +
>> +         This module provides access to the hardware capabilities through
>> +         a character device node named /dev/kvm.
>> +
>> +         If unsure, say N.
>
> I see you can't support a modular build, which is a pity.
>

My concern is that I map in code that needs to be in place for running
in Hyp mode. Of course I just need to pin these, so it should be
possible. I'll look into this as well.

>> +
>> +static int k_show(struct seq_file *m, void *v)
>> +{
>> +       print_kvm_debug_info(&seq_printf, m);
>> +       return 0;
>> +}
>> +
>> +static void *k_start(struct seq_file *m, loff_t *pos)
>> +{
>> +       return *pos<  1 ? (void *)1 : NULL;
>> +}
>> +
>> +static void *k_next(struct seq_file *m, void *v, loff_t *pos)
>> +{
>> +       ++*pos;
>> +       return NULL;
>> +}
>> +
>> +static void k_stop(struct seq_file *m, void *v)
>> +{
>> +}
>> +
>> +static const struct seq_operations kvmproc_op = {
>> +       .start  = k_start,
>> +       .next   = k_next,
>> +       .stop   = k_stop,
>> +       .show   = k_show
>> +};
>> +
>> +static int kvm_open(struct inode *inode, struct file *file)
>> +{
>> +       return seq_open(file,&kvmproc_op);
>> +}
>> +
>> +static const struct file_operations proc_kvm_operations = {
>> +       .open           = kvm_open,
>> +       .read           = seq_read,
>> +       .llseek         = seq_lseek,
>> +       .release        = seq_release,
>> +};
>> +
>> +static int arm_init(void)
>> +{
>> +       int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
>> +       if (rc == 0)
>> +               proc_create("kvm", 0, NULL,&proc_kvm_operations);
>> +       return rc;
>> +}
>
> /proc is frowned upon these days.  Is there no better place for this?+
>>

Yeah, this is actually quite legacy and probably shouldn't have been
included. I will give this a thorough overhaul before next patch
series.

>> +/*
>> + * Return a pointer to the register number valid in the specified mode of
>> + * the virtual CPU.
>> + */
>> +u32* kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode)
>> +{
>> +       struct kvm_vcpu_regs *regs;
>> +       u8 reg_idx;
>> +       BUG_ON(reg_num>  15);
>> +
>> +       regs =&vcpu->arch.regs;
>> +
>> +       /* The PC is trivial */
>> +       if (reg_num == 15)
>> +               return&(regs->pc);
>> +
>> +       /* Non-banked registers */
>> +       if (reg_num<  8)
>> +               return&(regs->usr_regs[reg_num]);
>> +
>> +       /* Banked registers r13 and r14 */
>> +       if (reg_num>= 13) {
>> +               reg_idx = reg_num - 13; /* 0=r13 and 1=r14 */
>> +               switch (mode) {
>> +               case MODE_FIQ:
>> +                       return&(regs->fiq_regs[reg_idx + 5]);
>> +               case MODE_IRQ:
>> +                       return&(regs->irq_regs[reg_idx]);
>> +               case MODE_SVC:
>> +                       return&(regs->svc_regs[reg_idx]);
>> +               case MODE_ABT:
>> +                       return&(regs->abt_regs[reg_idx]);
>> +               case MODE_UND:
>> +                       return&(regs->und_regs[reg_idx]);
>> +               case MODE_USR:
>> +               case MODE_SYS:
>> +                       return&(regs->usr_regs[reg_idx]);
>> +               }
>> +       }
>> +
>> +       /* Banked FIQ registers r8-r12 */
>> +       if (reg_num>= 8&&  reg_num<= 12) {
>> +               if (mode == MODE_FIQ) {
>> +                       reg_idx = reg_num - 8; /* 0=r8, ..., 4=r12 */
>> +                       return&(regs->fiq_regs[reg_idx]);
>> +               } else
>> +                       return&(regs->usr_regs[reg_num]);
>> +       }
>
> You could have a static 2D array indexed by mode and register number,
> returning an offsetof() into the vcpu structure.

You think it's simpler or faster? I don't quite see the incentive.
It's not going to be called a whole lot given the Virt. Extensions.

>
>> +
>> +       BUG();
>> +       return NULL;
>> +}
>>
>> diff --git a/arch/arm/kvm/trace.c b/arch/arm/kvm/trace.c
>> new file mode 100644
>> index 0000000..8ea1155

[snip]

>> +
>> +void kvm_arm_count_event(unsigned int event)
>> +{
>> +       if (event>= KVM_EVENTC_ITEMS)
>> +               return;
>> +
>> +       kvm_eventc_log[event].cnt++;
>> +}
>
> We've switched to ftrace for this sort of thing.  Simply add a tracepoint
> for each interesting event, and the kernel can provide you with
>
> - a count of events ('perf stat')
> - a log of events ('trace-cmd record/report'), possibly with other kernel
> events interspersed
> - a running histogram ('kvm_stat')
>
> with near-zero impact when disabled.
>
> See include/trace/events/kvm.h, arch/x86/kvm/trace.h.

Will do, thanks.
>
> --
> error compiling committee.c: too many arguments to function
>
>

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-05 16:03   ` Christoffer Dall
@ 2011-06-05 16:06     ` Avi Kivity
       [not found]     ` <211B3F42-9B68-41BB-B1FA-348B5500C60A@suse.de>
  1 sibling, 0 replies; 58+ messages in thread
From: Avi Kivity @ 2011-06-05 16:06 UTC (permalink / raw)
  To: Christoffer Dall
  Cc: catalin.marinas, android-virt, s.raho, a.motakis, kvm, a.costa

On 06/05/2011 07:03 PM, Christoffer Dall wrote:
> >>  +
> >>  +       /* Banked FIQ registers r8-r12 */
> >>  +       if (reg_num>= 8&&    reg_num<= 12) {
> >>  +               if (mode == MODE_FIQ) {
> >>  +                       reg_idx = reg_num - 8; /* 0=r8, ..., 4=r12 */
> >>  +                       return&(regs->fiq_regs[reg_idx]);
> >>  +               } else
> >>  +                       return&(regs->usr_regs[reg_num]);
> >>  +       }
> >
> >  You could have a static 2D array indexed by mode and register number,
> >  returning an offsetof() into the vcpu structure.
>
> You think it's simpler or faster? I don't quite see the incentive.
> It's not going to be called a whole lot given the Virt. Extensions.

Yes (those are mostly unpredictable branches), and clearer as well.  But 
I agree it's a rare-enough path that doesn't need optimizing, certainly 
not prematurely.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-05 14:58           ` Jan Kiszka
  2011-06-05 15:10             ` Avi Kivity
@ 2011-06-05 16:24             ` Christoffer Dall
  2011-06-05 16:31               ` Avi Kivity
  1 sibling, 1 reply; 58+ messages in thread
From: Christoffer Dall @ 2011-06-05 16:24 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: Avi Kivity, catalin.marinas, android-virt, s.raho, a.motakis, kvm,
	a.costa

On Sun, Jun 5, 2011 at 4:58 PM, Jan Kiszka <jan.kiszka@web.de> wrote:
> On 2011-06-05 16:18, Avi Kivity wrote:
>> On 06/05/2011 05:13 PM, Jan Kiszka wrote:
>>> On 2011-06-05 14:21, Avi Kivity wrote:
>>> >  On 06/03/2011 06:53 PM, Jan Kiszka wrote:
>>> >>  >>   @@ -310,6 +310,7 @@ struct kvm_translation {
>>> >>  >>    struct kvm_interrupt {
>>> >>  >>        /* in */
>>> >>  >>        __u32 irq;
>>> >>  >>   +    __u8  raise;
>>> >>  >>    };
>>> >>  >
>>> >>  >   This touches an existing ABI and corrupts the definition of
>>> >>  >   KVM_INTERRUPT IOCTL. The might exist jurisdictions considering
>>> this a
>>> >>  >   capital crime. :)
>>> >>  >
>>> >>  >   You rather have to define a new CPU IRQ injection interface that
>>> >>  >   supports both raising and lowering
>>> >
>>> >  This is KVM_IRQ_LINE:
>>> >
>>>
>>> It's so far associated with in-kernel irqchip input pins, not with
>>> raising CPU IRQs.
>>
>> It's up to the architecture to define what it's connected to.
>>
>> Note that with KVM_SET_GSI_ROUTING (bad name for ARM...) we can even
>> choose if an irq line is connected to a kernel-emulated interrupt
>> controller or to the core's irq input.
>
> Makes some sense: Add KVM_IRQ_ROUTING_CPU, and kvm_irq_routing_entry's
> union would require some struct kvm_irq_routing_cpu containing the
> target identifier.
>
> However, I would recommend to carefully check the generic irq routing
> bits before use - if they still contain some x86/ia64 specifics or
> unwanted irqchip_in_kernel().
>
OK, so let me try to get this straight.

We all agree that ruining the ABI is crazy - sorry about even putting
that out there.

On ARM, we are talking about IRQs and FIQs, both of which can be
lowered and raised. In other words, four values.

But instead of this, you want to use KVM_IRQ_LINE and just assume one
CPU to begin with and always assert/deassert the interrupts on that
CPU and later, with SMP support, you want to use KVM_SET_GSI_ROUTING
to control which CPU gets the interrupt - even though GSI is a
non-existing concept for ARM? (Also, I could not seem to find any use
of GSI_ROUTING in current upstream QEMU, but I may be missing
something here?).

I suggest I simply use a flag in the irq field on the kvm_interrupt
struct as suggested by Jan, for now, and if we start emulating GICs
inside the kernel for ARM we can go the extra mile at that point.
IMHO, I think the semantics of actually raising an interrup line on
the VCPU, as opposed to the VM, when it's CPU interrupt we are talking
about is equally important.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-05 15:14               ` Jan Kiszka
  2011-06-05 15:18                 ` Avi Kivity
@ 2011-06-05 16:25                 ` Christoffer Dall
  2011-06-05 16:28                   ` Avi Kivity
  1 sibling, 1 reply; 58+ messages in thread
From: Christoffer Dall @ 2011-06-05 16:25 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: Avi Kivity, catalin.marinas, android-virt, s.raho, a.motakis, kvm,
	a.costa

On Sun, Jun 5, 2011 at 5:14 PM, Jan Kiszka <jan.kiszka@web.de> wrote:
> On 2011-06-05 17:10, Avi Kivity wrote:
>> On 06/05/2011 05:58 PM, Jan Kiszka wrote:
>>> >
>>> >  Note that with KVM_SET_GSI_ROUTING (bad name for ARM...) we can even
>>> >  choose if an irq line is connected to a kernel-emulated interrupt
>>> >  controller or to the core's irq input.
>>>
>>> Makes some sense: Add KVM_IRQ_ROUTING_CPU, and kvm_irq_routing_entry's
>>> union would require some struct kvm_irq_routing_cpu containing the
>>> target identifier.
>>
>> Right.  Note it would be the default, so no need to implement
>> KVM_SET_GSI_ROUTING just yet.
>>
>> An additional advantage is that this is a vm ioctl, not a vcpu ioctl, so
>> no need to interrupt the vcpu thread in userspace in order to queue an
>> interrupt.  Of course, it still happens in the kernel, but it's easier
>> for userspace to implement its device model this way.
>
> So supporting this over existing archs would have some benefits as well,
> though a bit smaller if in-kernel irqchip is already implemented.
>

Could you elaborate what you mean here? I'm not really following. Are
you suggesting a new arch-generic interface? (Pardon my ignorance).

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-05 16:25                 ` Christoffer Dall
@ 2011-06-05 16:28                   ` Avi Kivity
  2011-06-05 16:30                     ` [Android-virt] " Alexander Graf
  0 siblings, 1 reply; 58+ messages in thread
From: Avi Kivity @ 2011-06-05 16:28 UTC (permalink / raw)
  To: Christoffer Dall
  Cc: Jan Kiszka, catalin.marinas, android-virt, s.raho, a.motakis, kvm,
	a.costa

On 06/05/2011 07:25 PM, Christoffer Dall wrote:
> >>
> >>  An additional advantage is that this is a vm ioctl, not a vcpu ioctl, so
> >>  no need to interrupt the vcpu thread in userspace in order to queue an
> >>  interrupt.  Of course, it still happens in the kernel, but it's easier
> >>  for userspace to implement its device model this way.
> >
> >  So supporting this over existing archs would have some benefits as well,
> >  though a bit smaller if in-kernel irqchip is already implemented.
> >
>
> Could you elaborate what you mean here? I'm not really following. Are
> you suggesting a new arch-generic interface? (Pardon my ignorance).

Using KVM_IRQ_LINE everywhere except s390, not just in x86 and ARM.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [Android-virt] [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-05 16:28                   ` Avi Kivity
@ 2011-06-05 16:30                     ` Alexander Graf
  2011-06-05 16:33                       ` Avi Kivity
  0 siblings, 1 reply; 58+ messages in thread
From: Alexander Graf @ 2011-06-05 16:30 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Christoffer Dall, kvm, a.costa, Jan Kiszka, android-virt, s.raho


On 05.06.2011, at 18:28, Avi Kivity wrote:

> On 06/05/2011 07:25 PM, Christoffer Dall wrote:
>>>> 
>>>> An additional advantage is that this is a vm ioctl, not a vcpu ioctl, so
>>>> no need to interrupt the vcpu thread in userspace in order to queue an
>>>> interrupt.  Of course, it still happens in the kernel, but it's easier
>>>> for userspace to implement its device model this way.
>>> 
>>> So supporting this over existing archs would have some benefits as well,
>>> though a bit smaller if in-kernel irqchip is already implemented.
>>> 
>> 
>> Could you elaborate what you mean here? I'm not really following. Are
>> you suggesting a new arch-generic interface? (Pardon my ignorance).
> 
> Using KVM_IRQ_LINE everywhere except s390, not just in x86 and ARM.

An in-kernel MPIC implementation is coming for PPC, so I don't see any reason to switch from something that works now.


Alex


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-05 16:24             ` Christoffer Dall
@ 2011-06-05 16:31               ` Avi Kivity
  0 siblings, 0 replies; 58+ messages in thread
From: Avi Kivity @ 2011-06-05 16:31 UTC (permalink / raw)
  To: Christoffer Dall
  Cc: Jan Kiszka, catalin.marinas, android-virt, s.raho, a.motakis, kvm,
	a.costa

On 06/05/2011 07:24 PM, Christoffer Dall wrote:
> On ARM, we are talking about IRQs and FIQs, both of which can be
> lowered and raised. In other words, four values.

Two lines per cpu, each of which can be raised or lowered.

> But instead of this, you want to use KVM_IRQ_LINE and just assume one
> CPU to begin with and always assert/deassert the interrupts on that
> CPU and later, with SMP support, you want to use KVM_SET_GSI_ROUTING
> to control which CPU gets the interrupt - even though GSI is a
> non-existing concept for ARM? (Also, I could not seem to find any use
> of GSI_ROUTING in current upstream QEMU, but I may be missing
> something here?).

Almost.  vcpu N's IRQ -> KVM_IRQ_LINE(N*2, level).  vcpu N's FIQ -> 
KVM_IRQ_LINE(N*2+1, level), + documentation somewhere.

> I suggest I simply use a flag in the irq field on the kvm_interrupt
> struct as suggested by Jan, for now, and if we start emulating GICs
> inside the kernel for ARM we can go the extra mile at that point.
> IMHO, I think the semantics of actually raising an interrup line on
> the VCPU, as opposed to the VM, when it's CPU interrupt we are talking
> about is equally important.

When you implement an interrupt controller, you can use 
KVM_SET_GSI_ROUTING to change the meaning of the parameter to 
KVM_IRQ_LINE to point to the interrupt controllers.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [Android-virt] [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-05 16:30                     ` [Android-virt] " Alexander Graf
@ 2011-06-05 16:33                       ` Avi Kivity
  2011-06-05 17:19                         ` Alexander Graf
  0 siblings, 1 reply; 58+ messages in thread
From: Avi Kivity @ 2011-06-05 16:33 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Christoffer Dall, kvm, a.costa, Jan Kiszka, android-virt, s.raho

On 06/05/2011 07:30 PM, Alexander Graf wrote:
> >>
> >>  Could you elaborate what you mean here? I'm not really following. Are
> >>  you suggesting a new arch-generic interface? (Pardon my ignorance).
> >
> >  Using KVM_IRQ_LINE everywhere except s390, not just in x86 and ARM.
>
> An in-kernel MPIC implementation is coming for PPC, so I don't see any reason to switch from something that works now.

Right, this is spilled milk.

Does the ppc qemu implementation raise KVM_INTERRUPT solely from the 
vcpu thread?

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [Android-virt] [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-05 16:33                       ` Avi Kivity
@ 2011-06-05 17:19                         ` Alexander Graf
  2011-06-05 17:48                           ` Jan Kiszka
  2011-06-06  7:41                           ` Avi Kivity
  0 siblings, 2 replies; 58+ messages in thread
From: Alexander Graf @ 2011-06-05 17:19 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Christoffer Dall, kvm, a.costa, Jan Kiszka, android-virt, s.raho


On 05.06.2011, at 18:33, Avi Kivity wrote:

> On 06/05/2011 07:30 PM, Alexander Graf wrote:
>> >>
>> >>  Could you elaborate what you mean here? I'm not really following. Are
>> >>  you suggesting a new arch-generic interface? (Pardon my ignorance).
>> >
>> >  Using KVM_IRQ_LINE everywhere except s390, not just in x86 and ARM.
>> 
>> An in-kernel MPIC implementation is coming for PPC, so I don't see any reason to switch from something that works now.
> 
> Right, this is spilled milk.
> 
> Does the ppc qemu implementation raise KVM_INTERRUPT solely from the vcpu thread?

Well, without iothread it used to obviously. Now that we have an iothread, it calls ioctl(KVM_INTERRUPT) from a separate thread. The code also doesn't forcefully wake up the vcpu thread, so yes, I think here's a chance for at least delaying interrupt delivery. Chances are pretty slim we don't get out of the vcpu thread at all :).


Alex


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [Android-virt] [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-05 17:19                         ` Alexander Graf
@ 2011-06-05 17:48                           ` Jan Kiszka
  2011-06-05 17:54                             ` Alexander Graf
  2011-06-06  7:41                           ` Avi Kivity
  1 sibling, 1 reply; 58+ messages in thread
From: Jan Kiszka @ 2011-06-05 17:48 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Avi Kivity, Christoffer Dall, kvm, a.costa, android-virt, s.raho

[-- Attachment #1: Type: text/plain, Size: 1397 bytes --]

On 2011-06-05 19:19, Alexander Graf wrote:
> 
> On 05.06.2011, at 18:33, Avi Kivity wrote:
> 
>> On 06/05/2011 07:30 PM, Alexander Graf wrote:
>>>>>
>>>>>  Could you elaborate what you mean here? I'm not really following. Are
>>>>>  you suggesting a new arch-generic interface? (Pardon my ignorance).
>>>>
>>>>  Using KVM_IRQ_LINE everywhere except s390, not just in x86 and ARM.
>>>
>>> An in-kernel MPIC implementation is coming for PPC, so I don't see any reason to switch from something that works now.
>>
>> Right, this is spilled milk.
>>
>> Does the ppc qemu implementation raise KVM_INTERRUPT solely from the vcpu thread?
> 
> Well, without iothread it used to obviously. Now that we have an iothread, it calls ioctl(KVM_INTERRUPT) from a separate thread. The code also doesn't forcefully wake up the vcpu thread, so yes, I think here's a chance for at least delaying interrupt delivery. Chances are pretty slim we don't get out of the vcpu thread at all :).

There are good chances to run into a deadlock when calling a per-vcpu
IOCTL over a foreign context: calling thread holds qemu_mutex and blocks
on kvm_mutex inside the kernel, target vcpu is running endless guest
loop, holding kvm_mutex, all other qemu threads will sooner or later
block on the global lock. That's at least one pattern you can get on x86
(we had a few of such bugs in the past).

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 259 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [Android-virt] [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-05 17:48                           ` Jan Kiszka
@ 2011-06-05 17:54                             ` Alexander Graf
  2011-06-05 17:56                               ` Jan Kiszka
  2011-06-06  7:42                               ` Avi Kivity
  0 siblings, 2 replies; 58+ messages in thread
From: Alexander Graf @ 2011-06-05 17:54 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: Avi Kivity, Christoffer Dall, kvm, a.costa, android-virt, s.raho


On 05.06.2011, at 19:48, Jan Kiszka wrote:

> On 2011-06-05 19:19, Alexander Graf wrote:
>> 
>> On 05.06.2011, at 18:33, Avi Kivity wrote:
>> 
>>> On 06/05/2011 07:30 PM, Alexander Graf wrote:
>>>>>> 
>>>>>> Could you elaborate what you mean here? I'm not really following. Are
>>>>>> you suggesting a new arch-generic interface? (Pardon my ignorance).
>>>>> 
>>>>> Using KVM_IRQ_LINE everywhere except s390, not just in x86 and ARM.
>>>> 
>>>> An in-kernel MPIC implementation is coming for PPC, so I don't see any reason to switch from something that works now.
>>> 
>>> Right, this is spilled milk.
>>> 
>>> Does the ppc qemu implementation raise KVM_INTERRUPT solely from the vcpu thread?
>> 
>> Well, without iothread it used to obviously. Now that we have an iothread, it calls ioctl(KVM_INTERRUPT) from a separate thread. The code also doesn't forcefully wake up the vcpu thread, so yes, I think here's a chance for at least delaying interrupt delivery. Chances are pretty slim we don't get out of the vcpu thread at all :).
> 
> There are good chances to run into a deadlock when calling a per-vcpu
> IOCTL over a foreign context: calling thread holds qemu_mutex and blocks
> on kvm_mutex inside the kernel, target vcpu is running endless guest
> loop, holding kvm_mutex, all other qemu threads will sooner or later
> block on the global lock. That's at least one pattern you can get on x86
> (we had a few of such bugs in the past).

Any recommendations? Should we just signal the main thread when we want to inject an interrupt?


Alex


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [Android-virt] [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-05 17:54                             ` Alexander Graf
@ 2011-06-05 17:56                               ` Jan Kiszka
  2011-06-05 18:00                                 ` Alexander Graf
  2011-06-06  7:42                               ` Avi Kivity
  1 sibling, 1 reply; 58+ messages in thread
From: Jan Kiszka @ 2011-06-05 17:56 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Avi Kivity, Christoffer Dall, kvm, a.costa, android-virt, s.raho

[-- Attachment #1: Type: text/plain, Size: 1717 bytes --]

On 2011-06-05 19:54, Alexander Graf wrote:
> 
> On 05.06.2011, at 19:48, Jan Kiszka wrote:
> 
>> On 2011-06-05 19:19, Alexander Graf wrote:
>>>
>>> On 05.06.2011, at 18:33, Avi Kivity wrote:
>>>
>>>> On 06/05/2011 07:30 PM, Alexander Graf wrote:
>>>>>>>
>>>>>>> Could you elaborate what you mean here? I'm not really following. Are
>>>>>>> you suggesting a new arch-generic interface? (Pardon my ignorance).
>>>>>>
>>>>>> Using KVM_IRQ_LINE everywhere except s390, not just in x86 and ARM.
>>>>>
>>>>> An in-kernel MPIC implementation is coming for PPC, so I don't see any reason to switch from something that works now.
>>>>
>>>> Right, this is spilled milk.
>>>>
>>>> Does the ppc qemu implementation raise KVM_INTERRUPT solely from the vcpu thread?
>>>
>>> Well, without iothread it used to obviously. Now that we have an iothread, it calls ioctl(KVM_INTERRUPT) from a separate thread. The code also doesn't forcefully wake up the vcpu thread, so yes, I think here's a chance for at least delaying interrupt delivery. Chances are pretty slim we don't get out of the vcpu thread at all :).
>>
>> There are good chances to run into a deadlock when calling a per-vcpu
>> IOCTL over a foreign context: calling thread holds qemu_mutex and blocks
>> on kvm_mutex inside the kernel, target vcpu is running endless guest
>> loop, holding kvm_mutex, all other qemu threads will sooner or later
>> block on the global lock. That's at least one pattern you can get on x86
>> (we had a few of such bugs in the past).
> 
> Any recommendations? Should we just signal the main thread when we want to inject an interrupt?

Yep. That's also what x86 does (when using user space irqchips).

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 259 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [Android-virt] [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-05 17:56                               ` Jan Kiszka
@ 2011-06-05 18:00                                 ` Alexander Graf
  2011-06-05 18:04                                   ` Jan Kiszka
  0 siblings, 1 reply; 58+ messages in thread
From: Alexander Graf @ 2011-06-05 18:00 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: Avi Kivity, Christoffer Dall, kvm, a.costa, android-virt, s.raho


On 05.06.2011, at 19:56, Jan Kiszka wrote:

> On 2011-06-05 19:54, Alexander Graf wrote:
>> 
>> On 05.06.2011, at 19:48, Jan Kiszka wrote:
>> 
>>> On 2011-06-05 19:19, Alexander Graf wrote:
>>>> 
>>>> On 05.06.2011, at 18:33, Avi Kivity wrote:
>>>> 
>>>>> On 06/05/2011 07:30 PM, Alexander Graf wrote:
>>>>>>>> 
>>>>>>>> Could you elaborate what you mean here? I'm not really following. Are
>>>>>>>> you suggesting a new arch-generic interface? (Pardon my ignorance).
>>>>>>> 
>>>>>>> Using KVM_IRQ_LINE everywhere except s390, not just in x86 and ARM.
>>>>>> 
>>>>>> An in-kernel MPIC implementation is coming for PPC, so I don't see any reason to switch from something that works now.
>>>>> 
>>>>> Right, this is spilled milk.
>>>>> 
>>>>> Does the ppc qemu implementation raise KVM_INTERRUPT solely from the vcpu thread?
>>>> 
>>>> Well, without iothread it used to obviously. Now that we have an iothread, it calls ioctl(KVM_INTERRUPT) from a separate thread. The code also doesn't forcefully wake up the vcpu thread, so yes, I think here's a chance for at least delaying interrupt delivery. Chances are pretty slim we don't get out of the vcpu thread at all :).
>>> 
>>> There are good chances to run into a deadlock when calling a per-vcpu
>>> IOCTL over a foreign context: calling thread holds qemu_mutex and blocks
>>> on kvm_mutex inside the kernel, target vcpu is running endless guest
>>> loop, holding kvm_mutex, all other qemu threads will sooner or later
>>> block on the global lock. That's at least one pattern you can get on x86
>>> (we had a few of such bugs in the past).
>> 
>> Any recommendations? Should we just signal the main thread when we want to inject an interrupt?
> 
> Yep. That's also what x86 does (when using user space irqchips).

Hrm, ok :). I guess the main reason we don't see major issues is that

  1) people don't use iothread too often yet - is it even enabled by default?
  2) the decrementor interrupt happens in-kernel, so timer interrupts still arrive properly


Alex


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [Android-virt] [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-05 18:00                                 ` Alexander Graf
@ 2011-06-05 18:04                                   ` Jan Kiszka
  2011-06-05 18:12                                     ` Alexander Graf
  0 siblings, 1 reply; 58+ messages in thread
From: Jan Kiszka @ 2011-06-05 18:04 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Avi Kivity, Christoffer Dall, kvm, a.costa, android-virt, s.raho

[-- Attachment #1: Type: text/plain, Size: 2232 bytes --]

On 2011-06-05 20:00, Alexander Graf wrote:
> 
> On 05.06.2011, at 19:56, Jan Kiszka wrote:
> 
>> On 2011-06-05 19:54, Alexander Graf wrote:
>>>
>>> On 05.06.2011, at 19:48, Jan Kiszka wrote:
>>>
>>>> On 2011-06-05 19:19, Alexander Graf wrote:
>>>>>
>>>>> On 05.06.2011, at 18:33, Avi Kivity wrote:
>>>>>
>>>>>> On 06/05/2011 07:30 PM, Alexander Graf wrote:
>>>>>>>>>
>>>>>>>>> Could you elaborate what you mean here? I'm not really following. Are
>>>>>>>>> you suggesting a new arch-generic interface? (Pardon my ignorance).
>>>>>>>>
>>>>>>>> Using KVM_IRQ_LINE everywhere except s390, not just in x86 and ARM.
>>>>>>>
>>>>>>> An in-kernel MPIC implementation is coming for PPC, so I don't see any reason to switch from something that works now.
>>>>>>
>>>>>> Right, this is spilled milk.
>>>>>>
>>>>>> Does the ppc qemu implementation raise KVM_INTERRUPT solely from the vcpu thread?
>>>>>
>>>>> Well, without iothread it used to obviously. Now that we have an iothread, it calls ioctl(KVM_INTERRUPT) from a separate thread. The code also doesn't forcefully wake up the vcpu thread, so yes, I think here's a chance for at least delaying interrupt delivery. Chances are pretty slim we don't get out of the vcpu thread at all :).
>>>>
>>>> There are good chances to run into a deadlock when calling a per-vcpu
>>>> IOCTL over a foreign context: calling thread holds qemu_mutex and blocks
>>>> on kvm_mutex inside the kernel, target vcpu is running endless guest
>>>> loop, holding kvm_mutex, all other qemu threads will sooner or later
>>>> block on the global lock. That's at least one pattern you can get on x86
>>>> (we had a few of such bugs in the past).
>>>
>>> Any recommendations? Should we just signal the main thread when we want to inject an interrupt?
>>
>> Yep. That's also what x86 does (when using user space irqchips).
> 
> Hrm, ok :). I guess the main reason we don't see major issues is that
> 
>   1) people don't use iothread too often yet - is it even enabled by default?

Nope (unless you use qemu-kvm.git next).

>   2) the decrementor interrupt happens in-kernel, so timer interrupts still arrive properly

Means PPC periodically returns to user space?

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 259 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [Android-virt] [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-05 18:04                                   ` Jan Kiszka
@ 2011-06-05 18:12                                     ` Alexander Graf
  2011-06-05 18:19                                       ` Jan Kiszka
  0 siblings, 1 reply; 58+ messages in thread
From: Alexander Graf @ 2011-06-05 18:12 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: Avi Kivity, Christoffer Dall, kvm, a.costa, android-virt, s.raho


On 05.06.2011, at 20:04, Jan Kiszka wrote:

> On 2011-06-05 20:00, Alexander Graf wrote:
>> 
>> On 05.06.2011, at 19:56, Jan Kiszka wrote:
>> 
>>> On 2011-06-05 19:54, Alexander Graf wrote:
>>>> 
>>>> On 05.06.2011, at 19:48, Jan Kiszka wrote:
>>>> 
>>>>> On 2011-06-05 19:19, Alexander Graf wrote:
>>>>>> 
>>>>>> On 05.06.2011, at 18:33, Avi Kivity wrote:
>>>>>> 
>>>>>>> On 06/05/2011 07:30 PM, Alexander Graf wrote:
>>>>>>>>>> 
>>>>>>>>>> Could you elaborate what you mean here? I'm not really following. Are
>>>>>>>>>> you suggesting a new arch-generic interface? (Pardon my ignorance).
>>>>>>>>> 
>>>>>>>>> Using KVM_IRQ_LINE everywhere except s390, not just in x86 and ARM.
>>>>>>>> 
>>>>>>>> An in-kernel MPIC implementation is coming for PPC, so I don't see any reason to switch from something that works now.
>>>>>>> 
>>>>>>> Right, this is spilled milk.
>>>>>>> 
>>>>>>> Does the ppc qemu implementation raise KVM_INTERRUPT solely from the vcpu thread?
>>>>>> 
>>>>>> Well, without iothread it used to obviously. Now that we have an iothread, it calls ioctl(KVM_INTERRUPT) from a separate thread. The code also doesn't forcefully wake up the vcpu thread, so yes, I think here's a chance for at least delaying interrupt delivery. Chances are pretty slim we don't get out of the vcpu thread at all :).
>>>>> 
>>>>> There are good chances to run into a deadlock when calling a per-vcpu
>>>>> IOCTL over a foreign context: calling thread holds qemu_mutex and blocks
>>>>> on kvm_mutex inside the kernel, target vcpu is running endless guest
>>>>> loop, holding kvm_mutex, all other qemu threads will sooner or later
>>>>> block on the global lock. That's at least one pattern you can get on x86
>>>>> (we had a few of such bugs in the past).
>>>> 
>>>> Any recommendations? Should we just signal the main thread when we want to inject an interrupt?
>>> 
>>> Yep. That's also what x86 does (when using user space irqchips).
>> 
>> Hrm, ok :). I guess the main reason we don't see major issues is that
>> 
>>  1) people don't use iothread too often yet - is it even enabled by default?
> 
> Nope (unless you use qemu-kvm.git next).

Any plans on finally doing that step? Code that isn't enabled by default is pretty prone to not be tested ;). It's a good way to slowly move code upstream, stabilize it there and then finally have it enabled by default. But I don't think this process should last more than 1/2 year. And IIRC with iothread, we're way past that point.

>>  2) the decrementor interrupt happens in-kernel, so timer interrupts still arrive properly
> 
> Means PPC periodically returns to user space?

Hrm. No. It stays in kernel space even more. But at least it continues to live without the need for a timer interrupt from user space, so an endless loop doesn't completely stall the guest. I see your point though.

With VGA we're good thanks to the dirty bitmap update. So we get quite a good number of exits from guest context. -nographic might be problematic though. I don't see how a new key event or a new packet on the network could easily arrive inside the guest there. Sigh.

I'll take a look at it when I get my head around it. Thanks a lot for the insight :).


Alex


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [Android-virt] [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-05 18:12                                     ` Alexander Graf
@ 2011-06-05 18:19                                       ` Jan Kiszka
  0 siblings, 0 replies; 58+ messages in thread
From: Jan Kiszka @ 2011-06-05 18:19 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Avi Kivity, Christoffer Dall, kvm, a.costa, android-virt, s.raho

[-- Attachment #1: Type: text/plain, Size: 3041 bytes --]

On 2011-06-05 20:12, Alexander Graf wrote:
> 
> On 05.06.2011, at 20:04, Jan Kiszka wrote:
> 
>> On 2011-06-05 20:00, Alexander Graf wrote:
>>>
>>> On 05.06.2011, at 19:56, Jan Kiszka wrote:
>>>
>>>> On 2011-06-05 19:54, Alexander Graf wrote:
>>>>>
>>>>> On 05.06.2011, at 19:48, Jan Kiszka wrote:
>>>>>
>>>>>> On 2011-06-05 19:19, Alexander Graf wrote:
>>>>>>>
>>>>>>> On 05.06.2011, at 18:33, Avi Kivity wrote:
>>>>>>>
>>>>>>>> On 06/05/2011 07:30 PM, Alexander Graf wrote:
>>>>>>>>>>>
>>>>>>>>>>> Could you elaborate what you mean here? I'm not really following. Are
>>>>>>>>>>> you suggesting a new arch-generic interface? (Pardon my ignorance).
>>>>>>>>>>
>>>>>>>>>> Using KVM_IRQ_LINE everywhere except s390, not just in x86 and ARM.
>>>>>>>>>
>>>>>>>>> An in-kernel MPIC implementation is coming for PPC, so I don't see any reason to switch from something that works now.
>>>>>>>>
>>>>>>>> Right, this is spilled milk.
>>>>>>>>
>>>>>>>> Does the ppc qemu implementation raise KVM_INTERRUPT solely from the vcpu thread?
>>>>>>>
>>>>>>> Well, without iothread it used to obviously. Now that we have an iothread, it calls ioctl(KVM_INTERRUPT) from a separate thread. The code also doesn't forcefully wake up the vcpu thread, so yes, I think here's a chance for at least delaying interrupt delivery. Chances are pretty slim we don't get out of the vcpu thread at all :).
>>>>>>
>>>>>> There are good chances to run into a deadlock when calling a per-vcpu
>>>>>> IOCTL over a foreign context: calling thread holds qemu_mutex and blocks
>>>>>> on kvm_mutex inside the kernel, target vcpu is running endless guest
>>>>>> loop, holding kvm_mutex, all other qemu threads will sooner or later
>>>>>> block on the global lock. That's at least one pattern you can get on x86
>>>>>> (we had a few of such bugs in the past).
>>>>>
>>>>> Any recommendations? Should we just signal the main thread when we want to inject an interrupt?
>>>>
>>>> Yep. That's also what x86 does (when using user space irqchips).
>>>
>>> Hrm, ok :). I guess the main reason we don't see major issues is that
>>>
>>>  1) people don't use iothread too often yet - is it even enabled by default?
>>
>> Nope (unless you use qemu-kvm.git next).
> 
> Any plans on finally doing that step? Code that isn't enabled by default is pretty prone to not be tested ;). It's a good way to slowly move code upstream, stabilize it there and then finally have it enabled by default. But I don't think this process should last more than 1/2 year. And IIRC with iothread, we're way past that point.

That's getting a bit off-topic for this thread now:

The good news is that 'next' will become 'master' fairly soon (unless
some regression is found), and then we are using QEMU upstream's
iothread code in qemu-kvm. By default.

For upstream, we are still facing TCG performance regressions in
iothread mode, thus it's still default off. $Someone would have to sort
them out, and then we could flip defaults there as well.

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 259 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [Android-virt] [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-05 17:19                         ` Alexander Graf
  2011-06-05 17:48                           ` Jan Kiszka
@ 2011-06-06  7:41                           ` Avi Kivity
  1 sibling, 0 replies; 58+ messages in thread
From: Avi Kivity @ 2011-06-06  7:41 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Christoffer Dall, kvm, a.costa, Jan Kiszka, android-virt, s.raho

On 06/05/2011 08:19 PM, Alexander Graf wrote:
> On 05.06.2011, at 18:33, Avi Kivity wrote:
>
> >  On 06/05/2011 07:30 PM, Alexander Graf wrote:
> >>  >>
> >>  >>   Could you elaborate what you mean here? I'm not really following. Are
> >>  >>   you suggesting a new arch-generic interface? (Pardon my ignorance).
> >>  >
> >>  >   Using KVM_IRQ_LINE everywhere except s390, not just in x86 and ARM.
> >>
> >>  An in-kernel MPIC implementation is coming for PPC, so I don't see any reason to switch from something that works now.
> >
> >  Right, this is spilled milk.
> >
> >  Does the ppc qemu implementation raise KVM_INTERRUPT solely from the vcpu thread?
>
> Well, without iothread it used to obviously. Now that we have an iothread, it calls ioctl(KVM_INTERRUPT) from a separate thread.

That's 100% broken, as api.txt states.  Besides consistency, this is to 
allow an eventual move to a syscall interface where the vcpu is derived 
from current rather than an fd parameter.

> The code also doesn't forcefully wake up the vcpu thread, so yes, I think here's a chance for at least delaying interrupt delivery. Chances are pretty slim we don't get out of the vcpu thread at all :).

Maybe slim, but still a major bug.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [Android-virt] [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-05 17:54                             ` Alexander Graf
  2011-06-05 17:56                               ` Jan Kiszka
@ 2011-06-06  7:42                               ` Avi Kivity
  1 sibling, 0 replies; 58+ messages in thread
From: Avi Kivity @ 2011-06-06  7:42 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Jan Kiszka, Christoffer Dall, kvm, a.costa, android-virt, s.raho

On 06/05/2011 08:54 PM, Alexander Graf wrote:
> On 05.06.2011, at 19:48, Jan Kiszka wrote:
>
> >  On 2011-06-05 19:19, Alexander Graf wrote:
> >>
> >>  On 05.06.2011, at 18:33, Avi Kivity wrote:
> >>
> >>>  On 06/05/2011 07:30 PM, Alexander Graf wrote:
> >>>>>>
> >>>>>>  Could you elaborate what you mean here? I'm not really following. Are
> >>>>>>  you suggesting a new arch-generic interface? (Pardon my ignorance).
> >>>>>
> >>>>>  Using KVM_IRQ_LINE everywhere except s390, not just in x86 and ARM.
> >>>>
> >>>>  An in-kernel MPIC implementation is coming for PPC, so I don't see any reason to switch from something that works now.
> >>>
> >>>  Right, this is spilled milk.
> >>>
> >>>  Does the ppc qemu implementation raise KVM_INTERRUPT solely from the vcpu thread?
> >>
> >>  Well, without iothread it used to obviously. Now that we have an iothread, it calls ioctl(KVM_INTERRUPT) from a separate thread. The code also doesn't forcefully wake up the vcpu thread, so yes, I think here's a chance for at least delaying interrupt delivery. Chances are pretty slim we don't get out of the vcpu thread at all :).
> >
> >  There are good chances to run into a deadlock when calling a per-vcpu
> >  IOCTL over a foreign context: calling thread holds qemu_mutex and blocks
> >  on kvm_mutex inside the kernel, target vcpu is running endless guest
> >  loop, holding kvm_mutex, all other qemu threads will sooner or later
> >  block on the global lock. That's at least one pattern you can get on x86
> >  (we had a few of such bugs in the past).
>
> Any recommendations? Should we just signal the main thread when we want to inject an interrupt?
>

Signal the vcpu thread, of course.  There's on_vcpu (or on_cpu, don't 
know how it's called today) for that.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [Android-virt] [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
       [not found]     ` <211B3F42-9B68-41BB-B1FA-348B5500C60A@suse.de>
@ 2011-06-10  8:40       ` Christoffer Dall
  2011-06-10  9:23         ` Catalin Marinas
  0 siblings, 1 reply; 58+ messages in thread
From: Christoffer Dall @ 2011-06-10  8:40 UTC (permalink / raw)
  To: Alexander Graf, Catalin Marinas, Robin Randhawa
  Cc: Avi Kivity, kvm, a.costa, android-virt, s.raho

On Sun, Jun 5, 2011 at 6:27 PM, Alexander Graf <agraf@suse.de> wrote:
>
> On 05.06.2011, at 18:03, Christoffer Dall wrote:
>
> On Sun, Jun 5, 2011 at 2:36 PM, Avi Kivity <avi@redhat.com> wrote:
>
> On 06/03/2011 06:03 PM, Christoffer Dall wrote:
>
>
> [...]
>
> +if VIRTUALIZATION
>
> +
>
> +config KVM
>
> +       bool "Kernel-based Virtual Machine (KVM) support"
>
> +       select PREEMPT_NOTIFIERS
>
> +       select ANON_INODES
>
> +       select KVM_ARM_HOST
>
> +       select KVM_MMIO
>
> +       ---help---
>
> +         Support hosting virtualized guest machines. You will also
>
> +         need to select one or more of the processor modules below.
>
> +
>
> +         This module provides access to the hardware capabilities through
>
> +         a character device node named /dev/kvm.
>
> +
>
> +         If unsure, say N.
>
> I see you can't support a modular build, which is a pity.
>
>
> My concern is that I map in code that needs to be in place for running
> in Hyp mode. Of course I just need to pin these, so it should be
> possible. I'll look into this as well.
>
> We have similar requirements for PPC, so feel free to check how we did it
> there :). On =M we still include reasonably small parts of the code in the
> kernel as =Y, but keep the big chunks outside as module.
>

OK, so I looked into compiling KVM/ARM as a module and there are a few
obstacles:

I need to export several symbols (identitiy_mapping_add,
pgd_clear_bad, __irq_svc). __irq_svc is tricky because it's in the
entry-armv.S assembly file, so it's not obvious where to export it
from. Recommendations? (dedicated KVM export file as in PPC or simply
adding EXPORT_SYMBOL_GPL(...) or alternative?

Also, iirc, modules are loaded using vmalloc, wouldn't that make it
impossible for me to use virt_to_phys, which I need to do on some of
the code pages for Hypervisor initialization? If any of the ARM people
could give an advise here, it would be much appreciated - of course I
can kmalloc a page and relocate the init code, but I really would like
to avoid this.

Thanks!
-Christoffer

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [Android-virt] [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-10  8:40       ` [Android-virt] " Christoffer Dall
@ 2011-06-10  9:23         ` Catalin Marinas
  2011-06-10  9:53           ` Alexander Graf
  0 siblings, 1 reply; 58+ messages in thread
From: Catalin Marinas @ 2011-06-10  9:23 UTC (permalink / raw)
  To: Christoffer Dall
  Cc: Alexander Graf, Robin Randhawa, Avi Kivity, kvm@vger.kernel.org,
	a.costa@virtualopensystems.com,
	android-virt@lists.cs.columbia.edu, s.raho@virtualopensystems.com

On Fri, Jun 10, 2011 at 09:40:34AM +0100, Christoffer Dall wrote:
> OK, so I looked into compiling KVM/ARM as a module and there are a few
> obstacles:
> 
> I need to export several symbols (identitiy_mapping_add,
> pgd_clear_bad, __irq_svc). __irq_svc is tricky because it's in the
> entry-armv.S assembly file, so it's not obvious where to export it
> from. Recommendations? (dedicated KVM export file as in PPC or simply
> adding EXPORT_SYMBOL_GPL(...) or alternative?

There is arch/arm/kernel/armksyms.c where we export extra symbols coming
from .S files. I haven't looked at the code yet, maybe some of the
symbols could be avoided.

> Also, iirc, modules are loaded using vmalloc, wouldn't that make it
> impossible for me to use virt_to_phys, which I need to do on some of
> the code pages for Hypervisor initialization? If any of the ARM people
> could give an advise here, it would be much appreciated - of course I
> can kmalloc a page and relocate the init code, but I really would like
> to avoid this.

You could alloc a page (or a few) an copy the hypervisor code in there.
Just make sure that it is position independent (which is not difficult
with hand-written assembly).

Alternatively, you could set up the Hyp translation tables to also
include the modules area (pretty much duplicating the corresponding
swapper_pg_dir entries, with some attributes changed).

But for now you can probably ignore this issue and get back to it once
everything else is working.

-- 
Catalin


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [Android-virt] [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-10  9:23         ` Catalin Marinas
@ 2011-06-10  9:53           ` Alexander Graf
  2011-06-10  9:58             ` Catalin Marinas
  0 siblings, 1 reply; 58+ messages in thread
From: Alexander Graf @ 2011-06-10  9:53 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: Christoffer Dall, Robin Randhawa, Avi Kivity, kvm@vger.kernel.org,
	a.costa@virtualopensystems.com,
	android-virt@lists.cs.columbia.edu, s.raho@virtualopensystems.com





Am 10.06.2011 um 11:23 schrieb Catalin Marinas <catalin.marinas@arm.com>:

> On Fri, Jun 10, 2011 at 09:40:34AM +0100, Christoffer Dall wrote:
>> OK, so I looked into compiling KVM/ARM as a module and there are a few
>> obstacles:
>> 
>> I need to export several symbols (identitiy_mapping_add,
>> pgd_clear_bad, __irq_svc). __irq_svc is tricky because it's in the
>> entry-armv.S assembly file, so it's not obvious where to export it
>> from. Recommendations? (dedicated KVM export file as in PPC or simply
>> adding EXPORT_SYMBOL_GPL(...) or alternative?
> 
> There is arch/arm/kernel/armksyms.c where we export extra symbols coming
> from .S files. I haven't looked at the code yet, maybe some of the
> symbols could be avoided.
> 
>> Also, iirc, modules are loaded using vmalloc, wouldn't that make it
>> impossible for me to use virt_to_phys, which I need to do on some of
>> the code pages for Hypervisor initialization? If any of the ARM people
>> could give an advise here, it would be much appreciated - of course I
>> can kmalloc a page and relocate the init code, but I really would like
>> to avoid this.
> 
> You could alloc a page (or a few) an copy the hypervisor code in there.
> Just make sure that it is position independent (which is not difficult
> with hand-written assembly).

What we do on ppc is to select another kernel option =Y when we set kvm=M. Based on that other option, we then add the code that need to run in real mode (linear map) to the main kernel binary.

That way, the guest entry/exit code for example is always in the kernel, while the logic that dispatches what to do on a specific guest exit can stay as module, making development a lot easier.


Alex

> 

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [Android-virt] [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-10  9:53           ` Alexander Graf
@ 2011-06-10  9:58             ` Catalin Marinas
  2011-06-10 11:56               ` Christoffer Dall
  0 siblings, 1 reply; 58+ messages in thread
From: Catalin Marinas @ 2011-06-10  9:58 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Christoffer Dall, Robin Randhawa, Avi Kivity, kvm@vger.kernel.org,
	a.costa@virtualopensystems.com,
	android-virt@lists.cs.columbia.edu, s.raho@virtualopensystems.com

On Fri, Jun 10, 2011 at 10:53:06AM +0100, Alexander Graf wrote:
> Am 10.06.2011 um 11:23 schrieb Catalin Marinas <catalin.marinas@arm.com>:
> > On Fri, Jun 10, 2011 at 09:40:34AM +0100, Christoffer Dall wrote:
> >> OK, so I looked into compiling KVM/ARM as a module and there are a few
> >> obstacles:
> >> 
> >> I need to export several symbols (identitiy_mapping_add,
> >> pgd_clear_bad, __irq_svc). __irq_svc is tricky because it's in the
> >> entry-armv.S assembly file, so it's not obvious where to export it
> >> from. Recommendations? (dedicated KVM export file as in PPC or simply
> >> adding EXPORT_SYMBOL_GPL(...) or alternative?
> > 
> > There is arch/arm/kernel/armksyms.c where we export extra symbols coming
> > from .S files. I haven't looked at the code yet, maybe some of the
> > symbols could be avoided.
> > 
> >> Also, iirc, modules are loaded using vmalloc, wouldn't that make it
> >> impossible for me to use virt_to_phys, which I need to do on some of
> >> the code pages for Hypervisor initialization? If any of the ARM people
> >> could give an advise here, it would be much appreciated - of course I
> >> can kmalloc a page and relocate the init code, but I really would like
> >> to avoid this.
> > 
> > You could alloc a page (or a few) an copy the hypervisor code in there.
> > Just make sure that it is position independent (which is not difficult
> > with hand-written assembly).
> 
> What we do on ppc is to select another kernel option =Y when we set
> kvm=M. Based on that other option, we then add the code that need to
> run in real mode (linear map) to the main kernel binary.
> 
> That way, the guest entry/exit code for example is always in the
> kernel, while the logic that dispatches what to do on a specific guest
> exit can stay as module, making development a lot easier.

Good point. We could do the same on ARM.

Thanks.

-- 
Catalin


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [Android-virt] [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support
  2011-06-10  9:58             ` Catalin Marinas
@ 2011-06-10 11:56               ` Christoffer Dall
  0 siblings, 0 replies; 58+ messages in thread
From: Christoffer Dall @ 2011-06-10 11:56 UTC (permalink / raw)
  To: Catalin Marinas, Alexander Graf
  Cc: Avi Kivity, kvm@vger.kernel.org, a.costa@virtualopensystems.com,
	android-virt@lists.cs.columbia.edu, s.raho@virtualopensystems.com

On Fri, Jun 10, 2011 at 11:58 AM, Catalin Marinas
<catalin.marinas@arm.com> wrote:
> On Fri, Jun 10, 2011 at 10:53:06AM +0100, Alexander Graf wrote:
>> Am 10.06.2011 um 11:23 schrieb Catalin Marinas <catalin.marinas@arm.com>:
>> > On Fri, Jun 10, 2011 at 09:40:34AM +0100, Christoffer Dall wrote:
>> >> OK, so I looked into compiling KVM/ARM as a module and there are a few
>> >> obstacles:
>> >>
>> >> I need to export several symbols (identitiy_mapping_add,
>> >> pgd_clear_bad, __irq_svc). __irq_svc is tricky because it's in the
>> >> entry-armv.S assembly file, so it's not obvious where to export it
>> >> from. Recommendations? (dedicated KVM export file as in PPC or simply
>> >> adding EXPORT_SYMBOL_GPL(...) or alternative?
>> >
>> > There is arch/arm/kernel/armksyms.c where we export extra symbols coming
>> > from .S files. I haven't looked at the code yet, maybe some of the
>> > symbols could be avoided.
>> >
>> >> Also, iirc, modules are loaded using vmalloc, wouldn't that make it
>> >> impossible for me to use virt_to_phys, which I need to do on some of
>> >> the code pages for Hypervisor initialization? If any of the ARM people
>> >> could give an advise here, it would be much appreciated - of course I
>> >> can kmalloc a page and relocate the init code, but I really would like
>> >> to avoid this.
>> >
>> > You could alloc a page (or a few) an copy the hypervisor code in there.
>> > Just make sure that it is position independent (which is not difficult
>> > with hand-written assembly).
>>
>> What we do on ppc is to select another kernel option =Y when we set
>> kvm=M. Based on that other option, we then add the code that need to
>> run in real mode (linear map) to the main kernel binary.
>>
>> That way, the guest entry/exit code for example is always in the
>> kernel, while the logic that dispatches what to do on a specific guest
>> exit can stay as module, making development a lot easier.
>
> Good point. We could do the same on ARM.

Thanks, I will follow your suggestions for next patch series.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 7/8] ARM: KVM: Handle guest faults in KVM
  2011-06-05 12:48   ` Avi Kivity
@ 2011-06-11 10:37     ` Christoffer Dall
  2011-06-12  8:24       ` Avi Kivity
  0 siblings, 1 reply; 58+ messages in thread
From: Christoffer Dall @ 2011-06-11 10:37 UTC (permalink / raw)
  To: Avi Kivity; +Cc: catalin.marinas, android-virt, s.raho, a.motakis, kvm, a.costa

On Sun, Jun 5, 2011 at 2:48 PM, Avi Kivity <avi@redhat.com> wrote:
> On 06/03/2011 06:04 PM, Christoffer Dall wrote:
>>
>> Handles the guest faults in KVM by mapping in corresponding user pages
>> in the 2nd stage page tables.
>>
>>
>>
>> +static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>> +                         gfn_t gfn, struct kvm_memory_slot *memslot)
>> +{
>> +       pfn_t pfn;
>> +       pgd_t *pgd;
>> +       pmd_t *pmd;
>> +       pte_t *pte, new_pte;
>> +
>> +       pfn = gfn_to_pfn(vcpu->kvm, gfn);
>> +
>> +       if (is_error_pfn(pfn)) {
>> +               kvm_err(-EFAULT, "Guest gfn %u (0x%08lx) does not have "
>> +                               "corresponding host mapping",
>> +                               gfn, gfn<<  PAGE_SHIFT);
>> +               return -EFAULT;
>> +       }
>> +
>> +       /* Create 2nd stage page table mapping - Level 1 */
>> +       pgd = vcpu->kvm->arch.pgd + pgd_index(fault_ipa);
>> +       if (pgd_none(*pgd)) {
>> +               pmd = pmd_alloc_one(NULL, fault_ipa);
>> +               if (!pmd) {
>> +                       kvm_err(-ENOMEM, "Cannot allocate 2nd stage pmd");
>> +                       return -ENOMEM;
>> +               }
>> +               pgd_populate(NULL, pgd, pmd);
>> +               pmd += pmd_index(fault_ipa);
>> +       } else
>> +               pmd = pmd_offset(pgd, fault_ipa);
>> +
>> +       /* Create 2nd stage page table mapping - Level 2 */
>> +       if (pmd_none(*pmd)) {
>> +               pte = pte_alloc_one_kernel(NULL, fault_ipa);
>> +               if (!pte) {
>> +                       kvm_err(-ENOMEM, "Cannot allocate 2nd stage pte");
>> +                       return -ENOMEM;
>> +               }
>> +               pmd_populate_kernel(NULL, pmd, pte);
>> +               pte += pte_index(fault_ipa);
>> +       } else
>> +               pte = pte_offset_kernel(pmd, fault_ipa);
>> +
>> +       /* Create 2nd stage page table mapping - Level 3 */
>> +       new_pte = pfn_pte(pfn, PAGE_KVM_GUEST);
>> +       set_pte_ext(pte, new_pte, 0);
>> +
>> +       return 0;
>> +}
>> +
>> +#define HSR_ABT_FS     (0x3f)
>> +#define HPFAR_MASK     (~0xf)
>>  int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
>>  {
>> +       unsigned long hsr_ec;
>> +       unsigned long fault_status;
>> +       phys_addr_t fault_ipa;
>> +       struct kvm_memory_slot *memslot = NULL;
>> +       bool is_iabt;
>> +       gfn_t gfn;
>> +
>> +       hsr_ec = vcpu->arch.hsr>>  HSR_EC_SHIFT;
>> +       is_iabt = (hsr_ec == HSR_EC_IABT);
>> +
>> +       /* Check that the second stage fault is a translation fault */
>> +       fault_status = vcpu->arch.hsr&  HSR_ABT_FS;
>> +       if ((fault_status&  0x3c) != 0x4) {
>> +               kvm_err(-EFAULT, "Unsupported fault status: %x",
>> +                               fault_status&  0x3c);
>> +               return -EFAULT;
>> +       }
>> +
>> +       fault_ipa = ((phys_addr_t)vcpu->arch.hpfar&  HPFAR_MASK)<<  8;
>> +
>> +       gfn = fault_ipa>>  PAGE_SHIFT;
>> +       if (!kvm_is_visible_gfn(vcpu->kvm, gfn))
>> +               goto io_mem_abort;
>> +
>> +       memslot = gfn_to_memslot(vcpu->kvm, gfn);
>> +       if (memslot->user_alloc)
>> +               return user_mem_abort(vcpu, fault_ipa, gfn, memslot);
>
> Non-user_alloc should not exist for ARM (and are not supported for x86 these
> days, except for a few implementation internal slots).

ok, I raise an error in when (!memslot->user_alloc) instead now. thanks.

>
>> +
>> +io_mem_abort:
>> +       if (is_iabt) {
>> +               kvm_err(-EFAULT, "Inst. abort on I/O address");
>> +               return -EFAULT;
>> +       }
>> +
>> +       kvm_msg("I/O address abort...");
>>        KVMARM_NOT_IMPLEMENTED();
>>        return -EINVAL;
>>  }
>
> Okay, this is about a zillion times simpler than x86.  Congratulations.

Well, I need to handle the I/O aborts, but it's quite simple. What
makes it much more complicated on x86?

>
> What are your thoughts about mmu notifier support?

For what purpose? There is no swapping on ARM, so only case that jumps
to my mind is for KSM. And I'm not quite there yet :)

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 7/8] ARM: KVM: Handle guest faults in KVM
  2011-06-11 10:37     ` Christoffer Dall
@ 2011-06-12  8:24       ` Avi Kivity
  2011-06-12  8:57         ` Christoffer Dall
  0 siblings, 1 reply; 58+ messages in thread
From: Avi Kivity @ 2011-06-12  8:24 UTC (permalink / raw)
  To: Christoffer Dall
  Cc: catalin.marinas, android-virt, s.raho, a.motakis, kvm, a.costa

On 06/11/2011 01:37 PM, Christoffer Dall wrote:
> >
> >  Okay, this is about a zillion times simpler than x86.  Congratulations.
>
> Well, I need to handle the I/O aborts, but it's quite simple. What
> makes it much more complicated on x86?

- lack of nested paging on earlier processors
- 97 different paging modes
- lots of extra bits bringing in wierd functionality
- lots of optimizations

> >
> >  What are your thoughts about mmu notifier support?
>
> For what purpose? There is no swapping on ARM, so only case that jumps
> to my mind is for KSM. And I'm not quite there yet :)

Really?  I imaging swapping will be needed for server workloads.  mmu 
notifiers are also useful for transparent hugepages and page 
migrations.  I imagine these will all follow if ARM servers take off.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v3 7/8] ARM: KVM: Handle guest faults in KVM
  2011-06-12  8:24       ` Avi Kivity
@ 2011-06-12  8:57         ` Christoffer Dall
  0 siblings, 0 replies; 58+ messages in thread
From: Christoffer Dall @ 2011-06-12  8:57 UTC (permalink / raw)
  To: Avi Kivity; +Cc: catalin.marinas, android-virt, s.raho, a.motakis, kvm, a.costa

>> >
>> >  What are your thoughts about mmu notifier support?
>>
>> For what purpose? There is no swapping on ARM, so only case that jumps
>> to my mind is for KSM. And I'm not quite there yet :)
>
> Really?  I imaging swapping will be needed for server workloads.  mmu
> notifiers are also useful for transparent hugepages and page migrations.  I
> imagine these will all follow if ARM servers take off.
>

You may be right, but I guess it depends how ARM servers are going to
be used. I agree though, swapping could very well be useful in an ARM
server scenario and at that time mmu notifier integration should be
looked into, but it's not in my critical path yet.

Thanks for the input.

^ permalink raw reply	[flat|nested] 58+ messages in thread

end of thread, other threads:[~2011-06-12  8:57 UTC | newest]

Thread overview: 58+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-06-03 15:03 [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support Christoffer Dall
2011-06-03 15:03 ` [PATCH v3 2/8] ARM: KVM: Hypervisor identity mapping Christoffer Dall
2011-06-03 15:03 ` [PATCH v3 3/8] ARM: KVM: Add hypervisor inititalization Christoffer Dall
2011-06-03 15:03 ` [PATCH v3 4/8] ARM: KVM: Memory virtualization setup Christoffer Dall
2011-06-05 12:41   ` Avi Kivity
2011-06-05 14:50     ` Christoffer Dall
2011-06-05 14:53       ` Avi Kivity
2011-06-05 15:14         ` Avi Kivity
2011-06-05 15:27           ` Christoffer Dall
2011-06-05 16:02             ` Avi Kivity
2011-06-03 15:03 ` [PATCH v3 5/8] ARM: KVM: World-switch implementation Christoffer Dall
2011-06-03 15:04 ` [PATCH v3 6/8] ARM: KVM: Emulation framework and CP15 emulation Christoffer Dall
2011-06-03 15:04 ` [PATCH v3 7/8] ARM: KVM: Handle guest faults in KVM Christoffer Dall
2011-06-05 12:48   ` Avi Kivity
2011-06-11 10:37     ` Christoffer Dall
2011-06-12  8:24       ` Avi Kivity
2011-06-12  8:57         ` Christoffer Dall
2011-06-03 15:04 ` [PATCH v3 8/8] ARM: KVM: Handle I/O aborts Christoffer Dall
2011-06-03 15:31 ` [PATCH v3 1/8] ARM: KVM: Initial skeleton to compile KVM support Jan Kiszka
2011-06-03 15:53   ` Jan Kiszka
2011-06-03 16:19     ` Christoffer Dall
2011-06-03 16:31       ` [Android-virt] " Alexander Graf
2011-06-04 14:13     ` Alexander Graf
2011-06-05 12:21     ` Avi Kivity
2011-06-05 14:13       ` Jan Kiszka
2011-06-05 14:18         ` Avi Kivity
2011-06-05 14:58           ` Jan Kiszka
2011-06-05 15:10             ` Avi Kivity
2011-06-05 15:14               ` Jan Kiszka
2011-06-05 15:18                 ` Avi Kivity
2011-06-05 16:25                 ` Christoffer Dall
2011-06-05 16:28                   ` Avi Kivity
2011-06-05 16:30                     ` [Android-virt] " Alexander Graf
2011-06-05 16:33                       ` Avi Kivity
2011-06-05 17:19                         ` Alexander Graf
2011-06-05 17:48                           ` Jan Kiszka
2011-06-05 17:54                             ` Alexander Graf
2011-06-05 17:56                               ` Jan Kiszka
2011-06-05 18:00                                 ` Alexander Graf
2011-06-05 18:04                                   ` Jan Kiszka
2011-06-05 18:12                                     ` Alexander Graf
2011-06-05 18:19                                       ` Jan Kiszka
2011-06-06  7:42                               ` Avi Kivity
2011-06-06  7:41                           ` Avi Kivity
2011-06-05 16:24             ` Christoffer Dall
2011-06-05 16:31               ` Avi Kivity
2011-06-05 12:36 ` Avi Kivity
2011-06-05 16:03   ` Christoffer Dall
2011-06-05 16:06     ` Avi Kivity
     [not found]     ` <211B3F42-9B68-41BB-B1FA-348B5500C60A@suse.de>
2011-06-10  8:40       ` [Android-virt] " Christoffer Dall
2011-06-10  9:23         ` Catalin Marinas
2011-06-10  9:53           ` Alexander Graf
2011-06-10  9:58             ` Catalin Marinas
2011-06-10 11:56               ` Christoffer Dall
2011-06-05 12:52 ` Avi Kivity
2011-06-05 14:00   ` Avi Kivity
2011-06-05 14:13     ` Christoffer Dall
2011-06-05 14:18       ` Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox