* [PATCH v5 01/13] ARM: KVM: Initial skeleton to compile KVM support
2011-12-11 10:24 [PATCH v5 00/13] KVM/ARM Implementation Christoffer Dall
@ 2011-12-11 10:24 ` Christoffer Dall
2011-12-11 10:24 ` [PATCH v5 02/13] ARM: KVM: Hypervisor identity mapping Christoffer Dall
` (12 subsequent siblings)
13 siblings, 0 replies; 105+ messages in thread
From: Christoffer Dall @ 2011-12-11 10:24 UTC (permalink / raw)
To: android-virt, kvm; +Cc: Marc.Zyngier, catalin.marinas, tech, avi, peter.maydell
Targets KVM support for Cortex A-15 processors.
Contains no real functionality but all the framework components,
make files, header files and some tracing functionality.
Most functionality is in arch/arm/kvm/* or arch/arm/include/asm/kvm_*.h.
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
---
arch/arm/Kconfig | 2
arch/arm/Makefile | 1
arch/arm/include/asm/kvm.h | 66 +++++++++
arch/arm/include/asm/kvm_asm.h | 28 ++++
arch/arm/include/asm/kvm_emulate.h | 91 ++++++++++++
arch/arm/include/asm/kvm_host.h | 93 ++++++++++++
arch/arm/include/asm/kvm_para.h | 9 +
arch/arm/include/asm/unified.h | 12 ++
arch/arm/kvm/Kconfig | 44 ++++++
arch/arm/kvm/Makefile | 17 ++
arch/arm/kvm/arm.c | 279 ++++++++++++++++++++++++++++++++++++
arch/arm/kvm/debug.h | 48 ++++++
arch/arm/kvm/emulate.c | 121 ++++++++++++++++
arch/arm/kvm/exports.c | 16 ++
arch/arm/kvm/guest.c | 148 +++++++++++++++++++
arch/arm/kvm/init.S | 17 ++
arch/arm/kvm/interrupts.S | 17 ++
arch/arm/kvm/mmu.c | 15 ++
arch/arm/kvm/trace.h | 52 +++++++
arch/arm/mach-vexpress/Kconfig | 1
arch/arm/mm/Kconfig | 8 +
21 files changed, 1085 insertions(+), 0 deletions(-)
create mode 100644 arch/arm/include/asm/kvm.h
create mode 100644 arch/arm/include/asm/kvm_asm.h
create mode 100644 arch/arm/include/asm/kvm_emulate.h
create mode 100644 arch/arm/include/asm/kvm_host.h
create mode 100644 arch/arm/include/asm/kvm_para.h
create mode 100644 arch/arm/kvm/Kconfig
create mode 100644 arch/arm/kvm/Makefile
create mode 100644 arch/arm/kvm/arm.c
create mode 100644 arch/arm/kvm/debug.h
create mode 100644 arch/arm/kvm/emulate.c
create mode 100644 arch/arm/kvm/exports.c
create mode 100644 arch/arm/kvm/guest.c
create mode 100644 arch/arm/kvm/init.S
create mode 100644 arch/arm/kvm/interrupts.S
create mode 100644 arch/arm/kvm/mmu.c
create mode 100644 arch/arm/kvm/trace.h
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 00e908b..2a65d7b 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -2248,3 +2248,5 @@ source "security/Kconfig"
source "crypto/Kconfig"
source "lib/Kconfig"
+
+source "arch/arm/kvm/Kconfig"
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index dfcf3b0..621fb8d 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -255,6 +255,7 @@ core-$(CONFIG_VFP) += arch/arm/vfp/
# If we have a machine-specific directory, then include it in the build.
core-y += arch/arm/kernel/ arch/arm/mm/ arch/arm/common/
+core-y += arch/arm/kvm/
core-y += $(machdirs) $(platdirs)
drivers-$(CONFIG_OPROFILE) += arch/arm/oprofile/
diff --git a/arch/arm/include/asm/kvm.h b/arch/arm/include/asm/kvm.h
new file mode 100644
index 0000000..87dc33b
--- /dev/null
+++ b/arch/arm/include/asm/kvm.h
@@ -0,0 +1,66 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef __ARM_KVM_H__
+#define __ARM_KVM_H__
+
+#include <asm/types.h>
+
+/*
+ * Modes used for short-hand mode determinition in the world-switch code and
+ * in emulation code.
+ *
+ * Note: These indices do NOT correspond to the value of the CPSR mode bits!
+ */
+#define MODE_FIQ 0
+#define MODE_IRQ 1
+#define MODE_SVC 2
+#define MODE_ABT 3
+#define MODE_UND 4
+#define MODE_USR 5
+#define MODE_SYS 6
+
+struct kvm_regs {
+ __u32 regs0_7[8]; /* Unbanked regs. (r0 - r7) */
+ __u32 fiq_regs8_12[5]; /* Banked fiq regs. (r8 - r12) */
+ __u32 usr_regs8_12[5]; /* Banked usr registers (r8 - r12) */
+ __u32 reg13[6]; /* Banked r13, indexed by MODE_ */
+ __u32 reg14[6]; /* Banked r13, indexed by MODE_ */
+ __u32 reg15;
+ __u32 cpsr;
+ __u32 spsr[5]; /* Banked SPSR, indexed by MODE_ */
+ struct {
+ __u32 c1_sys;
+ __u32 c2_base0;
+ __u32 c2_base1;
+ __u32 c3_dacr;
+ } cp15;
+
+};
+
+struct kvm_sregs {
+};
+
+struct kvm_fpu {
+};
+
+struct kvm_guest_debug_arch {
+};
+
+struct kvm_debug_exit_arch {
+};
+
+#endif /* __ARM_KVM_H__ */
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
new file mode 100644
index 0000000..c3d4458
--- /dev/null
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -0,0 +1,28 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef __ARM_KVM_ASM_H__
+#define __ARM_KVM_ASM_H__
+
+#define ARM_EXCEPTION_RESET 0
+#define ARM_EXCEPTION_UNDEFINED 1
+#define ARM_EXCEPTION_SOFTWARE 2
+#define ARM_EXCEPTION_PREF_ABORT 3
+#define ARM_EXCEPTION_DATA_ABORT 4
+#define ARM_EXCEPTION_IRQ 5
+#define ARM_EXCEPTION_FIQ 6
+
+#endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
new file mode 100644
index 0000000..91d461a
--- /dev/null
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -0,0 +1,91 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef __ARM_KVM_EMULATE_H__
+#define __ARM_KVM_EMULATE_H__
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_asm.h>
+
+u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
+
+static inline unsigned char vcpu_mode(struct kvm_vcpu *vcpu)
+{
+ u8 modes_table[16] = {
+ MODE_USR, /* 0x0 */
+ MODE_FIQ, /* 0x1 */
+ MODE_IRQ, /* 0x2 */
+ MODE_SVC, /* 0x3 */
+ 0xf, 0xf, 0xf,
+ MODE_ABT, /* 0x7 */
+ 0xf, 0xf, 0xf,
+ MODE_UND, /* 0xb */
+ 0xf, 0xf, 0xf,
+ MODE_SYS}; /* 0xf */
+
+ BUG_ON(modes_table[vcpu->arch.regs.cpsr & 0xf] == 0xf);
+ return modes_table[vcpu->arch.regs.cpsr & 0xf];
+}
+
+/*
+ * Return the SPSR for the specified mode of the virtual CPU.
+ */
+static inline u32 *kvm_vcpu_spsr(struct kvm_vcpu *vcpu, u32 mode)
+{
+ switch (mode) {
+ case MODE_SVC:
+ return &vcpu->arch.regs.svc_regs[2];
+ case MODE_ABT:
+ return &vcpu->arch.regs.svc_regs[2];
+ case MODE_UND:
+ return &vcpu->arch.regs.svc_regs[2];
+ case MODE_IRQ:
+ return &vcpu->arch.regs.svc_regs[2];
+ case MODE_FIQ:
+ return &vcpu->arch.regs.fiq_regs[7];
+ default:
+ BUG();
+ }
+}
+
+/* Get vcpu register for current mode */
+static inline u32 *vcpu_reg(struct kvm_vcpu *vcpu, unsigned long reg_num)
+{
+ return kvm_vcpu_reg(vcpu, reg_num, vcpu_mode(vcpu));
+}
+
+static inline u32 *vcpu_cpsr(struct kvm_vcpu *vcpu)
+{
+ return &vcpu->arch.regs.cpsr;
+}
+
+/* Get vcpu SPSR for current mode */
+static inline u32 *vcpu_spsr(struct kvm_vcpu *vcpu)
+{
+ return kvm_vcpu_spsr(vcpu, vcpu_mode(vcpu));
+}
+
+static inline bool mode_has_spsr(struct kvm_vcpu *vcpu)
+{
+ return (vcpu_mode(vcpu) < MODE_USR);
+}
+
+static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu)
+{
+ return ((vcpu_mode(vcpu)) == MODE_USR) ? 0 : 1;
+}
+
+#endif /* __ARM_KVM_EMULATE_H__ */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
new file mode 100644
index 0000000..b2fcd8a
--- /dev/null
+++ b/arch/arm/include/asm/kvm_host.h
@@ -0,0 +1,93 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef __ARM_KVM_HOST_H__
+#define __ARM_KVM_HOST_H__
+
+#define KVM_MAX_VCPUS 1
+#define KVM_MEMORY_SLOTS 32
+#define KVM_PRIVATE_MEM_SLOTS 4
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
+/* We don't currently support large pages. */
+#define KVM_HPAGE_GFN_SHIFT(x) 0
+#define KVM_NR_PAGE_SIZES 1
+#define KVM_PAGES_PER_HPAGE(x) (1UL<<31)
+
+struct kvm_vcpu;
+u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
+
+struct kvm_arch {
+};
+
+#define EXCEPTION_NONE 0
+#define EXCEPTION_RESET 0x80
+#define EXCEPTION_UNDEFINED 0x40
+#define EXCEPTION_SOFTWARE 0x20
+#define EXCEPTION_PREFETCH 0x10
+#define EXCEPTION_DATA 0x08
+#define EXCEPTION_IMPRECISE 0x04
+#define EXCEPTION_IRQ 0x02
+#define EXCEPTION_FIQ 0x01
+
+struct kvm_vcpu_regs {
+ u32 usr_regs[15]; /* R0_usr - R14_usr */
+ u32 svc_regs[3]; /* SP_svc, LR_svc, SPSR_svc */
+ u32 abt_regs[3]; /* SP_abt, LR_abt, SPSR_abt */
+ u32 und_regs[3]; /* SP_und, LR_und, SPSR_und */
+ u32 irq_regs[3]; /* SP_irq, LR_irq, SPSR_irq */
+ u32 fiq_regs[8]; /* R8_fiq - R14_fiq, SPSR_fiq */
+ u32 pc; /* The program counter (r15) */
+ u32 cpsr; /* The guest CPSR */
+} __packed;
+
+struct kvm_vcpu_arch {
+ struct kvm_vcpu_regs regs;
+
+ /* System control coprocessor (cp15) */
+ struct {
+ u32 c1_SCTLR; /* System Control Register */
+ u32 c1_ACTLR; /* Auxilliary Control Register */
+ u32 c1_CPACR; /* Coprocessor Access Control */
+ u64 c2_TTBR0; /* Translation Table Base Register 0 */
+ u64 c2_TTBR1; /* Translation Table Base Register 1 */
+ u32 c2_TTBCR; /* Translation Table Base Control R. */
+ u32 c3_DACR; /* Domain Access Control Register */
+ } cp15;
+
+ u32 virt_irq; /* HCR exception mask */
+
+ /* Exception Information */
+ u32 hsr; /* Hyp Syndrom Register */
+ u32 hdfar; /* Hyp Data Fault Address Register */
+ u32 hifar; /* Hyp Inst. Fault Address Register */
+ u32 hpfar; /* Hyp IPA Fault Address Register */
+
+ /* IO related fields */
+ u32 mmio_rd;
+
+ /* Misc. fields */
+ u32 wait_for_interrupts;
+};
+
+struct kvm_vm_stat {
+ u32 remote_tlb_flush;
+};
+
+struct kvm_vcpu_stat {
+};
+
+#endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/kvm_para.h b/arch/arm/include/asm/kvm_para.h
new file mode 100644
index 0000000..7ce5f1c
--- /dev/null
+++ b/arch/arm/include/asm/kvm_para.h
@@ -0,0 +1,9 @@
+#ifndef _ASM_X86_KVM_PARA_H
+#define _ASM_X86_KVM_PARA_H
+
+static inline unsigned int kvm_arch_para_features(void)
+{
+ return 0;
+}
+
+#endif /* _ASM_X86_KVM_PARA_H */
diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h
index bc63116..0d41bde 100644
--- a/arch/arm/include/asm/unified.h
+++ b/arch/arm/include/asm/unified.h
@@ -54,6 +54,18 @@
#endif /* CONFIG_THUMB2_KERNEL */
+#ifdef CONFIG_KVM_ARM_HOST
+#ifdef __ASSEMBLY__
+.arch_extension sec
+.arch_extension virt
+#else
+__asm__(
+" .arch_extension sec\n"
+" .arch_extension virt\n"
+);
+#endif
+#endif
+
#ifndef CONFIG_ARM_ASM_UNIFIED
/*
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
new file mode 100644
index 0000000..ccabbb3
--- /dev/null
+++ b/arch/arm/kvm/Kconfig
@@ -0,0 +1,44 @@
+#
+# KVM configuration
+#
+
+source "virt/kvm/Kconfig"
+
+menuconfig VIRTUALIZATION
+ bool "Virtualization"
+ ---help---
+ Say Y here to get to see options for using your Linux host to run
+ other operating systems inside virtual machines (guests).
+ This option alone does not add any kernel code.
+
+ If you say N, all options in this submenu will be skipped and
+ disabled.
+
+if VIRTUALIZATION
+
+config KVM
+ tristate "Kernel-based Virtual Machine (KVM) support"
+ select PREEMPT_NOTIFIERS
+ select ANON_INODES
+ select KVM_ARM_HOST
+ select KVM_MMIO
+ ---help---
+ Support hosting virtualized guest machines. You will also
+ need to select one or more of the processor modules below.
+
+ This module provides access to the hardware capabilities through
+ a character device node named /dev/kvm.
+
+ If unsure, say N.
+
+config KVM_ARM_HOST
+ bool "KVM host support for ARM cpus."
+ depends on KVM
+ depends on MMU
+ depends on CPU_V7 || ARM_VIRT_EXT
+ ---help---
+ Provides host support for ARM processors.
+
+source drivers/virtio/Kconfig
+
+endif # VIRTUALIZATION
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
new file mode 100644
index 0000000..e69a8e1
--- /dev/null
+++ b/arch/arm/kvm/Makefile
@@ -0,0 +1,17 @@
+#
+# Makefile for Kernel-based Virtual Machine module
+#
+
+ccflags-y += -Ivirt/kvm -Iarch/arm/kvm
+CFLAGS_arm.o := -I.
+CFLAGS_mmu.o := -I.
+
+AFLAGS_interrupts.o := -I$(obj)
+
+obj-$(CONFIG_KVM_ARM_HOST) += init.o interrupts.o exports.o
+
+kvm-arm-y += $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
+
+kvm-arm-y += arm.o guest.o mmu.o emulate.o
+
+obj-$(CONFIG_KVM) += kvm-arm.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
new file mode 100644
index 0000000..50d7207
--- /dev/null
+++ b/arch/arm/kvm/arm.c
@@ -0,0 +1,279 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/mman.h>
+#include <linux/sched.h>
+#include <trace/events/kvm.h>
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
+#include <asm/unified.h>
+#include <asm/uaccess.h>
+#include <asm/ptrace.h>
+#include <asm/mman.h>
+
+#include "debug.h"
+
+#define TMP_LOG_LEN 512
+static char __tmp_log_data[TMP_LOG_LEN];
+DEFINE_SPINLOCK(__tmp_log_lock);
+void __kvm_print_msg(char *fmt, ...)
+{
+ va_list ap;
+ unsigned int size;
+
+ spin_lock(&__tmp_log_lock);
+
+ va_start(ap, fmt);
+ size = vsnprintf(__tmp_log_data, TMP_LOG_LEN, fmt, ap);
+ va_end(ap);
+
+ if (size >= TMP_LOG_LEN)
+ printk(KERN_ERR "Message exceeded log length!\n");
+ else
+ printk(KERN_INFO "%s", __tmp_log_data);
+
+ spin_unlock(&__tmp_log_lock);
+}
+
+int kvm_arch_hardware_enable(void *garbage)
+{
+ return 0;
+}
+
+void kvm_arch_hardware_disable(void *garbage)
+{
+}
+
+int kvm_arch_hardware_setup(void)
+{
+ return 0;
+}
+
+void kvm_arch_hardware_unsetup(void)
+{
+}
+
+void kvm_arch_check_processor_compat(void *rtn)
+{
+ *(int *)rtn = 0;
+}
+
+void kvm_arch_sync_events(struct kvm *kvm)
+{
+}
+
+int kvm_arch_init_vm(struct kvm *kvm)
+{
+ return 0;
+}
+
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
+ int i;
+
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ if (kvm->vcpus[i]) {
+ kvm_arch_vcpu_free(kvm->vcpus[i]);
+ kvm->vcpus[i] = NULL;
+ }
+ }
+}
+
+int kvm_dev_ioctl_check_extension(long ext)
+{
+ int r;
+ switch (ext) {
+ case KVM_CAP_USER_MEMORY:
+ case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
+ r = 1;
+ break;
+ case KVM_CAP_COALESCED_MMIO:
+ r = KVM_COALESCED_MMIO_PAGE_OFFSET;
+ break;
+ default:
+ r = 0;
+ break;
+ }
+ return r;
+}
+
+long kvm_arch_dev_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_set_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem,
+ struct kvm_memory_slot old,
+ int user_alloc)
+{
+ return 0;
+}
+
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+ struct kvm_memory_slot *memslot,
+ struct kvm_memory_slot old,
+ struct kvm_userspace_memory_region *mem,
+ int user_alloc)
+{
+ return 0;
+}
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem,
+ struct kvm_memory_slot old,
+ int user_alloc)
+{
+}
+
+void kvm_arch_flush_shadow(struct kvm *kvm)
+{
+}
+
+struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+ int err;
+ struct kvm_vcpu *vcpu;
+
+ vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+ if (!vcpu) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = kvm_vcpu_init(vcpu, kvm, id);
+ if (err)
+ goto free_vcpu;
+
+ return vcpu;
+free_vcpu:
+ kmem_cache_free(kvm_vcpu_cache, vcpu);
+out:
+ return ERR_PTR(err);
+}
+
+void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
+{
+ KVMARM_NOT_IMPLEMENTED();
+}
+
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+ kvm_arch_vcpu_free(vcpu);
+}
+
+int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+
+int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+ KVMARM_NOT_IMPLEMENTED();
+ return 0;
+}
+
+void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+{
+}
+
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
+{
+ return -EINVAL;
+}
+
+
+int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
+{
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ KVMARM_NOT_IMPLEMENTED();
+ return -EINVAL;
+}
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ return -EINVAL;
+}
+
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
+{
+ return -EINVAL;
+}
+
+long kvm_arch_vm_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ printk(KERN_ERR "kvm_arch_vm_ioctl: Unsupported ioctl (%d)\n", ioctl);
+ return -EINVAL;
+}
+
+int kvm_arch_init(void *opaque)
+{
+ return 0;
+}
+
+void kvm_arch_exit(void)
+{
+}
+
+static int arm_init(void)
+{
+ int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+ return rc;
+}
+
+static void __exit arm_exit(void)
+{
+ kvm_exit();
+}
+
+module_init(arm_init);
+module_exit(arm_exit)
diff --git a/arch/arm/kvm/debug.h b/arch/arm/kvm/debug.h
new file mode 100644
index 0000000..7c21fde
--- /dev/null
+++ b/arch/arm/kvm/debug.h
@@ -0,0 +1,48 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ *
+ * This file contains debugging and tracing functions and definitions
+ * for KVM/ARM.
+ */
+#ifndef __ARM_KVM_TRACE_H__
+#define __ARM_KVM_TRACE_H__
+
+#include <linux/types.h>
+#include <linux/kvm_types.h>
+#include <linux/kvm_host.h>
+
+void __kvm_print_msg(char *_fmt, ...);
+
+#define kvm_err(err, fmt, args...) do { \
+ __kvm_print_msg(KERN_ERR "KVM error [%s:%d]: (%d) ", \
+ __func__, __LINE__, err); \
+ __kvm_print_msg(fmt "\n", ##args); \
+} while (0)
+
+#define __kvm_msg(fmt, args...) do { \
+ __kvm_print_msg(KERN_ERR "KVM [%s:%d]: ", __func__, __LINE__); \
+ __kvm_print_msg(fmt, ##args); \
+} while (0)
+
+#define kvm_msg(__fmt, __args...) __kvm_msg(__fmt "\n", ##__args)
+
+
+#define KVMARM_NOT_IMPLEMENTED() \
+{ \
+ printk(KERN_ERR "KVM not implemented [%s:%d] in %s\n", \
+ __FILE__, __LINE__, __func__); \
+}
+
+#endif /* __ARM_KVM_TRACE_H__ */
diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
new file mode 100644
index 0000000..6587dde
--- /dev/null
+++ b/arch/arm/kvm/emulate.c
@@ -0,0 +1,121 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include <asm/kvm_emulate.h>
+
+#define USR_REG_OFFSET(_reg) \
+ offsetof(struct kvm_vcpu_arch, regs.usr_regs[_reg])
+
+static unsigned long vcpu_reg_offsets[MODE_SYS + 1][16] = {
+ /* FIQ Registers */
+ {
+ USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+ USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+ USR_REG_OFFSET(6), USR_REG_OFFSET(7),
+ offsetof(struct kvm_vcpu_arch, regs.fiq_regs[1]), /* r8 */
+ offsetof(struct kvm_vcpu_arch, regs.fiq_regs[1]), /* r9 */
+ offsetof(struct kvm_vcpu_arch, regs.fiq_regs[2]), /* r10 */
+ offsetof(struct kvm_vcpu_arch, regs.fiq_regs[3]), /* r11 */
+ offsetof(struct kvm_vcpu_arch, regs.fiq_regs[4]), /* r12 */
+ offsetof(struct kvm_vcpu_arch, regs.fiq_regs[5]), /* r13 */
+ offsetof(struct kvm_vcpu_arch, regs.fiq_regs[6]), /* r14 */
+ offsetof(struct kvm_vcpu_arch, regs.pc) /* r15 */
+ },
+
+ /* IRQ Registers */
+ {
+ USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+ USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+ USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+ USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+ USR_REG_OFFSET(12),
+ offsetof(struct kvm_vcpu_arch, regs.irq_regs[0]), /* r13 */
+ offsetof(struct kvm_vcpu_arch, regs.irq_regs[1]), /* r14 */
+ offsetof(struct kvm_vcpu_arch, regs.pc) /* r15 */
+ },
+
+ /* SVC Registers */
+ {
+ USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+ USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+ USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+ USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+ USR_REG_OFFSET(12),
+ offsetof(struct kvm_vcpu_arch, regs.svc_regs[0]), /* r13 */
+ offsetof(struct kvm_vcpu_arch, regs.svc_regs[1]), /* r14 */
+ offsetof(struct kvm_vcpu_arch, regs.pc) /* r15 */
+ },
+
+ /* ABT Registers */
+ {
+ USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+ USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+ USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+ USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+ USR_REG_OFFSET(12),
+ offsetof(struct kvm_vcpu_arch, regs.abt_regs[0]), /* r13 */
+ offsetof(struct kvm_vcpu_arch, regs.abt_regs[1]), /* r14 */
+ offsetof(struct kvm_vcpu_arch, regs.pc) /* r15 */
+ },
+
+ /* UND Registers */
+ {
+ USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+ USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+ USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+ USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+ USR_REG_OFFSET(12),
+ offsetof(struct kvm_vcpu_arch, regs.und_regs[0]), /* r13 */
+ offsetof(struct kvm_vcpu_arch, regs.und_regs[1]), /* r14 */
+ offsetof(struct kvm_vcpu_arch, regs.pc) /* r15 */
+ },
+
+ /* USR Registers */
+ {
+ USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+ USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+ USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+ USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+ USR_REG_OFFSET(12),
+ offsetof(struct kvm_vcpu_arch, regs.usr_regs[13]), /* r13 */
+ offsetof(struct kvm_vcpu_arch, regs.usr_regs[14]), /* r14 */
+ offsetof(struct kvm_vcpu_arch, regs.pc) /* r15 */
+ },
+
+ /* SYS Registers */
+ {
+ USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+ USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+ USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+ USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+ USR_REG_OFFSET(12),
+ offsetof(struct kvm_vcpu_arch, regs.usr_regs[13]), /* r13 */
+ offsetof(struct kvm_vcpu_arch, regs.usr_regs[14]), /* r14 */
+ offsetof(struct kvm_vcpu_arch, regs.pc) /* r15 */
+ },
+};
+
+/*
+ * Return a pointer to the register number valid in the specified mode of
+ * the virtual CPU.
+ */
+u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode)
+{
+ BUG_ON(reg_num > 15);
+ BUG_ON(mode > MODE_SYS);
+
+ return (u32 *)((void *)&vcpu->arch + vcpu_reg_offsets[mode][reg_num]);
+}
diff --git a/arch/arm/kvm/exports.c b/arch/arm/kvm/exports.c
new file mode 100644
index 0000000..d8a7fd5
--- /dev/null
+++ b/arch/arm/kvm/exports.c
@@ -0,0 +1,16 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <linux/module.h>
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
new file mode 100644
index 0000000..94a5c54
--- /dev/null
+++ b/arch/arm/kvm/guest.c
@@ -0,0 +1,148 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <asm/uaccess.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+
+
+#define VM_STAT(x) (offsetof(struct kvm, stat.x), KVM_STAT_VM)
+#define VCPU_STAT(x) (offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU)
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+ { NULL }
+};
+
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ struct kvm_vcpu_regs *vcpu_regs = &vcpu->arch.regs;
+
+ /*
+ * GPRs and PSRs
+ */
+ memcpy(regs->regs0_7, &(vcpu_regs->usr_regs[0]), sizeof(u32) * 8);
+ memcpy(regs->usr_regs8_12, &(vcpu_regs->usr_regs[8]), sizeof(u32) * 5);
+ memcpy(regs->fiq_regs8_12, &(vcpu_regs->fiq_regs[0]), sizeof(u32) * 5);
+ regs->reg13[MODE_FIQ] = vcpu_regs->fiq_regs[5];
+ regs->reg14[MODE_FIQ] = vcpu_regs->fiq_regs[6];
+ regs->reg13[MODE_IRQ] = vcpu_regs->irq_regs[0];
+ regs->reg14[MODE_IRQ] = vcpu_regs->irq_regs[1];
+ regs->reg13[MODE_SVC] = vcpu_regs->svc_regs[0];
+ regs->reg14[MODE_SVC] = vcpu_regs->svc_regs[1];
+ regs->reg13[MODE_ABT] = vcpu_regs->abt_regs[0];
+ regs->reg14[MODE_ABT] = vcpu_regs->abt_regs[1];
+ regs->reg13[MODE_UND] = vcpu_regs->und_regs[0];
+ regs->reg14[MODE_UND] = vcpu_regs->und_regs[1];
+ regs->reg13[MODE_USR] = vcpu_regs->usr_regs[0];
+ regs->reg14[MODE_USR] = vcpu_regs->usr_regs[1];
+
+ regs->spsr[MODE_FIQ] = vcpu_regs->fiq_regs[7];
+ regs->spsr[MODE_IRQ] = vcpu_regs->irq_regs[2];
+ regs->spsr[MODE_SVC] = vcpu_regs->svc_regs[2];
+ regs->spsr[MODE_ABT] = vcpu_regs->abt_regs[2];
+ regs->spsr[MODE_UND] = vcpu_regs->und_regs[2];
+
+ regs->reg15 = vcpu_regs->pc;
+ regs->cpsr = vcpu_regs->cpsr;
+
+
+ /*
+ * Co-processor registers.
+ */
+ regs->cp15.c1_sys = vcpu->arch.cp15.c1_SCTLR;
+ regs->cp15.c2_base0 = vcpu->arch.cp15.c2_TTBR0;
+ regs->cp15.c2_base1 = vcpu->arch.cp15.c2_TTBR1;
+ regs->cp15.c3_dacr = vcpu->arch.cp15.c3_DACR;
+
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ struct kvm_vcpu_regs *vcpu_regs = &vcpu->arch.regs;
+
+ memcpy(&(vcpu_regs->usr_regs[0]), regs->regs0_7, sizeof(u32) * 8);
+ memcpy(&(vcpu_regs->usr_regs[8]), regs->usr_regs8_12, sizeof(u32) * 5);
+ memcpy(&(vcpu_regs->fiq_regs[0]), regs->fiq_regs8_12, sizeof(u32) * 5);
+
+ vcpu_regs->fiq_regs[5] = regs->reg13[MODE_FIQ];
+ vcpu_regs->fiq_regs[6] = regs->reg14[MODE_FIQ];
+ vcpu_regs->irq_regs[0] = regs->reg13[MODE_IRQ];
+ vcpu_regs->irq_regs[1] = regs->reg14[MODE_IRQ];
+ vcpu_regs->svc_regs[0] = regs->reg13[MODE_SVC];
+ vcpu_regs->svc_regs[1] = regs->reg14[MODE_SVC];
+ vcpu_regs->abt_regs[0] = regs->reg13[MODE_ABT];
+ vcpu_regs->abt_regs[1] = regs->reg14[MODE_ABT];
+ vcpu_regs->und_regs[0] = regs->reg13[MODE_UND];
+ vcpu_regs->und_regs[1] = regs->reg14[MODE_UND];
+ vcpu_regs->usr_regs[0] = regs->reg13[MODE_USR];
+ vcpu_regs->usr_regs[1] = regs->reg14[MODE_USR];
+
+ vcpu_regs->fiq_regs[7] = regs->spsr[MODE_FIQ];
+ vcpu_regs->irq_regs[2] = regs->spsr[MODE_IRQ];
+ vcpu_regs->svc_regs[2] = regs->spsr[MODE_SVC];
+ vcpu_regs->abt_regs[2] = regs->spsr[MODE_ABT];
+ vcpu_regs->und_regs[2] = regs->spsr[MODE_UND];
+
+ /*
+ * Co-processor registers.
+ */
+ vcpu->arch.cp15.c1_SCTLR = regs->cp15.c1_sys;
+
+ vcpu_regs->pc = regs->reg15;
+ vcpu_regs->cpsr = regs->cpsr;
+
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+ struct kvm_translation *tr)
+{
+ return -EINVAL;
+}
diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S
new file mode 100644
index 0000000..073a494
--- /dev/null
+++ b/arch/arm/kvm/init.S
@@ -0,0 +1,17 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ */
+#include <asm/asm-offsets.h>
+#include <asm/kvm_asm.h>
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
new file mode 100644
index 0000000..073a494
--- /dev/null
+++ b/arch/arm/kvm/interrupts.S
@@ -0,0 +1,17 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ */
+#include <asm/asm-offsets.h>
+#include <asm/kvm_asm.h>
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
new file mode 100644
index 0000000..2cccd48
--- /dev/null
+++ b/arch/arm/kvm/mmu.c
@@ -0,0 +1,15 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ */
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
new file mode 100644
index 0000000..f8869c1
--- /dev/null
+++ b/arch/arm/kvm/trace.h
@@ -0,0 +1,52 @@
+#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm
+
+/*
+ * Tracepoints for entry/exit to guest
+ */
+TRACE_EVENT(kvm_entry,
+ TP_PROTO(unsigned long vcpu_pc),
+ TP_ARGS(vcpu_pc),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, vcpu_pc )
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_pc = vcpu_pc;
+ ),
+
+ TP_printk("PC: 0x%08lx", __entry->vcpu_pc)
+);
+
+TRACE_EVENT(kvm_exit,
+ TP_PROTO(unsigned long vcpu_pc),
+ TP_ARGS(vcpu_pc),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, vcpu_pc )
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_pc = vcpu_pc;
+ ),
+
+ TP_printk("PC: 0x%08lx", __entry->vcpu_pc)
+);
+
+
+
+#endif /* _TRACE_KVM_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH arch/arm/kvm
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig
index c294f2b..b9565a7 100644
--- a/arch/arm/mach-vexpress/Kconfig
+++ b/arch/arm/mach-vexpress/Kconfig
@@ -37,6 +37,7 @@ config ARCH_VEXPRESS_CA15X4
bool "Versatile Express Cortex-A15x4 tile"
depends on VEXPRESS_EXTENDED_MEMORY_MAP
select CPU_V7
+ select ARM_VIRT_EXT
select ARM_GIC
select ARM_GIC_VPPI
select ARM_ARCH_TIMER
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index fc9b9f2..e3f5173 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -640,6 +640,14 @@ config ARM_LPAE
If unsure, say N.
+config ARM_VIRT_EXT
+ bool "Support for ARM Virtualization Extensions"
+ depends on ARM_LPAE
+ help
+ Say Y if you have an ARMv7 processor supporting the ARM hardware
+ Virtualization extensions. KVM depends on this feature and will
+ not run without it being selected.
+
config ARCH_PHYS_ADDR_T_64BIT
def_bool ARM_LPAE
^ permalink raw reply related [flat|nested] 105+ messages in thread
* [PATCH v5 02/13] ARM: KVM: Hypervisor identity mapping
2011-12-11 10:24 [PATCH v5 00/13] KVM/ARM Implementation Christoffer Dall
2011-12-11 10:24 ` [PATCH v5 01/13] ARM: KVM: Initial skeleton to compile KVM support Christoffer Dall
@ 2011-12-11 10:24 ` Christoffer Dall
2011-12-11 10:24 ` [PATCH v5 03/13] ARM: KVM: Add hypervisor inititalization Christoffer Dall
` (11 subsequent siblings)
13 siblings, 0 replies; 105+ messages in thread
From: Christoffer Dall @ 2011-12-11 10:24 UTC (permalink / raw)
To: android-virt, kvm; +Cc: Marc.Zyngier, catalin.marinas, tech, avi, peter.maydell
From: Christoffer Dall <cdall@cs.columbia.edu>
Adds support in the identity mapping feature that allows KVM to setup
identity mapping for the Hyp mode with the AP[1] bit set as required by
the specification and also supports freeing created sub pmd's after
finished use.
These two functions:
- hyp_identity_mapping_add(pgd, addr, end);
- hyp_identity_mapping_del(pgd, addr, end);
are essentially calls to the same function as the non-hyp versions but
with a different argument value. KVM calls these functions to setup
and teardown the identity mapping used to initialize the hypervisor.
Note, the hyp-version of the _del function actually frees the pmd's
pointed to by the pgd as opposed to the non-hyp version which just
clears them.
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
---
arch/arm/include/asm/pgtable-3level-hwdef.h | 1 +
arch/arm/include/asm/pgtable.h | 6 +++
arch/arm/mm/idmap.c | 54 +++++++++++++++++++++++++++
3 files changed, 60 insertions(+), 1 deletions(-)
diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h
index d795282..a2d404e 100644
--- a/arch/arm/include/asm/pgtable-3level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-3level-hwdef.h
@@ -44,6 +44,7 @@
#define PMD_SECT_XN (_AT(pmdval_t, 1) << 54)
#define PMD_SECT_AP_WRITE (_AT(pmdval_t, 0))
#define PMD_SECT_AP_READ (_AT(pmdval_t, 0))
+#define PMD_SECT_AP1 (_AT(pmdval_t, 1) << 6)
#define PMD_SECT_TEX(x) (_AT(pmdval_t, 0))
/*
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index aec18ab..19456f4 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -318,6 +318,12 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
void identity_mapping_add(pgd_t *, unsigned long, unsigned long);
void identity_mapping_del(pgd_t *, unsigned long, unsigned long);
+#ifdef CONFIG_KVM_ARM_HOST
+void hyp_identity_mapping_add(pgd_t *, unsigned long, unsigned long);
+void hyp_identity_mapping_del(pgd_t *pgd, unsigned long addr,
+ unsigned long end);
+#endif
+
#endif /* !__ASSEMBLY__ */
#endif /* CONFIG_MMU */
diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c
index 267db72..e29903a 100644
--- a/arch/arm/mm/idmap.c
+++ b/arch/arm/mm/idmap.c
@@ -1,3 +1,4 @@
+#include <linux/module.h>
#include <linux/kernel.h>
#include <asm/cputype.h>
@@ -54,11 +55,18 @@ static void idmap_add_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
} while (pud++, addr = next, addr != end);
}
-void identity_mapping_add(pgd_t *pgd, unsigned long addr, unsigned long end)
+static void __identity_mapping_add(pgd_t *pgd, unsigned long addr,
+ unsigned long end, bool hyp_mapping)
{
unsigned long prot, next;
prot = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AF;
+
+#ifdef CONFIG_ARM_LPAE
+ if (hyp_mapping)
+ prot |= PMD_SECT_AP1;
+#endif
+
if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale())
prot |= PMD_BIT4;
@@ -69,6 +77,12 @@ void identity_mapping_add(pgd_t *pgd, unsigned long addr, unsigned long end)
} while (pgd++, addr = next, addr != end);
}
+void identity_mapping_add(pgd_t *pgd, unsigned long addr, unsigned long end)
+{
+ __identity_mapping_add(pgd, addr, end, false);
+}
+
+
#ifdef CONFIG_SMP
static void idmap_del_pmd(pud_t *pud, unsigned long addr, unsigned long end)
{
@@ -103,6 +117,44 @@ void identity_mapping_del(pgd_t *pgd, unsigned long addr, unsigned long end)
}
#endif
+#ifdef CONFIG_KVM_ARM_HOST
+void hyp_identity_mapping_add(pgd_t *pgd, unsigned long addr, unsigned long end)
+{
+ __identity_mapping_add(pgd, addr, end, true);
+}
+EXPORT_SYMBOL_GPL(hyp_identity_mapping_add);
+
+static void hyp_idmap_del_pmd(pgd_t *pgd, unsigned long addr)
+{
+ pud_t *pud;
+ pmd_t *pmd;
+
+ pud = pud_offset(pgd, addr);
+ pmd = pmd_offset(pud, addr);
+ pmd_free(NULL, pmd);
+}
+
+/*
+ * This version actually frees the underlying pmds for all pgds in range and
+ * clear the pgds themselves afterwards.
+ */
+void hyp_identity_mapping_del(pgd_t *pgd, unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ pgd_t *next_pgd;
+
+ do {
+ next = pgd_addr_end(addr, end);
+ next_pgd = pgd + pgd_index(addr);
+ if (!pgd_none_or_clear_bad(next_pgd)) {
+ hyp_idmap_del_pmd(next_pgd, addr);
+ pgd_clear(next_pgd);
+ }
+ } while (addr = next, addr < end);
+}
+EXPORT_SYMBOL_GPL(hyp_identity_mapping_del);
+#endif
+
/*
* In order to soft-boot, we need to insert a 1:1 mapping in place of
* the user-mode pages. This will then ensure that we have predictable
^ permalink raw reply related [flat|nested] 105+ messages in thread
* [PATCH v5 03/13] ARM: KVM: Add hypervisor inititalization
2011-12-11 10:24 [PATCH v5 00/13] KVM/ARM Implementation Christoffer Dall
2011-12-11 10:24 ` [PATCH v5 01/13] ARM: KVM: Initial skeleton to compile KVM support Christoffer Dall
2011-12-11 10:24 ` [PATCH v5 02/13] ARM: KVM: Hypervisor identity mapping Christoffer Dall
@ 2011-12-11 10:24 ` Christoffer Dall
2011-12-11 10:24 ` [PATCH v5 04/13] ARM: KVM: Memory virtualization setup Christoffer Dall
` (10 subsequent siblings)
13 siblings, 0 replies; 105+ messages in thread
From: Christoffer Dall @ 2011-12-11 10:24 UTC (permalink / raw)
To: android-virt, kvm; +Cc: Marc.Zyngier, catalin.marinas, tech, avi, peter.maydell
Sets up the required registers to run code in HYP-mode from the kernel.
No major controversies, but we should consider how to deal with SMP
support for hypervisor stack page.
By setting the HVBAR the kernel can execute code in Hyp-mode with
the MMU disabled. The HVBAR initially points to initialization code,
which initializes other Hyp-mode registers and enables the MMU
for Hyp-mode. Afterwards, the HVBAR is changed to point to KVM
Hyp vectors used to catch guest faults and to switch to Hyp mode
to perform a world-switch into a KVM guest.
Also provides memory mapping code to map required code pages and data
structures accessed in Hyp mode at the same virtual address as the
host kernel virtual addresses, but which conforms to the architectural
requirements for translations in Hyp mode. This interface is added in
arch/arm/kvm/arm_mmu.c and is comprised of:
- create_hyp_mappings(hyp_pgd, start, end);
- free_hyp_pmds(pgd_hyp);
See the implementation for more details.
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
---
arch/arm/include/asm/kvm_arm.h | 103 +++++++++++++++++
arch/arm/include/asm/kvm_asm.h | 23 ++++
arch/arm/include/asm/kvm_host.h | 1
arch/arm/include/asm/kvm_mmu.h | 35 ++++++
arch/arm/include/asm/pgtable-3level-hwdef.h | 4 +
arch/arm/include/asm/pgtable-3level.h | 4 +
arch/arm/include/asm/pgtable.h | 1
arch/arm/kvm/arm.c | 166 +++++++++++++++++++++++++++
arch/arm/kvm/exports.c | 10 ++
arch/arm/kvm/init.S | 98 ++++++++++++++++
arch/arm/kvm/interrupts.S | 30 +++++
arch/arm/kvm/mmu.c | 152 +++++++++++++++++++++++++
mm/memory.c | 1
13 files changed, 628 insertions(+), 0 deletions(-)
create mode 100644 arch/arm/include/asm/kvm_arm.h
create mode 100644 arch/arm/include/asm/kvm_mmu.h
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
new file mode 100644
index 0000000..835abd1
--- /dev/null
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -0,0 +1,103 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef __KVM_ARM_H__
+#define __KVM_ARM_H__
+
+#include <asm/types.h>
+
+/* Hyp Configuration Register (HCR) bits */
+#define HCR_TGE (1 << 27)
+#define HCR_TVM (1 << 26)
+#define HCR_TTLB (1 << 25)
+#define HCR_TPU (1 << 24)
+#define HCR_TPC (1 << 23)
+#define HCR_TSW (1 << 22)
+#define HCR_TAC (1 << 21)
+#define HCR_TIDCP (1 << 20)
+#define HCR_TSC (1 << 19)
+#define HCR_TID3 (1 << 18)
+#define HCR_TID2 (1 << 17)
+#define HCR_TID1 (1 << 16)
+#define HCR_TID0 (1 << 15)
+#define HCR_TWE (1 << 14)
+#define HCR_TWI (1 << 13)
+#define HCR_DC (1 << 12)
+#define HCR_BSU (3 << 10)
+#define HCR_FB (1 << 9)
+#define HCR_VA (1 << 8)
+#define HCR_VI (1 << 7)
+#define HCR_VF (1 << 6)
+#define HCR_AMO (1 << 5)
+#define HCR_IMO (1 << 4)
+#define HCR_FMO (1 << 3)
+#define HCR_PTW (1 << 2)
+#define HCR_SWIO (1 << 1)
+#define HCR_VM 1
+#define HCR_GUEST_MASK (HCR_TSC | HCR_TWE | HCR_TWI | HCR_VM | HCR_AMO | \
+ HCR_AMO | HCR_IMO | HCR_FMO | HCR_SWIO)
+
+/* Hyp System Control Register (HSCTLR) bits */
+#define HSCTLR_TE (1 << 30)
+#define HSCTLR_EE (1 << 25)
+#define HSCTLR_FI (1 << 21)
+#define HSCTLR_WXN (1 << 19)
+#define HSCTLR_I (1 << 12)
+#define HSCTLR_C (1 << 2)
+#define HSCTLR_A (1 << 1)
+#define HSCTLR_M 1
+#define HSCTLR_MASK (HSCTLR_M | HSCTLR_A | HSCTLR_C | HSCTLR_I | \
+ HSCTLR_WXN | HSCTLR_FI | HSCTLR_EE | HSCTLR_TE)
+
+/* TTBCR and HTCR Registers bits */
+#define TTBCR_EAE (1 << 31)
+#define TTBCR_IMP (1 << 30)
+#define TTBCR_SH1 (3 << 28)
+#define TTBCR_ORGN1 (3 << 26)
+#define TTBCR_IRGN1 (3 << 24)
+#define TTBCR_EPD1 (1 << 23)
+#define TTBCR_A1 (1 << 22)
+#define TTBCR_T1SZ (3 << 16)
+#define TTBCR_SH0 (3 << 12)
+#define TTBCR_ORGN0 (3 << 10)
+#define TTBCR_IRGN0 (3 << 8)
+#define TTBCR_EPD0 (1 << 7)
+#define TTBCR_T0SZ 3
+#define HTCR_MASK (TTBCR_T0SZ | TTBCR_IRGN0 | TTBCR_ORGN0 | TTBCR_SH0)
+
+
+/* Virtualization Translation Control Register (VTCR) bits */
+#define VTCR_SH0 (3 << 12)
+#define VTCR_ORGN0 (3 << 10)
+#define VTCR_IRGN0 (3 << 8)
+#define VTCR_SL0 (3 << 6)
+#define VTCR_S (1 << 4)
+#define VTCR_T0SZ 3
+#define VTCR_MASK (VTCR_SH0 | VTCR_ORGN0 | VTCR_IRGN0 | VTCR_SL0 | \
+ VTCR_S | VTCR_T0SZ | VTCR_MASK)
+#define VTCR_HTCR_SH (VTCR_SH0 | VTCR_ORGN0 | VTCR_IRGN0)
+#define VTCR_SL_L2 0 /* Starting-level: 2 */
+#define VTCR_SL_L1 (1 << 6) /* Starting-level: 1 */
+#define VTCR_GUEST_SL VTCR_SL_L1
+#define VTCR_GUEST_T0SZ 0
+#if VTCR_GUEST_SL == 0
+#define VTTBR_X (14 - VTCR_GUEST_T0SZ)
+#else
+#define VTTBR_X (5 - VTCR_GUEST_T0SZ)
+#endif
+
+
+#endif /* __KVM_ARM_H__ */
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index c3d4458..89c318ea 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -24,5 +24,28 @@
#define ARM_EXCEPTION_DATA_ABORT 4
#define ARM_EXCEPTION_IRQ 5
#define ARM_EXCEPTION_FIQ 6
+#define ARM_EXCEPTION_HVC 7
+
+/*
+ * SMC Hypervisor API call numbers
+ */
+#ifdef __ASSEMBLY__
+.equ SMCHYP_HVBAR_W, 0xfffffff0
+#else /* !__ASSEMBLY__ */
+asm(".equ SMCHYP_HVBAR_W, 0xfffffff0");
+#endif /* __ASSEMBLY__ */
+
+#ifndef __ASSEMBLY__
+struct kvm_vcpu;
+
+extern char __kvm_hyp_init[];
+extern char __kvm_hyp_init_end[];
+
+extern char __kvm_hyp_vector[];
+extern char __kvm_hyp_vector_end[];
+
+extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+extern char __kvm_vcpu_run_end[];
+#endif
#endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index b2fcd8a..6a10467 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -31,6 +31,7 @@ struct kvm_vcpu;
u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
struct kvm_arch {
+ pgd_t *pgd; /* 1-level 2nd stage table */
};
#define EXCEPTION_NONE 0
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
new file mode 100644
index 0000000..13fd8dc
--- /dev/null
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -0,0 +1,35 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef __ARM_KVM_MMU_H__
+#define __ARM_KVM_MMU_H__
+
+/*
+ * The architecture supports 40-bit IPA as input to the 2nd stage translations
+ * and PTRS_PER_PGD2 could therefore be 1024.
+ *
+ * To save a bit of memory and to avoid alignment issues we assume 39-bit IPA
+ * for now, but remember that the level-1 table must be aligned to its size.
+ */
+#define PTRS_PER_PGD2 512
+#define PGD2_ORDER get_order(PTRS_PER_PGD2 * sizeof(pgd_t))
+
+extern pgd_t *kvm_hyp_pgd;
+
+int create_hyp_mappings(pgd_t *hyp_pgd, void *from, void *to);
+void free_hyp_pmds(pgd_t *hyp_pgd);
+
+#endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h
index a2d404e..18f5cef 100644
--- a/arch/arm/include/asm/pgtable-3level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-3level-hwdef.h
@@ -32,6 +32,9 @@
#define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0)
#define PMD_BIT4 (_AT(pmdval_t, 0))
#define PMD_DOMAIN(x) (_AT(pmdval_t, 0))
+#define PMD_APTABLE_SHIFT (61)
+#define PMD_APTABLE (_AT(pgdval_t, 3) << PGD_APTABLE_SHIFT)
+#define PMD_PXNTABLE (_AT(pgdval_t, 1) << 59)
/*
* - section
@@ -41,6 +44,7 @@
#define PMD_SECT_S (_AT(pmdval_t, 3) << 8)
#define PMD_SECT_AF (_AT(pmdval_t, 1) << 10)
#define PMD_SECT_nG (_AT(pmdval_t, 1) << 11)
+#define PMD_SECT_PXN (_AT(pmdval_t, 1) << 53)
#define PMD_SECT_XN (_AT(pmdval_t, 1) << 54)
#define PMD_SECT_AP_WRITE (_AT(pmdval_t, 0))
#define PMD_SECT_AP_READ (_AT(pmdval_t, 0))
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 759af70..edc3cb9 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -109,6 +109,10 @@
#define pud_none(pud) (!pud_val(pud))
#define pud_bad(pud) (!(pud_val(pud) & 2))
#define pud_present(pud) (pud_val(pud))
+#define pmd_table(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \
+ PMD_TYPE_TABLE)
+#define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \
+ PMD_TYPE_SECT)
#define pud_clear(pudp) \
do { \
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 19456f4..20025cc 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -88,6 +88,7 @@ extern pgprot_t pgprot_kernel;
#define PAGE_READONLY_EXEC _MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_RDONLY)
#define PAGE_KERNEL _MOD_PROT(pgprot_kernel, L_PTE_XN)
#define PAGE_KERNEL_EXEC pgprot_kernel
+#define PAGE_HYP _MOD_PROT(pgprot_kernel, L_PTE_USER)
#define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN)
#define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 50d7207..e6bdf50 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -31,6 +31,10 @@
#include <asm/uaccess.h>
#include <asm/ptrace.h>
#include <asm/mman.h>
+#include <asm/tlbflush.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmu.h>
#include "debug.h"
@@ -56,6 +60,13 @@ void __kvm_print_msg(char *fmt, ...)
spin_unlock(&__tmp_log_lock);
}
+static void *kvm_arm_hyp_stack_page;
+
+/* The VMID used in the VTTBR */
+#define VMID_SIZE (1<<8)
+static DECLARE_BITMAP(kvm_vmids, VMID_SIZE);
+static DEFINE_MUTEX(kvm_vmids_mutex);
+
int kvm_arch_hardware_enable(void *garbage)
{
return 0;
@@ -255,13 +266,168 @@ long kvm_arch_vm_ioctl(struct file *filp,
return -EINVAL;
}
+/**
+ * Inits Hyp-mode on a single CPU
+ */
+static int init_hyp_mode(void)
+{
+ phys_addr_t init_phys_addr, init_end_phys_addr;
+ unsigned long hyp_stack_ptr;
+ int err = 0;
+
+ /*
+ * Allocate Hyp level-1 page table
+ */
+ kvm_hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
+ if (!kvm_hyp_pgd)
+ return -ENOMEM;
+
+ /*
+ * Allocate stack page for Hypervisor-mode
+ */
+ kvm_arm_hyp_stack_page = (void *)__get_free_page(GFP_KERNEL);
+ if (!kvm_arm_hyp_stack_page) {
+ err = -ENOMEM;
+ goto out_free_pgd;
+ }
+
+ hyp_stack_ptr = (unsigned long)kvm_arm_hyp_stack_page + PAGE_SIZE;
+
+ init_phys_addr = virt_to_phys((void *)&__kvm_hyp_init);
+ init_end_phys_addr = virt_to_phys((void *)&__kvm_hyp_init_end);
+
+ /*
+ * Create identity mapping
+ */
+ hyp_identity_mapping_add(kvm_hyp_pgd,
+ (unsigned long)init_phys_addr,
+ (unsigned long)init_end_phys_addr);
+
+ /*
+ * Set the HVBAR
+ */
+ BUG_ON(init_phys_addr & 0x1f);
+ asm volatile (
+ "mov r0, %[vector_ptr]\n\t"
+ "ldr r7, =SMCHYP_HVBAR_W\n\t"
+ "smc #0\n\t" : :
+ [vector_ptr] "r" ((unsigned long)init_phys_addr) :
+ "r0", "r7");
+
+ /*
+ * Call initialization code
+ */
+ asm volatile (
+ "mov r0, %[pgd_ptr]\n\t"
+ "mov r1, %[stack_ptr]\n\t"
+ "hvc #0\n\t" : :
+ [pgd_ptr] "r" (virt_to_phys(kvm_hyp_pgd)),
+ [stack_ptr] "r" (hyp_stack_ptr) :
+ "r0", "r1");
+
+ /*
+ * Unmap the identity mapping
+ */
+ hyp_identity_mapping_del(kvm_hyp_pgd,
+ (unsigned long)init_phys_addr,
+ (unsigned long)init_end_phys_addr);
+
+ /*
+ * Set the HVBAR to the virtual kernel address
+ */
+ asm volatile (
+ "mov r0, %[vector_ptr]\n\t"
+ "ldr r7, =SMCHYP_HVBAR_W\n\t"
+ "smc #0\n\t" : :
+ [vector_ptr] "r" (__kvm_hyp_vector) :
+ "r0", "r7");
+
+ return err;
+out_free_pgd:
+ kfree(kvm_hyp_pgd);
+ kvm_hyp_pgd = NULL;
+ return err;
+}
+
+/*
+ * Initializes the memory mappings used in Hyp-mode
+ *
+ * Code executed in Hyp-mode and a stack page per cpu must be mapped into the
+ * hypervisor translation tables.
+ *
+ * Currently there is no SMP support so we map only a single stack page on a
+ * single CPU.
+ */
+static int init_hyp_memory(void)
+{
+ int err = 0;
+ char *stack_page;
+
+ /*
+ * Map Hyp exception vectors
+ */
+ err = create_hyp_mappings(kvm_hyp_pgd,
+ __kvm_hyp_vector, __kvm_hyp_vector_end);
+ if (err) {
+ kvm_err(err, "Cannot map hyp vector");
+ goto out_free_mappings;
+ }
+
+ /*
+ * Map the world-switch code
+ */
+ err = create_hyp_mappings(kvm_hyp_pgd,
+ __kvm_vcpu_run, __kvm_vcpu_run_end);
+ if (err) {
+ kvm_err(err, "Cannot map world-switch code");
+ goto out_free_mappings;
+ }
+
+ /*
+ * Map the Hyp stack page
+ */
+ stack_page = kvm_arm_hyp_stack_page;
+ err = create_hyp_mappings(kvm_hyp_pgd,
+ stack_page, stack_page + PAGE_SIZE);
+ if (err) {
+ kvm_err(err, "Cannot map hyp stack");
+ goto out_free_mappings;
+ }
+
+ return err;
+out_free_mappings:
+ free_hyp_pmds(kvm_hyp_pgd);
+ return err;
+}
+
+/**
+ * Initialize Hyp-mode and memory mappings on all CPUs.
+ */
int kvm_arch_init(void *opaque)
{
+ int err;
+
+ err = init_hyp_mode();
+ if (err)
+ goto out_err;
+
+ err = init_hyp_memory();
+ if (err)
+ goto out_err;
+
+ set_bit(0, kvm_vmids);
return 0;
+out_err:
+ return err;
}
void kvm_arch_exit(void)
{
+ if (kvm_hyp_pgd) {
+ free_hyp_pmds(kvm_hyp_pgd);
+ kfree(kvm_hyp_pgd);
+ kvm_hyp_pgd = NULL;
+ }
}
static int arm_init(void)
diff --git a/arch/arm/kvm/exports.c b/arch/arm/kvm/exports.c
index d8a7fd5..0fdd5ff 100644
--- a/arch/arm/kvm/exports.c
+++ b/arch/arm/kvm/exports.c
@@ -14,3 +14,13 @@
*/
#include <linux/module.h>
+#include <asm/kvm_asm.h>
+
+EXPORT_SYMBOL_GPL(__kvm_hyp_init);
+EXPORT_SYMBOL_GPL(__kvm_hyp_init_end);
+
+EXPORT_SYMBOL_GPL(__kvm_hyp_vector);
+EXPORT_SYMBOL_GPL(__kvm_hyp_vector_end);
+
+EXPORT_SYMBOL_GPL(__kvm_vcpu_run);
+EXPORT_SYMBOL_GPL(__kvm_vcpu_run_end);
diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S
index 073a494..5f7e922 100644
--- a/arch/arm/kvm/init.S
+++ b/arch/arm/kvm/init.S
@@ -13,5 +13,103 @@
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
*/
+
+#include <linux/linkage.h>
+#include <asm/unified.h>
#include <asm/asm-offsets.h>
#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@ Hypervisor initialization
+@ - should be called with:
+@ r0 = Hypervisor pgd pointer
+@ r1 = top of Hyp stack (kernel VA)
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ .text
+ .arm
+ .align 12
+__kvm_hyp_init:
+ .globl __kvm_hyp_init
+
+ @ Hyp-mode exception vector
+ nop
+ nop
+ nop
+ nop
+ nop
+ b __do_hyp_init
+ nop
+ nop
+
+__do_hyp_init:
+ @ Set the sp to end of this page and push data for later use
+ mov sp, pc
+ bic sp, sp, #0x0ff
+ bic sp, sp, #0xf00
+ add sp, sp, #0x1000
+ push {r1, r2, r12}
+
+ @ Set the HTTBR to point to the hypervisor PGD pointer passed to
+ @ function and set the upper bits equal to the kernel PGD.
+ mrrc p15, 1, r1, r2, c2
+ mcrr p15, 4, r0, r2, c2
+
+ @ Set the HTCR and VTCR to the same shareability and cacheability
+ @ settings as the non-secure TTBCR and with T0SZ == 0.
+ mrc p15, 4, r0, c2, c0, 2 @ HTCR
+ ldr r12, =HTCR_MASK
+ bic r0, r0, r12
+ mrc p15, 0, r1, c2, c0, 2 @ TTBCR
+ and r1, r1, #(HTCR_MASK & ~TTBCR_T0SZ)
+ orr r0, r0, r1
+ mcr p15, 4, r0, c2, c0, 2 @ HTCR
+
+ mrc p15, 4, r1, c2, c1, 2 @ VTCR
+ bic r1, r1, #(VTCR_HTCR_SH | VTCR_SL0)
+ bic r0, r0, #(~VTCR_HTCR_SH)
+ orr r1, r0, r1
+ orr r1, r1, #(VTCR_SL_L1 | VTCR_GUEST_T0SZ)
+ mcr p15, 4, r1, c2, c1, 2 @ VTCR
+
+ @ Use the same memory attributes for hyp. accesses as the kernel
+ @ (copy MAIRx ro HMAIRx).
+ mrc p15, 0, r0, c10, c2, 0
+ mcr p15, 4, r0, c10, c2, 0
+ mrc p15, 0, r0, c10, c2, 1
+ mcr p15, 4, r0, c10, c2, 1
+
+ @ Set the HSCTLR to:
+ @ - ARM/THUMB exceptions: ARM
+ @ - Endianness: Kernel config
+ @ - Fast Interrupt Features: Kernel config
+ @ - Write permission implies XN: disabled
+ @ - Instruction cache: enabled
+ @ - Data/Unified cache: enabled
+ @ - Memory alignment checks: enabled
+ @ - MMU: enabled (this code must be run from an identity mapping)
+ mrc p15, 4, r0, c1, c0, 0 @ HSCR
+ ldr r12, =HSCTLR_MASK
+ bic r0, r0, r12
+ mrc p15, 0, r1, c1, c0, 0 @ SCTLR
+ ldr r12, =(HSCTLR_EE | HSCTLR_FI)
+ and r1, r1, r12
+ ldr r12, =(HSCTLR_M | HSCTLR_A | HSCTLR_I)
+ orr r1, r1, r12
+ orr r0, r0, r1
+ isb
+ mcr p15, 4, r0, c1, c0, 0 @ HSCR
+ isb
+
+ @ Set stack pointer and return to the kernel
+ pop {r1, r2, r12}
+ mov sp, r1
+ eret
+
+ .ltorg
+
+ .align 12
+
+ __kvm_init_sp:
+ .globl __kvm_hyp_init_end
+__kvm_hyp_init_end:
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 073a494..2edc49b 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -13,5 +13,35 @@
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
*/
+
+#include <linux/linkage.h>
+#include <asm/unified.h>
+#include <asm/page.h>
#include <asm/asm-offsets.h>
#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@ Hypervisor world-switch code
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+ .text
+ .arm
+
+ENTRY(__kvm_vcpu_run)
+THUMB( orr lr, lr, #1)
+ mov pc, lr
+__kvm_vcpu_run_end:
+ .globl __kvm_vcpu_run_end
+
+
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@ Hypervisor exception vector and handlers
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+ .align 5
+__kvm_hyp_vector:
+ .globl __kvm_hyp_vector
+ nop
+__kvm_hyp_vector_end:
+ .globl __kvm_hyp_vector_end
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 2cccd48..a298926 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -13,3 +13,155 @@
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
*/
+
+#include <linux/mman.h>
+#include <linux/kvm_host.h>
+#include <asm/pgalloc.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+
+#include "debug.h"
+
+pgd_t *kvm_hyp_pgd;
+
+static void free_ptes(pmd_t *pmd, unsigned long addr)
+{
+ pte_t *pte;
+ unsigned int i;
+
+ for (i = 0; i < PTRS_PER_PMD; i++, addr += PMD_SIZE) {
+ if (!pmd_none(*pmd) && pmd_table(*pmd)) {
+ pte = pte_offset_kernel(pmd, addr);
+ pte_free_kernel(NULL, pte);
+ }
+ pmd++;
+ }
+}
+
+/**
+ * free_hyp_pmds - free a Hyp-mode level-2 tables and child level-3 tables
+ * @hypd_pgd: The Hyp-mode page table pointer
+ *
+ * Assumes this is a page table used strictly in Hyp-mode and therefore contains
+ * only mappings in the kernel memory area, which is above PAGE_OFFSET.
+ */
+void free_hyp_pmds(pgd_t *hyp_pgd)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ unsigned long addr, next, end;
+
+ addr = PAGE_OFFSET;
+ end = ~0;
+ do {
+ next = pgd_addr_end(addr, end);
+ pgd = hyp_pgd + pgd_index(addr);
+ pud = pud_offset(pgd, addr);
+
+ BUG_ON(pud_bad(*pud));
+
+ if (pud_none(*pud))
+ continue;
+
+ pmd = pmd_offset(pud, addr);
+ free_ptes(pmd, addr);
+ pmd_free(NULL, pmd);
+ } while (addr = next, addr != end);
+}
+
+static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long addr,
+ unsigned long end)
+{
+ pte_t *pte;
+ struct page *page;
+
+ addr &= PAGE_MASK;
+ do {
+ pte = pte_offset_kernel(pmd, addr);
+ BUG_ON(!virt_addr_valid(addr));
+ page = virt_to_page(addr);
+
+ set_pte_ext(pte, mk_pte(page, PAGE_HYP), 0);
+ } while (addr += PAGE_SIZE, addr < end);
+}
+
+static int create_hyp_pmd_mappings(pud_t *pud, unsigned long addr,
+ unsigned long end)
+{
+ pmd_t *pmd;
+ pte_t *pte;
+ unsigned long next;
+
+ do {
+ next = pmd_addr_end(addr, end);
+ pmd = pmd_offset(pud, addr);
+
+ BUG_ON(pmd_sect(*pmd));
+
+ if (pmd_none(*pmd)) {
+ pte = pte_alloc_one_kernel(NULL, addr);
+ if (!pte) {
+ kvm_err(-ENOMEM, "Cannot allocate Hyp pte");
+ return -ENOMEM;
+ }
+ pmd_populate_kernel(NULL, pmd, pte);
+ }
+
+ create_hyp_pte_mappings(pmd, addr, next);
+ } while (addr = next, addr < end);
+
+ return 0;
+}
+
+/**
+ * create_hyp_mappings - map a kernel virtual address range in Hyp mode
+ * @hyp_pgd: The allocated hypervisor level-1 table
+ * @from: The virtual kernel start address of the range
+ * @to: The virtual kernel end address of the range (exclusive)
+ *
+ * The same virtual address as the kernel virtual address is also used in
+ * Hyp-mode mapping to the same underlying physical pages.
+ */
+int create_hyp_mappings(pgd_t *hyp_pgd, void *from, void *to)
+{
+ unsigned long start = (unsigned long)from;
+ unsigned long end = (unsigned long)to;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ unsigned long addr, next;
+ int err = 0;
+
+ BUG_ON(start > end);
+ if (start < PAGE_OFFSET)
+ return -EINVAL;
+
+ addr = start;
+ do {
+ next = pgd_addr_end(addr, end);
+ pgd = hyp_pgd + pgd_index(addr);
+ pud = pud_offset(pgd, addr);
+
+ if (pud_none_or_clear_bad(pud)) {
+ pmd = pmd_alloc_one(NULL, addr);
+ if (!pmd) {
+ kvm_err(-ENOMEM, "Cannot allocate Hyp pmd");
+ return -ENOMEM;
+ }
+ pud_populate(NULL, pud, pmd);
+ }
+
+ err = create_hyp_pmd_mappings(pud, addr, next);
+ if (err)
+ return err;
+ } while (addr = next, addr < end);
+
+ return err;
+}
+
+int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ KVMARM_NOT_IMPLEMENTED();
+ return -EINVAL;
+}
diff --git a/mm/memory.c b/mm/memory.c
index 829d437..e976971 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -402,6 +402,7 @@ void pgd_clear_bad(pgd_t *pgd)
pgd_ERROR(*pgd);
pgd_clear(pgd);
}
+EXPORT_SYMBOL_GPL(pgd_clear_bad);
void pud_clear_bad(pud_t *pud)
{
^ permalink raw reply related [flat|nested] 105+ messages in thread
* [PATCH v5 04/13] ARM: KVM: Memory virtualization setup
2011-12-11 10:24 [PATCH v5 00/13] KVM/ARM Implementation Christoffer Dall
` (2 preceding siblings ...)
2011-12-11 10:24 ` [PATCH v5 03/13] ARM: KVM: Add hypervisor inititalization Christoffer Dall
@ 2011-12-11 10:24 ` Christoffer Dall
2011-12-12 14:40 ` Avi Kivity
2011-12-11 10:24 ` [PATCH v5 05/13] ARM: KVM: Inject IRQs and FIQs from userspace Christoffer Dall
` (9 subsequent siblings)
13 siblings, 1 reply; 105+ messages in thread
From: Christoffer Dall @ 2011-12-11 10:24 UTC (permalink / raw)
To: android-virt, kvm; +Cc: Marc.Zyngier, catalin.marinas, tech, avi, peter.maydell
This commit introduces the framework for guest memory management
through the use of 2nd stage translation. Each VM has a pointer
to a level-1 tabled (the pgd field in struct kvm_arch) which is
used for the 2nd stage translations. Entries are added when handling
guest faults (later patch) and the table itself can be allocated and
freed through the following functions implemented in
arch/arm/kvm/arm_mmu.c:
- kvm_alloc_stage2_pgd(struct kvm *kvm);
- kvm_free_stage2_pgd(struct kvm *kvm);
Further, each entry in TLBs and caches are tagged with a VMID
identifier in addition to ASIDs. The VMIDs are managed using
a bitmap and assigned when creating the VM in kvm_arch_init_vm()
where the 2nd stage pgd is also allocated. The table is freed in
kvm_arch_destroy_vm(). Both functions are called from the main
KVM code.
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
---
arch/arm/include/asm/kvm_host.h | 4 ++
arch/arm/include/asm/kvm_mmu.h | 5 +++
arch/arm/kvm/arm.c | 59 +++++++++++++++++++++++++++++++--
arch/arm/kvm/mmu.c | 69 +++++++++++++++++++++++++++++++++++++++
4 files changed, 132 insertions(+), 5 deletions(-)
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 6a10467..06d1263 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -31,7 +31,9 @@ struct kvm_vcpu;
u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
struct kvm_arch {
- pgd_t *pgd; /* 1-level 2nd stage table */
+ u32 vmid; /* The VMID used for the virt. memory system */
+ pgd_t *pgd; /* 1-level 2nd stage table */
+ u64 vttbr; /* VTTBR value associated with above pgd and vmid */
};
#define EXCEPTION_NONE 0
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 13fd8dc..9d7440c 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -32,4 +32,9 @@ extern pgd_t *kvm_hyp_pgd;
int create_hyp_mappings(pgd_t *hyp_pgd, void *from, void *to);
void free_hyp_pmds(pgd_t *hyp_pgd);
+int kvm_alloc_stage2_pgd(struct kvm *kvm);
+void kvm_free_stage2_pgd(struct kvm *kvm);
+
+int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);
+
#endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index e6bdf50..89ba18d 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -94,15 +94,62 @@ void kvm_arch_sync_events(struct kvm *kvm)
{
}
+/**
+ * kvm_arch_init_vm - initializes a VM data structure
+ * @kvm: pointer to the KVM struct
+ */
int kvm_arch_init_vm(struct kvm *kvm)
{
- return 0;
+ int ret = 0;
+ phys_addr_t pgd_phys;
+ unsigned long vmid;
+
+ mutex_lock(&kvm_vmids_mutex);
+ vmid = find_first_zero_bit(kvm_vmids, VMID_SIZE);
+ if (vmid >= VMID_SIZE) {
+ mutex_unlock(&kvm_vmids_mutex);
+ return -EBUSY;
+ }
+ __set_bit(vmid, kvm_vmids);
+ kvm->arch.vmid = vmid;
+ mutex_unlock(&kvm_vmids_mutex);
+
+ ret = kvm_alloc_stage2_pgd(kvm);
+ if (ret)
+ goto out_fail_alloc;
+
+ pgd_phys = virt_to_phys(kvm->arch.pgd);
+ kvm->arch.vttbr = pgd_phys & ((1LLU << 40) - 1) & ~((2 << VTTBR_X) - 1);
+ kvm->arch.vttbr |= ((u64)vmid << 48);
+
+ ret = create_hyp_mappings(kvm_hyp_pgd, kvm, kvm + 1);
+ if (ret)
+ goto out_free_stage2_pgd;
+
+ return ret;
+out_free_stage2_pgd:
+ kvm_free_stage2_pgd(kvm);
+out_fail_alloc:
+ clear_bit(vmid, kvm_vmids);
+ return ret;
}
+/**
+ * kvm_arch_destroy_vm - destroy the VM data structure
+ * @kvm: pointer to the KVM struct
+ */
void kvm_arch_destroy_vm(struct kvm *kvm)
{
int i;
+ kvm_free_stage2_pgd(kvm);
+
+ if (kvm->arch.vmid != 0) {
+ mutex_lock(&kvm_vmids_mutex);
+ clear_bit(kvm->arch.vmid, kvm_vmids);
+ mutex_unlock(&kvm_vmids_mutex);
+ }
+
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
if (kvm->vcpus[i]) {
kvm_arch_vcpu_free(kvm->vcpus[i]);
@@ -178,6 +225,10 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
if (err)
goto free_vcpu;
+ err = create_hyp_mappings(kvm_hyp_pgd, vcpu, vcpu + 1);
+ if (err)
+ goto free_vcpu;
+
return vcpu;
free_vcpu:
kmem_cache_free(kvm_vcpu_cache, vcpu);
@@ -187,7 +238,7 @@ out:
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
{
- KVMARM_NOT_IMPLEMENTED();
+ kmem_cache_free(kvm_vcpu_cache, vcpu);
}
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -293,8 +344,8 @@ static int init_hyp_mode(void)
hyp_stack_ptr = (unsigned long)kvm_arm_hyp_stack_page + PAGE_SIZE;
- init_phys_addr = virt_to_phys((void *)&__kvm_hyp_init);
- init_end_phys_addr = virt_to_phys((void *)&__kvm_hyp_init_end);
+ init_phys_addr = virt_to_phys(__kvm_hyp_init);
+ init_end_phys_addr = virt_to_phys(__kvm_hyp_init_end);
/*
* Create identity mapping
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index a298926..f7a7b17 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -160,6 +160,75 @@ int create_hyp_mappings(pgd_t *hyp_pgd, void *from, void *to)
return err;
}
+/**
+ * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
+ * @kvm: The KVM struct pointer for the VM.
+ *
+ * Allocates the 1st level table only of size defined by PGD2_ORDER (can
+ * support either full 40-bit input addresses or limited to 32-bit input
+ * addresses). Clears the allocated pages.
+ */
+int kvm_alloc_stage2_pgd(struct kvm *kvm)
+{
+ pgd_t *pgd;
+
+ if (kvm->arch.pgd != NULL) {
+ kvm_err(-EINVAL, "kvm_arch already initialized?\n");
+ return -EINVAL;
+ }
+
+ pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, PGD2_ORDER);
+ if (!pgd)
+ return -ENOMEM;
+
+ memset(pgd, 0, PTRS_PER_PGD2 * sizeof(pgd_t));
+ kvm->arch.pgd = pgd;
+
+ return 0;
+}
+
+/**
+ * kvm_free_stage2_pgd - free all stage-2 tables
+ * @kvm: The KVM struct pointer for the VM.
+ *
+ * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all
+ * underlying level-2 and level-3 tables before freeing the actual level-1 table
+ * and setting the struct pointer to NULL.
+ */
+void kvm_free_stage2_pgd(struct kvm *kvm)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ unsigned long long i, addr;
+
+ if (kvm->arch.pgd == NULL)
+ return;
+
+ /*
+ * We do this slightly different than other places, since we need more
+ * than 32 bits and for instance pgd_addr_end converts to unsigned long.
+ */
+ addr = 0;
+ for (i = 0; i < PTRS_PER_PGD2; i++) {
+ addr = i * (unsigned long long)PGDIR_SIZE;
+ pgd = kvm->arch.pgd + i;
+ pud = pud_offset(pgd, addr);
+
+ if (pud_none(*pud))
+ continue;
+
+ BUG_ON(pud_bad(*pud));
+
+ pmd = pmd_offset(pud, addr);
+ free_ptes(pmd, addr);
+ pmd_free(NULL, pmd);
+ }
+
+ free_pages((unsigned long)kvm->arch.pgd, PGD2_ORDER);
+ kvm->arch.pgd = NULL;
+}
+
int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
KVMARM_NOT_IMPLEMENTED();
^ permalink raw reply related [flat|nested] 105+ messages in thread
* Re: [PATCH v5 04/13] ARM: KVM: Memory virtualization setup
2011-12-11 10:24 ` [PATCH v5 04/13] ARM: KVM: Memory virtualization setup Christoffer Dall
@ 2011-12-12 14:40 ` Avi Kivity
2011-12-12 15:09 ` [Android-virt] " Christoffer Dall
0 siblings, 1 reply; 105+ messages in thread
From: Avi Kivity @ 2011-12-12 14:40 UTC (permalink / raw)
To: Christoffer Dall
Cc: android-virt, kvm, Marc.Zyngier, catalin.marinas, tech,
peter.maydell
On 12/11/2011 12:24 PM, Christoffer Dall wrote:
> This commit introduces the framework for guest memory management
> through the use of 2nd stage translation. Each VM has a pointer
> to a level-1 tabled (the pgd field in struct kvm_arch) which is
> used for the 2nd stage translations. Entries are added when handling
> guest faults (later patch) and the table itself can be allocated and
> freed through the following functions implemented in
> arch/arm/kvm/arm_mmu.c:
> - kvm_alloc_stage2_pgd(struct kvm *kvm);
> - kvm_free_stage2_pgd(struct kvm *kvm);
>
> Further, each entry in TLBs and caches are tagged with a VMID
> identifier in addition to ASIDs. The VMIDs are managed using
> a bitmap and assigned when creating the VM in kvm_arch_init_vm()
> where the 2nd stage pgd is also allocated. The table is freed in
> kvm_arch_destroy_vm(). Both functions are called from the main
> KVM code.
>
>
> struct kvm_arch {
> - pgd_t *pgd; /* 1-level 2nd stage table */
> + u32 vmid; /* The VMID used for the virt. memory system */
> + pgd_t *pgd; /* 1-level 2nd stage table */
> + u64 vttbr; /* VTTBR value associated with above pgd and vmid */
> };
>
I can't say I have a solid grasp here, but my feeling is that vmid needs
to be per-vcpu. Otherwise vcpu 1 can migrate to a cpu that previously
ran vcpu 0, and reuse its tlb since they have the same vmid.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 04/13] ARM: KVM: Memory virtualization setup
2011-12-12 14:40 ` Avi Kivity
@ 2011-12-12 15:09 ` Christoffer Dall
2011-12-12 15:15 ` Avi Kivity
0 siblings, 1 reply; 105+ messages in thread
From: Christoffer Dall @ 2011-12-12 15:09 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm, Marc.Zyngier, android-virt, tech
On Mon, Dec 12, 2011 at 9:40 AM, Avi Kivity <avi@redhat.com> wrote:
> On 12/11/2011 12:24 PM, Christoffer Dall wrote:
>> This commit introduces the framework for guest memory management
>> through the use of 2nd stage translation. Each VM has a pointer
>> to a level-1 tabled (the pgd field in struct kvm_arch) which is
>> used for the 2nd stage translations. Entries are added when handling
>> guest faults (later patch) and the table itself can be allocated and
>> freed through the following functions implemented in
>> arch/arm/kvm/arm_mmu.c:
>> - kvm_alloc_stage2_pgd(struct kvm *kvm);
>> - kvm_free_stage2_pgd(struct kvm *kvm);
>>
>> Further, each entry in TLBs and caches are tagged with a VMID
>> identifier in addition to ASIDs. The VMIDs are managed using
>> a bitmap and assigned when creating the VM in kvm_arch_init_vm()
>> where the 2nd stage pgd is also allocated. The table is freed in
>> kvm_arch_destroy_vm(). Both functions are called from the main
>> KVM code.
>>
>>
>> struct kvm_arch {
>> - pgd_t *pgd; /* 1-level 2nd stage table */
>> + u32 vmid; /* The VMID used for the virt. memory system */
>> + pgd_t *pgd; /* 1-level 2nd stage table */
>> + u64 vttbr; /* VTTBR value associated with above pgd and vmid */
>> };
>>
>
> I can't say I have a solid grasp here, but my feeling is that vmid needs
> to be per-vcpu. Otherwise vcpu 1 can migrate to a cpu that previously
> ran vcpu 0, and reuse its tlb since they have the same vmid.
>
According to the ARM guys, the same rules apply as for ASIDs which
results into 1 vmid per VM. The vmid is checked along with a
guest-specific asid, so they would only share TLB entries when running
the same guest process, which I assume is fine then. Actually
desirable. But, granted, my SMP knowledge is limited so far, so if
Marc or Catalin feels like chipping in here...
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 04/13] ARM: KVM: Memory virtualization setup
2011-12-12 15:09 ` [Android-virt] " Christoffer Dall
@ 2011-12-12 15:15 ` Avi Kivity
2011-12-12 15:25 ` Peter Maydell
0 siblings, 1 reply; 105+ messages in thread
From: Avi Kivity @ 2011-12-12 15:15 UTC (permalink / raw)
To: Christoffer Dall; +Cc: kvm, Marc.Zyngier, android-virt, tech
On 12/12/2011 05:09 PM, Christoffer Dall wrote:
> On Mon, Dec 12, 2011 at 9:40 AM, Avi Kivity <avi@redhat.com> wrote:
> > On 12/11/2011 12:24 PM, Christoffer Dall wrote:
> >> This commit introduces the framework for guest memory management
> >> through the use of 2nd stage translation. Each VM has a pointer
> >> to a level-1 tabled (the pgd field in struct kvm_arch) which is
> >> used for the 2nd stage translations. Entries are added when handling
> >> guest faults (later patch) and the table itself can be allocated and
> >> freed through the following functions implemented in
> >> arch/arm/kvm/arm_mmu.c:
> >> - kvm_alloc_stage2_pgd(struct kvm *kvm);
> >> - kvm_free_stage2_pgd(struct kvm *kvm);
> >>
> >> Further, each entry in TLBs and caches are tagged with a VMID
> >> identifier in addition to ASIDs. The VMIDs are managed using
> >> a bitmap and assigned when creating the VM in kvm_arch_init_vm()
> >> where the 2nd stage pgd is also allocated. The table is freed in
> >> kvm_arch_destroy_vm(). Both functions are called from the main
> >> KVM code.
> >>
> >>
> >> struct kvm_arch {
> >> - pgd_t *pgd; /* 1-level 2nd stage table */
> >> + u32 vmid; /* The VMID used for the virt. memory system */
> >> + pgd_t *pgd; /* 1-level 2nd stage table */
> >> + u64 vttbr; /* VTTBR value associated with above pgd and vmid */
> >> };
> >>
> >
> > I can't say I have a solid grasp here, but my feeling is that vmid needs
> > to be per-vcpu. Otherwise vcpu 1 can migrate to a cpu that previously
> > ran vcpu 0, and reuse its tlb since they have the same vmid.
> >
> According to the ARM guys, the same rules apply as for ASIDs which
> results into 1 vmid per VM. The vmid is checked along with a
> guest-specific asid, so they would only share TLB entries when running
> the same guest process, which I assume is fine then. Actually
> desirable. But, granted, my SMP knowledge is limited so far, so if
> Marc or Catalin feels like chipping in here...
We need to differentiate in how Linux-as-a-guest acts and how the cpu is
supposed to work. A guest operating system can theoretically assign the
ASID x to process A running on vcpu 0, and the same ASID x to process B
running on vcpu 1, and be sure that TLB entries don't leak from A to B
since the TLB is cpu local (is that in fact correct?). Does the ARM
arch allow this, or does it forbid running different contexts on
different vcpus with the same ASID?
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 04/13] ARM: KVM: Memory virtualization setup
2011-12-12 15:15 ` Avi Kivity
@ 2011-12-12 15:25 ` Peter Maydell
2011-12-12 15:49 ` Avi Kivity
0 siblings, 1 reply; 105+ messages in thread
From: Peter Maydell @ 2011-12-12 15:25 UTC (permalink / raw)
To: Avi Kivity; +Cc: Christoffer Dall, Marc.Zyngier, tech, android-virt, kvm
On 12 December 2011 15:15, Avi Kivity <avi@redhat.com> wrote:
> We need to differentiate in how Linux-as-a-guest acts and how the cpu is
> supposed to work. A guest operating system can theoretically assign the
> ASID x to process A running on vcpu 0, and the same ASID x to process B
> running on vcpu 1
That would be a guest bug. From the ARM ARM:
"For a symmetric multiprocessor cluster where a single operating system
is running on the set of processing elements, ARMv7 requires all ASID
values to be assigned uniquely within any single Inner Shareable domain.
In other words, each ASID value must have the same meaning to all
processing elements in the system."
-- PMM
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 04/13] ARM: KVM: Memory virtualization setup
2011-12-12 15:25 ` Peter Maydell
@ 2011-12-12 15:49 ` Avi Kivity
2011-12-12 17:40 ` Christoffer Dall
2011-12-13 17:10 ` Antonios Motakis
0 siblings, 2 replies; 105+ messages in thread
From: Avi Kivity @ 2011-12-12 15:49 UTC (permalink / raw)
To: Peter Maydell; +Cc: Christoffer Dall, Marc.Zyngier, tech, android-virt, kvm
On 12/12/2011 05:25 PM, Peter Maydell wrote:
> On 12 December 2011 15:15, Avi Kivity <avi@redhat.com> wrote:
> > We need to differentiate in how Linux-as-a-guest acts and how the cpu is
> > supposed to work. A guest operating system can theoretically assign the
> > ASID x to process A running on vcpu 0, and the same ASID x to process B
> > running on vcpu 1
>
> That would be a guest bug. From the ARM ARM:
> "For a symmetric multiprocessor cluster where a single operating system
> is running on the set of processing elements, ARMv7 requires all ASID
> values to be assigned uniquely within any single Inner Shareable domain.
> In other words, each ASID value must have the same meaning to all
> processing elements in the system."
Thanks. So per-vm vmids should work.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 04/13] ARM: KVM: Memory virtualization setup
2011-12-12 15:49 ` Avi Kivity
@ 2011-12-12 17:40 ` Christoffer Dall
2011-12-13 17:10 ` Antonios Motakis
1 sibling, 0 replies; 105+ messages in thread
From: Christoffer Dall @ 2011-12-12 17:40 UTC (permalink / raw)
To: Avi Kivity; +Cc: Peter Maydell, Marc.Zyngier, tech, android-virt, kvm
On Mon, Dec 12, 2011 at 10:49 AM, Avi Kivity <avi@redhat.com> wrote:
> On 12/12/2011 05:25 PM, Peter Maydell wrote:
>> On 12 December 2011 15:15, Avi Kivity <avi@redhat.com> wrote:
>> > We need to differentiate in how Linux-as-a-guest acts and how the cpu is
>> > supposed to work. A guest operating system can theoretically assign the
>> > ASID x to process A running on vcpu 0, and the same ASID x to process B
>> > running on vcpu 1
>>
>> That would be a guest bug. From the ARM ARM:
>> "For a symmetric multiprocessor cluster where a single operating system
>> is running on the set of processing elements, ARMv7 requires all ASID
>> values to be assigned uniquely within any single Inner Shareable domain.
>> In other words, each ASID value must have the same meaning to all
>> processing elements in the system."
>
> Thanks. So per-vm vmids should work.
>
yep.
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 04/13] ARM: KVM: Memory virtualization setup
2011-12-12 15:49 ` Avi Kivity
2011-12-12 17:40 ` Christoffer Dall
@ 2011-12-13 17:10 ` Antonios Motakis
2011-12-13 17:13 ` Christoffer Dall
1 sibling, 1 reply; 105+ messages in thread
From: Antonios Motakis @ 2011-12-13 17:10 UTC (permalink / raw)
To: Avi Kivity
Cc: Peter Maydell, Christoffer Dall, Marc.Zyngier, tech, android-virt,
kvm
On 12/12/2011 04:49 PM, Avi Kivity wrote:
> On 12/12/2011 05:25 PM, Peter Maydell wrote:
>> On 12 December 2011 15:15, Avi Kivity<avi@redhat.com> wrote:
>>> We need to differentiate in how Linux-as-a-guest acts and how the cpu is
>>> supposed to work. A guest operating system can theoretically assign the
>>> ASID x to process A running on vcpu 0, and the same ASID x to process B
>>> running on vcpu 1
>> That would be a guest bug. From the ARM ARM:
>> "For a symmetric multiprocessor cluster where a single operating system
>> is running on the set of processing elements, ARMv7 requires all ASID
>> values to be assigned uniquely within any single Inner Shareable domain.
>> In other words, each ASID value must have the same meaning to all
>> processing elements in the system."
> Thanks. So per-vm vmids should work.
>
We where playing with a VMID recycling patch based on the assumption of
per-cpu VMIDs being possible, which would have the advantage of
recycling VMIDs without much complicated locking (inspired by the KVM
SVM implementation). However we killed it with fire and hot plasma when
it became clear it violated the ARM spec...
On the other hand, maybe we could do something with per vcpu VMIDs, but
with proper synchronization accross physical CPUs in order to be
compatible with the spec, but at the same time potentially allow a buggy
guest to run? Since in practice a lot of CPUs will not share TLB (and
instruction cache) structures, maybe it's possible that there is
software out there that violates the spec, without having problems on
the real hw.
Anyway VMID reuse will be available soon, and the difference between a
per vm and per vcpu implementation is a couple of trivial lines of code.
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 04/13] ARM: KVM: Memory virtualization setup
2011-12-13 17:10 ` Antonios Motakis
@ 2011-12-13 17:13 ` Christoffer Dall
0 siblings, 0 replies; 105+ messages in thread
From: Christoffer Dall @ 2011-12-13 17:13 UTC (permalink / raw)
To: Antonios Motakis; +Cc: Avi Kivity, kvm, Marc.Zyngier, android-virt, tech
On Tue, Dec 13, 2011 at 12:10 PM, Antonios Motakis
<a.motakis@virtualopensystems.com> wrote:
> On 12/12/2011 04:49 PM, Avi Kivity wrote:
>> On 12/12/2011 05:25 PM, Peter Maydell wrote:
>>> On 12 December 2011 15:15, Avi Kivity<avi@redhat.com> wrote:
>>>> We need to differentiate in how Linux-as-a-guest acts and how the cpu is
>>>> supposed to work. A guest operating system can theoretically assign the
>>>> ASID x to process A running on vcpu 0, and the same ASID x to process B
>>>> running on vcpu 1
>>> That would be a guest bug. From the ARM ARM:
>>> "For a symmetric multiprocessor cluster where a single operating system
>>> is running on the set of processing elements, ARMv7 requires all ASID
>>> values to be assigned uniquely within any single Inner Shareable domain.
>>> In other words, each ASID value must have the same meaning to all
>>> processing elements in the system."
>> Thanks. So per-vm vmids should work.
>>
> We where playing with a VMID recycling patch based on the assumption of
> per-cpu VMIDs being possible, which would have the advantage of
> recycling VMIDs without much complicated locking (inspired by the KVM
> SVM implementation). However we killed it with fire and hot plasma when
> it became clear it violated the ARM spec...
>
> On the other hand, maybe we could do something with per vcpu VMIDs, but
> with proper synchronization accross physical CPUs in order to be
> compatible with the spec, but at the same time potentially allow a buggy
> guest to run? Since in practice a lot of CPUs will not share TLB (and
> instruction cache) structures, maybe it's possible that there is
> software out there that violates the spec, without having problems on
> the real hw.
>
> Anyway VMID reuse will be available soon, and the difference between a
> per vm and per vcpu implementation is a couple of trivial lines of code.
>
yes, this is going to be a simple per-vm implementation that flushes
TLBs on roll-over for the next patch series, let's leave it at that
for now!
^ permalink raw reply [flat|nested] 105+ messages in thread
* [PATCH v5 05/13] ARM: KVM: Inject IRQs and FIQs from userspace
2011-12-11 10:24 [PATCH v5 00/13] KVM/ARM Implementation Christoffer Dall
` (3 preceding siblings ...)
2011-12-11 10:24 ` [PATCH v5 04/13] ARM: KVM: Memory virtualization setup Christoffer Dall
@ 2011-12-11 10:24 ` Christoffer Dall
2011-12-11 15:18 ` Jan Kiszka
2011-12-12 13:28 ` Avi Kivity
2011-12-11 10:24 ` [PATCH v5 06/13] ARM: KVM: World-switch implementation Christoffer Dall
` (8 subsequent siblings)
13 siblings, 2 replies; 105+ messages in thread
From: Christoffer Dall @ 2011-12-11 10:24 UTC (permalink / raw)
To: android-virt, kvm; +Cc: Marc.Zyngier, catalin.marinas, tech, avi, peter.maydell
Userspace can inject IRQs and FIQs through the KVM_IRQ_LINE VM ioctl.
This ioctl is used since the sematics are in fact two lines that can be
either raised or lowered on the VCPU - the IRQ and FIQ lines.
KVM needs to know which VCPU it must operate on and whether the FIQ or
IRQ line is raised/lowered. Hence both pieces of information is packed
in the kvm_irq_level->irq field. The irq fild value will be:
IRQ: vcpu_index * 2
FIQ: (vcpu_index * 2) + 1
This is documented in Documentation/kvm/api.txt.
The effect of the ioctl is simply to simply raise/lower the
corresponding virt_irq field on the VCPU struct, which will cause the
world-switch code to raise/lower virtual interrupts when running the
guest on next switch. The wait_for_interrupt flag is also cleared for
raised IRQs causing an idle VCPU to become active again.
Note: The custom trace_kvm_irq_line is used despite a generic definition of
trace_kvm_set_irq, since the trace-Kvm_set_irq depends on the x86-specific
define of __HAVE_IOAPIC. Either the trace event should be created
regardless of this define or it should depend on another ifdef clause,
common for both x86 and ARM. However, since the arguments don't really
match those used in ARM, I am yet to be convinced why this is necessary.
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
---
Documentation/virtual/kvm/api.txt | 10 ++++++-
arch/arm/include/asm/kvm.h | 8 ++++++
arch/arm/include/asm/kvm_arm.h | 1 +
arch/arm/kvm/arm.c | 53 ++++++++++++++++++++++++++++++++++++-
arch/arm/kvm/trace.h | 21 +++++++++++++++
include/linux/kvm.h | 1 +
6 files changed, 91 insertions(+), 3 deletions(-)
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 7945b0b..4abaa67 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -572,7 +572,7 @@ only go to the IOAPIC. On ia64, a IOSAPIC is created.
4.25 KVM_IRQ_LINE
Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64
+Architectures: x86, ia64, arm
Type: vm ioctl
Parameters: struct kvm_irq_level
Returns: 0 on success, -1 on error
@@ -582,6 +582,14 @@ Requires that an interrupt controller model has been previously created with
KVM_CREATE_IRQCHIP. Note that edge-triggered interrupts require the level
to be set to 1 and then back to 0.
+KVM_CREATE_IRQCHIP (except for ARM). Note that edge-triggered interrupts
+require the level to be set to 1 and then back to 0.
+
+ARM uses two types of interrupt lines per CPU, ie. IRQ and FIQ. The value of the
+irq field should be (VCPU_INDEX * 2) for IRQs and ((VCPU_INDEX * 2) + 1) for
+FIQs. Level is used to raise/lower the line. See arch/arm/include/asm/kvm.h for
+convenience macros.
+
struct kvm_irq_level {
union {
__u32 irq; /* GSI */
diff --git a/arch/arm/include/asm/kvm.h b/arch/arm/include/asm/kvm.h
index 87dc33b..8935062 100644
--- a/arch/arm/include/asm/kvm.h
+++ b/arch/arm/include/asm/kvm.h
@@ -20,6 +20,14 @@
#include <asm/types.h>
/*
+ * KVM_IRQ_LINE macros to set/read IRQ/FIQ for specific VCPU index.
+ */
+enum KVM_ARM_IRQ_LINE_TYPE {
+ KVM_ARM_IRQ_LINE = 0,
+ KVM_ARM_FIQ_LINE = 1,
+};
+
+/*
* Modes used for short-hand mode determinition in the world-switch code and
* in emulation code.
*
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 835abd1..e378a37 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -49,6 +49,7 @@
#define HCR_VM 1
#define HCR_GUEST_MASK (HCR_TSC | HCR_TWE | HCR_TWI | HCR_VM | HCR_AMO | \
HCR_AMO | HCR_IMO | HCR_FMO | HCR_SWIO)
+#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
/* Hyp System Control Register (HSCTLR) bits */
#define HSCTLR_TE (1 << 30)
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 89ba18d..fc0bd6b 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -299,6 +299,43 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
return -EINVAL;
}
+static int kvm_arch_vm_ioctl_irq_line(struct kvm *kvm,
+ struct kvm_irq_level *irq_level)
+{
+ u32 mask;
+ unsigned int vcpu_idx;
+ struct kvm_vcpu *vcpu;
+
+ vcpu_idx = irq_level->irq / 2;
+ if (vcpu_idx >= KVM_MAX_VCPUS)
+ return -EINVAL;
+
+ vcpu = kvm_get_vcpu(kvm, vcpu_idx);
+ if (!vcpu)
+ return -EINVAL;
+
+ switch (irq_level->irq % 2) {
+ case KVM_ARM_IRQ_LINE:
+ mask = HCR_VI;
+ break;
+ case KVM_ARM_FIQ_LINE:
+ mask = HCR_VF;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ trace_kvm_irq_line(irq_level->irq % 2, irq_level->level, vcpu_idx);
+
+ if (irq_level->level) {
+ vcpu->arch.virt_irq |= mask;
+ vcpu->arch.wait_for_interrupts = 0;
+ } else
+ vcpu->arch.virt_irq &= ~mask;
+
+ return 0;
+}
+
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -313,8 +350,20 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
- printk(KERN_ERR "kvm_arch_vm_ioctl: Unsupported ioctl (%d)\n", ioctl);
- return -EINVAL;
+ struct kvm *kvm = filp->private_data;
+ void __user *argp = (void __user *)arg;
+
+ switch (ioctl) {
+ case KVM_IRQ_LINE: {
+ struct kvm_irq_level irq_event;
+
+ if (copy_from_user(&irq_event, argp, sizeof irq_event))
+ return -EFAULT;
+ return kvm_arch_vm_ioctl_irq_line(kvm, &irq_event);
+ }
+ default:
+ return -EINVAL;
+ }
}
/**
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index f8869c1..ac64e3a 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -40,6 +40,27 @@ TRACE_EVENT(kvm_exit,
);
+TRACE_EVENT(kvm_irq_line,
+ TP_PROTO(unsigned int type, unsigned int level, unsigned int vcpu_idx),
+ TP_ARGS(type, level, vcpu_idx),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, type )
+ __field( unsigned int, level )
+ __field( unsigned int, vcpu_idx )
+ ),
+
+ TP_fast_assign(
+ __entry->type = type;
+ __entry->level = level;
+ __entry->vcpu_idx = vcpu_idx;
+ ),
+
+ TP_printk("KVM_IRQ_LINE: type: %s, level: %u, vcpu: %u",
+ (__entry->type == KVM_ARM_IRQ_LINE) ? "IRQ" : "FIQ",
+ __entry->level, __entry->vcpu_idx)
+);
+
#endif /* _TRACE_KVM_H */
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index c3892fc..679abbb 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -111,6 +111,7 @@ struct kvm_irq_level {
* ACPI gsi notion of irq.
* For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47..
* For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23..
+ * For ARM: IRQ: irq = (2*vcpu_index). FIQ: irq = (2*vcpu_indx + 1).
*/
union {
__u32 irq;
^ permalink raw reply related [flat|nested] 105+ messages in thread
* Re: [PATCH v5 05/13] ARM: KVM: Inject IRQs and FIQs from userspace
2011-12-11 10:24 ` [PATCH v5 05/13] ARM: KVM: Inject IRQs and FIQs from userspace Christoffer Dall
@ 2011-12-11 15:18 ` Jan Kiszka
2011-12-11 16:03 ` Peter Maydell
2011-12-11 19:16 ` Christoffer Dall
2011-12-12 13:28 ` Avi Kivity
1 sibling, 2 replies; 105+ messages in thread
From: Jan Kiszka @ 2011-12-11 15:18 UTC (permalink / raw)
To: Christoffer Dall
Cc: android-virt, kvm, Marc.Zyngier, catalin.marinas, tech, avi,
peter.maydell
[-- Attachment #1: Type: text/plain, Size: 7860 bytes --]
Just found two, maybe three nits while browsing by:
On 2011-12-11 11:24, Christoffer Dall wrote:
> Userspace can inject IRQs and FIQs through the KVM_IRQ_LINE VM ioctl.
> This ioctl is used since the sematics are in fact two lines that can be
> either raised or lowered on the VCPU - the IRQ and FIQ lines.
>
> KVM needs to know which VCPU it must operate on and whether the FIQ or
> IRQ line is raised/lowered. Hence both pieces of information is packed
> in the kvm_irq_level->irq field. The irq fild value will be:
> IRQ: vcpu_index * 2
> FIQ: (vcpu_index * 2) + 1
>
> This is documented in Documentation/kvm/api.txt.
>
> The effect of the ioctl is simply to simply raise/lower the
> corresponding virt_irq field on the VCPU struct, which will cause the
> world-switch code to raise/lower virtual interrupts when running the
> guest on next switch. The wait_for_interrupt flag is also cleared for
> raised IRQs causing an idle VCPU to become active again.
>
> Note: The custom trace_kvm_irq_line is used despite a generic definition of
> trace_kvm_set_irq, since the trace-Kvm_set_irq depends on the x86-specific
> define of __HAVE_IOAPIC. Either the trace event should be created
> regardless of this define or it should depend on another ifdef clause,
> common for both x86 and ARM. However, since the arguments don't really
> match those used in ARM, I am yet to be convinced why this is necessary.
>
> Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
> ---
> Documentation/virtual/kvm/api.txt | 10 ++++++-
> arch/arm/include/asm/kvm.h | 8 ++++++
> arch/arm/include/asm/kvm_arm.h | 1 +
> arch/arm/kvm/arm.c | 53 ++++++++++++++++++++++++++++++++++++-
> arch/arm/kvm/trace.h | 21 +++++++++++++++
> include/linux/kvm.h | 1 +
> 6 files changed, 91 insertions(+), 3 deletions(-)
>
> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> index 7945b0b..4abaa67 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -572,7 +572,7 @@ only go to the IOAPIC. On ia64, a IOSAPIC is created.
> 4.25 KVM_IRQ_LINE
>
> Capability: KVM_CAP_IRQCHIP
> -Architectures: x86, ia64
> +Architectures: x86, ia64, arm
> Type: vm ioctl
> Parameters: struct kvm_irq_level
> Returns: 0 on success, -1 on error
> @@ -582,6 +582,14 @@ Requires that an interrupt controller model has been previously created with
> KVM_CREATE_IRQCHIP. Note that edge-triggered interrupts require the level
> to be set to 1 and then back to 0.
>
> +KVM_CREATE_IRQCHIP (except for ARM). Note that edge-triggered interrupts
> +require the level to be set to 1 and then back to 0.
You probably wanted to replace the original lines with these two, no?
> +
> +ARM uses two types of interrupt lines per CPU, ie. IRQ and FIQ. The value of the
> +irq field should be (VCPU_INDEX * 2) for IRQs and ((VCPU_INDEX * 2) + 1) for
> +FIQs. Level is used to raise/lower the line. See arch/arm/include/asm/kvm.h for
> +convenience macros.
> +
> struct kvm_irq_level {
> union {
> __u32 irq; /* GSI */
> diff --git a/arch/arm/include/asm/kvm.h b/arch/arm/include/asm/kvm.h
> index 87dc33b..8935062 100644
> --- a/arch/arm/include/asm/kvm.h
> +++ b/arch/arm/include/asm/kvm.h
> @@ -20,6 +20,14 @@
> #include <asm/types.h>
>
> /*
> + * KVM_IRQ_LINE macros to set/read IRQ/FIQ for specific VCPU index.
> + */
> +enum KVM_ARM_IRQ_LINE_TYPE {
> + KVM_ARM_IRQ_LINE = 0,
> + KVM_ARM_FIQ_LINE = 1,
> +};
> +
> +/*
> * Modes used for short-hand mode determinition in the world-switch code and
> * in emulation code.
> *
> diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
> index 835abd1..e378a37 100644
> --- a/arch/arm/include/asm/kvm_arm.h
> +++ b/arch/arm/include/asm/kvm_arm.h
> @@ -49,6 +49,7 @@
> #define HCR_VM 1
> #define HCR_GUEST_MASK (HCR_TSC | HCR_TWE | HCR_TWI | HCR_VM | HCR_AMO | \
> HCR_AMO | HCR_IMO | HCR_FMO | HCR_SWIO)
> +#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
>
> /* Hyp System Control Register (HSCTLR) bits */
> #define HSCTLR_TE (1 << 30)
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index 89ba18d..fc0bd6b 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -299,6 +299,43 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
> return -EINVAL;
> }
>
> +static int kvm_arch_vm_ioctl_irq_line(struct kvm *kvm,
> + struct kvm_irq_level *irq_level)
> +{
> + u32 mask;
> + unsigned int vcpu_idx;
> + struct kvm_vcpu *vcpu;
> +
> + vcpu_idx = irq_level->irq / 2;
> + if (vcpu_idx >= KVM_MAX_VCPUS)
> + return -EINVAL;
> +
> + vcpu = kvm_get_vcpu(kvm, vcpu_idx);
> + if (!vcpu)
> + return -EINVAL;
> +
> + switch (irq_level->irq % 2) {
> + case KVM_ARM_IRQ_LINE:
> + mask = HCR_VI;
> + break;
> + case KVM_ARM_FIQ_LINE:
> + mask = HCR_VF;
> + break;
> + default:
> + return -EINVAL;
Due to % 2, default is unreachable. Remove the masking?
> + }
> +
> + trace_kvm_irq_line(irq_level->irq % 2, irq_level->level, vcpu_idx);
> +
> + if (irq_level->level) {
> + vcpu->arch.virt_irq |= mask;
> + vcpu->arch.wait_for_interrupts = 0;
> + } else
> + vcpu->arch.virt_irq &= ~mask;
No need to protect the bitops on virt_irq? Or what lock does this?
> +
> + return 0;
> +}
> +
> long kvm_arch_vcpu_ioctl(struct file *filp,
> unsigned int ioctl, unsigned long arg)
> {
> @@ -313,8 +350,20 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
> long kvm_arch_vm_ioctl(struct file *filp,
> unsigned int ioctl, unsigned long arg)
> {
> - printk(KERN_ERR "kvm_arch_vm_ioctl: Unsupported ioctl (%d)\n", ioctl);
> - return -EINVAL;
> + struct kvm *kvm = filp->private_data;
> + void __user *argp = (void __user *)arg;
> +
> + switch (ioctl) {
> + case KVM_IRQ_LINE: {
> + struct kvm_irq_level irq_event;
> +
> + if (copy_from_user(&irq_event, argp, sizeof irq_event))
> + return -EFAULT;
> + return kvm_arch_vm_ioctl_irq_line(kvm, &irq_event);
> + }
> + default:
> + return -EINVAL;
> + }
> }
>
> /**
> diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
> index f8869c1..ac64e3a 100644
> --- a/arch/arm/kvm/trace.h
> +++ b/arch/arm/kvm/trace.h
> @@ -40,6 +40,27 @@ TRACE_EVENT(kvm_exit,
> );
>
>
> +TRACE_EVENT(kvm_irq_line,
> + TP_PROTO(unsigned int type, unsigned int level, unsigned int vcpu_idx),
> + TP_ARGS(type, level, vcpu_idx),
> +
> + TP_STRUCT__entry(
> + __field( unsigned int, type )
> + __field( unsigned int, level )
> + __field( unsigned int, vcpu_idx )
> + ),
> +
> + TP_fast_assign(
> + __entry->type = type;
> + __entry->level = level;
> + __entry->vcpu_idx = vcpu_idx;
> + ),
> +
> + TP_printk("KVM_IRQ_LINE: type: %s, level: %u, vcpu: %u",
> + (__entry->type == KVM_ARM_IRQ_LINE) ? "IRQ" : "FIQ",
> + __entry->level, __entry->vcpu_idx)
> +);
> +
>
> #endif /* _TRACE_KVM_H */
>
> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> index c3892fc..679abbb 100644
> --- a/include/linux/kvm.h
> +++ b/include/linux/kvm.h
> @@ -111,6 +111,7 @@ struct kvm_irq_level {
> * ACPI gsi notion of irq.
> * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47..
> * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23..
> + * For ARM: IRQ: irq = (2*vcpu_index). FIQ: irq = (2*vcpu_indx + 1).
> */
> union {
> __u32 irq;
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
Jan
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 262 bytes --]
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [PATCH v5 05/13] ARM: KVM: Inject IRQs and FIQs from userspace
2011-12-11 15:18 ` Jan Kiszka
@ 2011-12-11 16:03 ` Peter Maydell
2011-12-11 19:30 ` Christoffer Dall
2011-12-11 19:16 ` Christoffer Dall
1 sibling, 1 reply; 105+ messages in thread
From: Peter Maydell @ 2011-12-11 16:03 UTC (permalink / raw)
To: Jan Kiszka
Cc: Christoffer Dall, android-virt, kvm, Marc.Zyngier,
catalin.marinas, tech, avi
On 11 December 2011 15:18, Jan Kiszka <jan.kiszka@web.de> wrote:
> Just found two, maybe three nits while browsing by:
>
> On 2011-12-11 11:24, Christoffer Dall wrote:
>> +ARM uses two types of interrupt lines per CPU, ie. IRQ and FIQ. The value of the
>> +irq field should be (VCPU_INDEX * 2) for IRQs and ((VCPU_INDEX * 2) + 1) for
>> +FIQs.
This seems to me a slightly obscure way of defining the two fields
in this word (ie bits [31..1] cpu number, bit [0] irq-vs-fiq).
>> +static int kvm_arch_vm_ioctl_irq_line(struct kvm *kvm,
>> + struct kvm_irq_level *irq_level)
>> +{
>> + u32 mask;
>> + unsigned int vcpu_idx;
>> + struct kvm_vcpu *vcpu;
>> +
>> + vcpu_idx = irq_level->irq / 2;
>> + if (vcpu_idx >= KVM_MAX_VCPUS)
>> + return -EINVAL;
>> +
>> + vcpu = kvm_get_vcpu(kvm, vcpu_idx);
>> + if (!vcpu)
>> + return -EINVAL;
>> +
>> + switch (irq_level->irq % 2) {
>> + case KVM_ARM_IRQ_LINE:
>> + mask = HCR_VI;
>> + break;
>> + case KVM_ARM_FIQ_LINE:
>> + mask = HCR_VF;
>> + break;
>> + default:
>> + return -EINVAL;
>
> Due to % 2, default is unreachable. Remove the masking?
Removing the mask would be wrong since the irq field here
is encoding both cpu number and irq-vs-fiq. The default is
just an unreachable condition. (Why are we using % here
rather than the obvious bit operation, incidentally?)
-- PMM
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [PATCH v5 05/13] ARM: KVM: Inject IRQs and FIQs from userspace
2011-12-11 16:03 ` Peter Maydell
@ 2011-12-11 19:30 ` Christoffer Dall
2011-12-11 19:48 ` Peter Maydell
0 siblings, 1 reply; 105+ messages in thread
From: Christoffer Dall @ 2011-12-11 19:30 UTC (permalink / raw)
To: Peter Maydell
Cc: Jan Kiszka, android-virt, kvm, Marc.Zyngier, catalin.marinas,
tech, avi
On Sun, Dec 11, 2011 at 11:03 AM, Peter Maydell
<peter.maydell@linaro.org> wrote:
> On 11 December 2011 15:18, Jan Kiszka <jan.kiszka@web.de> wrote:
>> Just found two, maybe three nits while browsing by:
>>
>> On 2011-12-11 11:24, Christoffer Dall wrote:
>>> +ARM uses two types of interrupt lines per CPU, ie. IRQ and FIQ. The value of the
>>> +irq field should be (VCPU_INDEX * 2) for IRQs and ((VCPU_INDEX * 2) + 1) for
>>> +FIQs.
>
> This seems to me a slightly obscure way of defining the two fields
> in this word (ie bits [31..1] cpu number, bit [0] irq-vs-fiq).
>
Isn't that just personal preference? The other scheme was suggested by
Avi, and nobody else complained then, so I'd be inclined to just leave
it as is.
>>> +static int kvm_arch_vm_ioctl_irq_line(struct kvm *kvm,
>>> + struct kvm_irq_level *irq_level)
>>> +{
>>> + u32 mask;
>>> + unsigned int vcpu_idx;
>>> + struct kvm_vcpu *vcpu;
>>> +
>>> + vcpu_idx = irq_level->irq / 2;
>>> + if (vcpu_idx >= KVM_MAX_VCPUS)
>>> + return -EINVAL;
>>> +
>>> + vcpu = kvm_get_vcpu(kvm, vcpu_idx);
>>> + if (!vcpu)
>>> + return -EINVAL;
>>> +
>>> + switch (irq_level->irq % 2) {
>>> + case KVM_ARM_IRQ_LINE:
>>> + mask = HCR_VI;
>>> + break;
>>> + case KVM_ARM_FIQ_LINE:
>>> + mask = HCR_VF;
>>> + break;
>>> + default:
>>> + return -EINVAL;
>>
>> Due to % 2, default is unreachable. Remove the masking?
>
> Removing the mask would be wrong since the irq field here
> is encoding both cpu number and irq-vs-fiq. The default is
> just an unreachable condition. (Why are we using % here
> rather than the obvious bit operation, incidentally?)
>
right, I will remove the default case.
I highly doubt that the difference in using a bitop will be measurably
more efficient, but if you feel strongly about it, I can change it to
a shift and bitwise and, which I assume is what you mean by the
obvious bit operation? I think my CS background speaks for using %,
but whatever.
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [PATCH v5 05/13] ARM: KVM: Inject IRQs and FIQs from userspace
2011-12-11 19:30 ` Christoffer Dall
@ 2011-12-11 19:48 ` Peter Maydell
2011-12-11 20:07 ` [Android-virt] " Christoffer Dall
2011-12-12 6:35 ` Alexander Graf
0 siblings, 2 replies; 105+ messages in thread
From: Peter Maydell @ 2011-12-11 19:48 UTC (permalink / raw)
To: Christoffer Dall
Cc: Jan Kiszka, android-virt, kvm, Marc.Zyngier, catalin.marinas,
tech, avi
On 11 December 2011 19:30, Christoffer Dall
<c.dall@virtualopensystems.com> wrote:
> On Sun, Dec 11, 2011 at 11:03 AM, Peter Maydell
> <peter.maydell@linaro.org> wrote:
>> Removing the mask would be wrong since the irq field here
>> is encoding both cpu number and irq-vs-fiq. The default is
>> just an unreachable condition. (Why are we using % here
>> rather than the obvious bit operation, incidentally?)
>>
> right, I will remove the default case.
>
> I highly doubt that the difference in using a bitop will be measurably
> more efficient, but if you feel strongly about it, I can change it to
> a shift and bitwise and, which I assume is what you mean by the
> obvious bit operation? I think my CS background speaks for using %,
> but whatever.
Certainly the compiler ought to be able to figure out the
two are the same thing; I just think "irq & 1" is more readable
than "irq % 2" (because it's being clear that it's treating the
variable as a pile of bits rather than an integer). This is
bikeshedding rather, though, and style issues in kernel code
are a matter for the kernel folk. So you can ignore me :-)
-- PMM
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 05/13] ARM: KVM: Inject IRQs and FIQs from userspace
2011-12-11 19:48 ` Peter Maydell
@ 2011-12-11 20:07 ` Christoffer Dall
2011-12-11 20:25 ` Peter Maydell
2011-12-12 11:06 ` Marc Zyngier
2011-12-12 6:35 ` Alexander Graf
1 sibling, 2 replies; 105+ messages in thread
From: Christoffer Dall @ 2011-12-11 20:07 UTC (permalink / raw)
To: Peter Maydell
Cc: Christoffer Dall, kvm@vger.kernel.org, Marc.Zyngier@arm.com,
Jan Kiszka, avi@redhat.com, android-virt@lists.cs.columbia.edu,
tech@virtualopensystems.com
On Dec 11, 2011, at 2:48 PM, Peter Maydell <peter.maydell@linaro.org> wrote:
> On 11 December 2011 19:30, Christoffer Dall
> <c.dall@virtualopensystems.com> wrote:
>> On Sun, Dec 11, 2011 at 11:03 AM, Peter Maydell
>> <peter.maydell@linaro.org> wrote:
>>> Removing the mask would be wrong since the irq field here
>>> is encoding both cpu number and irq-vs-fiq. The default is
>>> just an unreachable condition. (Why are we using % here
>>> rather than the obvious bit operation, incidentally?)
>>>
>> right, I will remove the default case.
>>
>> I highly doubt that the difference in using a bitop will be measurably
>> more efficient, but if you feel strongly about it, I can change it to
>> a shift and bitwise and, which I assume is what you mean by the
>> obvious bit operation? I think my CS background speaks for using %,
>> but whatever.
>
> Certainly the compiler ought to be able to figure out the
> two are the same thing; I just think "irq & 1" is more readable
> than "irq % 2" (because it's being clear that it's treating the
> variable as a pile of bits rather than an integer). This is
> bikeshedding rather, though, and style issues in kernel code
> are a matter for the kernel folk. So you can ignore me :-)
>
Well, if it was just "irq & 1", then I hear you, but it would be "(irq
>> cpu_idx) & 1" which I don't think is more clear.
But yes let's see what the kernel folks say.
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 05/13] ARM: KVM: Inject IRQs and FIQs from userspace
2011-12-11 20:07 ` [Android-virt] " Christoffer Dall
@ 2011-12-11 20:25 ` Peter Maydell
2011-12-11 21:36 ` Christoffer Dall
2011-12-12 11:06 ` Marc Zyngier
1 sibling, 1 reply; 105+ messages in thread
From: Peter Maydell @ 2011-12-11 20:25 UTC (permalink / raw)
To: Christoffer Dall
Cc: Christoffer Dall, kvm@vger.kernel.org, Marc.Zyngier@arm.com,
Jan Kiszka, avi@redhat.com, android-virt@lists.cs.columbia.edu,
tech@virtualopensystems.com
On 11 December 2011 20:07, Christoffer Dall
<christofferdall@christofferdall.dk> wrote:
> Well, if it was just "irq & 1", then I hear you, but it would be "(irq
> >> cpu_idx) & 1" which I don't think is more clear.
Er, what? The fields are [31..1] CPU index and [0] irqtype,
right? So what you have now is:
vcpu_idx = irq_level->irq / 2;
irqtype = irq_level->irq % 2;
and the bitshifting equivalent is:
vcpu_idx = irq_level->irq >> 1;
irqtype = irq_level->irq & 1;
surely?
Shifting by the cpuindex is definitely wrong.
(Incidentally I fixed a bug in your QEMU-side code which wasn't
feeding this field to the kernel in the way it expects:
http://git.linaro.org/gitweb?p=qemu/qemu-linaro.git;a=commitdiff;h=2502ba067e795e48d346f9816fad45177ca64bca
Sorry, I should have posted that to the list. I'll do that now.)
-- PMM
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 05/13] ARM: KVM: Inject IRQs and FIQs from userspace
2011-12-11 20:25 ` Peter Maydell
@ 2011-12-11 21:36 ` Christoffer Dall
2011-12-11 22:12 ` Peter Maydell
0 siblings, 1 reply; 105+ messages in thread
From: Christoffer Dall @ 2011-12-11 21:36 UTC (permalink / raw)
To: Peter Maydell
Cc: Christoffer Dall, kvm@vger.kernel.org, Marc.Zyngier@arm.com,
Jan Kiszka, avi@redhat.com, android-virt@lists.cs.columbia.edu,
tech@virtualopensystems.com
On Sun, Dec 11, 2011 at 3:25 PM, Peter Maydell <peter.maydell@linaro.org> wrote:
> On 11 December 2011 20:07, Christoffer Dall
> <christofferdall@christofferdall.dk> wrote:
>> Well, if it was just "irq & 1", then I hear you, but it would be "(irq
>> >> cpu_idx) & 1" which I don't think is more clear.
>
> Er, what? The fields are [31..1] CPU index and [0] irqtype,
> right? So what you have now is:
> vcpu_idx = irq_level->irq / 2;
> irqtype = irq_level->irq % 2;
> and the bitshifting equivalent is:
> vcpu_idx = irq_level->irq >> 1;
> irqtype = irq_level->irq & 1;
> surely?
>
> Shifting by the cpuindex is definitely wrong.
actually, that's not how the irq_level field is defined. If you look
in Documentation/virtual/kvm/api.txt:
"ARM uses two types of interrupt lines per CPU, ie. IRQ and FIQ. The
value of the
irq field should be (VCPU_INDEX * 2) for IRQs and ((VCPU_INDEX * 2) + 1) for
FIQs. Level is used to raise/lower the line. See arch/arm/include/asm/kvm.h for
convenience macros."
also, in the kernel code the cpu_index is achieved by a simple integer
division by 2.
as I said, this was the proposal from the last round of reviews after
a lengthy discussion, so I sticked with that.
we should definitely fix either side, and the only sane argument is
that this is an irq_line field, so an index resembling an actual line
seems more semantically in line with the field purpose rather than a
bit encoding, but I am open to arguments and not married to the
current implementation.
> (Incidentally I fixed a bug in your QEMU-side code which wasn't
> feeding this field to the kernel in the way it expects:
>
> http://git.linaro.org/gitweb?p=qemu/qemu-linaro.git;a=commitdiff;h=2502ba067e795e48d346f9816fad45177ca64bca
>
> Sorry, I should have posted that to the list. I'll do that now.)
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 05/13] ARM: KVM: Inject IRQs and FIQs from userspace
2011-12-11 21:36 ` Christoffer Dall
@ 2011-12-11 22:12 ` Peter Maydell
2011-12-11 22:35 ` Peter Maydell
0 siblings, 1 reply; 105+ messages in thread
From: Peter Maydell @ 2011-12-11 22:12 UTC (permalink / raw)
To: Christoffer Dall
Cc: Christoffer Dall, kvm@vger.kernel.org, Marc.Zyngier@arm.com,
Jan Kiszka, avi@redhat.com, android-virt@lists.cs.columbia.edu,
tech@virtualopensystems.com
On 11 December 2011 21:36, Christoffer Dall
<c.dall@virtualopensystems.com> wrote:
> On Sun, Dec 11, 2011 at 3:25 PM, Peter Maydell <peter.maydell@linaro.org> wrote:
>> On 11 December 2011 20:07, Christoffer Dall
>> <christofferdall@christofferdall.dk> wrote:
>>> Well, if it was just "irq & 1", then I hear you, but it would be "(irq
>>> >> cpu_idx) & 1" which I don't think is more clear.
>>
>> Er, what? The fields are [31..1] CPU index and [0] irqtype,
>> right? So what you have now is:
>> vcpu_idx = irq_level->irq / 2;
>> irqtype = irq_level->irq % 2;
>> and the bitshifting equivalent is:
>> vcpu_idx = irq_level->irq >> 1;
>> irqtype = irq_level->irq & 1;
>> surely?
>>
>> Shifting by the cpuindex is definitely wrong.
>
> actually, that's not how the irq_level field is defined.
It's not clear to me which part of my comment this is aimed at. Shifting
by the cpuindex doesn't give the right answer whether you define
irq_level by bitfields or with the current phrasing you quote below.
> If you look
> in Documentation/virtual/kvm/api.txt:
>
> "ARM uses two types of interrupt lines per CPU, ie. IRQ and FIQ. The
> value of the
> irq field should be (VCPU_INDEX * 2) for IRQs and ((VCPU_INDEX * 2) + 1) for
> FIQs. Level is used to raise/lower the line. See arch/arm/include/asm/kvm.h for
> convenience macros."
That's exactly the same thing, though, right? It's just a matter
of how you choose to phrase it (in either text or in code; the values
come out identical). When I was sorting out the QEMU side, I started
out by looking at the kernel source code, deduced that we were encoding
CPU number and irq-vs-fiq as described above (and documenting it in a
slightly confusing way as a multiplication) and then wrote the qemu
code in what seemed to me the clearest way.
(Actually what would be clearest would be if the ioctl took the
(interrupt-target, interrupt-line-for-that-target, value-of-line)
tuple as three separate values rather than encoding two of them into
a single integer, but I assume there's a reason we can't have that.)
> we should definitely fix either side, and the only sane argument is
> that this is an irq_line field, so an index resembling an actual line
> seems more semantically in line with the field purpose rather than a
> bit encoding, but I am open to arguments and not married to the
> current implementation.
To be clear, I'm not attempting to suggest a change in the semantics
of this field. (The qemu patch fixes the qemu side to adhere to what
the kernel requires.)
-- PMM
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 05/13] ARM: KVM: Inject IRQs and FIQs from userspace
2011-12-11 22:12 ` Peter Maydell
@ 2011-12-11 22:35 ` Peter Maydell
2011-12-11 22:53 ` Christoffer Dall
0 siblings, 1 reply; 105+ messages in thread
From: Peter Maydell @ 2011-12-11 22:35 UTC (permalink / raw)
To: Christoffer Dall
Cc: Christoffer Dall, kvm@vger.kernel.org, Marc.Zyngier@arm.com,
Jan Kiszka, avi@redhat.com, android-virt@lists.cs.columbia.edu,
tech@virtualopensystems.com
On 11 December 2011 22:12, Peter Maydell <peter.maydell@linaro.org> wrote:
> (Actually what would be clearest would be if the ioctl took the
> (interrupt-target, interrupt-line-for-that-target, value-of-line)
> tuple as three separate values rather than encoding two of them into
> a single integer, but I assume there's a reason we can't have that.)
Have you thought about how this encoding scheme would be extended
when we move to using the VGIC and an in-kernel interrupt controller
implementation, incidentally? I haven't really looked into that at
all, but I assume that then QEMU is going to start having to tell
the kernel it wants to deliver interrupt 35 to the GIC, and so on...
-- PMM
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 05/13] ARM: KVM: Inject IRQs and FIQs from userspace
2011-12-11 22:35 ` Peter Maydell
@ 2011-12-11 22:53 ` Christoffer Dall
2011-12-11 23:01 ` Jan Kiszka
0 siblings, 1 reply; 105+ messages in thread
From: Christoffer Dall @ 2011-12-11 22:53 UTC (permalink / raw)
To: Peter Maydell
Cc: kvm@vger.kernel.org, Marc.Zyngier@arm.com, Jan Kiszka,
avi@redhat.com, android-virt@lists.cs.columbia.edu,
tech@virtualopensystems.com
On Sun, Dec 11, 2011 at 5:35 PM, Peter Maydell <peter.maydell@linaro.org> wrote:
> On 11 December 2011 22:12, Peter Maydell <peter.maydell@linaro.org> wrote:
>> (Actually what would be clearest would be if the ioctl took the
>> (interrupt-target, interrupt-line-for-that-target, value-of-line)
>> tuple as three separate values rather than encoding two of them into
>> a single integer, but I assume there's a reason we can't have that.)
>
> Have you thought about how this encoding scheme would be extended
> when we move to using the VGIC and an in-kernel interrupt controller
> implementation, incidentally? I haven't really looked into that at
> all, but I assume that then QEMU is going to start having to tell
> the kernel it wants to deliver interrupt 35 to the GIC, and so on...
>
>
no, I haven't looked into that at all. My plan was to decipher the
common irq, ioapic stuff for x86 and see how much we can re-use and if
there will be some nice way to either use what's there or change some
bits to accomodate both existing archs and ARM. But the short answer
is, no not really, I was focusing so far on getting a stable
implementation upstream.
yes, we are going to have to have some interface with QEMU for this
and if we need new features from what's already there that should
probably be discussed in the same round as the mechanism for handing
of CP15 stuff to QEMU that we touched upon earlier.
-Christoffer
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 05/13] ARM: KVM: Inject IRQs and FIQs from userspace
2011-12-11 22:53 ` Christoffer Dall
@ 2011-12-11 23:01 ` Jan Kiszka
2011-12-12 16:31 ` Peter Maydell
0 siblings, 1 reply; 105+ messages in thread
From: Jan Kiszka @ 2011-12-11 23:01 UTC (permalink / raw)
To: Christoffer Dall
Cc: Peter Maydell, kvm@vger.kernel.org, Marc.Zyngier@arm.com,
avi@redhat.com, android-virt@lists.cs.columbia.edu,
tech@virtualopensystems.com
[-- Attachment #1: Type: text/plain, Size: 1963 bytes --]
On 2011-12-11 23:53, Christoffer Dall wrote:
> On Sun, Dec 11, 2011 at 5:35 PM, Peter Maydell <peter.maydell@linaro.org> wrote:
>> On 11 December 2011 22:12, Peter Maydell <peter.maydell@linaro.org> wrote:
>>> (Actually what would be clearest would be if the ioctl took the
>>> (interrupt-target, interrupt-line-for-that-target, value-of-line)
>>> tuple as three separate values rather than encoding two of them into
>>> a single integer, but I assume there's a reason we can't have that.)
>>
>> Have you thought about how this encoding scheme would be extended
>> when we move to using the VGIC and an in-kernel interrupt controller
>> implementation, incidentally? I haven't really looked into that at
>> all, but I assume that then QEMU is going to start having to tell
>> the kernel it wants to deliver interrupt 35 to the GIC, and so on...
>>
>>
> no, I haven't looked into that at all. My plan was to decipher the
> common irq, ioapic stuff for x86 and see how much we can re-use and if
> there will be some nice way to either use what's there or change some
> bits to accomodate both existing archs and ARM. But the short answer
> is, no not really, I was focusing so far on getting a stable
> implementation upstream.
>
> yes, we are going to have to have some interface with QEMU for this
> and if we need new features from what's already there that should
> probably be discussed in the same round as the mechanism for handing
> of CP15 stuff to QEMU that we touched upon earlier.
Enabling in-kernel irqchips usually means "switching worlds". So the
semantics of these particular IRQ inject interface details may change
without breaking anything.
However, things might look different if there will be a need to inject
also the CPU IRQs directly, not only the irqchip inputs. In that case,
it may make some sense to reserve more space for interrupt types than
just one bit and use a common encoding scheme.
Jan
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 262 bytes --]
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 05/13] ARM: KVM: Inject IRQs and FIQs from userspace
2011-12-11 23:01 ` Jan Kiszka
@ 2011-12-12 16:31 ` Peter Maydell
2011-12-12 17:40 ` Avi Kivity
0 siblings, 1 reply; 105+ messages in thread
From: Peter Maydell @ 2011-12-12 16:31 UTC (permalink / raw)
To: Jan Kiszka
Cc: Christoffer Dall, kvm@vger.kernel.org, Marc.Zyngier@arm.com,
avi@redhat.com, android-virt@lists.cs.columbia.edu,
tech@virtualopensystems.com
On 11 December 2011 23:01, Jan Kiszka <jan.kiszka@web.de> wrote:
> Enabling in-kernel irqchips usually means "switching worlds". So the
> semantics of these particular IRQ inject interface details may change
> without breaking anything.
>
> However, things might look different if there will be a need to inject
> also the CPU IRQs directly, not only the irqchip inputs. In that case,
> it may make some sense to reserve more space for interrupt types than
> just one bit and use a common encoding scheme.
I think with an in-kernel GIC model you'd only need to be able to set
one of the (256 including internal-to-the-CPU inputs) GIC input lines;
the GIC itself then connects directly to the vcpu IRQ and FIQ.
So we could just have different semantics for the ioctl in the 'kernel
GIC model enabled' config, as you suggest.
-- PMM
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 05/13] ARM: KVM: Inject IRQs and FIQs from userspace
2011-12-12 16:31 ` Peter Maydell
@ 2011-12-12 17:40 ` Avi Kivity
2011-12-29 1:29 ` Christoffer Dall
2012-02-09 1:15 ` Peter Maydell
0 siblings, 2 replies; 105+ messages in thread
From: Avi Kivity @ 2011-12-12 17:40 UTC (permalink / raw)
To: Peter Maydell
Cc: Jan Kiszka, Christoffer Dall, kvm@vger.kernel.org,
Marc.Zyngier@arm.com, android-virt@lists.cs.columbia.edu,
tech@virtualopensystems.com
On 12/12/2011 06:31 PM, Peter Maydell wrote:
> On 11 December 2011 23:01, Jan Kiszka <jan.kiszka@web.de> wrote:
> > Enabling in-kernel irqchips usually means "switching worlds". So the
> > semantics of these particular IRQ inject interface details may change
> > without breaking anything.
> >
> > However, things might look different if there will be a need to inject
> > also the CPU IRQs directly, not only the irqchip inputs. In that case,
> > it may make some sense to reserve more space for interrupt types than
> > just one bit and use a common encoding scheme.
>
> I think with an in-kernel GIC model you'd only need to be able to set
> one of the (256 including internal-to-the-CPU inputs) GIC input lines;
> the GIC itself then connects directly to the vcpu IRQ and FIQ.
>
> So we could just have different semantics for the ioctl in the 'kernel
> GIC model enabled' config, as you suggest.
btw, since we use the KVM_IRQ_LINE ioctl, it may make sense to require
KVM_CREATE_IRQCHIP. To create a kernel GIC model, just call
KVM_CREATE_IRQCHIP with a different parameter. This removes an "except
for ARM" from the documentation.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 05/13] ARM: KVM: Inject IRQs and FIQs from userspace
2011-12-12 17:40 ` Avi Kivity
@ 2011-12-29 1:29 ` Christoffer Dall
2012-02-09 1:15 ` Peter Maydell
1 sibling, 0 replies; 105+ messages in thread
From: Christoffer Dall @ 2011-12-29 1:29 UTC (permalink / raw)
To: Avi Kivity
Cc: Peter Maydell, kvm@vger.kernel.org, Marc.Zyngier@arm.com,
Jan Kiszka, android-virt@lists.cs.columbia.edu,
tech@virtualopensystems.com
On Dec 12, 2011, at 12:40 PM, Avi Kivity wrote:
> On 12/12/2011 06:31 PM, Peter Maydell wrote:
>> On 11 December 2011 23:01, Jan Kiszka <jan.kiszka@web.de> wrote:
>>> Enabling in-kernel irqchips usually means "switching worlds". So the
>>> semantics of these particular IRQ inject interface details may change
>>> without breaking anything.
>>>
>>> However, things might look different if there will be a need to inject
>>> also the CPU IRQs directly, not only the irqchip inputs. In that case,
>>> it may make some sense to reserve more space for interrupt types than
>>> just one bit and use a common encoding scheme.
>>
>> I think with an in-kernel GIC model you'd only need to be able to set
>> one of the (256 including internal-to-the-CPU inputs) GIC input lines;
>> the GIC itself then connects directly to the vcpu IRQ and FIQ.
>>
>> So we could just have different semantics for the ioctl in the 'kernel
>> GIC model enabled' config, as you suggest.
>
> btw, since we use the KVM_IRQ_LINE ioctl, it may make sense to require
> KVM_CREATE_IRQCHIP. To create a kernel GIC model, just call
> KVM_CREATE_IRQCHIP with a different parameter. This removes an "except
> for ARM" from the documentation.
I added this, but it feels a bit contrived. Please take a look when I post the next patch series if this is what you have in mind.
Thanks.
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 05/13] ARM: KVM: Inject IRQs and FIQs from userspace
2011-12-12 17:40 ` Avi Kivity
2011-12-29 1:29 ` Christoffer Dall
@ 2012-02-09 1:15 ` Peter Maydell
1 sibling, 0 replies; 105+ messages in thread
From: Peter Maydell @ 2012-02-09 1:15 UTC (permalink / raw)
To: Avi Kivity
Cc: Jan Kiszka, Christoffer Dall, kvm@vger.kernel.org,
Marc.Zyngier@arm.com, android-virt@lists.cs.columbia.edu,
tech@virtualopensystems.com
On 12 December 2011 17:40, Avi Kivity <avi@redhat.com> wrote:
> On 12/12/2011 06:31 PM, Peter Maydell wrote:
>> I think with an in-kernel GIC model you'd only need to be able to set
>> one of the (256 including internal-to-the-CPU inputs) GIC input lines;
>> the GIC itself then connects directly to the vcpu IRQ and FIQ.
>>
>> So we could just have different semantics for the ioctl in the 'kernel
>> GIC model enabled' config, as you suggest.
>
> btw, since we use the KVM_IRQ_LINE ioctl, it may make sense to require
> KVM_CREATE_IRQCHIP. To create a kernel GIC model, just call
> KVM_CREATE_IRQCHIP with a different parameter. This removes an "except
> for ARM" from the documentation.
Just to yank this thread back from the dead, Christoffer pointed
out that at the moment QEMU only calls KVM_CREATE_IRQCHIP if the
user asked for one on the command line. This makes sense to me,
since KVM_CREATE_IRQCHIP is saying "create me an in-kernel
interrupt controller", and so for ARM it ought to mean "create an
in-kernel GIC model"... So I think it would be better just to fix
the documentation to note that on some architectures it doesn't
make sense to call KVM_IRQ_LINE unless you've previously asked
for an in kernel irq chip via KVM_CREATE_IRQCHIP.
-- PMM
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 05/13] ARM: KVM: Inject IRQs and FIQs from userspace
2011-12-11 20:07 ` [Android-virt] " Christoffer Dall
2011-12-11 20:25 ` Peter Maydell
@ 2011-12-12 11:06 ` Marc Zyngier
2011-12-12 12:54 ` Christoffer Dall
1 sibling, 1 reply; 105+ messages in thread
From: Marc Zyngier @ 2011-12-12 11:06 UTC (permalink / raw)
To: Christoffer Dall
Cc: Peter Maydell, Christoffer Dall, kvm@vger.kernel.org, Jan Kiszka,
avi@redhat.com, android-virt@lists.cs.columbia.edu,
tech@virtualopensystems.com
On 11/12/11 20:07, Christoffer Dall wrote:
> On Dec 11, 2011, at 2:48 PM, Peter Maydell <peter.maydell@linaro.org> wrote:
>
>> On 11 December 2011 19:30, Christoffer Dall
>> <c.dall@virtualopensystems.com> wrote:
>>> On Sun, Dec 11, 2011 at 11:03 AM, Peter Maydell
>>> <peter.maydell@linaro.org> wrote:
>>>> Removing the mask would be wrong since the irq field here
>>>> is encoding both cpu number and irq-vs-fiq. The default is
>>>> just an unreachable condition. (Why are we using % here
>>>> rather than the obvious bit operation, incidentally?)
>>>>
>>> right, I will remove the default case.
>>>
>>> I highly doubt that the difference in using a bitop will be measurably
>>> more efficient, but if you feel strongly about it, I can change it to
>>> a shift and bitwise and, which I assume is what you mean by the
>>> obvious bit operation? I think my CS background speaks for using %,
>>> but whatever.
>>
>> Certainly the compiler ought to be able to figure out the
>> two are the same thing; I just think "irq & 1" is more readable
>> than "irq % 2" (because it's being clear that it's treating the
>> variable as a pile of bits rather than an integer). This is
>> bikeshedding rather, though, and style issues in kernel code
>> are a matter for the kernel folk. So you can ignore me :-)
>>
> Well, if it was just "irq & 1", then I hear you, but it would be "(irq
>>> cpu_idx) & 1" which I don't think is more clear.
>
> But yes let's see what the kernel folks say.
The general consensus is to use bit operations rather than arithmetic.
The compiler will usually convert the "% 2" pattern into a shift, but I
tend to agree with Peter on the readability of the thing. When encoding
multiple information in a word, bit operations should be used, as they
make it obvious which part of the word contains the bit you're
interested in.
But I've probably been corrupted by working with HW guys for a bit too
long... ;-)
M.
--
Jazz is not dead. It just smells funny...
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 05/13] ARM: KVM: Inject IRQs and FIQs from userspace
2011-12-12 11:06 ` Marc Zyngier
@ 2011-12-12 12:54 ` Christoffer Dall
0 siblings, 0 replies; 105+ messages in thread
From: Christoffer Dall @ 2011-12-12 12:54 UTC (permalink / raw)
To: Marc Zyngier
Cc: Christoffer Dall, Peter Maydell, kvm@vger.kernel.org, Jan Kiszka,
avi@redhat.com, android-virt@lists.cs.columbia.edu,
tech@virtualopensystems.com
On Mon, Dec 12, 2011 at 6:06 AM, Marc Zyngier <marc.zyngier@arm.com> wrote:
> On 11/12/11 20:07, Christoffer Dall wrote:
>> On Dec 11, 2011, at 2:48 PM, Peter Maydell <peter.maydell@linaro.org> wrote:
>>
>>> On 11 December 2011 19:30, Christoffer Dall
>>> <c.dall@virtualopensystems.com> wrote:
>>>> On Sun, Dec 11, 2011 at 11:03 AM, Peter Maydell
>>>> <peter.maydell@linaro.org> wrote:
>>>>> Removing the mask would be wrong since the irq field here
>>>>> is encoding both cpu number and irq-vs-fiq. The default is
>>>>> just an unreachable condition. (Why are we using % here
>>>>> rather than the obvious bit operation, incidentally?)
>>>>>
>>>> right, I will remove the default case.
>>>>
>>>> I highly doubt that the difference in using a bitop will be measurably
>>>> more efficient, but if you feel strongly about it, I can change it to
>>>> a shift and bitwise and, which I assume is what you mean by the
>>>> obvious bit operation? I think my CS background speaks for using %,
>>>> but whatever.
>>>
>>> Certainly the compiler ought to be able to figure out the
>>> two are the same thing; I just think "irq & 1" is more readable
>>> than "irq % 2" (because it's being clear that it's treating the
>>> variable as a pile of bits rather than an integer). This is
>>> bikeshedding rather, though, and style issues in kernel code
>>> are a matter for the kernel folk. So you can ignore me :-)
>>>
>> Well, if it was just "irq & 1", then I hear you, but it would be "(irq
>>>> cpu_idx) & 1" which I don't think is more clear.
>>
>> But yes let's see what the kernel folks say.
>
> The general consensus is to use bit operations rather than arithmetic.
> The compiler will usually convert the "% 2" pattern into a shift, but I
> tend to agree with Peter on the readability of the thing. When encoding
> multiple information in a word, bit operations should be used, as they
> make it obvious which part of the word contains the bit you're
> interested in.
>
> But I've probably been corrupted by working with HW guys for a bit too
> long... ;-)
>
>
ok, ok, I'll change it to a bit op. Can't wait for the dazzling
performance improvement ;)
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 05/13] ARM: KVM: Inject IRQs and FIQs from userspace
2011-12-11 19:48 ` Peter Maydell
2011-12-11 20:07 ` [Android-virt] " Christoffer Dall
@ 2011-12-12 6:35 ` Alexander Graf
1 sibling, 0 replies; 105+ messages in thread
From: Alexander Graf @ 2011-12-12 6:35 UTC (permalink / raw)
To: Peter Maydell
Cc: Christoffer Dall, kvm@vger.kernel.org, Marc.Zyngier@arm.com,
Jan Kiszka, avi@redhat.com, android-virt@lists.cs.columbia.edu,
tech@virtualopensystems.com
On 11.12.2011, at 20:48, Peter Maydell <peter.maydell@linaro.org> wrote:
> On 11 December 2011 19:30, Christoffer Dall
> <c.dall@virtualopensystems.com> wrote:
>> On Sun, Dec 11, 2011 at 11:03 AM, Peter Maydell
>> <peter.maydell@linaro.org> wrote:
>>> Removing the mask would be wrong since the irq field here
>>> is encoding both cpu number and irq-vs-fiq. The default is
>>> just an unreachable condition. (Why are we using % here
>>> rather than the obvious bit operation, incidentally?)
>>>
>> right, I will remove the default case.
>>
>> I highly doubt that the difference in using a bitop will be measurably
>> more efficient, but if you feel strongly about it, I can change it to
>> a shift and bitwise and, which I assume is what you mean by the
>> obvious bit operation? I think my CS background speaks for using %,
>> but whatever.
>
> Certainly the compiler ought to be able to figure out the
> two are the same thing; I just think "irq & 1" is more readable
> than "irq % 2" (because it's being clear that it's treating the
> variable as a pile of bits rather than an integer). This is
> bikeshedding rather, though, and style issues in kernel code
> are a matter for the kernel folk. So you can ignore me :-)
Yes, the general rule of thumb is to use bit operations where you can. And in this case it certainly makes sense :).
Plus, bit operations are an order of magnitude faster than div/mod usually.
Alex
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [PATCH v5 05/13] ARM: KVM: Inject IRQs and FIQs from userspace
2011-12-11 15:18 ` Jan Kiszka
2011-12-11 16:03 ` Peter Maydell
@ 2011-12-11 19:16 ` Christoffer Dall
1 sibling, 0 replies; 105+ messages in thread
From: Christoffer Dall @ 2011-12-11 19:16 UTC (permalink / raw)
To: Jan Kiszka
Cc: android-virt, kvm, Marc.Zyngier, catalin.marinas, tech, avi,
peter.maydell
On Sun, Dec 11, 2011 at 10:18 AM, Jan Kiszka <jan.kiszka@web.de> wrote:
> Just found two, maybe three nits while browsing by:
>
> On 2011-12-11 11:24, Christoffer Dall wrote:
>> Userspace can inject IRQs and FIQs through the KVM_IRQ_LINE VM ioctl.
>> This ioctl is used since the sematics are in fact two lines that can be
>> either raised or lowered on the VCPU - the IRQ and FIQ lines.
>>
>> KVM needs to know which VCPU it must operate on and whether the FIQ or
>> IRQ line is raised/lowered. Hence both pieces of information is packed
>> in the kvm_irq_level->irq field. The irq fild value will be:
>> IRQ: vcpu_index * 2
>> FIQ: (vcpu_index * 2) + 1
>>
>> This is documented in Documentation/kvm/api.txt.
>>
>> The effect of the ioctl is simply to simply raise/lower the
>> corresponding virt_irq field on the VCPU struct, which will cause the
>> world-switch code to raise/lower virtual interrupts when running the
>> guest on next switch. The wait_for_interrupt flag is also cleared for
>> raised IRQs causing an idle VCPU to become active again.
>>
>> Note: The custom trace_kvm_irq_line is used despite a generic definition of
>> trace_kvm_set_irq, since the trace-Kvm_set_irq depends on the x86-specific
>> define of __HAVE_IOAPIC. Either the trace event should be created
>> regardless of this define or it should depend on another ifdef clause,
>> common for both x86 and ARM. However, since the arguments don't really
>> match those used in ARM, I am yet to be convinced why this is necessary.
>>
>> Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
>> ---
>> Documentation/virtual/kvm/api.txt | 10 ++++++-
>> arch/arm/include/asm/kvm.h | 8 ++++++
>> arch/arm/include/asm/kvm_arm.h | 1 +
>> arch/arm/kvm/arm.c | 53 ++++++++++++++++++++++++++++++++++++-
>> arch/arm/kvm/trace.h | 21 +++++++++++++++
>> include/linux/kvm.h | 1 +
>> 6 files changed, 91 insertions(+), 3 deletions(-)
>>
>> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
>> index 7945b0b..4abaa67 100644
>> --- a/Documentation/virtual/kvm/api.txt
>> +++ b/Documentation/virtual/kvm/api.txt
>> @@ -572,7 +572,7 @@ only go to the IOAPIC. On ia64, a IOSAPIC is created.
>> 4.25 KVM_IRQ_LINE
>>
>> Capability: KVM_CAP_IRQCHIP
>> -Architectures: x86, ia64
>> +Architectures: x86, ia64, arm
>> Type: vm ioctl
>> Parameters: struct kvm_irq_level
>> Returns: 0 on success, -1 on error
>> @@ -582,6 +582,14 @@ Requires that an interrupt controller model has been previously created with
>> KVM_CREATE_IRQCHIP. Note that edge-triggered interrupts require the level
>> to be set to 1 and then back to 0.
>>
>> +KVM_CREATE_IRQCHIP (except for ARM). Note that edge-triggered interrupts
>> +require the level to be set to 1 and then back to 0.
>
> You probably wanted to replace the original lines with these two, no?
>
ah yes, some stgit re-ordering artifact.
>> +
>> +ARM uses two types of interrupt lines per CPU, ie. IRQ and FIQ. The value of the
>> +irq field should be (VCPU_INDEX * 2) for IRQs and ((VCPU_INDEX * 2) + 1) for
>> +FIQs. Level is used to raise/lower the line. See arch/arm/include/asm/kvm.h for
>> +convenience macros.
>> +
>> struct kvm_irq_level {
>> union {
>> __u32 irq; /* GSI */
>> diff --git a/arch/arm/include/asm/kvm.h b/arch/arm/include/asm/kvm.h
>> index 87dc33b..8935062 100644
>> --- a/arch/arm/include/asm/kvm.h
>> +++ b/arch/arm/include/asm/kvm.h
>> @@ -20,6 +20,14 @@
>> #include <asm/types.h>
>>
>> /*
>> + * KVM_IRQ_LINE macros to set/read IRQ/FIQ for specific VCPU index.
>> + */
>> +enum KVM_ARM_IRQ_LINE_TYPE {
>> + KVM_ARM_IRQ_LINE = 0,
>> + KVM_ARM_FIQ_LINE = 1,
>> +};
>> +
>> +/*
>> * Modes used for short-hand mode determinition in the world-switch code and
>> * in emulation code.
>> *
>> diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
>> index 835abd1..e378a37 100644
>> --- a/arch/arm/include/asm/kvm_arm.h
>> +++ b/arch/arm/include/asm/kvm_arm.h
>> @@ -49,6 +49,7 @@
>> #define HCR_VM 1
>> #define HCR_GUEST_MASK (HCR_TSC | HCR_TWE | HCR_TWI | HCR_VM | HCR_AMO | \
>> HCR_AMO | HCR_IMO | HCR_FMO | HCR_SWIO)
>> +#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
>>
>> /* Hyp System Control Register (HSCTLR) bits */
>> #define HSCTLR_TE (1 << 30)
>> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
>> index 89ba18d..fc0bd6b 100644
>> --- a/arch/arm/kvm/arm.c
>> +++ b/arch/arm/kvm/arm.c
>> @@ -299,6 +299,43 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
>> return -EINVAL;
>> }
>>
>> +static int kvm_arch_vm_ioctl_irq_line(struct kvm *kvm,
>> + struct kvm_irq_level *irq_level)
>> +{
>> + u32 mask;
>> + unsigned int vcpu_idx;
>> + struct kvm_vcpu *vcpu;
>> +
>> + vcpu_idx = irq_level->irq / 2;
>> + if (vcpu_idx >= KVM_MAX_VCPUS)
>> + return -EINVAL;
>> +
>> + vcpu = kvm_get_vcpu(kvm, vcpu_idx);
>> + if (!vcpu)
>> + return -EINVAL;
>> +
>> + switch (irq_level->irq % 2) {
>> + case KVM_ARM_IRQ_LINE:
>> + mask = HCR_VI;
>> + break;
>> + case KVM_ARM_FIQ_LINE:
>> + mask = HCR_VF;
>> + break;
>> + default:
>> + return -EINVAL;
>
> Due to % 2, default is unreachable. Remove the masking?
>
nah, the masking should be there, it's just th default case that should go away.
>> + }
>> +
>> + trace_kvm_irq_line(irq_level->irq % 2, irq_level->level, vcpu_idx);
>> +
>> + if (irq_level->level) {
>> + vcpu->arch.virt_irq |= mask;
>> + vcpu->arch.wait_for_interrupts = 0;
>> + } else
>> + vcpu->arch.virt_irq &= ~mask;
>
> No need to protect the bitops on virt_irq? Or what lock does this?
>
that's fixed in the SMP host patch (but I guess with I/O thread that
doesn't really apply only to SMP, so I could move that logging down
the patch stream. In any case, yes, there should be locking, and there
is).
>> +
>> + return 0;
>> +}
>> +
>> long kvm_arch_vcpu_ioctl(struct file *filp,
>> unsigned int ioctl, unsigned long arg)
>> {
>> @@ -313,8 +350,20 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
>> long kvm_arch_vm_ioctl(struct file *filp,
>> unsigned int ioctl, unsigned long arg)
>> {
>> - printk(KERN_ERR "kvm_arch_vm_ioctl: Unsupported ioctl (%d)\n", ioctl);
>> - return -EINVAL;
>> + struct kvm *kvm = filp->private_data;
>> + void __user *argp = (void __user *)arg;
>> +
>> + switch (ioctl) {
>> + case KVM_IRQ_LINE: {
>> + struct kvm_irq_level irq_event;
>> +
>> + if (copy_from_user(&irq_event, argp, sizeof irq_event))
>> + return -EFAULT;
>> + return kvm_arch_vm_ioctl_irq_line(kvm, &irq_event);
>> + }
>> + default:
>> + return -EINVAL;
>> + }
>> }
>>
>> /**
>> diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
>> index f8869c1..ac64e3a 100644
>> --- a/arch/arm/kvm/trace.h
>> +++ b/arch/arm/kvm/trace.h
>> @@ -40,6 +40,27 @@ TRACE_EVENT(kvm_exit,
>> );
>>
>>
>> +TRACE_EVENT(kvm_irq_line,
>> + TP_PROTO(unsigned int type, unsigned int level, unsigned int vcpu_idx),
>> + TP_ARGS(type, level, vcpu_idx),
>> +
>> + TP_STRUCT__entry(
>> + __field( unsigned int, type )
>> + __field( unsigned int, level )
>> + __field( unsigned int, vcpu_idx )
>> + ),
>> +
>> + TP_fast_assign(
>> + __entry->type = type;
>> + __entry->level = level;
>> + __entry->vcpu_idx = vcpu_idx;
>> + ),
>> +
>> + TP_printk("KVM_IRQ_LINE: type: %s, level: %u, vcpu: %u",
>> + (__entry->type == KVM_ARM_IRQ_LINE) ? "IRQ" : "FIQ",
>> + __entry->level, __entry->vcpu_idx)
>> +);
>> +
>>
>> #endif /* _TRACE_KVM_H */
>>
>> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
>> index c3892fc..679abbb 100644
>> --- a/include/linux/kvm.h
>> +++ b/include/linux/kvm.h
>> @@ -111,6 +111,7 @@ struct kvm_irq_level {
>> * ACPI gsi notion of irq.
>> * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47..
>> * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23..
>> + * For ARM: IRQ: irq = (2*vcpu_index). FIQ: irq = (2*vcpu_indx + 1).
>> */
>> union {
>> __u32 irq;
>>
Thanks,
-Christoffer
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [PATCH v5 05/13] ARM: KVM: Inject IRQs and FIQs from userspace
2011-12-11 10:24 ` [PATCH v5 05/13] ARM: KVM: Inject IRQs and FIQs from userspace Christoffer Dall
2011-12-11 15:18 ` Jan Kiszka
@ 2011-12-12 13:28 ` Avi Kivity
2011-12-12 14:38 ` [Android-virt] " Christoffer Dall
1 sibling, 1 reply; 105+ messages in thread
From: Avi Kivity @ 2011-12-12 13:28 UTC (permalink / raw)
To: Christoffer Dall
Cc: android-virt, kvm, Marc.Zyngier, catalin.marinas, tech,
peter.maydell
On 12/11/2011 12:24 PM, Christoffer Dall wrote:
> Userspace can inject IRQs and FIQs through the KVM_IRQ_LINE VM ioctl.
> This ioctl is used since the sematics are in fact two lines that can be
> either raised or lowered on the VCPU - the IRQ and FIQ lines.
>
> KVM needs to know which VCPU it must operate on and whether the FIQ or
> IRQ line is raised/lowered. Hence both pieces of information is packed
> in the kvm_irq_level->irq field. The irq fild value will be:
> IRQ: vcpu_index * 2
> FIQ: (vcpu_index * 2) + 1
>
> This is documented in Documentation/kvm/api.txt.
>
> The effect of the ioctl is simply to simply raise/lower the
> corresponding virt_irq field on the VCPU struct, which will cause the
> world-switch code to raise/lower virtual interrupts when running the
> guest on next switch. The wait_for_interrupt flag is also cleared for
> raised IRQs causing an idle VCPU to become active again.
>
> Note: The custom trace_kvm_irq_line is used despite a generic definition of
> trace_kvm_set_irq, since the trace-Kvm_set_irq depends on the x86-specific
> define of __HAVE_IOAPIC. Either the trace event should be created
> regardless of this define or it should depend on another ifdef clause,
> common for both x86 and ARM. However, since the arguments don't really
> match those used in ARM, I am yet to be convinced why this is necessary.
Why don't they match? The assignment of lines to actual pins differs,
but essentially it's the same thing (otherwise we'd use a different ioctl).
>
> Capability: KVM_CAP_IRQCHIP
> -Architectures: x86, ia64
> +Architectures: x86, ia64, arm
> Type: vm ioctl
> Parameters: struct kvm_irq_level
> Returns: 0 on success, -1 on error
> @@ -582,6 +582,14 @@ Requires that an interrupt controller model has been previously created with
> KVM_CREATE_IRQCHIP. Note that edge-triggered interrupts require the level
> to be set to 1 and then back to 0.
>
> +KVM_CREATE_IRQCHIP (except for ARM). Note that edge-triggered interrupts
> +require the level to be set to 1 and then back to 0.
Need to replace the previous line beginning with KVM_CREATE_IRQCHIP, not
duplicate it.
> +
> +ARM uses two types of interrupt lines per CPU, ie. IRQ and FIQ. The value of the
> +irq field should be (VCPU_INDEX * 2) for IRQs and ((VCPU_INDEX * 2) + 1) for
> +FIQs. Level is used to raise/lower the line. See arch/arm/include/asm/kvm.h for
> +convenience macros.
Userspace only includes <linux/kvm.h>; also please name the macros here.
>
> +static int kvm_arch_vm_ioctl_irq_line(struct kvm *kvm,
> + struct kvm_irq_level *irq_level)
> +{
> + u32 mask;
> + unsigned int vcpu_idx;
> + struct kvm_vcpu *vcpu;
> +
> + vcpu_idx = irq_level->irq / 2;
> + if (vcpu_idx >= KVM_MAX_VCPUS)
> + return -EINVAL;
> +
> + vcpu = kvm_get_vcpu(kvm, vcpu_idx);
> + if (!vcpu)
> + return -EINVAL;
> +
> + switch (irq_level->irq % 2) {
> + case KVM_ARM_IRQ_LINE:
> + mask = HCR_VI;
> + break;
> + case KVM_ARM_FIQ_LINE:
> + mask = HCR_VF;
> + break;
> + default:
> + return -EINVAL;
> + }
> +
> + trace_kvm_irq_line(irq_level->irq % 2, irq_level->level, vcpu_idx);
> +
> + if (irq_level->level) {
> + vcpu->arch.virt_irq |= mask;
racy - this is a vm ioctl, and so can (and usually is) invoked from
outside the vcpu thread.
> + vcpu->arch.wait_for_interrupts = 0;
Need an actual wakeup here (see x86's kvm_vcpu_kick() - should really be
common code; it takes care of both the 'vcpu sleeping and needs a
wakeup' and 'vcpu is in guest mode and needs to go to the host to
evaluate interrupt state').
> + } else
> + vcpu->arch.virt_irq &= ~mask;
> +
> + return 0;
> +}
> +
>
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 05/13] ARM: KVM: Inject IRQs and FIQs from userspace
2011-12-12 13:28 ` Avi Kivity
@ 2011-12-12 14:38 ` Christoffer Dall
2011-12-12 14:50 ` Avi Kivity
0 siblings, 1 reply; 105+ messages in thread
From: Christoffer Dall @ 2011-12-12 14:38 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm, Marc.Zyngier, android-virt, tech
On Mon, Dec 12, 2011 at 8:28 AM, Avi Kivity <avi@redhat.com> wrote:
> On 12/11/2011 12:24 PM, Christoffer Dall wrote:
>> Userspace can inject IRQs and FIQs through the KVM_IRQ_LINE VM ioctl.
>> This ioctl is used since the sematics are in fact two lines that can be
>> either raised or lowered on the VCPU - the IRQ and FIQ lines.
>>
>> KVM needs to know which VCPU it must operate on and whether the FIQ or
>> IRQ line is raised/lowered. Hence both pieces of information is packed
>> in the kvm_irq_level->irq field. The irq fild value will be:
>> IRQ: vcpu_index * 2
>> FIQ: (vcpu_index * 2) + 1
>>
>> This is documented in Documentation/kvm/api.txt.
>>
>> The effect of the ioctl is simply to simply raise/lower the
>> corresponding virt_irq field on the VCPU struct, which will cause the
>> world-switch code to raise/lower virtual interrupts when running the
>> guest on next switch. The wait_for_interrupt flag is also cleared for
>> raised IRQs causing an idle VCPU to become active again.
>>
>> Note: The custom trace_kvm_irq_line is used despite a generic definition of
>> trace_kvm_set_irq, since the trace-Kvm_set_irq depends on the x86-specific
>> define of __HAVE_IOAPIC. Either the trace event should be created
>> regardless of this define or it should depend on another ifdef clause,
>> common for both x86 and ARM. However, since the arguments don't really
>> match those used in ARM, I am yet to be convinced why this is necessary.
>
> Why don't they match? The assignment of lines to actual pins differs,
> but essentially it's the same thing (otherwise we'd use a different ioctl).
>
because there is no notion of gsi and irq_source_id on ARM. What's the
harm of this additional tracepoint?
If I should re-use the existing one, should I simply move it outside
of __KVM_HAVE_IOAPIC?
>>
>> Capability: KVM_CAP_IRQCHIP
>> -Architectures: x86, ia64
>> +Architectures: x86, ia64, arm
>> Type: vm ioctl
>> Parameters: struct kvm_irq_level
>> Returns: 0 on success, -1 on error
>> @@ -582,6 +582,14 @@ Requires that an interrupt controller model has been previously created with
>> KVM_CREATE_IRQCHIP. Note that edge-triggered interrupts require the level
>> to be set to 1 and then back to 0.
>>
>> +KVM_CREATE_IRQCHIP (except for ARM). Note that edge-triggered interrupts
>> +require the level to be set to 1 and then back to 0.
>
> Need to replace the previous line beginning with KVM_CREATE_IRQCHIP, not
> duplicate it.
>
right, has been noted and will be fixed. It's a merge error.
>> +
>> +ARM uses two types of interrupt lines per CPU, ie. IRQ and FIQ. The value of the
>> +irq field should be (VCPU_INDEX * 2) for IRQs and ((VCPU_INDEX * 2) + 1) for
>> +FIQs. Level is used to raise/lower the line. See arch/arm/include/asm/kvm.h for
>> +convenience macros.
>
> Userspace only includes <linux/kvm.h>; also please name the macros here.
>
I simply dropped this idea and changed the explanation to (vcpu_index
<< 1) and ((vcpu_index << 1) | 1).
>>
>> +static int kvm_arch_vm_ioctl_irq_line(struct kvm *kvm,
>> + struct kvm_irq_level *irq_level)
>> +{
>> + u32 mask;
>> + unsigned int vcpu_idx;
>> + struct kvm_vcpu *vcpu;
>> +
>> + vcpu_idx = irq_level->irq / 2;
>> + if (vcpu_idx >= KVM_MAX_VCPUS)
>> + return -EINVAL;
>> +
>> + vcpu = kvm_get_vcpu(kvm, vcpu_idx);
>> + if (!vcpu)
>> + return -EINVAL;
>> +
>> + switch (irq_level->irq % 2) {
>> + case KVM_ARM_IRQ_LINE:
>> + mask = HCR_VI;
>> + break;
>> + case KVM_ARM_FIQ_LINE:
>> + mask = HCR_VF;
>> + break;
>> + default:
>> + return -EINVAL;
>> + }
>> +
>> + trace_kvm_irq_line(irq_level->irq % 2, irq_level->level, vcpu_idx);
>> +
>> + if (irq_level->level) {
>> + vcpu->arch.virt_irq |= mask;
>
> racy - this is a vm ioctl, and so can (and usually is) invoked from
> outside the vcpu thread.
>
this is taken care of in SMP host patch, but will be moved down the
patches for next revision.
>> + vcpu->arch.wait_for_interrupts = 0;
>
> Need an actual wakeup here (see x86's kvm_vcpu_kick() - should really be
> common code; it takes care of both the 'vcpu sleeping and needs a
> wakeup' and 'vcpu is in guest mode and needs to go to the host to
> evaluate interrupt state').
>
the wakeup - same as above. Good point that we need to signal the
other CPU. Will come, maybe not next revision, but the one after that.
>> + } else
>> + vcpu->arch.virt_irq &= ~mask;
>> +
>> + return 0;
>> +}
>> +
>>
Thanks,
Christoffer
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 05/13] ARM: KVM: Inject IRQs and FIQs from userspace
2011-12-12 14:38 ` [Android-virt] " Christoffer Dall
@ 2011-12-12 14:50 ` Avi Kivity
2011-12-12 15:11 ` Christoffer Dall
0 siblings, 1 reply; 105+ messages in thread
From: Avi Kivity @ 2011-12-12 14:50 UTC (permalink / raw)
To: Christoffer Dall; +Cc: kvm, Marc.Zyngier, android-virt, tech
On 12/12/2011 04:38 PM, Christoffer Dall wrote:
> >
> > Why don't they match? The assignment of lines to actual pins differs,
> > but essentially it's the same thing (otherwise we'd use a different ioctl).
> >
>
> because there is no notion of gsi and irq_source_id on ARM.
gsi = number of irq line, just a bad name, but you do have it on ARM.
irq_source_id really shouldn't have been in kvm_set_irq(), it's an
implementation detail rather than an architectural feature; just ignore it.
> What's the
> harm of this additional tracepoint?
If we get tools that use them, they have an additional difference to
consider. It's a weak argument but it's there.
> If I should re-use the existing one, should I simply move it outside
> of __KVM_HAVE_IOAPIC?
Protect it with __KVM_HAVE_IRQ_LINE so we don't leak unused tracepoints
for other archs.
> >> + trace_kvm_irq_line(irq_level->irq % 2, irq_level->level, vcpu_idx);
> >> +
> >> + if (irq_level->level) {
> >> + vcpu->arch.virt_irq |= mask;
> >
> > racy - this is a vm ioctl, and so can (and usually is) invoked from
> > outside the vcpu thread.
> >
>
> this is taken care of in SMP host patch, but will be moved down the
> patches for next revision.
Yes please. It's hard to review this way. Fold all the smp stuff into
the patches which introduce the functionality.
>
> >> + vcpu->arch.wait_for_interrupts = 0;
> >
> > Need an actual wakeup here (see x86's kvm_vcpu_kick() - should really be
> > common code; it takes care of both the 'vcpu sleeping and needs a
> > wakeup' and 'vcpu is in guest mode and needs to go to the host to
> > evaluate interrupt state').
> >
>
> the wakeup - same as above. Good point that we need to signal the
> other CPU. Will come, maybe not next revision, but the one after that.
Ok. I think you can reuse x86 concepts and even code. I'll accept
patches to make code arch independent ahead of the merge if it helps.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 05/13] ARM: KVM: Inject IRQs and FIQs from userspace
2011-12-12 14:50 ` Avi Kivity
@ 2011-12-12 15:11 ` Christoffer Dall
2011-12-12 15:16 ` Avi Kivity
0 siblings, 1 reply; 105+ messages in thread
From: Christoffer Dall @ 2011-12-12 15:11 UTC (permalink / raw)
To: Avi Kivity; +Cc: Marc.Zyngier, tech, android-virt, kvm
On Mon, Dec 12, 2011 at 9:50 AM, Avi Kivity <avi@redhat.com> wrote:
> On 12/12/2011 04:38 PM, Christoffer Dall wrote:
>> >
>> > Why don't they match? The assignment of lines to actual pins differs,
>> > but essentially it's the same thing (otherwise we'd use a different ioctl).
>> >
>>
>> because there is no notion of gsi and irq_source_id on ARM.
>
>
> gsi = number of irq line, just a bad name, but you do have it on ARM.
>
> irq_source_id really shouldn't have been in kvm_set_irq(), it's an
> implementation detail rather than an architectural feature; just ignore it.
>
>> What's the
>> harm of this additional tracepoint?
>
> If we get tools that use them, they have an additional difference to
> consider. It's a weak argument but it's there.
>
ok, I am all for re-using as much as possible.
>> If I should re-use the existing one, should I simply move it outside
>> of __KVM_HAVE_IOAPIC?
>
> Protect it with __KVM_HAVE_IRQ_LINE so we don't leak unused tracepoints
> for other archs.
>
ok. I used to be scared of touching your arch independent stuff, but
maybe I should ease up there...
>> >> + trace_kvm_irq_line(irq_level->irq % 2, irq_level->level, vcpu_idx);
>> >> +
>> >> + if (irq_level->level) {
>> >> + vcpu->arch.virt_irq |= mask;
>> >
>> > racy - this is a vm ioctl, and so can (and usually is) invoked from
>> > outside the vcpu thread.
>> >
>>
>> this is taken care of in SMP host patch, but will be moved down the
>> patches for next revision.
>
> Yes please. It's hard to review this way. Fold all the smp stuff into
> the patches which introduce the functionality.
>
sorry about that, I see this pretty clearly after the fact.
>>
>> >> + vcpu->arch.wait_for_interrupts = 0;
>> >
>> > Need an actual wakeup here (see x86's kvm_vcpu_kick() - should really be
>> > common code; it takes care of both the 'vcpu sleeping and needs a
>> > wakeup' and 'vcpu is in guest mode and needs to go to the host to
>> > evaluate interrupt state').
>> >
>>
>> the wakeup - same as above. Good point that we need to signal the
>> other CPU. Will come, maybe not next revision, but the one after that.
>
> Ok. I think you can reuse x86 concepts and even code. I'll accept
> patches to make code arch independent ahead of the merge if it helps.
>
ok, I'll look into it.
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 05/13] ARM: KVM: Inject IRQs and FIQs from userspace
2011-12-12 15:11 ` Christoffer Dall
@ 2011-12-12 15:16 ` Avi Kivity
0 siblings, 0 replies; 105+ messages in thread
From: Avi Kivity @ 2011-12-12 15:16 UTC (permalink / raw)
To: Christoffer Dall; +Cc: Marc.Zyngier, tech, android-virt, kvm
On 12/12/2011 05:11 PM, Christoffer Dall wrote:
> >> If I should re-use the existing one, should I simply move it outside
> >> of __KVM_HAVE_IOAPIC?
> >
> > Protect it with __KVM_HAVE_IRQ_LINE so we don't leak unused tracepoints
> > for other archs.
> >
>
> ok. I used to be scared of touching your arch independent stuff, but
> maybe I should ease up there...
Yup. We should also make more x86 code arch indepedent where possible.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 105+ messages in thread
* [PATCH v5 06/13] ARM: KVM: World-switch implementation
2011-12-11 10:24 [PATCH v5 00/13] KVM/ARM Implementation Christoffer Dall
` (4 preceding siblings ...)
2011-12-11 10:24 ` [PATCH v5 05/13] ARM: KVM: Inject IRQs and FIQs from userspace Christoffer Dall
@ 2011-12-11 10:24 ` Christoffer Dall
2011-12-11 10:25 ` [PATCH v5 07/13] ARM: KVM: Emulation framework and CP15 emulation Christoffer Dall
` (7 subsequent siblings)
13 siblings, 0 replies; 105+ messages in thread
From: Christoffer Dall @ 2011-12-11 10:24 UTC (permalink / raw)
To: android-virt, kvm; +Cc: Marc.Zyngier, catalin.marinas, tech, avi, peter.maydell
Provides complete world-switch implementation to switch to other guests
runinng in non-secure modes. Includes Hyp exception handlers that
captures necessary exception information and stores the information on
the VCPU and KVM structures.
Switching to Hyp mode is done through a simple HVC instructions. The
exception vector code will check that the HVC comes from VMID==0 and if
so will store the necessary state on the Hyp stack, which will look like
this (see hyp_hvc):
...
Hyp_Sp + 4: lr_usr
Hyp_Sp : spsr (Host-SVC cpsr)
When returning from Hyp mode to SVC mode, another HVC instruction is
executed from Hyp mode, which is taken in the Hyp_Svc handler. The Hyp
stack pointer should be where it was left from the above initial call,
since the values on the stack will be used to restore state (see
hyp_svc).
Otherwise, the world-switch is pretty straight-forward. All state that
can be modified by the guest is first backed up on the Hyp stack and the
VCPU values is loaded onto the hardware. State, which is not loaded, but
theoretically modifiable by the guest is protected through the
virtualiation features to generate a trap and cause software emulation.
Upon guest returns, all state is restored from hardware onto the VCPU
struct and the original state is restored from the Hyp-stack onto the
hardware.
One controversy may be the back-door call to __irq_svc (the host
kernel's own physical IRQ handler) which is called when a physical IRQ
exception is taken in Hyp mode while running in the guest.
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
---
arch/arm/include/asm/kvm.h | 1
arch/arm/include/asm/kvm_arm.h | 26 ++
arch/arm/include/asm/kvm_host.h | 8 +
arch/arm/kernel/armksyms.c | 7 +
arch/arm/kernel/asm-offsets.c | 33 +++
arch/arm/kernel/entry-armv.S | 1
arch/arm/kvm/arm.c | 45 ++++
arch/arm/kvm/guest.c | 2
arch/arm/kvm/interrupts.S | 443 +++++++++++++++++++++++++++++++++++++++
9 files changed, 562 insertions(+), 4 deletions(-)
diff --git a/arch/arm/include/asm/kvm.h b/arch/arm/include/asm/kvm.h
index 8935062..ff88ca0 100644
--- a/arch/arm/include/asm/kvm.h
+++ b/arch/arm/include/asm/kvm.h
@@ -51,6 +51,7 @@ struct kvm_regs {
__u32 cpsr;
__u32 spsr[5]; /* Banked SPSR, indexed by MODE_ */
struct {
+ __u32 c0_midr;
__u32 c1_sys;
__u32 c2_base0;
__u32 c2_base1;
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index e378a37..1769187 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -100,5 +100,31 @@
#define VTTBR_X (5 - VTCR_GUEST_T0SZ)
#endif
+/* Hyp Syndrome Register (HSR) bits */
+#define HSR_EC_SHIFT (26)
+#define HSR_EC (0x3fU << HSR_EC_SHIFT)
+#define HSR_IL (1U << 25)
+#define HSR_ISS (HSR_IL - 1)
+#define HSR_ISV_SHIFT (24)
+#define HSR_ISV (1U << HSR_ISV_SHIFT)
+
+#define HSR_EC_UNKNOWN (0x00)
+#define HSR_EC_WFI (0x01)
+#define HSR_EC_CP15_32 (0x03)
+#define HSR_EC_CP15_64 (0x04)
+#define HSR_EC_CP14_MR (0x05)
+#define HSR_EC_CP14_LS (0x06)
+#define HSR_EC_CP_0_13 (0x07)
+#define HSR_EC_CP10_ID (0x08)
+#define HSR_EC_JAZELLE (0x09)
+#define HSR_EC_BXJ (0x0A)
+#define HSR_EC_CP14_64 (0x0C)
+#define HSR_EC_SVC_HYP (0x11)
+#define HSR_EC_HVC (0x12)
+#define HSR_EC_SMC (0x13)
+#define HSR_EC_IABT (0x20)
+#define HSR_EC_IABT_HYP (0x21)
+#define HSR_EC_DABT (0x24)
+#define HSR_EC_DABT_HYP (0x25)
#endif /* __KVM_ARM_H__ */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 06d1263..59fcd15 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -62,6 +62,7 @@ struct kvm_vcpu_arch {
/* System control coprocessor (cp15) */
struct {
+ u32 c0_MIDR; /* Main ID Register */
u32 c1_SCTLR; /* System Control Register */
u32 c1_ACTLR; /* Auxilliary Control Register */
u32 c1_CPACR; /* Coprocessor Access Control */
@@ -69,6 +70,12 @@ struct kvm_vcpu_arch {
u64 c2_TTBR1; /* Translation Table Base Register 1 */
u32 c2_TTBCR; /* Translation Table Base Control R. */
u32 c3_DACR; /* Domain Access Control Register */
+ u32 c10_PRRR; /* Primary Region Remap Register */
+ u32 c10_NMRR; /* Normal Memory Remap Register */
+ u32 c13_CID; /* Context ID Register */
+ u32 c13_TID_URW; /* Thread ID, User R/W */
+ u32 c13_TID_URO; /* Thread ID, User R/O */
+ u32 c13_TID_PRIV; /* Thread ID, Priveleged */
} cp15;
u32 virt_irq; /* HCR exception mask */
@@ -78,6 +85,7 @@ struct kvm_vcpu_arch {
u32 hdfar; /* Hyp Data Fault Address Register */
u32 hifar; /* Hyp Inst. Fault Address Register */
u32 hpfar; /* Hyp IPA Fault Address Register */
+ u64 pc_ipa; /* IPA for the current PC (VA to PA result) */
/* IO related fields */
u32 mmio_rd;
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index 5b0bce6..2a14b6e 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -49,6 +49,13 @@ extern void __aeabi_ulcmp(void);
extern void fpundefinstr(void);
+#ifdef CONFIG_KVM_ARM_HOST
+/* This is needed for KVM */
+extern void __irq_svc(void);
+
+EXPORT_SYMBOL_GPL(__irq_svc);
+#endif
+
/* platform dependent support */
EXPORT_SYMBOL(__udelay);
EXPORT_SYMBOL(__const_udelay);
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 1429d89..c126cfb 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -16,6 +16,7 @@
#include <asm/cacheflush.h>
#include <asm/glue-df.h>
#include <asm/glue-pf.h>
+#include <linux/kvm_host.h>
#include <asm/mach/arch.h>
#include <asm/thread_info.h>
#include <asm/memory.h>
@@ -144,5 +145,37 @@ int main(void)
DEFINE(DMA_BIDIRECTIONAL, DMA_BIDIRECTIONAL);
DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE);
DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE);
+#ifdef CONFIG_KVM_ARM_HOST
+ DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
+ DEFINE(VCPU_MIDR, offsetof(struct kvm_vcpu, arch.cp15.c0_MIDR));
+ DEFINE(VCPU_SCTLR, offsetof(struct kvm_vcpu, arch.cp15.c1_SCTLR));
+ DEFINE(VCPU_CPACR, offsetof(struct kvm_vcpu, arch.cp15.c1_CPACR));
+ DEFINE(VCPU_TTBR0, offsetof(struct kvm_vcpu, arch.cp15.c2_TTBR0));
+ DEFINE(VCPU_TTBR1, offsetof(struct kvm_vcpu, arch.cp15.c2_TTBR1));
+ DEFINE(VCPU_TTBCR, offsetof(struct kvm_vcpu, arch.cp15.c2_TTBCR));
+ DEFINE(VCPU_DACR, offsetof(struct kvm_vcpu, arch.cp15.c3_DACR));
+ DEFINE(VCPU_PRRR, offsetof(struct kvm_vcpu, arch.cp15.c10_PRRR));
+ DEFINE(VCPU_NMRR, offsetof(struct kvm_vcpu, arch.cp15.c10_NMRR));
+ DEFINE(VCPU_CID, offsetof(struct kvm_vcpu, arch.cp15.c13_CID));
+ DEFINE(VCPU_TID_URW, offsetof(struct kvm_vcpu, arch.cp15.c13_TID_URW));
+ DEFINE(VCPU_TID_URO, offsetof(struct kvm_vcpu, arch.cp15.c13_TID_URO));
+ DEFINE(VCPU_TID_PRIV, offsetof(struct kvm_vcpu, arch.cp15.c13_TID_PRIV));
+ DEFINE(VCPU_REGS, offsetof(struct kvm_vcpu, arch.regs));
+ DEFINE(VCPU_USR_REGS, offsetof(struct kvm_vcpu, arch.regs.usr_regs));
+ DEFINE(VCPU_SVC_REGS, offsetof(struct kvm_vcpu, arch.regs.svc_regs));
+ DEFINE(VCPU_ABT_REGS, offsetof(struct kvm_vcpu, arch.regs.abt_regs));
+ DEFINE(VCPU_UND_REGS, offsetof(struct kvm_vcpu, arch.regs.und_regs));
+ DEFINE(VCPU_IRQ_REGS, offsetof(struct kvm_vcpu, arch.regs.irq_regs));
+ DEFINE(VCPU_FIQ_REGS, offsetof(struct kvm_vcpu, arch.regs.fiq_regs));
+ DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.regs.pc));
+ DEFINE(VCPU_CPSR, offsetof(struct kvm_vcpu, arch.regs.cpsr));
+ DEFINE(VCPU_VIRT_IRQ, offsetof(struct kvm_vcpu, arch.virt_irq));
+ DEFINE(VCPU_HSR, offsetof(struct kvm_vcpu, arch.hsr));
+ DEFINE(VCPU_HDFAR, offsetof(struct kvm_vcpu, arch.hdfar));
+ DEFINE(VCPU_HIFAR, offsetof(struct kvm_vcpu, arch.hifar));
+ DEFINE(VCPU_HPFAR, offsetof(struct kvm_vcpu, arch.hpfar));
+ DEFINE(VCPU_PC_IPA, offsetof(struct kvm_vcpu, arch.pc_ipa));
+ DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr));
+#endif
return 0;
}
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 3a456c6..aaa557b 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -206,6 +206,7 @@ __dabt_svc:
ENDPROC(__dabt_svc)
.align 5
+ .globl __irq_svc
__irq_svc:
svc_entry
irq_handler
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index fc0bd6b..f5dbbbb 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -253,7 +253,20 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
{
- KVMARM_NOT_IMPLEMENTED();
+ unsigned long cpsr;
+ unsigned long sctlr;
+
+ /* Init execution CPSR */
+ asm volatile ("mrs %[cpsr], cpsr" :
+ [cpsr] "=r" (cpsr));
+ vcpu->arch.regs.cpsr = SVC_MODE | PSR_I_BIT | PSR_F_BIT | PSR_A_BIT |
+ (cpsr & PSR_E_BIT);
+
+ /* Init SCTLR with MMU disabled */
+ asm volatile ("mrc p15, 0, %[sctlr], c1, c0, 0" :
+ [sctlr] "=r" (sctlr));
+ vcpu->arch.cp15.c1_SCTLR = sctlr & ~1U;
+
return 0;
}
@@ -293,10 +306,36 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
return 0;
}
+/**
+ * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
+ * @vcpu: The VCPU pointer
+ * @run: The kvm_run structure pointer used for userspace state exchange
+ *
+ * This function is called through the VCPU_RUN ioctl called from user space. It
+ * will execute VM code in a loop until the time slice for the process is used
+ * or some emulation is needed from user space in which case the function will
+ * return with return value 0 and with the kvm_run structure filled in with the
+ * required data for the requested emulation.
+ */
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
- KVMARM_NOT_IMPLEMENTED();
- return -EINVAL;
+ int ret;
+
+ for (;;) {
+ trace_kvm_entry(vcpu->arch.regs.pc);
+
+ local_irq_disable();
+ kvm_guest_enter();
+
+ ret = __kvm_vcpu_run(vcpu);
+
+ kvm_guest_exit();
+ local_irq_enable();
+
+ trace_kvm_exit(vcpu->arch.regs.pc);
+ }
+
+ return ret;
}
static int kvm_arch_vm_ioctl_irq_line(struct kvm *kvm,
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 94a5c54..3a23bee 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -73,6 +73,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
/*
* Co-processor registers.
*/
+ regs->cp15.c0_midr = vcpu->arch.cp15.c0_MIDR;
regs->cp15.c1_sys = vcpu->arch.cp15.c1_SCTLR;
regs->cp15.c2_base0 = vcpu->arch.cp15.c2_TTBR0;
regs->cp15.c2_base1 = vcpu->arch.cp15.c2_TTBR1;
@@ -111,6 +112,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
/*
* Co-processor registers.
*/
+ vcpu->arch.cp15.c0_MIDR = regs->cp15.c0_midr;
vcpu->arch.cp15.c1_SCTLR = regs->cp15.c1_sys;
vcpu_regs->pc = regs->reg15;
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 2edc49b..d516bf4 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -21,6 +21,12 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_arm.h>
+#define VCPU_USR_REG(_reg_nr) (VCPU_USR_REGS + (_reg_nr * 4))
+#define VCPU_USR_SP (VCPU_USR_REG(13))
+#define VCPU_FIQ_REG(_reg_nr) (VCPU_FIQ_REGS + (_reg_nr * 4))
+#define VCPU_FIQ_SPSR (VCPU_FIQ_REG(7))
+
+
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@ Hypervisor world-switch code
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@ -28,9 +34,317 @@
.text
.arm
+/* These are simply for the macros to work - value don't have meaning */
+.equ usr, 0
+.equ svc, 1
+.equ abt, 2
+.equ und, 3
+.equ irq, 4
+.equ fiq, 5
+
+.macro store_mode_state base_reg, mode
+ .if \mode == usr
+ mrs r2, SP_usr
+ mov r3, lr
+ stmdb \base_reg!, {r2, r3}
+ .elseif \mode != fiq
+ mrs r2, SP_\mode
+ mrs r3, LR_\mode
+ mrs r4, SPSR_\mode
+ stmdb \base_reg!, {r2, r3, r4}
+ .else
+ mrs r2, r8_fiq
+ mrs r3, r9_fiq
+ mrs r4, r10_fiq
+ mrs r5, r11_fiq
+ mrs r6, r12_fiq
+ mrs r7, SP_fiq
+ mrs r8, LR_fiq
+ mrs r9, SPSR_fiq
+ stmdb \base_reg!, {r2-r9}
+ .endif
+.endm
+
+.macro load_mode_state base_reg, mode
+ .if \mode == usr
+ ldmia \base_reg!, {r2, r3}
+ msr SP_usr, r2
+ mov lr, r3
+ .elseif \mode != fiq
+ ldmia \base_reg!, {r2, r3, r4}
+ msr SP_\mode, r2
+ msr LR_\mode, r3
+ msr SPSR_\mode, r4
+ .else
+ ldmia \base_reg!, {r2-r9}
+ msr r8_fiq, r2
+ msr r9_fiq, r3
+ msr r10_fiq, r4
+ msr r11_fiq, r5
+ msr r12_fiq, r6
+ msr SP_fiq, r7
+ msr LR_fiq, r8
+ msr SPSR_fiq, r9
+ .endif
+.endm
+
+/* Reads cp15 registers from hardware and stores then in memory
+ * @vcpu: If 0, registers are written in-order to the stack,
+ * otherwise to the VCPU struct pointed to by vcpup
+ * @vcpup: Register pointing to VCPU struct
+ */
+.macro read_cp15_state vcpu=0, vcpup
+ mrc p15, 0, r2, c1, c0, 0 @ SCTLR
+ mrc p15, 0, r3, c1, c0, 2 @ CPACR
+ mrc p15, 0, r4, c2, c0, 2 @ TTBCR
+ mrc p15, 0, r5, c3, c0, 0 @ DACR
+ mrrc p15, 0, r6, r7, c2 @ TTBR 0
+ mrrc p15, 1, r8, r9, c2 @ TTBR 1
+ mrc p15, 0, r10, c10, c2, 0 @ PRRR
+ mrc p15, 0, r11, c10, c2, 1 @ NMRR
+
+ .if \vcpu == 0
+ push {r2-r11} @ Push CP15 registers
+ .else
+ str r2, [\vcpup, #VCPU_SCTLR]
+ str r3, [\vcpup, #VCPU_CPACR]
+ str r4, [\vcpup, #VCPU_TTBCR]
+ str r5, [\vcpup, #VCPU_DACR]
+ add \vcpup, \vcpup, #VCPU_TTBR0
+ strd r6, r7, [\vcpup]
+ add \vcpup, \vcpup, #(VCPU_TTBR1 - VCPU_TTBR0)
+ strd r8, r9, [\vcpup]
+ sub \vcpup, \vcpup, #(VCPU_TTBR1)
+ str r10, [\vcpup, #VCPU_PRRR]
+ str r11, [\vcpup, #VCPU_NMRR]
+ .endif
+
+ mrc p15, 0, r2, c13, c0, 1 @ CID
+ mrc p15, 0, r3, c13, c0, 2 @ TID_URW
+ mrc p15, 0, r4, c13, c0, 3 @ TID_URO
+ mrc p15, 0, r5, c13, c0, 4 @ TID_PRIV
+ .if \vcpu == 0
+ push {r2-r5} @ Push CP15 registers
+ .else
+ str r2, [\vcpup, #VCPU_CID]
+ str r3, [\vcpup, #VCPU_TID_URW]
+ str r4, [\vcpup, #VCPU_TID_URO]
+ str r5, [\vcpup, #VCPU_TID_PRIV]
+ .endif
+.endm
+
+/* Reads cp15 registers from memory and writes them to hardware
+ * @vcpu: If 0, registers are read in-order from the stack,
+ * otherwise from the VCPU struct pointed to by vcpup
+ * @vcpup: Register pointing to VCPU struct
+ */
+.macro write_cp15_state vcpu=0, vcpup
+ .if \vcpu == 0
+ pop {r2-r5}
+ .else
+ ldr r2, [\vcpup, #VCPU_CID]
+ ldr r3, [\vcpup, #VCPU_TID_URW]
+ ldr r4, [\vcpup, #VCPU_TID_URO]
+ ldr r5, [\vcpup, #VCPU_TID_PRIV]
+ .endif
+
+ mcr p15, 0, r2, c13, c0, 1 @ CID
+ mcr p15, 0, r3, c13, c0, 2 @ TID_URW
+ mcr p15, 0, r4, c13, c0, 3 @ TID_URO
+ mcr p15, 0, r5, c13, c0, 4 @ TID_PRIV
+
+ .if \vcpu == 0
+ pop {r2-r11}
+ .else
+ ldr r2, [\vcpup, #VCPU_SCTLR]
+ ldr r3, [\vcpup, #VCPU_CPACR]
+ ldr r4, [\vcpup, #VCPU_TTBCR]
+ ldr r5, [\vcpup, #VCPU_DACR]
+ add \vcpup, \vcpup, #VCPU_TTBR0
+ ldrd r6, r7, [\vcpup]
+ add \vcpup, \vcpup, #(VCPU_TTBR1 - VCPU_TTBR0)
+ ldrd r8, r9, [\vcpup]
+ sub \vcpup, \vcpup, #(VCPU_TTBR1)
+ ldr r10, [\vcpup, #VCPU_PRRR]
+ ldr r11, [\vcpup, #VCPU_NMRR]
+ .endif
+
+ mcr p15, 0, r2, c1, c0, 0 @ SCTLR
+ mcr p15, 0, r3, c1, c0, 2 @ CPACR
+ mcr p15, 0, r4, c2, c0, 2 @ TTBCR
+ mcr p15, 0, r5, c3, c0, 0 @ DACR
+ mcrr p15, 0, r6, r7, c2 @ TTBR 0
+ mcrr p15, 1, r8, r9, c2 @ TTBR 1
+ mcr p15, 0, r10, c10, c2, 0 @ PRRR
+ mcr p15, 0, r11, c10, c2, 1 @ NMRR
+.endm
+
+/* Configures the HSTR (Hyp System Trap Register) on entry/return
+ * (hardware reset value is 0) */
+.macro set_hstr entry
+ mrc p15, 4, r2, c1, c1, 3
+ ldr r3, =0x9e00
+ .if \entry == 1
+ orr r2, r2, r3 @ Trap CR{9,10,11,12,15}
+ .else
+ bic r2, r2, r3 @ Don't trap any CRx accesses
+ .endif
+ mcr p15, 4, r2, c1, c1, 3
+.endm
+
+/* Enable/Disable: stage-2 trans., trap interrupts, trap wfi/wfe, trap smc */
+.macro configure_hyp_role entry, vcpu_ptr
+ mrc p15, 4, r2, c1, c1, 0 @ HCR
+ bic r2, r2, #HCR_VIRT_EXCP_MASK
+ ldr r3, =HCR_GUEST_MASK
+ .if \entry == 1
+ orr r2, r2, r3
+ ldr r3, [\vcpu_ptr, #VCPU_VIRT_IRQ]
+ orr r2, r2, r3
+ .else
+ bic r2, r2, r3
+ .endif
+ mcr p15, 4, r2, c1, c1, 0
+.endm
+
+@ This must be called from Hyp mode!
+@ Arguments:
+@ r0: pointer to vcpu struct
ENTRY(__kvm_vcpu_run)
+ hvc #0 @ Change to Hyp-mode
+
+ @ Now we're in Hyp-mode and lr_usr, spsr_hyp are on the stack
+ mrs r2, sp_usr
+ push {r2} @ Push r13_usr
+ push {r4-r12} @ Push r4-r12
+
+ store_mode_state sp, svc
+ store_mode_state sp, abt
+ store_mode_state sp, und
+ store_mode_state sp, irq
+ store_mode_state sp, fiq
+
+ @ Store hardware CP15 state and load guest state
+ read_cp15_state
+ write_cp15_state 1, r0
+
+ push {r0} @ Push the VCPU pointer
+
+ @ Set up guest memory translation
+ ldr r1, [r0, #VCPU_KVM] @ r1 points to kvm struct
+ ldrd r2, r3, [r1, #KVM_VTTBR]
+ mcrr p15, 6, r2, r3, c2 @ Write VTTBR
+
+ @ Configure Hyp-role
+ configure_hyp_role 1, r0
+
+ @ Trap coprocessor CRx for all x except 2 and 14
+ set_hstr 1
+
+ @ Write standard A-9 CPU id in MIDR
+ ldr r1, [r0, #VCPU_MIDR]
+ mcr p15, 4, r1, c0, c0, 0
+
+ @ Load guest registers
+ add r0, r0, #(VCPU_USR_SP)
+ load_mode_state r0, usr
+ load_mode_state r0, svc
+ load_mode_state r0, abt
+ load_mode_state r0, und
+ load_mode_state r0, irq
+ load_mode_state r0, fiq
+
+ @ Load return state (r0 now points to vcpu->arch.regs.pc)
+ ldmia r0, {r2, r3}
+ msr ELR_hyp, r2
+ msr spsr, r3
+
+ @ Load remaining registers and do the switch
+ sub r0, r0, #(VCPU_PC - VCPU_USR_REGS)
+ ldmia r0, {r0-r12}
+ eret
+
+__kvm_vcpu_return:
+ @ Store return state
+ mrs r2, ELR_hyp
+ mrs r3, spsr
+ str r2, [r1, #VCPU_PC]
+ str r3, [r1, #VCPU_CPSR]
+
+ @ Store guest registers
+ add r1, r1, #(VCPU_FIQ_SPSR + 4)
+ store_mode_state r1, fiq
+ store_mode_state r1, irq
+ store_mode_state r1, und
+ store_mode_state r1, abt
+ store_mode_state r1, svc
+ store_mode_state r1, usr
+ sub r1, r1, #(VCPU_USR_REG(13))
+
+ @ Don't trap coprocessor accesses for host kernel
+ set_hstr 0
+
+ @ Reset Hyp-role
+ configure_hyp_role 0, r1
+
+ @ Let guest read hardware MIDR
+ mrc p15, 0, r2, c0, c0, 0
+ mcr p15, 4, r2, c0, c0, 0
+
+ @ Set VMID == 0
+ mov r2, #0
+ mov r3, #0
+ mcrr p15, 6, r2, r3, c2 @ Write VTTBR
+
+ @ Store guest CP15 state and restore host state
+ read_cp15_state 1, r1
+ write_cp15_state
+
+ load_mode_state sp, fiq
+ load_mode_state sp, irq
+ load_mode_state sp, und
+ load_mode_state sp, abt
+ load_mode_state sp, svc
+
+ pop {r4-r12} @ Pop r4-r12
+ pop {r2} @ Pop r13_usr
+ msr sp_usr, r2
+
+ hvc #0
+
+ cmp r0, #ARM_EXCEPTION_IRQ
+ bne return_to_ioctl
+
+ /*
+ * It's time to launch the kernel IRQ handler for IRQ exceptions. This
+ * requires some manipulation though.
+ *
+ * - The easiest entry point to the host handler is __irq_svc.
+ * - The __irq_svc expects to be called from SVC mode, which has been
+ * switched to from vector_stub code in entry-armv.S. The __irq_svc
+ * calls svc_entry which uses values stored in memory and pointed to
+ * by r0 to return from handler. We allocate this memory on the
+ * stack, which will contain these values:
+ * 0x8: cpsr
+ * 0x4: return_address
+ * 0x0: r0
+ */
+ adr r1, irq_kernel_resume @ Where to resume
+ mrs r2, cpsr @ CPSR when we return
+ push {r0 - r2}
+ mov r0, sp
+ b __irq_svc
+
+irq_kernel_resume:
+ pop {r0}
+ add sp, sp, #8
+
+return_to_ioctl:
THUMB( orr lr, lr, #1)
mov pc, lr
+
+ .ltorg
+
__kvm_vcpu_run_end:
.globl __kvm_vcpu_run_end
@@ -39,9 +353,136 @@ __kvm_vcpu_run_end:
@ Hypervisor exception vector and handlers
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ .text
+ .arm
+
.align 5
__kvm_hyp_vector:
.globl __kvm_hyp_vector
- nop
+
+ @ Hyp-mode exception vector
+ b hyp_reset
+ b hyp_undef
+ b hyp_svc
+ b hyp_pabt
+ b hyp_dabt
+ b hyp_hvc
+ b hyp_irq
+ b hyp_fiq
+
+ .align
+hyp_reset:
+ sub pc, pc, #8
+
+ .align
+hyp_undef:
+ sub pc, pc, #8
+
+ .align
+hyp_svc:
+ @ Can only get here if HVC or SVC is called from Hyp, mode which means
+ @ we want to change mode back to SVC mode.
+ @ NB: Stack pointer should be where hyp_hvc handler left it!
+ ldr lr, [sp, #4]
+ msr spsr, lr
+ ldr lr, [sp]
+ add sp, sp, #8
+ eret
+
+ .align
+hyp_pabt:
+ sub pc, pc, #8
+
+ .align
+hyp_dabt:
+ sub pc, pc, #8
+
+ .align
+hyp_hvc:
+ @ Getting here is either becuase of a trap from a guest or from calling
+ @ HVC from the host kernel, which means "switch to Hyp mode".
+ push {r0, r1, r2}
+
+ @ Check syndrome register
+ mrc p15, 4, r0, c5, c2, 0 @ HSR
+ lsr r1, r0, #HSR_EC_SHIFT
+ cmp r1, #HSR_EC_HVC
+ bne guest_trap @ Not HVC instr.
+
+ @ Let's check if the HVC came from VMID 0 and allow simple
+ @ switch to Hyp mode
+ mrrc p15, 6, r1, r2, c2
+ lsr r2, r2, #16
+ and r2, r2, #0xff
+ cmp r2, #0
+ bne guest_trap @ Guest called HVC
+
+ pop {r0, r1, r2}
+
+ @ Store lr_usr,spsr (svc cpsr) on stack
+ sub sp, sp, #8
+ str lr, [sp]
+ mrs lr, spsr
+ str lr, [sp, #4]
+
+ @ Return to caller in Hyp mode
+ mrs lr, ELR_hyp
+ mov pc, lr
+
+guest_trap:
+ ldr r1, [sp, #12] @ Load VCPU pointer
+ str r0, [r1, #VCPU_HSR]
+ add r1, r1, #VCPU_USR_REG(3)
+ stmia r1, {r3-r12}
+ sub r1, r1, #(VCPU_USR_REG(3) - VCPU_USR_REG(0))
+ pop {r3, r4, r5}
+ add sp, sp, #4 @ We loaded the VCPU pointer above
+ stmia r1, {r3, r4, r5}
+ sub r1, r1, #VCPU_USR_REG(0)
+
+ @ Check if we need the fault information
+ lsr r2, r0, #HSR_EC_SHIFT
+ cmp r2, #HSR_EC_IABT
+ beq 2f
+ cmpne r2, #HSR_EC_DABT
+ bne 1f
+
+ @ For non-valid data aborts, get the offending instr. PA
+ lsr r2, r0, #HSR_ISV_SHIFT
+ ands r2, r2, #1
+ bne 2f
+ mrs r3, ELR_hyp
+ mcr p15, 0, r3, c7, c8, 0 @ VA to PA, V2PCWPR
+ mrrc p15, 0, r4, r5, c7 @ PAR
+ add r6, r1, #VCPU_PC_IPA
+ strd r4, r5, [r6]
+
+2: mrc p15, 4, r2, c6, c0, 0 @ HDFAR
+ mrc p15, 4, r3, c6, c0, 2 @ HIFAR
+ mrc p15, 4, r4, c6, c0, 4 @ HPFAR
+ add r5, r1, #VCPU_HDFAR
+ stmia r5, {r2, r3, r4}
+
+1: mov r0, #ARM_EXCEPTION_HVC
+ b __kvm_vcpu_return
+
+ .align
+hyp_irq:
+ push {r0}
+ ldr r0, [sp, #4] @ Load VCPU pointer
+ add r0, r0, #(VCPU_USR_REG(1))
+ stmia r0, {r1-r12}
+ pop {r0, r1} @ r1 == vcpu pointer
+ str r0, [r1, #VCPU_USR_REG(0)]
+
+ mov r0, #ARM_EXCEPTION_IRQ
+ b __kvm_vcpu_return
+
+ .align
+hyp_fiq:
+ sub pc, pc, #8
+
+ .ltorg
+
__kvm_hyp_vector_end:
.globl __kvm_hyp_vector_end
^ permalink raw reply related [flat|nested] 105+ messages in thread
* [PATCH v5 07/13] ARM: KVM: Emulation framework and CP15 emulation
2011-12-11 10:24 [PATCH v5 00/13] KVM/ARM Implementation Christoffer Dall
` (5 preceding siblings ...)
2011-12-11 10:24 ` [PATCH v5 06/13] ARM: KVM: World-switch implementation Christoffer Dall
@ 2011-12-11 10:25 ` Christoffer Dall
2011-12-12 13:44 ` Avi Kivity
2011-12-11 10:25 ` [PATCH v5 08/13] ARM: KVM: Handle guest faults in KVM Christoffer Dall
` (6 subsequent siblings)
13 siblings, 1 reply; 105+ messages in thread
From: Christoffer Dall @ 2011-12-11 10:25 UTC (permalink / raw)
To: android-virt, kvm; +Cc: Marc.Zyngier, catalin.marinas, tech, avi, peter.maydell
From: Christoffer Dall <cdall@cs.columbia.edu>
Adds a new important function in the main KVM/ARM code called
handle_exit() which is called from kvm_arch_vcpu_ioctl_run() on returns
from guest execution. This function examines the Hyp-Syndrome-Register
(HSR), which contains information telling KVM what caused the exit from
the guest.
Some of the reasons for an exit are CP15 accesses, which are
not allowed from the guest and this commits handles these exits by
emulating the intented operation in software and skip the guest
instruction.
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
---
arch/arm/include/asm/kvm_emulate.h | 7 +
arch/arm/kvm/arm.c | 77 ++++++++++++++
arch/arm/kvm/emulate.c | 195 ++++++++++++++++++++++++++++++++++++
arch/arm/kvm/trace.h | 28 +++++
4 files changed, 307 insertions(+), 0 deletions(-)
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 91d461a..af21fd5 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -40,6 +40,13 @@ static inline unsigned char vcpu_mode(struct kvm_vcpu *vcpu)
return modes_table[vcpu->arch.regs.cpsr & 0xf];
}
+int kvm_handle_cp10_id(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp_0_13_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp15_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run);
+
/*
* Return the SPSR for the specified mode of the virtual CPU.
*/
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index f5dbbbb..a6e1763 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -35,6 +35,7 @@
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
+#include <asm/kvm_emulate.h>
#include "debug.h"
@@ -306,6 +307,62 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
return 0;
}
+static inline int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ int exception_index)
+{
+ unsigned long hsr_ec;
+
+ if (exception_index == ARM_EXCEPTION_IRQ)
+ return 0;
+
+ if (exception_index != ARM_EXCEPTION_HVC) {
+ kvm_err(-EINVAL, "Unsupported exception type");
+ return -EINVAL;
+ }
+
+ hsr_ec = (vcpu->arch.hsr & HSR_EC) >> HSR_EC_SHIFT;
+ switch (hsr_ec) {
+ case HSR_EC_WFI:
+ return kvm_handle_wfi(vcpu, run);
+ case HSR_EC_CP15_32:
+ case HSR_EC_CP15_64:
+ return kvm_handle_cp15_access(vcpu, run);
+ case HSR_EC_CP14_MR:
+ return kvm_handle_cp14_access(vcpu, run);
+ case HSR_EC_CP14_LS:
+ return kvm_handle_cp14_load_store(vcpu, run);
+ case HSR_EC_CP14_64:
+ return kvm_handle_cp14_access(vcpu, run);
+ case HSR_EC_CP_0_13:
+ return kvm_handle_cp_0_13_access(vcpu, run);
+ case HSR_EC_CP10_ID:
+ return kvm_handle_cp10_id(vcpu, run);
+ case HSR_EC_SVC_HYP:
+ /* SVC called from Hyp mode should never get here */
+ kvm_msg("SVC called from Hyp mode shouldn't go here");
+ BUG();
+ case HSR_EC_HVC:
+ kvm_msg("hvc: %x (at %08x)", vcpu->arch.hsr & ((1 << 16) - 1),
+ vcpu->arch.regs.pc);
+ kvm_msg(" HSR: %8x", vcpu->arch.hsr);
+ break;
+ case HSR_EC_IABT:
+ case HSR_EC_DABT:
+ return kvm_handle_guest_abort(vcpu, run);
+ case HSR_EC_IABT_HYP:
+ case HSR_EC_DABT_HYP:
+ /* The hypervisor should never cause aborts */
+ kvm_msg("The hypervisor itself shouldn't cause aborts");
+ BUG();
+ default:
+ kvm_msg("Unkown exception class: %08x (%08x)", hsr_ec,
+ vcpu->arch.hsr);
+ BUG();
+ }
+
+ return 0;
+}
+
/**
* kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
* @vcpu: The VCPU pointer
@@ -333,6 +390,26 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
local_irq_enable();
trace_kvm_exit(vcpu->arch.regs.pc);
+
+ ret = handle_exit(vcpu, run, ret);
+ if (ret) {
+ kvm_err(ret, "Error in handle_exit");
+ break;
+ }
+
+ if (run->exit_reason == KVM_EXIT_MMIO)
+ break;
+
+ if (need_resched()) {
+ vcpu_put(vcpu);
+ schedule();
+ vcpu_load(vcpu);
+ }
+
+ if (signal_pending(current) && !(run->exit_reason)) {
+ run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
+ break;
+ }
}
return ret;
diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
index 6587dde..fded8c7 100644
--- a/arch/arm/kvm/emulate.c
+++ b/arch/arm/kvm/emulate.c
@@ -14,7 +14,14 @@
*
*/
+#include <linux/mm.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_host.h>
#include <asm/kvm_emulate.h>
+#include <trace/events/kvm.h>
+
+#include "debug.h"
+#include "trace.h"
#define USR_REG_OFFSET(_reg) \
offsetof(struct kvm_vcpu_arch, regs.usr_regs[_reg])
@@ -119,3 +126,191 @@ u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode)
return (u32 *)((void *)&vcpu->arch + vcpu_reg_offsets[mode][reg_num]);
}
+
+/******************************************************************************
+ * Co-processor emulation
+ */
+
+struct coproc_params {
+ unsigned long CRm;
+ unsigned long CRn;
+ unsigned long Op1;
+ unsigned long Op2;
+ unsigned long Rt1;
+ unsigned long Rt2;
+ bool is_64bit;
+ bool is_write;
+};
+
+#define CP15_OP(_vcpu, _params, _cp15_reg) \
+do { \
+ if (_params->is_write) \
+ _vcpu->arch.cp15._cp15_reg = *vcpu_reg(_vcpu, _params->Rt1); \
+ else \
+ *vcpu_reg(_vcpu, _params->Rt1) = _vcpu->arch.cp15._cp15_reg; \
+} while (0);
+
+
+static inline void print_cp_instr(struct coproc_params *p)
+{
+ if (p->is_64bit) {
+ kvm_msg(" %s\tp15, %u, r%u, r%u, c%u",
+ (p->is_write) ? "mcrr" : "mrrc",
+ p->Op1, p->Rt1, p->Rt2, p->CRm);
+ } else {
+ kvm_msg(" %s\tp15, %u, r%u, c%u, c%u, %u",
+ (p->is_write) ? "mcr" : "mrc",
+ p->Op1, p->Rt1, p->CRn, p->CRm, p->Op2);
+ }
+}
+
+int kvm_handle_cp10_id(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ KVMARM_NOT_IMPLEMENTED();
+ return -EINVAL;
+}
+
+int kvm_handle_cp_0_13_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ KVMARM_NOT_IMPLEMENTED();
+ return -EINVAL;
+}
+
+int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ KVMARM_NOT_IMPLEMENTED();
+ return -EINVAL;
+}
+
+int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ KVMARM_NOT_IMPLEMENTED();
+ return -EINVAL;
+}
+
+/**
+ * emulate_cp15_c10_access -- emulates cp15 accesses for CRn == 10
+ * @vcpu: The VCPU pointer
+ * @p: The coprocessor parameters struct pointer holding trap inst. details
+ *
+ * This function may not need to exist - if we can ignore guest attempts to
+ * tamper with TLB lockdowns then it should be enough to store/restore the
+ * host/guest PRRR and NMRR memory remap registers and allow guest direct access
+ * to these registers.
+ */
+static int emulate_cp15_c10_access(struct kvm_vcpu *vcpu,
+ struct coproc_params *p)
+{
+ BUG_ON(p->CRn != 10);
+ BUG_ON(p->is_64bit);
+
+ if ((p->CRm == 0 || p->CRm == 1 || p->CRm == 4 || p->CRm == 8) &&
+ (p->Op2 <= 7)) {
+ /* TLB Lockdown operations - ignored */
+ return 0;
+ }
+
+ if (p->CRm == 2 && p->Op2 == 0) {
+ CP15_OP(vcpu, p, c10_PRRR);
+ return 0;
+ }
+
+ if (p->CRm == 2 && p->Op2 == 1) {
+ CP15_OP(vcpu, p, c10_NMRR);
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+/**
+ * emulate_cp15_c15_access -- emulates cp15 accesses for CRn == 15
+ * @vcpu: The VCPU pointer
+ * @p: The coprocessor parameters struct pointer holding trap inst. details
+ *
+ * The CP15 c15 register is implementation defined, but some guest kernels
+ * attempt to read/write a diagnostics register here. We always return 0 and
+ * ignore writes and hope for the best. This may need to be refined.
+ */
+static int emulate_cp15_c15_access(struct kvm_vcpu *vcpu,
+ struct coproc_params *p)
+{
+ trace_kvm_emulate_cp15_imp(p->Op1, p->Rt1, p->CRn, p->CRm,
+ p->Op2, p->is_write);
+
+ if (!p->is_write)
+ *vcpu_reg(vcpu, p->Rt1) = 0;
+
+ return 0;
+}
+
+/**
+ * kvm_handle_cp15_access -- handles a trap on a guest CP15 access
+ * @vcpu: The VCPU pointer
+ * @run: The kvm_run struct
+ *
+ * Investigates the CRn/CRm and wether this was mcr/mrc or mcrr/mrrc and either
+ * simply errors out if the operation was not supported (should maybe raise
+ * undefined to guest instead?) and otherwise emulated access.
+ */
+int kvm_handle_cp15_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ unsigned long hsr_ec, instr_len;
+ struct coproc_params params;
+ int ret = 0;
+
+ hsr_ec = vcpu->arch.hsr >> HSR_EC_SHIFT;
+ params.CRm = (vcpu->arch.hsr >> 1) & 0xf;
+ params.Rt1 = (vcpu->arch.hsr >> 5) & 0xf;
+ BUG_ON(params.Rt1 >= 15);
+ params.is_write = ((vcpu->arch.hsr & 1) == 0);
+ params.is_64bit = (hsr_ec == HSR_EC_CP15_64);
+
+ if (params.is_64bit) {
+ /* mrrc, mccr operation */
+ params.Op1 = (vcpu->arch.hsr >> 16) & 0xf;
+ params.Op2 = 0;
+ params.Rt2 = (vcpu->arch.hsr >> 10) & 0xf;
+ BUG_ON(params.Rt2 >= 15);
+ params.CRn = 0;
+ } else {
+ params.CRn = (vcpu->arch.hsr >> 10) & 0xf;
+ params.Op1 = (vcpu->arch.hsr >> 14) & 0x7;
+ params.Op2 = (vcpu->arch.hsr >> 17) & 0x7;
+ params.Rt2 = 0;
+ }
+
+ /* So far no mrrc/mcrr accesses are emulated */
+ if (params.is_64bit)
+ goto unsupp_err_out;
+
+ switch (params.CRn) {
+ case 10:
+ ret = emulate_cp15_c10_access(vcpu, ¶ms);
+ break;
+ case 15:
+ ret = emulate_cp15_c15_access(vcpu, ¶ms);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ if (ret)
+ goto unsupp_err_out;
+
+ /* Skip instruction, since it was emulated */
+ instr_len = ((vcpu->arch.hsr >> 25) & 1) ? 4 : 2;
+ *vcpu_reg(vcpu, 15) += instr_len;
+
+ return ret;
+unsupp_err_out:
+ kvm_msg("Unsupported guest CP15 access at: %08x", vcpu->arch.regs.pc);
+ print_cp_instr(¶ms);
+ return -EINVAL;
+}
+
+int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ return 0;
+}
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index ac64e3a..381ea4a 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -39,6 +39,34 @@ TRACE_EVENT(kvm_exit,
TP_printk("PC: 0x%08lx", __entry->vcpu_pc)
);
+TRACE_EVENT(kvm_emulate_cp15_imp,
+ TP_PROTO(unsigned long Op1, unsigned long Rt1, unsigned long CRn,
+ unsigned long CRm, unsigned long Op2, bool is_write),
+ TP_ARGS(Op1, Rt1, CRn, CRm, Op2, is_write),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, Op1 )
+ __field( unsigned int, Rt1 )
+ __field( unsigned int, CRn )
+ __field( unsigned int, CRm )
+ __field( unsigned int, Op2 )
+ __field( bool, is_write )
+ ),
+
+ TP_fast_assign(
+ __entry->is_write = is_write;
+ __entry->Op1 = Op1;
+ __entry->Rt1 = Rt1;
+ __entry->CRn = CRn;
+ __entry->CRm = CRm;
+ __entry->Op2 = Op2;
+ ),
+
+ TP_printk("Implementation defined CP15: %s\tp15, %u, r%u, c%u, c%u, %u",
+ (__entry->is_write) ? "mcr" : "mrc",
+ __entry->Op1, __entry->Rt1, __entry->CRn,
+ __entry->CRm, __entry->Op2)
+);
TRACE_EVENT(kvm_irq_line,
TP_PROTO(unsigned int type, unsigned int level, unsigned int vcpu_idx),
^ permalink raw reply related [flat|nested] 105+ messages in thread
* Re: [PATCH v5 07/13] ARM: KVM: Emulation framework and CP15 emulation
2011-12-11 10:25 ` [PATCH v5 07/13] ARM: KVM: Emulation framework and CP15 emulation Christoffer Dall
@ 2011-12-12 13:44 ` Avi Kivity
2011-12-12 16:17 ` Christoffer Dall
0 siblings, 1 reply; 105+ messages in thread
From: Avi Kivity @ 2011-12-12 13:44 UTC (permalink / raw)
To: Christoffer Dall
Cc: android-virt, kvm, Marc.Zyngier, catalin.marinas, tech,
peter.maydell
On 12/11/2011 12:25 PM, Christoffer Dall wrote:
> From: Christoffer Dall <cdall@cs.columbia.edu>
>
> Adds a new important function in the main KVM/ARM code called
> handle_exit() which is called from kvm_arch_vcpu_ioctl_run() on returns
> from guest execution. This function examines the Hyp-Syndrome-Register
> (HSR), which contains information telling KVM what caused the exit from
> the guest.
>
> Some of the reasons for an exit are CP15 accesses, which are
> not allowed from the guest and this commits handles these exits by
commit
> emulating the intented operation in software and skip the guest
intended
> instruction.
>
> @@ -306,6 +307,62 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
> return 0;
> }
>
> +static inline int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
> + int exception_index)
> +{
> + unsigned long hsr_ec;
> +
> + if (exception_index == ARM_EXCEPTION_IRQ)
> + return 0;
> +
> + if (exception_index != ARM_EXCEPTION_HVC) {
> + kvm_err(-EINVAL, "Unsupported exception type");
> + return -EINVAL;
> + }
> +
> + hsr_ec = (vcpu->arch.hsr & HSR_EC) >> HSR_EC_SHIFT;
> + switch (hsr_ec) {
> + case HSR_EC_WFI:
> + return kvm_handle_wfi(vcpu, run);
> + case HSR_EC_CP15_32:
> + case HSR_EC_CP15_64:
> + return kvm_handle_cp15_access(vcpu, run);
> + case HSR_EC_CP14_MR:
> + return kvm_handle_cp14_access(vcpu, run);
> + case HSR_EC_CP14_LS:
> + return kvm_handle_cp14_load_store(vcpu, run);
> + case HSR_EC_CP14_64:
> + return kvm_handle_cp14_access(vcpu, run);
> + case HSR_EC_CP_0_13:
> + return kvm_handle_cp_0_13_access(vcpu, run);
> + case HSR_EC_CP10_ID:
> + return kvm_handle_cp10_id(vcpu, run);
> + case HSR_EC_SVC_HYP:
> + /* SVC called from Hyp mode should never get here */
> + kvm_msg("SVC called from Hyp mode shouldn't go here");
> + BUG();
> + case HSR_EC_HVC:
> + kvm_msg("hvc: %x (at %08x)", vcpu->arch.hsr & ((1 << 16) - 1),
> + vcpu->arch.regs.pc);
> + kvm_msg(" HSR: %8x", vcpu->arch.hsr);
> + break;
> + case HSR_EC_IABT:
> + case HSR_EC_DABT:
> + return kvm_handle_guest_abort(vcpu, run);
> + case HSR_EC_IABT_HYP:
> + case HSR_EC_DABT_HYP:
> + /* The hypervisor should never cause aborts */
> + kvm_msg("The hypervisor itself shouldn't cause aborts");
> + BUG();
> + default:
> + kvm_msg("Unkown exception class: %08x (%08x)", hsr_ec,
> + vcpu->arch.hsr);
> + BUG();
> + }
x86 uses a function table, which is slightly nicer.
> +
> + return 0;
> +}
> +
> /**
> * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
> * @vcpu: The VCPU pointer
> @@ -333,6 +390,26 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
> local_irq_enable();
>
> trace_kvm_exit(vcpu->arch.regs.pc);
> +
> + ret = handle_exit(vcpu, run, ret);
> + if (ret) {
> + kvm_err(ret, "Error in handle_exit");
> + break;
> + }
> +
> + if (run->exit_reason == KVM_EXIT_MMIO)
> + break;
> +
> + if (need_resched()) {
> + vcpu_put(vcpu);
> + schedule();
> + vcpu_load(vcpu);
> + }
I don't think you need the vcpu_put()/vcpu_load() here; you can replace
the whole thing with cond_resched().
> +
> + if (signal_pending(current) && !(run->exit_reason)) {
> + run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
This exit reason doesn't fit with KVM_IRQ_LINE.
With KVM_INTERRUPT, userspace is responsible for determining when to
inject interrupts, so it needs to know when guest interrupts become
enabled (this is KVM_EXIT_IRQ_WINDOWS_OPEN). With KVM_IRQ_LINE,
userspace just sets the line status, and the kernel takes care of
everything.
Oh, and you need to exit to userspace unconditionally if a signal is
pending.
> + break;
> + }
> }
>
> +
> +/******************************************************************************
> + * Co-processor emulation
> + */
> +
> +struct coproc_params {
> + unsigned long CRm;
> + unsigned long CRn;
> + unsigned long Op1;
> + unsigned long Op2;
> + unsigned long Rt1;
> + unsigned long Rt2;
> + bool is_64bit;
> + bool is_write;
> +};
> +
> +#define CP15_OP(_vcpu, _params, _cp15_reg) \
> +do { \
> + if (_params->is_write) \
> + _vcpu->arch.cp15._cp15_reg = *vcpu_reg(_vcpu, _params->Rt1); \
> + else \
> + *vcpu_reg(_vcpu, _params->Rt1) = _vcpu->arch.cp15._cp15_reg; \
> +} while (0);
Ugly. How about an array of registers instead?
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [PATCH v5 07/13] ARM: KVM: Emulation framework and CP15 emulation
2011-12-12 13:44 ` Avi Kivity
@ 2011-12-12 16:17 ` Christoffer Dall
0 siblings, 0 replies; 105+ messages in thread
From: Christoffer Dall @ 2011-12-12 16:17 UTC (permalink / raw)
To: Avi Kivity
Cc: android-virt, kvm, Marc.Zyngier, catalin.marinas, tech,
peter.maydell
On Mon, Dec 12, 2011 at 8:44 AM, Avi Kivity <avi@redhat.com> wrote:
> On 12/11/2011 12:25 PM, Christoffer Dall wrote:
>> From: Christoffer Dall <cdall@cs.columbia.edu>
>>
>> Adds a new important function in the main KVM/ARM code called
>> handle_exit() which is called from kvm_arch_vcpu_ioctl_run() on returns
>> from guest execution. This function examines the Hyp-Syndrome-Register
>> (HSR), which contains information telling KVM what caused the exit from
>> the guest.
>>
>> Some of the reasons for an exit are CP15 accesses, which are
>> not allowed from the guest and this commits handles these exits by
>
> commit
>
>> emulating the intented operation in software and skip the guest
>
> intended
>
>> instruction.
>>
>> @@ -306,6 +307,62 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
>> return 0;
>> }
>>
>> +static inline int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
>> + int exception_index)
>> +{
>> + unsigned long hsr_ec;
>> +
>> + if (exception_index == ARM_EXCEPTION_IRQ)
>> + return 0;
>> +
>> + if (exception_index != ARM_EXCEPTION_HVC) {
>> + kvm_err(-EINVAL, "Unsupported exception type");
>> + return -EINVAL;
>> + }
>> +
>> + hsr_ec = (vcpu->arch.hsr & HSR_EC) >> HSR_EC_SHIFT;
>> + switch (hsr_ec) {
>> + case HSR_EC_WFI:
>> + return kvm_handle_wfi(vcpu, run);
>> + case HSR_EC_CP15_32:
>> + case HSR_EC_CP15_64:
>> + return kvm_handle_cp15_access(vcpu, run);
>> + case HSR_EC_CP14_MR:
>> + return kvm_handle_cp14_access(vcpu, run);
>> + case HSR_EC_CP14_LS:
>> + return kvm_handle_cp14_load_store(vcpu, run);
>> + case HSR_EC_CP14_64:
>> + return kvm_handle_cp14_access(vcpu, run);
>> + case HSR_EC_CP_0_13:
>> + return kvm_handle_cp_0_13_access(vcpu, run);
>> + case HSR_EC_CP10_ID:
>> + return kvm_handle_cp10_id(vcpu, run);
>> + case HSR_EC_SVC_HYP:
>> + /* SVC called from Hyp mode should never get here */
>> + kvm_msg("SVC called from Hyp mode shouldn't go here");
>> + BUG();
>> + case HSR_EC_HVC:
>> + kvm_msg("hvc: %x (at %08x)", vcpu->arch.hsr & ((1 << 16) - 1),
>> + vcpu->arch.regs.pc);
>> + kvm_msg(" HSR: %8x", vcpu->arch.hsr);
>> + break;
>> + case HSR_EC_IABT:
>> + case HSR_EC_DABT:
>> + return kvm_handle_guest_abort(vcpu, run);
>> + case HSR_EC_IABT_HYP:
>> + case HSR_EC_DABT_HYP:
>> + /* The hypervisor should never cause aborts */
>> + kvm_msg("The hypervisor itself shouldn't cause aborts");
>> + BUG();
>> + default:
>> + kvm_msg("Unkown exception class: %08x (%08x)", hsr_ec,
>> + vcpu->arch.hsr);
>> + BUG();
>> + }
>
> x86 uses a function table, which is slightly nicer.
>
ok, completely stole the design from x86.
>> +
>> + return 0;
>> +}
>> +
>> /**
>> * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
>> * @vcpu: The VCPU pointer
>> @@ -333,6 +390,26 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
>> local_irq_enable();
>>
>> trace_kvm_exit(vcpu->arch.regs.pc);
>> +
>> + ret = handle_exit(vcpu, run, ret);
>> + if (ret) {
>> + kvm_err(ret, "Error in handle_exit");
>> + break;
>> + }
>> +
>> + if (run->exit_reason == KVM_EXIT_MMIO)
>> + break;
>> +
>> + if (need_resched()) {
>> + vcpu_put(vcpu);
>> + schedule();
>> + vcpu_load(vcpu);
>> + }
>
> I don't think you need the vcpu_put()/vcpu_load() here; you can replace
> the whole thing with cond_resched().
>
sorry again about the stupid patch layout.
>> +
>> + if (signal_pending(current) && !(run->exit_reason)) {
>> + run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
>
> This exit reason doesn't fit with KVM_IRQ_LINE.
>
> With KVM_INTERRUPT, userspace is responsible for determining when to
> inject interrupts, so it needs to know when guest interrupts become
> enabled (this is KVM_EXIT_IRQ_WINDOWS_OPEN). With KVM_IRQ_LINE,
> userspace just sets the line status, and the kernel takes care of
> everything.
>
> Oh, and you need to exit to userspace unconditionally if a signal is
> pending.
>
also fixed. sorry again agin.
>> + break;
>> + }
>> }
>>
>> +
>> +/******************************************************************************
>> + * Co-processor emulation
>> + */
>> +
>> +struct coproc_params {
>> + unsigned long CRm;
>> + unsigned long CRn;
>> + unsigned long Op1;
>> + unsigned long Op2;
>> + unsigned long Rt1;
>> + unsigned long Rt2;
>> + bool is_64bit;
>> + bool is_write;
>> +};
>> +
>> +#define CP15_OP(_vcpu, _params, _cp15_reg) \
>> +do { \
>> + if (_params->is_write) \
>> + _vcpu->arch.cp15._cp15_reg = *vcpu_reg(_vcpu, _params->Rt1); \
>> + else \
>> + *vcpu_reg(_vcpu, _params->Rt1) = _vcpu->arch.cp15._cp15_reg; \
>> +} while (0);
>
> Ugly. How about an array of registers instead?
ugly, but fast work-around. it has been fixed, wait for new patch revision.
^ permalink raw reply [flat|nested] 105+ messages in thread
* [PATCH v5 08/13] ARM: KVM: Handle guest faults in KVM
2011-12-11 10:24 [PATCH v5 00/13] KVM/ARM Implementation Christoffer Dall
` (6 preceding siblings ...)
2011-12-11 10:25 ` [PATCH v5 07/13] ARM: KVM: Emulation framework and CP15 emulation Christoffer Dall
@ 2011-12-11 10:25 ` Christoffer Dall
2011-12-12 15:05 ` Avi Kivity
2011-12-11 10:25 ` [PATCH v5 09/13] ARM: KVM: Handle I/O aborts Christoffer Dall
` (5 subsequent siblings)
13 siblings, 1 reply; 105+ messages in thread
From: Christoffer Dall @ 2011-12-11 10:25 UTC (permalink / raw)
To: android-virt, kvm; +Cc: Marc.Zyngier, catalin.marinas, tech, avi, peter.maydell
From: Christoffer Dall <cdall@cs.columbia.edu>
Handles the guest faults in KVM by mapping in corresponding user pages
in the 2nd stage page tables.
Introduces new ARM-specific kernel memory types, PAGE_KVM_GUEST and
pgprot_guest variables used to map 2nd stage memory for KVM guests.
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
---
arch/arm/include/asm/pgtable-3level.h | 8 ++
arch/arm/include/asm/pgtable.h | 4 +
arch/arm/kvm/mmu.c | 107 ++++++++++++++++++++++++++++++++-
arch/arm/mm/mmu.c | 3 +
4 files changed, 120 insertions(+), 2 deletions(-)
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index edc3cb9..6dc5331 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -104,6 +104,14 @@
*/
#define L_PGD_SWAPPER (_AT(pgdval_t, 1) << 55) /* swapper_pg_dir entry */
+/*
+ * 2-nd stage PTE definitions for LPAE.
+ */
+#define L_PTE2_READ (_AT(pteval_t, 1) << 6) /* HAP[0] */
+#define L_PTE2_WRITE (_AT(pteval_t, 1) << 7) /* HAP[1] */
+#define L_PTE2_NORM_WB (_AT(pteval_t, 3) << 4) /* MemAttr[3:2] */
+#define L_PTE2_INNER_WB (_AT(pteval_t, 3) << 2) /* MemAttr[1:0] */
+
#ifndef __ASSEMBLY__
#define pud_none(pud) (!pud_val(pud))
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 20025cc..778856b 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -76,6 +76,7 @@ extern void __pgd_error(const char *file, int line, pgd_t);
extern pgprot_t pgprot_user;
extern pgprot_t pgprot_kernel;
+extern pgprot_t pgprot_guest;
#define _MOD_PROT(p, b) __pgprot(pgprot_val(p) | (b))
@@ -89,6 +90,9 @@ extern pgprot_t pgprot_kernel;
#define PAGE_KERNEL _MOD_PROT(pgprot_kernel, L_PTE_XN)
#define PAGE_KERNEL_EXEC pgprot_kernel
#define PAGE_HYP _MOD_PROT(pgprot_kernel, L_PTE_USER)
+#define PAGE_KVM_GUEST _MOD_PROT(pgprot_guest, L_PTE2_READ | \
+ L_PTE2_WRITE | L_PTE2_NORM_WB | \
+ L_PTE2_INNER_WB)
#define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN)
#define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index f7a7b17..d468238 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -229,8 +229,111 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
kvm->arch.pgd = NULL;
}
+static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+ gfn_t gfn, struct kvm_memory_slot *memslot)
+{
+ pfn_t pfn;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte, new_pte;
+
+ pfn = gfn_to_pfn(vcpu->kvm, gfn);
+
+ if (is_error_pfn(pfn)) {
+ kvm_err(-EFAULT, "Guest gfn %u (0x%08lx) does not have "
+ "corresponding host mapping",
+ gfn, gfn << PAGE_SHIFT);
+ return -EFAULT;
+ }
+
+ /* Create 2nd stage page table mapping - Level 1 */
+ pgd = vcpu->kvm->arch.pgd + pgd_index(fault_ipa);
+ pud = pud_offset(pgd, fault_ipa);
+ if (pud_none(*pud)) {
+ pmd = pmd_alloc_one(NULL, fault_ipa);
+ if (!pmd) {
+ kvm_err(-ENOMEM, "Cannot allocate 2nd stage pmd");
+ return -ENOMEM;
+ }
+ pud_populate(NULL, pud, pmd);
+ pmd += pmd_index(fault_ipa);
+ } else
+ pmd = pmd_offset(pud, fault_ipa);
+
+ /* Create 2nd stage page table mapping - Level 2 */
+ if (pmd_none(*pmd)) {
+ pte = pte_alloc_one_kernel(NULL, fault_ipa);
+ if (!pte) {
+ kvm_err(-ENOMEM, "Cannot allocate 2nd stage pte");
+ return -ENOMEM;
+ }
+ pmd_populate_kernel(NULL, pmd, pte);
+ pte += pte_index(fault_ipa);
+ } else
+ pte = pte_offset_kernel(pmd, fault_ipa);
+
+ /* Create 2nd stage page table mapping - Level 3 */
+ new_pte = pfn_pte(pfn, PAGE_KVM_GUEST);
+ set_pte_ext(pte, new_pte, 0);
+
+ return 0;
+}
+
+#define HSR_ABT_FS (0x3f)
+#define HPFAR_MASK (~0xf)
+
+/**
+ * kvm_handle_guest_abort - handles all 2nd stage aborts
+ * @vcpu: the VCPU pointer
+ * @run: the kvm_run structure
+ *
+ * Any abort that gets to the host is almost guaranteed to be caused by a
+ * missing second stage translation table entry, which can mean that either the
+ * guest simply needs more memory and we must allocate an appropriate page or it
+ * can mean that the guest tried to access I/O memory, which is emulated by user
+ * space. The distinction is based on the IPA causing the fault and whether this
+ * memory region has been registered as standard RAM by user space.
+ */
int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
- KVMARM_NOT_IMPLEMENTED();
- return -EINVAL;
+ unsigned long hsr_ec;
+ unsigned long fault_status;
+ phys_addr_t fault_ipa;
+ struct kvm_memory_slot *memslot = NULL;
+ bool is_iabt;
+ gfn_t gfn;
+
+ hsr_ec = vcpu->arch.hsr >> HSR_EC_SHIFT;
+ is_iabt = (hsr_ec == HSR_EC_IABT);
+
+ /* Check that the second stage fault is a translation fault */
+ fault_status = vcpu->arch.hsr & HSR_ABT_FS;
+ if ((fault_status & 0x3c) != 0x4) {
+ kvm_err(-EFAULT, "Unsupported fault status: %x",
+ fault_status & 0x3c);
+ return -EFAULT;
+ }
+
+ fault_ipa = ((phys_addr_t)vcpu->arch.hpfar & HPFAR_MASK) << 8;
+
+ gfn = fault_ipa >> PAGE_SHIFT;
+ if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+ if (is_iabt) {
+ kvm_err(-EFAULT, "Inst. abort on I/O address");
+ return -EFAULT;
+ }
+
+ kvm_msg("I/O address abort...");
+ KVMARM_NOT_IMPLEMENTED();
+ return -EINVAL;
+ }
+
+ memslot = gfn_to_memslot(vcpu->kvm, gfn);
+ if (!memslot->user_alloc) {
+ kvm_err(-EINVAL, "non user-alloc memslots not supported");
+ return -EINVAL;
+ }
+
+ return user_mem_abort(vcpu, fault_ipa, gfn, memslot);
}
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index b836d6b..1aa6e2c 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -53,9 +53,11 @@ static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK;
static unsigned int ecc_mask __initdata = 0;
pgprot_t pgprot_user;
pgprot_t pgprot_kernel;
+pgprot_t pgprot_guest;
EXPORT_SYMBOL(pgprot_user);
EXPORT_SYMBOL(pgprot_kernel);
+EXPORT_SYMBOL(pgprot_guest);
struct cachepolicy {
const char policy[16];
@@ -503,6 +505,7 @@ static void __init build_mem_type_table(void)
pgprot_user = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
L_PTE_DIRTY | kern_pgprot);
+ pgprot_guest = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG);
mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
^ permalink raw reply related [flat|nested] 105+ messages in thread
* Re: [PATCH v5 08/13] ARM: KVM: Handle guest faults in KVM
2011-12-11 10:25 ` [PATCH v5 08/13] ARM: KVM: Handle guest faults in KVM Christoffer Dall
@ 2011-12-12 15:05 ` Avi Kivity
2011-12-12 19:53 ` Christoffer Dall
0 siblings, 1 reply; 105+ messages in thread
From: Avi Kivity @ 2011-12-12 15:05 UTC (permalink / raw)
To: Christoffer Dall
Cc: android-virt, kvm, Marc.Zyngier, catalin.marinas, tech,
peter.maydell
On 12/11/2011 12:25 PM, Christoffer Dall wrote:
> From: Christoffer Dall <cdall@cs.columbia.edu>
>
> Handles the guest faults in KVM by mapping in corresponding user pages
> in the 2nd stage page tables.
>
> Introduces new ARM-specific kernel memory types, PAGE_KVM_GUEST and
> pgprot_guest variables used to map 2nd stage memory for KVM guests.
>
>
> +static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
> + gfn_t gfn, struct kvm_memory_slot *memslot)
> +{
> + pfn_t pfn;
> + pgd_t *pgd;
> + pud_t *pud;
> + pmd_t *pmd;
> + pte_t *pte, new_pte;
> +
> + pfn = gfn_to_pfn(vcpu->kvm, gfn);
> +
> + if (is_error_pfn(pfn)) {
put_page()
> + kvm_err(-EFAULT, "Guest gfn %u (0x%08lx) does not have "
> + "corresponding host mapping",
> + gfn, gfn << PAGE_SHIFT);
> + return -EFAULT;
> + }
> +
> + /* Create 2nd stage page table mapping - Level 1 */
> + pgd = vcpu->kvm->arch.pgd + pgd_index(fault_ipa);
> + pud = pud_offset(pgd, fault_ipa);
> + if (pud_none(*pud)) {
> + pmd = pmd_alloc_one(NULL, fault_ipa);
> + if (!pmd) {
> + kvm_err(-ENOMEM, "Cannot allocate 2nd stage pmd");
put_page()
> + return -ENOMEM;
> + }
> + pud_populate(NULL, pud, pmd);
> + pmd += pmd_index(fault_ipa);
> + } else
> + pmd = pmd_offset(pud, fault_ipa);
> +
> + /* Create 2nd stage page table mapping - Level 2 */
> + if (pmd_none(*pmd)) {
> + pte = pte_alloc_one_kernel(NULL, fault_ipa);
> + if (!pte) {
> + kvm_err(-ENOMEM, "Cannot allocate 2nd stage pte");
> + return -ENOMEM;
> + }
> + pmd_populate_kernel(NULL, pmd, pte);
> + pte += pte_index(fault_ipa);
> + } else
> + pte = pte_offset_kernel(pmd, fault_ipa);
> +
> + /* Create 2nd stage page table mapping - Level 3 */
> + new_pte = pfn_pte(pfn, PAGE_KVM_GUEST);
> + set_pte_ext(pte, new_pte, 0);
With LPAE and 40-bit addresses, a guest can cause 2GBs worth of page
tables to be pinned in host memory; this can be used as a denial of
service attack. x86 handles this by having a shrinker that can
dynamically free page tables, see mmu_shrinker.
An alternative way may be to impose RLIMIT_AS on the sum of a guest's
memory slots; though I prefer having a shrinker.
A bigger problem is that you pin all memory; what are the plans wrt mmu
notifiers?
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [PATCH v5 08/13] ARM: KVM: Handle guest faults in KVM
2011-12-12 15:05 ` Avi Kivity
@ 2011-12-12 19:53 ` Christoffer Dall
2011-12-13 9:45 ` Avi Kivity
0 siblings, 1 reply; 105+ messages in thread
From: Christoffer Dall @ 2011-12-12 19:53 UTC (permalink / raw)
To: Avi Kivity
Cc: android-virt, kvm, Marc.Zyngier, catalin.marinas, tech,
peter.maydell
On Mon, Dec 12, 2011 at 10:05 AM, Avi Kivity <avi@redhat.com> wrote:
> On 12/11/2011 12:25 PM, Christoffer Dall wrote:
>> From: Christoffer Dall <cdall@cs.columbia.edu>
>>
>> Handles the guest faults in KVM by mapping in corresponding user pages
>> in the 2nd stage page tables.
>>
>> Introduces new ARM-specific kernel memory types, PAGE_KVM_GUEST and
>> pgprot_guest variables used to map 2nd stage memory for KVM guests.
>>
>>
>> +static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>> + gfn_t gfn, struct kvm_memory_slot *memslot)
>> +{
>> + pfn_t pfn;
>> + pgd_t *pgd;
>> + pud_t *pud;
>> + pmd_t *pmd;
>> + pte_t *pte, new_pte;
>> +
>> + pfn = gfn_to_pfn(vcpu->kvm, gfn);
>> +
>> + if (is_error_pfn(pfn)) {
>
> put_page()
>
ack
>> + kvm_err(-EFAULT, "Guest gfn %u (0x%08lx) does not have "
>> + "corresponding host mapping",
>> + gfn, gfn << PAGE_SHIFT);
>> + return -EFAULT;
>> + }
>> +
>> + /* Create 2nd stage page table mapping - Level 1 */
>> + pgd = vcpu->kvm->arch.pgd + pgd_index(fault_ipa);
>> + pud = pud_offset(pgd, fault_ipa);
>> + if (pud_none(*pud)) {
>> + pmd = pmd_alloc_one(NULL, fault_ipa);
>> + if (!pmd) {
>> + kvm_err(-ENOMEM, "Cannot allocate 2nd stage pmd");
>
> put_page()
>
ack
>> + return -ENOMEM;
>> + }
>> + pud_populate(NULL, pud, pmd);
>> + pmd += pmd_index(fault_ipa);
>> + } else
>> + pmd = pmd_offset(pud, fault_ipa);
>> +
>> + /* Create 2nd stage page table mapping - Level 2 */
>> + if (pmd_none(*pmd)) {
>> + pte = pte_alloc_one_kernel(NULL, fault_ipa);
>> + if (!pte) {
>> + kvm_err(-ENOMEM, "Cannot allocate 2nd stage pte");
>> + return -ENOMEM;
>> + }
>> + pmd_populate_kernel(NULL, pmd, pte);
>> + pte += pte_index(fault_ipa);
>> + } else
>> + pte = pte_offset_kernel(pmd, fault_ipa);
>> +
>> + /* Create 2nd stage page table mapping - Level 3 */
>> + new_pte = pfn_pte(pfn, PAGE_KVM_GUEST);
>> + set_pte_ext(pte, new_pte, 0);
>
>
> With LPAE and 40-bit addresses, a guest can cause 2GBs worth of page
> tables to be pinned in host memory; this can be used as a denial of
> service attack. x86 handles this by having a shrinker that can
> dynamically free page tables, see mmu_shrinker.
>
> An alternative way may be to impose RLIMIT_AS on the sum of a guest's
> memory slots; though I prefer having a shrinker.
>
> A bigger problem is that you pin all memory; what are the plans wrt mmu
> notifiers?
>
hmm, I have no plans (yet).
I haven't looked into neither MMU shrinker nor MMU notifier.
As I see it, the problems of consuming too much memory just for the
page tables should be solved by somehow reclaiming pages used for the
second stage mappings, the question is just which mappings are the
most efficient to reclaim.
The other problem, the actual guest memory consuming too much memory,
I assumed this limit would be set by the user when creating his/her
VM, or can we do something smarter? (again, forgive my ignorance).
What is the alternative to pinning actual guest pages - as far as I
know it's not common to have swap space on ARM architectures, but I
could be wrong.
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [PATCH v5 08/13] ARM: KVM: Handle guest faults in KVM
2011-12-12 19:53 ` Christoffer Dall
@ 2011-12-13 9:45 ` Avi Kivity
2011-12-13 13:10 ` [Android-virt] " Christoffer Dall
0 siblings, 1 reply; 105+ messages in thread
From: Avi Kivity @ 2011-12-13 9:45 UTC (permalink / raw)
To: Christoffer Dall
Cc: android-virt, kvm, Marc.Zyngier, catalin.marinas, tech,
peter.maydell
On 12/12/2011 09:53 PM, Christoffer Dall wrote:
> >
> > A bigger problem is that you pin all memory; what are the plans wrt mmu
> > notifiers?
> >
> hmm, I have no plans (yet).
>
> I haven't looked into neither MMU shrinker nor MMU notifier.
>
> As I see it, the problems of consuming too much memory just for the
> page tables should be solved by somehow reclaiming pages used for the
> second stage mappings,
That's what the shrinker does.
> the question is just which mappings are the
> most efficient to reclaim.
Do you have accessed bits in those PTEs?
It's not really critical to have efficient reclaim here, since it
happens so rarely. It just needs to do something.
> The other problem, the actual guest memory consuming too much memory,
> I assumed this limit would be set by the user when creating his/her
> VM, or can we do something smarter? (again, forgive my ignorance).
> What is the alternative to pinning actual guest pages
mmu notifiers - pages aren't pinned; instead, Linux calls back into kvm
when modifying a host pte, and kvm responds by dropping or modifying its
translation (second stage pte in your case).
> - as far as I
> know it's not common to have swap space on ARM architectures, but I
> could be wrong.
It will become common once you start doing servers.
mmu notifiers are also useful for other optimizations, like ksm,
ballooning, and transparent huge pages.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 08/13] ARM: KVM: Handle guest faults in KVM
2011-12-13 9:45 ` Avi Kivity
@ 2011-12-13 13:10 ` Christoffer Dall
2011-12-13 13:17 ` Marc Zyngier
2011-12-13 13:23 ` Avi Kivity
0 siblings, 2 replies; 105+ messages in thread
From: Christoffer Dall @ 2011-12-13 13:10 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm, Marc.Zyngier, android-virt, tech
On Tue, Dec 13, 2011 at 4:45 AM, Avi Kivity <avi@redhat.com> wrote:
> On 12/12/2011 09:53 PM, Christoffer Dall wrote:
>> >
>> > A bigger problem is that you pin all memory; what are the plans wrt mmu
>> > notifiers?
>> >
>> hmm, I have no plans (yet).
>>
>> I haven't looked into neither MMU shrinker nor MMU notifier.
>>
>> As I see it, the problems of consuming too much memory just for the
>> page tables should be solved by somehow reclaiming pages used for the
>> second stage mappings,
>
> That's what the shrinker does.
>
ok, that's what I thought.
>> the question is just which mappings are the
>> most efficient to reclaim.
>
> Do you have accessed bits in those PTEs?
>
nope. We can protect the underlying target pages though, but...
> It's not really critical to have efficient reclaim here, since it
> happens so rarely. It just needs to do something.
>
when would you trigger it - when it reaches a certain limit, or? And
then what, free the lot and re-allocate what's needed?
>> The other problem, the actual guest memory consuming too much memory,
>> I assumed this limit would be set by the user when creating his/her
>> VM, or can we do something smarter? (again, forgive my ignorance).
>> What is the alternative to pinning actual guest pages
>
> mmu notifiers - pages aren't pinned; instead, Linux calls back into kvm
> when modifying a host pte, and kvm responds by dropping or modifying its
> translation (second stage pte in your case).
>
ah ok, so this works across VM boundary. Based on hyper-calls I presume?
>> - as far as I
>> know it's not common to have swap space on ARM architectures, but I
>> could be wrong.
>
> It will become common once you start doing servers.
>
I think so too, but I am not sure if it's completely supported for
ARM. Is it all arch-independent or do we miss arm-specific pieces?
Marc?
> mmu notifiers are also useful for other optimizations, like ksm,
> ballooning, and transparent huge pages.
>
I know those features have to be supported eventually. The question is
if all this must be in place before a merge upstream?
Thanks,
Christoffer
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 08/13] ARM: KVM: Handle guest faults in KVM
2011-12-13 13:10 ` [Android-virt] " Christoffer Dall
@ 2011-12-13 13:17 ` Marc Zyngier
2011-12-13 13:23 ` Avi Kivity
1 sibling, 0 replies; 105+ messages in thread
From: Marc Zyngier @ 2011-12-13 13:17 UTC (permalink / raw)
To: Christoffer Dall
Cc: Avi Kivity, kvm@vger.kernel.org,
android-virt@lists.cs.columbia.edu, tech@virtualopensystems.com
On 13/12/11 13:10, Christoffer Dall wrote:
> On Tue, Dec 13, 2011 at 4:45 AM, Avi Kivity <avi@redhat.com> wrote:
>> On 12/12/2011 09:53 PM, Christoffer Dall wrote:
>>> - as far as I
>>> know it's not common to have swap space on ARM architectures, but I
>>> could be wrong.
>>
>> It will become common once you start doing servers.
>>
>
> I think so too, but I am not sure if it's completely supported for
> ARM. Is it all arch-independent or do we miss arm-specific pieces?
> Marc?
Swapping definitely works as expected on ARM (and if it doesn't, it's a
major bug and should be tackled immediately).
M.
--
Jazz is not dead. It just smells funny...
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 08/13] ARM: KVM: Handle guest faults in KVM
2011-12-13 13:10 ` [Android-virt] " Christoffer Dall
2011-12-13 13:17 ` Marc Zyngier
@ 2011-12-13 13:23 ` Avi Kivity
2011-12-13 13:44 ` Christoffer Dall
1 sibling, 1 reply; 105+ messages in thread
From: Avi Kivity @ 2011-12-13 13:23 UTC (permalink / raw)
To: Christoffer Dall; +Cc: kvm, Marc.Zyngier, android-virt, tech
On 12/13/2011 03:10 PM, Christoffer Dall wrote:
> >> the question is just which mappings are the
> >> most efficient to reclaim.
> >
> > Do you have accessed bits in those PTEs?
> >
>
> nope. We can protect the underlying target pages though, but...
Yeah, we have the same issue with one of the vendors. Fortunately only
90% of the market is affected.
> > It's not really critical to have efficient reclaim here, since it
> > happens so rarely. It just needs to do something.
> >
>
> when would you trigger it - when it reaches a certain limit, or? And
> then what, free the lot and re-allocate what's needed?
The kernel triggers it based on internal pressure. It tells you how
much pressure to apply, so you just translate it to a number of pages to
free.
> >> The other problem, the actual guest memory consuming too much memory,
> >> I assumed this limit would be set by the user when creating his/her
> >> VM, or can we do something smarter? (again, forgive my ignorance).
> >> What is the alternative to pinning actual guest pages
> >
> > mmu notifiers - pages aren't pinned; instead, Linux calls back into kvm
> > when modifying a host pte, and kvm responds by dropping or modifying its
> > translation (second stage pte in your case).
> >
>
> ah ok, so this works across VM boundary. Based on hyper-calls I presume?
No, it's completely internal to the host.
See for example kvm_mmu_notifier_invalidate_page() (in common code).
It's called when Linux-as-host wants to change a pte (say to swap a
page). kvm responds by translating the host virtual address into a
guest physical address (via the memory slots table), then zapping the
relevant pte and flushing and TLBs which may have cached the pte.
> > mmu notifiers are also useful for other optimizations, like ksm,
> > ballooning, and transparent huge pages.
> >
>
> I know those features have to be supported eventually. The question is
> if all this must be in place before a merge upstream?
It doesn't have to be there for the merge but I recommend giving it high
priority. At least read and understand the code so the addition will
follow naturally.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 08/13] ARM: KVM: Handle guest faults in KVM
2011-12-13 13:23 ` Avi Kivity
@ 2011-12-13 13:44 ` Christoffer Dall
2011-12-13 14:27 ` Avi Kivity
0 siblings, 1 reply; 105+ messages in thread
From: Christoffer Dall @ 2011-12-13 13:44 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm, Marc.Zyngier, android-virt, tech
On Tue, Dec 13, 2011 at 8:23 AM, Avi Kivity <avi@redhat.com> wrote:
> On 12/13/2011 03:10 PM, Christoffer Dall wrote:
>> >> the question is just which mappings are the
>> >> most efficient to reclaim.
>> >
>> > Do you have accessed bits in those PTEs?
>> >
>>
>> nope. We can protect the underlying target pages though, but...
>
> Yeah, we have the same issue with one of the vendors. Fortunately only
> 90% of the market is affected.
>
:)
>> > It's not really critical to have efficient reclaim here, since it
>> > happens so rarely. It just needs to do something.
>> >
>>
>> when would you trigger it - when it reaches a certain limit, or? And
>> then what, free the lot and re-allocate what's needed?
>
> The kernel triggers it based on internal pressure. It tells you how
> much pressure to apply, so you just translate it to a number of pages to
> free.
>
>
ok, so we pick those pages at random? (perhaps trying to avoid hitting
the guest kernel at least for Linux, or...?)
>> >> The other problem, the actual guest memory consuming too much memory,
>> >> I assumed this limit would be set by the user when creating his/her
>> >> VM, or can we do something smarter? (again, forgive my ignorance).
>> >> What is the alternative to pinning actual guest pages
>> >
>> > mmu notifiers - pages aren't pinned; instead, Linux calls back into kvm
>> > when modifying a host pte, and kvm responds by dropping or modifying its
>> > translation (second stage pte in your case).
>> >
>>
>> ah ok, so this works across VM boundary. Based on hyper-calls I presume?
>
> No, it's completely internal to the host.
>
ok, got you. I got thrown off by the "Linux calls back into kvm" statement.
> See for example kvm_mmu_notifier_invalidate_page() (in common code).
> It's called when Linux-as-host wants to change a pte (say to swap a
> page). kvm responds by translating the host virtual address into a
> guest physical address (via the memory slots table), then zapping the
> relevant pte and flushing and TLBs which may have cached the pte.
>
>> > mmu notifiers are also useful for other optimizations, like ksm,
>> > ballooning, and transparent huge pages.
>> >
>>
>> I know those features have to be supported eventually. The question is
>> if all this must be in place before a merge upstream?
>
> It doesn't have to be there for the merge but I recommend giving it high
> priority. At least read and understand the code so the addition will
> follow naturally.
>
will do - I will make it a Christmas activity.
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 08/13] ARM: KVM: Handle guest faults in KVM
2011-12-13 13:44 ` Christoffer Dall
@ 2011-12-13 14:27 ` Avi Kivity
0 siblings, 0 replies; 105+ messages in thread
From: Avi Kivity @ 2011-12-13 14:27 UTC (permalink / raw)
To: Christoffer Dall; +Cc: kvm, Marc.Zyngier, android-virt, tech
On 12/13/2011 03:44 PM, Christoffer Dall wrote:
> >> > It's not really critical to have efficient reclaim here, since it
> >> > happens so rarely. It just needs to do something.
> >> >
> >>
> >> when would you trigger it - when it reaches a certain limit, or? And
> >> then what, free the lot and re-allocate what's needed?
> >
> > The kernel triggers it based on internal pressure. It tells you how
> > much pressure to apply, so you just translate it to a number of pages to
> > free.
> >
> >
>
> ok, so we pick those pages at random? (perhaps trying to avoid hitting
> the guest kernel at least for Linux, or...?)
x86 has a sort of poorly managed LRU; it's wildly inaccurate but doesn't
hurt in practice since it only triggers under severe memory pressure anyway.
> > It doesn't have to be there for the merge but I recommend giving it high
> > priority. At least read and understand the code so the addition will
> > follow naturally.
> >
> will do - I will make it a Christmas activity.
I was hoping to to get the ARM port as a present...
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 105+ messages in thread
* [PATCH v5 09/13] ARM: KVM: Handle I/O aborts
2011-12-11 10:24 [PATCH v5 00/13] KVM/ARM Implementation Christoffer Dall
` (7 preceding siblings ...)
2011-12-11 10:25 ` [PATCH v5 08/13] ARM: KVM: Handle guest faults in KVM Christoffer Dall
@ 2011-12-11 10:25 ` Christoffer Dall
2011-12-12 13:54 ` Avi Kivity
2011-12-11 10:25 ` [PATCH v5 10/13] ARM: KVM: Guest wait-for-interrupts (WFI) support Christoffer Dall
` (4 subsequent siblings)
13 siblings, 1 reply; 105+ messages in thread
From: Christoffer Dall @ 2011-12-11 10:25 UTC (permalink / raw)
To: android-virt, kvm; +Cc: Marc.Zyngier, catalin.marinas, tech, avi, peter.maydell
From: Christoffer Dall <cdall@cs.columbia.edu>
When the guest accesses I/O memory this will create data abort
exceptions and they are handled by decoding the HSR information
(physical address, read/write, length, register) and forwarding reads
and writes to QEMU which performs the device emulation.
Certain classes of load/store operations do not support the syndrome
information provided in the HSR and we therefore must be able to fetch
the offending instruction from guest memory and decode it manually.
This requires changing the general flow somewhat since new calls to run
the VCPU must check if there's a pending MMIO load and perform the write
after userspace has made the data available.
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
---
arch/arm/include/asm/kvm_emulate.h | 2
arch/arm/include/asm/kvm_host.h | 1
arch/arm/include/asm/kvm_mmu.h | 1
arch/arm/kvm/arm.c | 8 +
arch/arm/kvm/emulate.c | 288 ++++++++++++++++++++++++++++++++++++
arch/arm/kvm/mmu.c | 155 +++++++++++++++++++
arch/arm/kvm/trace.h | 22 +++
7 files changed, 470 insertions(+), 7 deletions(-)
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index af21fd5..9899474 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -46,6 +46,8 @@ int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_handle_cp15_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_emulate_mmio_ls(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+ unsigned long instr);
/*
* Return the SPSR for the specified mode of the virtual CPU.
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 59fcd15..86f6cf1 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -88,6 +88,7 @@ struct kvm_vcpu_arch {
u64 pc_ipa; /* IPA for the current PC (VA to PA result) */
/* IO related fields */
+ bool mmio_sign_extend; /* for byte/halfword loads */
u32 mmio_rd;
/* Misc. fields */
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 9d7440c..e82eae9 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -35,6 +35,7 @@ void free_hyp_pmds(pgd_t *hyp_pgd);
int kvm_alloc_stage2_pgd(struct kvm *kvm);
void kvm_free_stage2_pgd(struct kvm *kvm);
+int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);
#endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index a6e1763..e5348a7 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -379,6 +379,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
int ret;
for (;;) {
+ if (run->exit_reason == KVM_EXIT_MMIO) {
+ ret = kvm_handle_mmio_return(vcpu, vcpu->run);
+ if (ret)
+ break;
+ }
+
+ run->exit_reason = KVM_EXIT_UNKNOWN;
+
trace_kvm_entry(vcpu->arch.regs.pc);
local_irq_disable();
diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
index fded8c7..4fb5a7d 100644
--- a/arch/arm/kvm/emulate.c
+++ b/arch/arm/kvm/emulate.c
@@ -20,6 +20,7 @@
#include <asm/kvm_emulate.h>
#include <trace/events/kvm.h>
+#include "trace.h"
#include "debug.h"
#include "trace.h"
@@ -128,8 +129,30 @@ u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode)
}
/******************************************************************************
- * Co-processor emulation
+ * Utility functions common for all emulation code
+ *****************************************************************************/
+
+/*
+ * This one accepts a matrix where the first element is the
+ * bits as they must be, and the second element is the bitmask.
*/
+#define INSTR_NONE -1
+static int kvm_instr_index(u32 instr, u32 table[][2], int table_entries)
+{
+ int i;
+ u32 mask;
+
+ for (i = 0; i < table_entries; i++) {
+ mask = table[i][1];
+ if ((table[i][0] & mask) == (instr & mask))
+ return i;
+ }
+ return INSTR_NONE;
+}
+
+/******************************************************************************
+ * Co-processor emulation
+ *****************************************************************************/
struct coproc_params {
unsigned long CRm;
@@ -228,9 +251,11 @@ static int emulate_cp15_c10_access(struct kvm_vcpu *vcpu,
* @vcpu: The VCPU pointer
* @p: The coprocessor parameters struct pointer holding trap inst. details
*
- * The CP15 c15 register is implementation defined, but some guest kernels
- * attempt to read/write a diagnostics register here. We always return 0 and
- * ignore writes and hope for the best. This may need to be refined.
+ * The CP15 c15 register is architecturally implementation defined, but some
+ * guest kernels attempt to read/write a diagnostics register here. We always
+ * return 0 and ignore writes and hope for the best.
+ *
+ * This may need to be refined.
*/
static int emulate_cp15_c15_access(struct kvm_vcpu *vcpu,
struct coproc_params *p)
@@ -314,3 +339,258 @@ int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
return 0;
}
+
+
+/******************************************************************************
+ * Load-Store instruction emulation
+ *****************************************************************************/
+
+/*
+ * Must be ordered with LOADS first and WRITES afterwards
+ * for easy distinction when doing MMIO.
+ */
+#define NUM_LD_INSTR 9
+enum INSTR_LS_INDEXES {
+ INSTR_LS_LDRBT, INSTR_LS_LDRT, INSTR_LS_LDR, INSTR_LS_LDRB,
+ INSTR_LS_LDRD, INSTR_LS_LDREX, INSTR_LS_LDRH, INSTR_LS_LDRSB,
+ INSTR_LS_LDRSH,
+ INSTR_LS_STRBT, INSTR_LS_STRT, INSTR_LS_STR, INSTR_LS_STRB,
+ INSTR_LS_STRD, INSTR_LS_STREX, INSTR_LS_STRH,
+ NUM_LS_INSTR
+};
+
+static u32 ls_instr[NUM_LS_INSTR][2] = {
+ {0x04700000, 0x0d700000}, /* LDRBT */
+ {0x04300000, 0x0d700000}, /* LDRT */
+ {0x04100000, 0x0c500000}, /* LDR */
+ {0x04500000, 0x0c500000}, /* LDRB */
+ {0x000000d0, 0x0e1000f0}, /* LDRD */
+ {0x01900090, 0x0ff000f0}, /* LDREX */
+ {0x001000b0, 0x0e1000f0}, /* LDRH */
+ {0x001000d0, 0x0e1000f0}, /* LDRSB */
+ {0x001000f0, 0x0e1000f0}, /* LDRSH */
+ {0x04600000, 0x0d700000}, /* STRBT */
+ {0x04200000, 0x0d700000}, /* STRT */
+ {0x04000000, 0x0c500000}, /* STR */
+ {0x04400000, 0x0c500000}, /* STRB */
+ {0x000000f0, 0x0e1000f0}, /* STRD */
+ {0x01800090, 0x0ff000f0}, /* STREX */
+ {0x000000b0, 0x0e1000f0} /* STRH */
+};
+
+static inline int get_arm_ls_instr_index(u32 instr)
+{
+ return kvm_instr_index(instr, ls_instr, NUM_LS_INSTR);
+}
+
+/*
+ * Load-Store instruction decoding
+ */
+#define INSTR_LS_TYPE_BIT 26
+#define INSTR_LS_RD_MASK 0x0000f000
+#define INSTR_LS_RD_SHIFT 12
+#define INSTR_LS_RN_MASK 0x000f0000
+#define INSTR_LS_RN_SHIFT 16
+#define INSTR_LS_RM_MASK 0x0000000f
+#define INSTR_LS_OFFSET12_MASK 0x00000fff
+
+#define INSTR_LS_BIT_P 24
+#define INSTR_LS_BIT_U 23
+#define INSTR_LS_BIT_B 22
+#define INSTR_LS_BIT_W 21
+#define INSTR_LS_BIT_L 20
+#define INSTR_LS_BIT_S 6
+#define INSTR_LS_BIT_H 5
+
+/*
+ * ARM addressing mode defines
+ */
+#define OFFSET_IMM_MASK 0x0e000000
+#define OFFSET_IMM_VALUE 0x04000000
+#define OFFSET_REG_MASK 0x0e000ff0
+#define OFFSET_REG_VALUE 0x06000000
+#define OFFSET_SCALE_MASK 0x0e000010
+#define OFFSET_SCALE_VALUE 0x06000000
+
+#define SCALE_SHIFT_MASK 0x000000a0
+#define SCALE_SHIFT_SHIFT 5
+#define SCALE_SHIFT_LSL 0x0
+#define SCALE_SHIFT_LSR 0x1
+#define SCALE_SHIFT_ASR 0x2
+#define SCALE_SHIFT_ROR_RRX 0x3
+#define SCALE_SHIFT_IMM_MASK 0x00000f80
+#define SCALE_SHIFT_IMM_SHIFT 6
+
+#define PSR_BIT_C 29
+
+static unsigned long ls_word_calc_offset(struct kvm_vcpu *vcpu,
+ unsigned long instr)
+{
+ int offset = 0;
+
+ if ((instr & OFFSET_IMM_MASK) == OFFSET_IMM_VALUE) {
+ /* Immediate offset/index */
+ offset = instr & INSTR_LS_OFFSET12_MASK;
+
+ if (!(instr & (1U << INSTR_LS_BIT_U)))
+ offset = -offset;
+ }
+
+ if ((instr & OFFSET_REG_MASK) == OFFSET_REG_VALUE) {
+ /* Register offset/index */
+ u8 rm = instr & INSTR_LS_RM_MASK;
+ offset = *vcpu_reg(vcpu, rm);
+
+ if (!(instr & (1U << INSTR_LS_BIT_P)))
+ offset = 0;
+ }
+
+ if ((instr & OFFSET_SCALE_MASK) == OFFSET_SCALE_VALUE) {
+ /* Scaled register offset */
+ int asr_test;
+ u8 rm = instr & INSTR_LS_RM_MASK;
+ u8 shift = (instr & SCALE_SHIFT_MASK) >> SCALE_SHIFT_SHIFT;
+ u32 shift_imm = (instr & SCALE_SHIFT_IMM_MASK)
+ >> SCALE_SHIFT_IMM_SHIFT;
+ offset = *vcpu_reg(vcpu, rm);
+
+ switch (shift) {
+ case SCALE_SHIFT_LSL:
+ offset = offset << shift_imm;
+ break;
+ case SCALE_SHIFT_LSR:
+ if (shift_imm == 0)
+ offset = 0;
+ else
+ offset = ((u32)offset) >> shift_imm;
+ break;
+ case SCALE_SHIFT_ASR:
+ /* Test that the compiler used arithmetic right shift
+ * for signed values. */
+ asr_test = 0xffffffff;
+ BUG_ON((asr_test >> 2) >= 0);
+ if (shift_imm == 0) {
+ if (offset & (1U << 31))
+ offset = 0xffffffff;
+ else
+ offset = 0;
+ } else {
+ offset = offset >> shift_imm;
+ }
+ break;
+ case SCALE_SHIFT_ROR_RRX:
+ /* Test that the compiler used arithmetic right shift
+ * for signed values. */
+ asr_test = 0xffffffff;
+ BUG_ON((asr_test >> 2) >= 0);
+ if (shift_imm == 0) {
+ u32 C = (vcpu->arch.regs.cpsr &
+ (1U << PSR_BIT_C));
+ offset = (C << 31) | offset >> 1;
+ } else {
+ offset = ror32(offset, shift_imm);
+ }
+ break;
+ }
+
+ if (instr & (1U << INSTR_LS_BIT_U))
+ return offset;
+ else
+ return -offset;
+ }
+
+ if (instr & (1U << INSTR_LS_BIT_U))
+ return offset;
+ else
+ return -offset;
+
+ BUG();
+}
+
+static int kvm_ls_length(struct kvm_vcpu *vcpu, u32 instr)
+{
+ int index;
+
+ index = get_arm_ls_instr_index(instr);
+ BUG_ON(index == INSTR_NONE);
+
+ if (instr & (1U << INSTR_LS_TYPE_BIT)) {
+ /* LS word or unsigned byte */
+ if (instr & (1U << INSTR_LS_BIT_B))
+ return sizeof(unsigned char);
+ else
+ return sizeof(u32);
+ } else {
+ /* LS halfword, doubleword or signed byte */
+ u32 H = (instr & (1U << INSTR_LS_BIT_H));
+ u32 S = (instr & (1U << INSTR_LS_BIT_S));
+ u32 L = (instr & (1U << INSTR_LS_BIT_L));
+
+ if (!L && S) {
+ kvm_msg("WARNING: d-word for MMIO");
+ return 2 * sizeof(u32);
+ } else if (L && S && !H)
+ return sizeof(char);
+ else
+ return sizeof(u16);
+ }
+
+ BUG();
+}
+
+/**
+ * kvm_emulate_mmio_ls - emulates load/store instructions made to I/O memory
+ * @vcpu: The vcpu pointer
+ * @fault_ipa: The IPA that caused the 2nd stage fault
+ * @instr: The instruction that caused the fault
+ *
+ * Handles emulation of load/store instructions which cannot be emulated through
+ * information found in the HSR on faults. It is necessary in this case to
+ * simply decode the offending instruction in software and determine the
+ * required operands.
+ */
+int kvm_emulate_mmio_ls(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+ unsigned long instr)
+{
+ unsigned long rd, rn, offset, len;
+ int index;
+ bool is_write;
+
+ trace_kvm_mmio_emulate(vcpu->arch.regs.pc, instr, vcpu->arch.regs.cpsr);
+
+ index = get_arm_ls_instr_index(instr);
+ if (index == INSTR_NONE) {
+ kvm_err(-EINVAL, "Unknown load/store instruction");
+ return -EINVAL;
+ }
+
+ is_write = (index < NUM_LD_INSTR) ? false : true;
+ rd = (instr & INSTR_LS_RD_MASK) >> INSTR_LS_RD_SHIFT;
+ len = kvm_ls_length(vcpu, instr);
+
+ vcpu->run->exit_reason = KVM_EXIT_MMIO;
+ vcpu->run->mmio.is_write = is_write;
+ vcpu->run->mmio.phys_addr = fault_ipa;
+ vcpu->run->mmio.len = len;
+ vcpu->arch.mmio_sign_extend = false;
+ vcpu->arch.mmio_rd = rd;
+
+ trace_kvm_mmio((is_write) ? KVM_TRACE_MMIO_WRITE :
+ KVM_TRACE_MMIO_READ_UNSATISFIED,
+ len, fault_ipa, (is_write) ? *vcpu_reg(vcpu, rd) : 0);
+
+ /* Handle base register writeback */
+ if (!(instr & (1U << INSTR_LS_BIT_P)) ||
+ (instr & (1U << INSTR_LS_BIT_W))) {
+ rn = (instr & INSTR_LS_RN_MASK) >> INSTR_LS_RN_SHIFT;
+ offset = ls_word_calc_offset(vcpu, instr);
+ *vcpu_reg(vcpu, rn) += offset;
+ }
+
+ /*
+ * The MMIO instruction is emulated and should not be re-executed
+ * in the guest. (XXX We don't support Thumb instructions yet).
+ */
+ *vcpu_reg(vcpu, 15) += 4;
+ return 0;
+}
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index d468238..7732c48 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -16,10 +16,13 @@
#include <linux/mman.h>
#include <linux/kvm_host.h>
+#include <trace/events/kvm.h>
#include <asm/pgalloc.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_mmu.h>
+#include <asm/kvm_emulate.h>
+#include "trace.h"
#include "debug.h"
pgd_t *kvm_hyp_pgd;
@@ -280,6 +283,152 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return 0;
}
+/**
+ * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation
+ * @vcpu: The VCPU pointer
+ * @run: The VCPU run struct containing the mmio data
+ *
+ * This should only be called after returning to QEMU for MMIO load emulation.
+ */
+int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ int *dest;
+ unsigned int len;
+ int mask;
+
+ if (!run->mmio.is_write) {
+ dest = vcpu_reg(vcpu, vcpu->arch.mmio_rd);
+ memset(dest, 0, sizeof(int));
+
+ if (run->mmio.len > 4) {
+ kvm_err(-EINVAL, "Incorrect mmio length");
+ return -EINVAL;
+ }
+
+ len = run->mmio.len;
+ memcpy(dest, run->mmio.data, len);
+
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
+ *((u64 *)run->mmio.data));
+
+ if (vcpu->arch.mmio_sign_extend && len < 4) {
+ mask = 1U << ((len * 8) - 1);
+ *dest = (*dest ^ mask) - mask;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * invalid_io_mem_abort -- Handle I/O aborts ISV bit is clear
+ *
+ * @vcpu: The vcpu pointer
+ * @fault_ipa: The IPA that caused the 2nd stage fault
+ *
+ * Some load/store instructions cannot be emulated using the information
+ * presented in the HSR, for instance, register write-back instructions are not
+ * supported. We therefore need to fetch the instruction, decode it, and then
+ * emulate its behavior.
+ */
+static int invalid_io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
+{
+ unsigned long instr;
+ phys_addr_t pc_ipa;
+
+ if (vcpu->arch.pc_ipa & (1U << 11)) {
+ /* LPAE PAR format */
+ pc_ipa = vcpu->arch.pc_ipa & PAGE_MASK & ((1ULL << 32) - 1);
+ } else {
+ /* VMSAv7 PAR format */
+ pc_ipa = vcpu->arch.pc_ipa & PAGE_MASK & ((1ULL << 40) - 1);
+ }
+ pc_ipa += vcpu->arch.regs.pc & ~PAGE_MASK;
+
+ if (kvm_read_guest(vcpu->kvm, pc_ipa, &instr, sizeof(instr))) {
+ kvm_err(-EFAULT, "Could not copy guest instruction");
+ return -EFAULT;
+ }
+
+ if (vcpu->arch.regs.cpsr & PSR_T_BIT) {
+ /* Need to decode thumb instructions as well */
+ KVMARM_NOT_IMPLEMENTED();
+ return -EINVAL;
+ }
+
+ return kvm_emulate_mmio_ls(vcpu, fault_ipa, instr);
+}
+
+static int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ phys_addr_t fault_ipa, struct kvm_memory_slot *memslot)
+{
+ unsigned long rd, len, instr_len;
+ bool is_write, sign_extend;
+
+ if (!(vcpu->arch.hsr & HSR_ISV))
+ return invalid_io_mem_abort(vcpu, fault_ipa);
+
+ if (((vcpu->arch.hsr >> 8) & 1)) {
+ kvm_err(-EFAULT, "Not supported, Cache operation on I/O addr.");
+ return -EFAULT;
+ }
+
+ if ((vcpu->arch.hsr >> 7) & 1) {
+ kvm_err(-EFAULT, "Translation table accesses I/O memory");
+ return -EFAULT;
+ }
+
+ switch ((vcpu->arch.hsr >> 22) & 0x3) {
+ case 0:
+ len = 1;
+ break;
+ case 1:
+ len = 2;
+ break;
+ case 2:
+ len = 4;
+ break;
+ default:
+ kvm_err(-EFAULT, "Invalid I/O abort");
+ return -EFAULT;
+ }
+
+ is_write = ((vcpu->arch.hsr >> 6) & 1);
+ sign_extend = ((vcpu->arch.hsr >> 21) & 1);
+ rd = (vcpu->arch.hsr >> 16) & 0xf;
+ BUG_ON(rd > 15);
+
+ if (rd == 15) {
+ kvm_err(-EFAULT, "I/O memory trying to read/write pc");
+ return -EFAULT;
+ }
+
+ /* Get instruction length in bytes */
+ instr_len = ((vcpu->arch.hsr >> 25) & 1) ? 4 : 2;
+
+ /* Export MMIO operations to user space */
+ run->exit_reason = KVM_EXIT_MMIO;
+ run->mmio.is_write = is_write;
+ run->mmio.phys_addr = fault_ipa;
+ run->mmio.len = len;
+ vcpu->arch.mmio_sign_extend = sign_extend;
+ vcpu->arch.mmio_rd = rd;
+
+ trace_kvm_mmio((is_write) ? KVM_TRACE_MMIO_WRITE :
+ KVM_TRACE_MMIO_READ_UNSATISFIED,
+ len, fault_ipa, (is_write) ? *vcpu_reg(vcpu, rd) : 0);
+
+ if (is_write)
+ memcpy(run->mmio.data, vcpu_reg(vcpu, rd), len);
+
+ /*
+ * The MMIO instruction is emulated and should not be re-executed
+ * in the guest.
+ */
+ *vcpu_reg(vcpu, 15) += instr_len;
+ return 0;
+}
+
#define HSR_ABT_FS (0x3f)
#define HPFAR_MASK (~0xf)
@@ -324,9 +473,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
return -EFAULT;
}
- kvm_msg("I/O address abort...");
- KVMARM_NOT_IMPLEMENTED();
- return -EINVAL;
+ /* Adjust page offset */
+ fault_ipa += vcpu->arch.hdfar % PAGE_SIZE;
+ return io_mem_abort(vcpu, run, fault_ipa, memslot);
}
memslot = gfn_to_memslot(vcpu->kvm, gfn);
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index 381ea4a..8ba3db9 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -39,6 +39,28 @@ TRACE_EVENT(kvm_exit,
TP_printk("PC: 0x%08lx", __entry->vcpu_pc)
);
+TRACE_EVENT(kvm_mmio_emulate,
+ TP_PROTO(unsigned long vcpu_pc, unsigned long instr,
+ unsigned long cpsr),
+ TP_ARGS(vcpu_pc, instr, cpsr),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, vcpu_pc )
+ __field( unsigned long, instr )
+ __field( unsigned long, cpsr )
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_pc = vcpu_pc;
+ __entry->vcpu_pc = instr;
+ __entry->vcpu_pc = cpsr;
+ ),
+
+ TP_printk("Emulate MMIO at: 0x%08lx (instr: %08lx, cpsr: %08lx)",
+ __entry->vcpu_pc, __entry->instr, __entry->cpsr)
+);
+
+/* Architecturally implementation defined CP15 register access */
TRACE_EVENT(kvm_emulate_cp15_imp,
TP_PROTO(unsigned long Op1, unsigned long Rt1, unsigned long CRn,
unsigned long CRm, unsigned long Op2, bool is_write),
^ permalink raw reply related [flat|nested] 105+ messages in thread
* Re: [PATCH v5 09/13] ARM: KVM: Handle I/O aborts
2011-12-11 10:25 ` [PATCH v5 09/13] ARM: KVM: Handle I/O aborts Christoffer Dall
@ 2011-12-12 13:54 ` Avi Kivity
2011-12-12 14:56 ` [Android-virt] " Christoffer Dall
0 siblings, 1 reply; 105+ messages in thread
From: Avi Kivity @ 2011-12-12 13:54 UTC (permalink / raw)
To: Christoffer Dall
Cc: android-virt, kvm, Marc.Zyngier, catalin.marinas, tech,
peter.maydell
On 12/11/2011 12:25 PM, Christoffer Dall wrote:
> From: Christoffer Dall <cdall@cs.columbia.edu>
>
> When the guest accesses I/O memory this will create data abort
> exceptions and they are handled by decoding the HSR information
> (physical address, read/write, length, register) and forwarding reads
> and writes to QEMU which performs the device emulation.
>
> Certain classes of load/store operations do not support the syndrome
> information provided in the HSR and we therefore must be able to fetch
> the offending instruction from guest memory and decode it manually.
>
> This requires changing the general flow somewhat since new calls to run
> the VCPU must check if there's a pending MMIO load and perform the write
> after userspace has made the data available.
>
> }
>
> +/**
> + * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation
> + * @vcpu: The VCPU pointer
> + * @run: The VCPU run struct containing the mmio data
> + *
> + * This should only be called after returning to QEMU for MMIO load emulation.
s/to QEMU/from userspace/
> + */
> +int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
> +{
> + int *dest;
> + unsigned int len;
> + int mask;
> +
> + if (!run->mmio.is_write) {
> + dest = vcpu_reg(vcpu, vcpu->arch.mmio_rd);
> + memset(dest, 0, sizeof(int));
> +
> + if (run->mmio.len > 4) {
> + kvm_err(-EINVAL, "Incorrect mmio length");
> + return -EINVAL;
> + }
Time of check...
> +
> + len = run->mmio.len;
> + memcpy(dest, run->mmio.data, len);
... time of use. Anything in run-> is untrusted. Best to use the
kernel's copy of len.
> +
> + trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
> + *((u64 *)run->mmio.data));
> +
> + if (vcpu->arch.mmio_sign_extend && len < 4) {
> + mask = 1U << ((len * 8) - 1);
> + *dest = (*dest ^ mask) - mask;
> + }
> + }
> +
> + return 0;
> +}
> +
>
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 09/13] ARM: KVM: Handle I/O aborts
2011-12-12 13:54 ` Avi Kivity
@ 2011-12-12 14:56 ` Christoffer Dall
0 siblings, 0 replies; 105+ messages in thread
From: Christoffer Dall @ 2011-12-12 14:56 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm, Marc.Zyngier, android-virt, tech
On Mon, Dec 12, 2011 at 8:54 AM, Avi Kivity <avi@redhat.com> wrote:
> On 12/11/2011 12:25 PM, Christoffer Dall wrote:
>> From: Christoffer Dall <cdall@cs.columbia.edu>
>>
>> When the guest accesses I/O memory this will create data abort
>> exceptions and they are handled by decoding the HSR information
>> (physical address, read/write, length, register) and forwarding reads
>> and writes to QEMU which performs the device emulation.
>>
>> Certain classes of load/store operations do not support the syndrome
>> information provided in the HSR and we therefore must be able to fetch
>> the offending instruction from guest memory and decode it manually.
>>
>> This requires changing the general flow somewhat since new calls to run
>> the VCPU must check if there's a pending MMIO load and perform the write
>> after userspace has made the data available.
>>
>> }
>>
>> +/**
>> + * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation
>> + * @vcpu: The VCPU pointer
>> + * @run: The VCPU run struct containing the mmio data
>> + *
>> + * This should only be called after returning to QEMU for MMIO load emulation.
>
> s/to QEMU/from userspace/
>
>> + */
>> +int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
>> +{
>> + int *dest;
>> + unsigned int len;
>> + int mask;
>> +
>> + if (!run->mmio.is_write) {
>> + dest = vcpu_reg(vcpu, vcpu->arch.mmio_rd);
>> + memset(dest, 0, sizeof(int));
>> +
>> + if (run->mmio.len > 4) {
>> + kvm_err(-EINVAL, "Incorrect mmio length");
>> + return -EINVAL;
>> + }
>
> Time of check...
>
>> +
>> + len = run->mmio.len;
>> + memcpy(dest, run->mmio.data, len);
>
> ... time of use. Anything in run-> is untrusted. Best to use the
> kernel's copy of len.
nice. thanks.
^ permalink raw reply [flat|nested] 105+ messages in thread
* [PATCH v5 10/13] ARM: KVM: Guest wait-for-interrupts (WFI) support
2011-12-11 10:24 [PATCH v5 00/13] KVM/ARM Implementation Christoffer Dall
` (8 preceding siblings ...)
2011-12-11 10:25 ` [PATCH v5 09/13] ARM: KVM: Handle I/O aborts Christoffer Dall
@ 2011-12-11 10:25 ` Christoffer Dall
2011-12-12 14:12 ` Avi Kivity
2011-12-11 10:25 ` [PATCH v5 11/13] ARM: KVM: Support SMP hosts Christoffer Dall
` (3 subsequent siblings)
13 siblings, 1 reply; 105+ messages in thread
From: Christoffer Dall @ 2011-12-11 10:25 UTC (permalink / raw)
To: android-virt, kvm; +Cc: Marc.Zyngier, catalin.marinas, tech, avi, peter.maydell
From: Christoffer Dall <cdall@cs.columbia.edu>
When the guest executes a WFI instruction the operation is trapped to
KVM, which emulates the instruction in software. There is no correlation
between a guest executing a WFI instruction and actually puttin the
hardware into a low-power mode, since a KVM guest is essentially a
process and the WFI instruction can be seen as 'sleep' call from this
process. Therefore, we flag the VCPU to be in wait_for_interrupts mode
and call the main KVM function kvm_vcpu_block() function. This function
will put the thread on a wait-queue and call schedule.
When an interrupt comes in through KVM_IRQ_LINE (see previous patch) we
signal the VCPU thread and unflag the VCPU to no longer wait for
interrupts. All calls to kvm_arch_vcpu_ioctl_run() result in a call to
kvm_vcpu_block() as long as the VCPU is in wfi-mode.
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
---
arch/arm/kvm/arm.c | 33 ++++++++++++++++++++++++---------
arch/arm/kvm/emulate.c | 12 ++++++++++++
arch/arm/kvm/trace.h | 15 +++++++++++++++
3 files changed, 51 insertions(+), 9 deletions(-)
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index e5348a7..00215a1 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -302,9 +302,16 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
return -EINVAL;
}
+/**
+ * kvm_arch_vcpu_runnable - determine if the vcpu can be scheduled
+ * @v: The VCPU pointer
+ *
+ * If the guest CPU is not waiting for interrupts then it is by definition
+ * runnable.
+ */
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{
- return 0;
+ return !v->arch.wait_for_interrupts;
}
static inline int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
@@ -379,6 +386,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
int ret;
for (;;) {
+ if (vcpu->arch.wait_for_interrupts)
+ goto wait_for_interrupts;
+
if (run->exit_reason == KVM_EXIT_MMIO) {
ret = kvm_handle_mmio_return(vcpu, vcpu->run);
if (ret)
@@ -408,16 +418,19 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
if (run->exit_reason == KVM_EXIT_MMIO)
break;
- if (need_resched()) {
- vcpu_put(vcpu);
- schedule();
- vcpu_load(vcpu);
- }
-
- if (signal_pending(current) && !(run->exit_reason)) {
- run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
+ if (need_resched())
+ kvm_resched(vcpu);
+wait_for_interrupts:
+ if (signal_pending(current)) {
+ if (!run->exit_reason) {
+ ret = -EINTR;
+ run->exit_reason = KVM_EXIT_INTR;
+ }
break;
}
+
+ if (vcpu->arch.wait_for_interrupts)
+ kvm_vcpu_block(vcpu);
}
return ret;
@@ -454,6 +467,8 @@ static int kvm_arch_vm_ioctl_irq_line(struct kvm *kvm,
if (irq_level->level) {
vcpu->arch.virt_irq |= mask;
vcpu->arch.wait_for_interrupts = 0;
+ if (waitqueue_active(&vcpu->wq))
+ wake_up_interruptible(&vcpu->wq);
} else
vcpu->arch.virt_irq &= ~mask;
diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
index 4fb5a7d..f60c75a 100644
--- a/arch/arm/kvm/emulate.c
+++ b/arch/arm/kvm/emulate.c
@@ -335,8 +335,20 @@ unsupp_err_out:
return -EINVAL;
}
+/**
+ * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest
+ * @vcpu: the vcpu pointer
+ * @run: the kvm_run structure pointer
+ *
+ * Simply sets the wait_for_interrupts flag on the vcpu structure, which will
+ * halt execution of world-switches and schedule other host processes until
+ * there is an incoming IRQ or FIQ to the VM.
+ */
int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
+ trace_kvm_wfi(vcpu->arch.regs.pc);
+ if (!vcpu->arch.virt_irq)
+ vcpu->arch.wait_for_interrupts = 1;
return 0;
}
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index 8ba3db9..693da82 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -111,6 +111,21 @@ TRACE_EVENT(kvm_irq_line,
__entry->level, __entry->vcpu_idx)
);
+TRACE_EVENT(kvm_wfi,
+ TP_PROTO(unsigned long vcpu_pc),
+ TP_ARGS(vcpu_pc),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, vcpu_pc )
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_pc = vcpu_pc;
+ ),
+
+ TP_printk("guest executed wfi at: 0x%08lx", __entry->vcpu_pc)
+);
+
#endif /* _TRACE_KVM_H */
^ permalink raw reply related [flat|nested] 105+ messages in thread
* Re: [PATCH v5 10/13] ARM: KVM: Guest wait-for-interrupts (WFI) support
2011-12-11 10:25 ` [PATCH v5 10/13] ARM: KVM: Guest wait-for-interrupts (WFI) support Christoffer Dall
@ 2011-12-12 14:12 ` Avi Kivity
2011-12-12 16:20 ` Christoffer Dall
0 siblings, 1 reply; 105+ messages in thread
From: Avi Kivity @ 2011-12-12 14:12 UTC (permalink / raw)
To: Christoffer Dall
Cc: android-virt, kvm, Marc.Zyngier, catalin.marinas, tech,
peter.maydell
On 12/11/2011 12:25 PM, Christoffer Dall wrote:
> From: Christoffer Dall <cdall@cs.columbia.edu>
>
> When the guest executes a WFI instruction the operation is trapped to
> KVM, which emulates the instruction in software. There is no correlation
> between a guest executing a WFI instruction and actually puttin the
putting (puttin'? putin?)
>
> hardware into a low-power mode, since a KVM guest is essentially a
> process and the WFI instruction can be seen as 'sleep' call from this
> process. Therefore, we flag the VCPU to be in wait_for_interrupts mode
> and call the main KVM function kvm_vcpu_block() function. This function
> will put the thread on a wait-queue and call schedule.
>
> When an interrupt comes in through KVM_IRQ_LINE (see previous patch) we
> signal the VCPU thread and unflag the VCPU to no longer wait for
> interrupts. All calls to kvm_arch_vcpu_ioctl_run() result in a call to
> kvm_vcpu_block() as long as the VCPU is in wfi-mode.
Ah, this addresses my previous comment on this issue.
>
> return ret;
> @@ -454,6 +467,8 @@ static int kvm_arch_vm_ioctl_irq_line(struct kvm *kvm,
> if (irq_level->level) {
> vcpu->arch.virt_irq |= mask;
> vcpu->arch.wait_for_interrupts = 0;
>
> + if (waitqueue_active(&vcpu->wq))
> + wake_up_interruptible(&vcpu->wq);
Not sufficient. If the guest is running, you need to kick it out of
guest mode and back into kvm, so that it samples the interrupt lines.
Also, racy:
racy:
vcpu host thread
KVM_IRQ_LINE
WFI
if (!vcpu->arch.virt_irq)
vcpu->arch.virt_irq = x
vcpu->arch.wait_for_interrupts = 0
vcpu->arch.wait_for_interrupts = 1
if (waitqueue_active()) (fails)
schedule()
>
> +/**
> + * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest
> + * @vcpu: the vcpu pointer
> + * @run: the kvm_run structure pointer
> + *
> + * Simply sets the wait_for_interrupts flag on the vcpu structure, which will
> + * halt execution of world-switches and schedule other host processes until
> + * there is an incoming IRQ or FIQ to the VM.
> + */
> int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
> {
> + trace_kvm_wfi(vcpu->arch.regs.pc);
> + if (!vcpu->arch.virt_irq)
> + vcpu->arch.wait_for_interrupts = 1;
Why not just block here?
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [PATCH v5 10/13] ARM: KVM: Guest wait-for-interrupts (WFI) support
2011-12-12 14:12 ` Avi Kivity
@ 2011-12-12 16:20 ` Christoffer Dall
2011-12-12 17:44 ` Avi Kivity
0 siblings, 1 reply; 105+ messages in thread
From: Christoffer Dall @ 2011-12-12 16:20 UTC (permalink / raw)
To: Avi Kivity
Cc: android-virt, kvm, Marc.Zyngier, catalin.marinas, tech,
peter.maydell
On Mon, Dec 12, 2011 at 9:12 AM, Avi Kivity <avi@redhat.com> wrote:
> On 12/11/2011 12:25 PM, Christoffer Dall wrote:
>> From: Christoffer Dall <cdall@cs.columbia.edu>
>>
>> When the guest executes a WFI instruction the operation is trapped to
>> KVM, which emulates the instruction in software. There is no correlation
>> between a guest executing a WFI instruction and actually puttin the
>
> putting (puttin'? putin?)
>
putting, no hidden political agenda this time.
>>
>> hardware into a low-power mode, since a KVM guest is essentially a
>> process and the WFI instruction can be seen as 'sleep' call from this
>> process. Therefore, we flag the VCPU to be in wait_for_interrupts mode
>> and call the main KVM function kvm_vcpu_block() function. This function
>> will put the thread on a wait-queue and call schedule.
>>
>> When an interrupt comes in through KVM_IRQ_LINE (see previous patch) we
>> signal the VCPU thread and unflag the VCPU to no longer wait for
>> interrupts. All calls to kvm_arch_vcpu_ioctl_run() result in a call to
>> kvm_vcpu_block() as long as the VCPU is in wfi-mode.
>
> Ah, this addresses my previous comment on this issue.
>
sorry....
>>
>> return ret;
>> @@ -454,6 +467,8 @@ static int kvm_arch_vm_ioctl_irq_line(struct kvm *kvm,
>> if (irq_level->level) {
>> vcpu->arch.virt_irq |= mask;
>> vcpu->arch.wait_for_interrupts = 0;
>
>>
>> + if (waitqueue_active(&vcpu->wq))
>> + wake_up_interruptible(&vcpu->wq);
>
> Not sufficient. If the guest is running, you need to kick it out of
> guest mode and back into kvm, so that it samples the interrupt lines.
>
> Also, racy:
>
>
> racy:
> vcpu host thread
> KVM_IRQ_LINE
> WFI
> if (!vcpu->arch.virt_irq)
> vcpu->arch.virt_irq = x
> vcpu->arch.wait_for_interrupts = 0
> vcpu->arch.wait_for_interrupts = 1
> if (waitqueue_active()) (fails)
> schedule()
>
>
ignoring this comment, will deal with your suggestion in the following
patch. SORRY.
>>
>> +/**
>> + * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest
>> + * @vcpu: the vcpu pointer
>> + * @run: the kvm_run structure pointer
>> + *
>> + * Simply sets the wait_for_interrupts flag on the vcpu structure, which will
>> + * halt execution of world-switches and schedule other host processes until
>> + * there is an incoming IRQ or FIQ to the VM.
>> + */
>> int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
>> {
>> + trace_kvm_wfi(vcpu->arch.regs.pc);
>> + if (!vcpu->arch.virt_irq)
>> + vcpu->arch.wait_for_interrupts = 1;
>
> Why not just block here?
>
well, if we block, but receive a signal that we want to go back into
userspace for, and then come back but the guest should still be
waiting, then I want that flag set, and I think it's the most logical
control flow. Am I missing something completely?
Thanks,
Christoffer
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [PATCH v5 10/13] ARM: KVM: Guest wait-for-interrupts (WFI) support
2011-12-12 16:20 ` Christoffer Dall
@ 2011-12-12 17:44 ` Avi Kivity
2011-12-12 19:21 ` [Android-virt] " Christoffer Dall
0 siblings, 1 reply; 105+ messages in thread
From: Avi Kivity @ 2011-12-12 17:44 UTC (permalink / raw)
To: Christoffer Dall
Cc: android-virt, kvm, Marc.Zyngier, catalin.marinas, tech,
peter.maydell
On 12/12/2011 06:20 PM, Christoffer Dall wrote:
> >>
> >> +/**
> >> + * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest
> >> + * @vcpu: the vcpu pointer
> >> + * @run: the kvm_run structure pointer
> >> + *
> >> + * Simply sets the wait_for_interrupts flag on the vcpu structure, which will
> >> + * halt execution of world-switches and schedule other host processes until
> >> + * there is an incoming IRQ or FIQ to the VM.
> >> + */
> >> int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
> >> {
> >> + trace_kvm_wfi(vcpu->arch.regs.pc);
> >> + if (!vcpu->arch.virt_irq)
> >> + vcpu->arch.wait_for_interrupts = 1;
> >
> > Why not just block here?
> >
>
> well, if we block, but receive a signal that we want to go back into
> userspace for, and then come back but the guest should still be
> waiting, then I want that flag set, and I think it's the most logical
> control flow. Am I missing something completely?
That's just not the flow that the other archs use, I don't think that it
really matters. kvm_vcpu_block() checks for and wakes up on signals, so
it's okay to call it from here directly.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 10/13] ARM: KVM: Guest wait-for-interrupts (WFI) support
2011-12-12 17:44 ` Avi Kivity
@ 2011-12-12 19:21 ` Christoffer Dall
2011-12-13 9:41 ` Avi Kivity
0 siblings, 1 reply; 105+ messages in thread
From: Christoffer Dall @ 2011-12-12 19:21 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm, Marc.Zyngier, android-virt, tech
On Mon, Dec 12, 2011 at 12:44 PM, Avi Kivity <avi@redhat.com> wrote:
> On 12/12/2011 06:20 PM, Christoffer Dall wrote:
>> >>
>> >> +/**
>> >> + * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest
>> >> + * @vcpu: the vcpu pointer
>> >> + * @run: the kvm_run structure pointer
>> >> + *
>> >> + * Simply sets the wait_for_interrupts flag on the vcpu structure, which will
>> >> + * halt execution of world-switches and schedule other host processes until
>> >> + * there is an incoming IRQ or FIQ to the VM.
>> >> + */
>> >> int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
>> >> {
>> >> + trace_kvm_wfi(vcpu->arch.regs.pc);
>> >> + if (!vcpu->arch.virt_irq)
>> >> + vcpu->arch.wait_for_interrupts = 1;
>> >
>> > Why not just block here?
>> >
>>
>> well, if we block, but receive a signal that we want to go back into
>> userspace for, and then come back but the guest should still be
>> waiting, then I want that flag set, and I think it's the most logical
>> control flow. Am I missing something completely?
>
> That's just not the flow that the other archs use, I don't think that it
> really matters. kvm_vcpu_block() checks for and wakes up on signals, so
> it's okay to call it from here directly.
>
yes, but I still have to perform the check again in the main run
function if it woke up for anything else than a guest interrupt, so
why call it twice...
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 10/13] ARM: KVM: Guest wait-for-interrupts (WFI) support
2011-12-12 19:21 ` [Android-virt] " Christoffer Dall
@ 2011-12-13 9:41 ` Avi Kivity
0 siblings, 0 replies; 105+ messages in thread
From: Avi Kivity @ 2011-12-13 9:41 UTC (permalink / raw)
To: Christoffer Dall; +Cc: kvm, Marc.Zyngier, android-virt, tech
On 12/12/2011 09:21 PM, Christoffer Dall wrote:
> >>
> >> well, if we block, but receive a signal that we want to go back into
> >> userspace for, and then come back but the guest should still be
> >> waiting, then I want that flag set, and I think it's the most logical
> >> control flow. Am I missing something completely?
> >
> > That's just not the flow that the other archs use, I don't think that it
> > really matters. kvm_vcpu_block() checks for and wakes up on signals, so
> > it's okay to call it from here directly.
> >
> yes, but I still have to perform the check again in the main run
> function if it woke up for anything else than a guest interrupt, so
> why call it twice...
You're right and in fact that's how x86 works too. I misremembered.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 105+ messages in thread
* [PATCH v5 11/13] ARM: KVM: Support SMP hosts
2011-12-11 10:24 [PATCH v5 00/13] KVM/ARM Implementation Christoffer Dall
` (9 preceding siblings ...)
2011-12-11 10:25 ` [PATCH v5 10/13] ARM: KVM: Guest wait-for-interrupts (WFI) support Christoffer Dall
@ 2011-12-11 10:25 ` Christoffer Dall
2011-12-12 14:30 ` Avi Kivity
2011-12-19 6:15 ` Antonios Motakis
2011-12-11 10:25 ` [PATCH v5 12/13] ARM: KVM: Fix guest view of MPIDR Christoffer Dall
` (2 subsequent siblings)
13 siblings, 2 replies; 105+ messages in thread
From: Christoffer Dall @ 2011-12-11 10:25 UTC (permalink / raw)
To: android-virt, kvm; +Cc: Marc.Zyngier, catalin.marinas, tech, avi, peter.maydell
In order to support KVM on a SMP host, it is necessary to initialize the
hypervisor on all CPUs, mostly by making sure each CPU gets its own
hypervisor stack and runs the HYP init code.
We also take care of some missing locking of modifications to the
hypervisor page tables and ensure synchronized consistency between
virtual IRQ masks and wait_for_interrupt flags on the VPUs.
Note that this code doesn't handle CPU hotplug yet.
Note that this code doesn't support SMP guests.
WARNING: This code is in development and guests do not fully boot on SMP
hosts yet.
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
---
arch/arm/include/asm/kvm_host.h | 4 -
arch/arm/include/asm/kvm_mmu.h | 1
arch/arm/kvm/arm.c | 175 +++++++++++++++++++++++----------------
arch/arm/kvm/emulate.c | 2
arch/arm/kvm/mmu.c | 9 ++
5 files changed, 114 insertions(+), 77 deletions(-)
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 86f6cf1..a0ffbe8 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -78,8 +78,6 @@ struct kvm_vcpu_arch {
u32 c13_TID_PRIV; /* Thread ID, Priveleged */
} cp15;
- u32 virt_irq; /* HCR exception mask */
-
/* Exception Information */
u32 hsr; /* Hyp Syndrom Register */
u32 hdfar; /* Hyp Data Fault Address Register */
@@ -92,6 +90,8 @@ struct kvm_vcpu_arch {
u32 mmio_rd;
/* Misc. fields */
+ spinlock_t irq_lock;
+ u32 virt_irq; /* HCR exception mask */
u32 wait_for_interrupts;
};
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index e82eae9..917edd7 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -28,6 +28,7 @@
#define PGD2_ORDER get_order(PTRS_PER_PGD2 * sizeof(pgd_t))
extern pgd_t *kvm_hyp_pgd;
+extern struct mutex kvm_hyp_pgd_mutex;
int create_hyp_mappings(pgd_t *hyp_pgd, void *from, void *to);
void free_hyp_pmds(pgd_t *hyp_pgd);
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 00215a1..6e384e2 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -61,7 +61,7 @@ void __kvm_print_msg(char *fmt, ...)
spin_unlock(&__tmp_log_lock);
}
-static void *kvm_arm_hyp_stack_page;
+static DEFINE_PER_CPU(void *, kvm_arm_hyp_stack_page);
/* The VMID used in the VTTBR */
#define VMID_SIZE (1<<8)
@@ -257,6 +257,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
unsigned long cpsr;
unsigned long sctlr;
+ spin_lock_init(&vcpu->arch.irq_lock);
+
/* Init execution CPSR */
asm volatile ("mrs %[cpsr], cpsr" :
[cpsr] "=r" (cpsr));
@@ -464,13 +466,27 @@ static int kvm_arch_vm_ioctl_irq_line(struct kvm *kvm,
trace_kvm_irq_line(irq_level->irq % 2, irq_level->level, vcpu_idx);
+ spin_lock(&vcpu->arch.irq_lock);
if (irq_level->level) {
vcpu->arch.virt_irq |= mask;
+
+ /*
+ * Note that we grab the wq.lock before clearing the wfi flag
+ * since this ensures that a concurrent call to kvm_vcpu_block
+ * will either sleep before we grab the lock, in which case we
+ * wake it up, or will never sleep due to
+ * kvm_arch_vcpu_runnable being true (iow. this avoids having
+ * to grab the irq_lock in kvm_arch_vcpu_runnable).
+ */
+ spin_lock(&vcpu->wq.lock);
vcpu->arch.wait_for_interrupts = 0;
+
if (waitqueue_active(&vcpu->wq))
- wake_up_interruptible(&vcpu->wq);
+ __wake_up_locked(&vcpu->wq, TASK_INTERRUPTIBLE);
+ spin_unlock(&vcpu->wq.lock);
} else
vcpu->arch.virt_irq &= ~mask;
+ spin_unlock(&vcpu->arch.irq_lock);
return 0;
}
@@ -505,14 +521,49 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
}
+static void cpu_set_vector(void *vector)
+{
+ /*
+ * Set the HVBAR
+ */
+ asm volatile (
+ "mov r0, %[vector_ptr]\n\t"
+ "ldr r7, =SMCHYP_HVBAR_W\n\t"
+ "smc #0\n\t" : :
+ [vector_ptr] "r" (vector) :
+ "r0", "r7");
+}
+
+static void cpu_init_hyp_mode(void *vector)
+{
+ unsigned long hyp_stack_ptr;
+ void *stack_page;
+
+ stack_page = __get_cpu_var(kvm_arm_hyp_stack_page);
+ hyp_stack_ptr = (unsigned long)stack_page + PAGE_SIZE;
+
+ cpu_set_vector(vector);
+
+ /*
+ * Call initialization code
+ */
+ asm volatile (
+ "mov r0, %[pgd_ptr]\n\t"
+ "mov r1, %[stack_ptr]\n\t"
+ "hvc #0\n\t" : :
+ [pgd_ptr] "r" (virt_to_phys(kvm_hyp_pgd)),
+ [stack_ptr] "r" (hyp_stack_ptr) :
+ "r0", "r1");
+}
+
/**
- * Inits Hyp-mode on a single CPU
+ * Inits Hyp-mode on all online CPUs
*/
static int init_hyp_mode(void)
{
phys_addr_t init_phys_addr, init_end_phys_addr;
- unsigned long hyp_stack_ptr;
int err = 0;
+ int cpu;
/*
* Allocate Hyp level-1 page table
@@ -522,47 +573,42 @@ static int init_hyp_mode(void)
return -ENOMEM;
/*
- * Allocate stack page for Hypervisor-mode
+ * Allocate stack pages for Hypervisor-mode
*/
- kvm_arm_hyp_stack_page = (void *)__get_free_page(GFP_KERNEL);
- if (!kvm_arm_hyp_stack_page) {
- err = -ENOMEM;
- goto out_free_pgd;
- }
+ for_each_possible_cpu(cpu) {
+ void *stack_page;
- hyp_stack_ptr = (unsigned long)kvm_arm_hyp_stack_page + PAGE_SIZE;
+ stack_page = (void *)__get_free_page(GFP_KERNEL);
+ if (!stack_page) {
+ err = -ENOMEM;
+ goto out_free_pgd;
+ }
+
+ per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page;
+ }
init_phys_addr = virt_to_phys(__kvm_hyp_init);
init_end_phys_addr = virt_to_phys(__kvm_hyp_init_end);
+ BUG_ON(init_phys_addr & 0x1f);
/*
- * Create identity mapping
+ * Create identity mapping for the init code.
*/
hyp_identity_mapping_add(kvm_hyp_pgd,
(unsigned long)init_phys_addr,
(unsigned long)init_end_phys_addr);
/*
- * Set the HVBAR
- */
- BUG_ON(init_phys_addr & 0x1f);
- asm volatile (
- "mov r0, %[vector_ptr]\n\t"
- "ldr r7, =SMCHYP_HVBAR_W\n\t"
- "smc #0\n\t" : :
- [vector_ptr] "r" ((unsigned long)init_phys_addr) :
- "r0", "r7");
-
- /*
- * Call initialization code
+ * Execute the init code on each CPU.
+ *
+ * Note: The stack is not mapped yet, so don't do anything else than
+ * initializing the hypervisor mode on each CPU using a local stack
+ * space for temporary storage.
*/
- asm volatile (
- "mov r0, %[pgd_ptr]\n\t"
- "mov r1, %[stack_ptr]\n\t"
- "hvc #0\n\t" : :
- [pgd_ptr] "r" (virt_to_phys(kvm_hyp_pgd)),
- [stack_ptr] "r" (hyp_stack_ptr) :
- "r0", "r1");
+ for_each_online_cpu(cpu) {
+ smp_call_function_single(cpu, cpu_init_hyp_mode,
+ (void *)(long)init_phys_addr, 1);
+ }
/*
* Unmap the identity mapping
@@ -572,37 +618,6 @@ static int init_hyp_mode(void)
(unsigned long)init_end_phys_addr);
/*
- * Set the HVBAR to the virtual kernel address
- */
- asm volatile (
- "mov r0, %[vector_ptr]\n\t"
- "ldr r7, =SMCHYP_HVBAR_W\n\t"
- "smc #0\n\t" : :
- [vector_ptr] "r" (__kvm_hyp_vector) :
- "r0", "r7");
-
- return err;
-out_free_pgd:
- kfree(kvm_hyp_pgd);
- kvm_hyp_pgd = NULL;
- return err;
-}
-
-/*
- * Initializes the memory mappings used in Hyp-mode
- *
- * Code executed in Hyp-mode and a stack page per cpu must be mapped into the
- * hypervisor translation tables.
- *
- * Currently there is no SMP support so we map only a single stack page on a
- * single CPU.
- */
-static int init_hyp_memory(void)
-{
- int err = 0;
- char *stack_page;
-
- /*
* Map Hyp exception vectors
*/
err = create_hyp_mappings(kvm_hyp_pgd,
@@ -623,19 +638,35 @@ static int init_hyp_memory(void)
}
/*
- * Map the Hyp stack page
+ * Map the Hyp stack pages
*/
- stack_page = kvm_arm_hyp_stack_page;
- err = create_hyp_mappings(kvm_hyp_pgd,
- stack_page, stack_page + PAGE_SIZE);
- if (err) {
- kvm_err(err, "Cannot map hyp stack");
- goto out_free_mappings;
+ for_each_possible_cpu(cpu) {
+ char *stack_page = per_cpu(kvm_arm_hyp_stack_page, cpu);
+ err = create_hyp_mappings(kvm_hyp_pgd,
+ stack_page, stack_page + PAGE_SIZE);
+
+ if (err) {
+ kvm_err(err, "Cannot map hyp stack");
+ goto out_free_mappings;
+ }
}
- return err;
+ /*
+ * Set the HVBAR to the virtual kernel address
+ */
+ for_each_online_cpu(cpu)
+ smp_call_function_single(cpu, cpu_set_vector,
+ __kvm_hyp_vector, 1);
+
+ return 0;
+
out_free_mappings:
free_hyp_pmds(kvm_hyp_pgd);
+ for_each_possible_cpu(cpu)
+ free_page((unsigned long)per_cpu(kvm_arm_hyp_stack_page, cpu));
+out_free_pgd:
+ kfree(kvm_hyp_pgd);
+ kvm_hyp_pgd = NULL;
return err;
}
@@ -650,10 +681,6 @@ int kvm_arch_init(void *opaque)
if (err)
goto out_err;
- err = init_hyp_memory();
- if (err)
- goto out_err;
-
set_bit(0, kvm_vmids);
return 0;
out_err:
diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
index f60c75a..3cb2fe4 100644
--- a/arch/arm/kvm/emulate.c
+++ b/arch/arm/kvm/emulate.c
@@ -347,8 +347,10 @@ unsupp_err_out:
int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
trace_kvm_wfi(vcpu->arch.regs.pc);
+ spin_lock(&vcpu->arch.irq_lock);
if (!vcpu->arch.virt_irq)
vcpu->arch.wait_for_interrupts = 1;
+ spin_unlock(&vcpu->arch.irq_lock);
return 0;
}
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 7732c48..50c9571 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -26,6 +26,7 @@
#include "debug.h"
pgd_t *kvm_hyp_pgd;
+DEFINE_MUTEX(kvm_hyp_pgd_mutex);
static void free_ptes(pmd_t *pmd, unsigned long addr)
{
@@ -55,6 +56,7 @@ void free_hyp_pmds(pgd_t *hyp_pgd)
pmd_t *pmd;
unsigned long addr, next, end;
+ mutex_lock(&kvm_hyp_pgd_mutex);
addr = PAGE_OFFSET;
end = ~0;
do {
@@ -71,6 +73,7 @@ void free_hyp_pmds(pgd_t *hyp_pgd)
free_ptes(pmd, addr);
pmd_free(NULL, pmd);
} while (addr = next, addr != end);
+ mutex_unlock(&kvm_hyp_pgd_mutex);
}
static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long addr,
@@ -140,6 +143,7 @@ int create_hyp_mappings(pgd_t *hyp_pgd, void *from, void *to)
if (start < PAGE_OFFSET)
return -EINVAL;
+ mutex_lock(&kvm_hyp_pgd_mutex);
addr = start;
do {
next = pgd_addr_end(addr, end);
@@ -150,7 +154,8 @@ int create_hyp_mappings(pgd_t *hyp_pgd, void *from, void *to)
pmd = pmd_alloc_one(NULL, addr);
if (!pmd) {
kvm_err(-ENOMEM, "Cannot allocate Hyp pmd");
- return -ENOMEM;
+ err = -ENOMEM;
+ goto out;
}
pud_populate(NULL, pud, pmd);
}
@@ -160,6 +165,8 @@ int create_hyp_mappings(pgd_t *hyp_pgd, void *from, void *to)
return err;
} while (addr = next, addr < end);
+out:
+ mutex_unlock(&kvm_hyp_pgd_mutex);
return err;
}
^ permalink raw reply related [flat|nested] 105+ messages in thread
* Re: [PATCH v5 11/13] ARM: KVM: Support SMP hosts
2011-12-11 10:25 ` [PATCH v5 11/13] ARM: KVM: Support SMP hosts Christoffer Dall
@ 2011-12-12 14:30 ` Avi Kivity
2011-12-12 17:37 ` Christoffer Dall
2011-12-19 6:15 ` Antonios Motakis
1 sibling, 1 reply; 105+ messages in thread
From: Avi Kivity @ 2011-12-12 14:30 UTC (permalink / raw)
To: Christoffer Dall
Cc: android-virt, kvm, Marc.Zyngier, catalin.marinas, tech,
peter.maydell
On 12/11/2011 12:25 PM, Christoffer Dall wrote:
> In order to support KVM on a SMP host, it is necessary to initialize the
> hypervisor on all CPUs, mostly by making sure each CPU gets its own
> hypervisor stack and runs the HYP init code.
>
> We also take care of some missing locking of modifications to the
> hypervisor page tables and ensure synchronized consistency between
> virtual IRQ masks and wait_for_interrupt flags on the VPUs.
>
> Note that this code doesn't handle CPU hotplug yet.
> Note that this code doesn't support SMP guests.
>
> WARNING: This code is in development and guests do not fully boot on SMP
> hosts yet.
Damn, I just reviewed all that breakage.
>
> /* Misc. fields */
> + spinlock_t irq_lock;
> + u32 virt_irq; /* HCR exception mask */
> u32 wait_for_interrupts;
Better to use atomics, IMO.
> @@ -464,13 +466,27 @@ static int kvm_arch_vm_ioctl_irq_line(struct kvm *kvm,
>
> trace_kvm_irq_line(irq_level->irq % 2, irq_level->level, vcpu_idx);
>
> + spin_lock(&vcpu->arch.irq_lock);
> if (irq_level->level) {
> vcpu->arch.virt_irq |= mask;
> +
> + /*
> + * Note that we grab the wq.lock before clearing the wfi flag
> + * since this ensures that a concurrent call to kvm_vcpu_block
> + * will either sleep before we grab the lock, in which case we
> + * wake it up, or will never sleep due to
> + * kvm_arch_vcpu_runnable being true (iow. this avoids having
> + * to grab the irq_lock in kvm_arch_vcpu_runnable).
> + */
> + spin_lock(&vcpu->wq.lock);
> vcpu->arch.wait_for_interrupts = 0;
> +
> if (waitqueue_active(&vcpu->wq))
> - wake_up_interruptible(&vcpu->wq);
> + __wake_up_locked(&vcpu->wq, TASK_INTERRUPTIBLE);
> + spin_unlock(&vcpu->wq.lock);
> } else
> vcpu->arch.virt_irq &= ~mask;
> + spin_unlock(&vcpu->arch.irq_lock);
This looks overly complicated with two levels of locks. x86 gets by
with no locks, and a much more complicated interrupt architecture.
My recommendation is:
wait_for_interrupts is managed solely by the vcpu thread
KVM_IRQ_LINE does a set_bit(, virt_irq) for the appropriate irq type,
then IPI/wakeups the vcpu to make it examine both wait_for_interrupts
and virt_irq.
> +
> +static void cpu_init_hyp_mode(void *vector)
> +{
> + unsigned long hyp_stack_ptr;
> + void *stack_page;
> +
> + stack_page = __get_cpu_var(kvm_arm_hyp_stack_page);
> + hyp_stack_ptr = (unsigned long)stack_page + PAGE_SIZE;
> +
> + cpu_set_vector(vector);
> +
> + /*
> + * Call initialization code
> + */
> + asm volatile (
> + "mov r0, %[pgd_ptr]\n\t"
> + "mov r1, %[stack_ptr]\n\t"
> + "hvc #0\n\t" : :
> + [pgd_ptr] "r" (virt_to_phys(kvm_hyp_pgd)),
> + [stack_ptr] "r" (hyp_stack_ptr) :
> + "r0", "r1");
> +}
(slightly nicer is to allocate hyp_stack_ptr and pgd_ptr to "register
asm("r0")" and "register asm("r1")" to avoid the extra mov instruction)
> @@ -522,47 +573,42 @@ static int init_hyp_mode(void)
> return -ENOMEM;
>
> /*
> - * Allocate stack page for Hypervisor-mode
> + * Allocate stack pages for Hypervisor-mode
> */
> - kvm_arm_hyp_stack_page = (void *)__get_free_page(GFP_KERNEL);
> - if (!kvm_arm_hyp_stack_page) {
> - err = -ENOMEM;
> - goto out_free_pgd;
> - }
> + for_each_possible_cpu(cpu) {
> + void *stack_page;
>
> - hyp_stack_ptr = (unsigned long)kvm_arm_hyp_stack_page + PAGE_SIZE;
> + stack_page = (void *)__get_free_page(GFP_KERNEL);
Best to allocate this (and other per-cpu state) on the cpu's node.
> + if (!stack_page) {
> + err = -ENOMEM;
> + goto out_free_pgd;
> + }
> +
> + per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page;
> + }
>
> init_phys_addr = virt_to_phys(__kvm_hyp_init);
> init_end_phys_addr = virt_to_phys(__kvm_hyp_init_end);
> + BUG_ON(init_phys_addr & 0x1f);
>
> /*
> - * Create identity mapping
> + * Create identity mapping for the init code.
> */
> hyp_identity_mapping_add(kvm_hyp_pgd,
> (unsigned long)init_phys_addr,
> (unsigned long)init_end_phys_addr);
>
> + for_each_online_cpu(cpu) {
> + smp_call_function_single(cpu, cpu_init_hyp_mode,
> + (void *)(long)init_phys_addr, 1);
> + }
Need similar code for cpu hotplug. See kvm_cpu_hotplug() and
kvm_arch_hardware_enable() which do all this for you.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [PATCH v5 11/13] ARM: KVM: Support SMP hosts
2011-12-12 14:30 ` Avi Kivity
@ 2011-12-12 17:37 ` Christoffer Dall
2011-12-12 17:56 ` Avi Kivity
0 siblings, 1 reply; 105+ messages in thread
From: Christoffer Dall @ 2011-12-12 17:37 UTC (permalink / raw)
To: Avi Kivity
Cc: android-virt, kvm, Marc.Zyngier, catalin.marinas, tech,
peter.maydell
On Mon, Dec 12, 2011 at 9:30 AM, Avi Kivity <avi@redhat.com> wrote:
> On 12/11/2011 12:25 PM, Christoffer Dall wrote:
>> In order to support KVM on a SMP host, it is necessary to initialize the
>> hypervisor on all CPUs, mostly by making sure each CPU gets its own
>> hypervisor stack and runs the HYP init code.
>>
>> We also take care of some missing locking of modifications to the
>> hypervisor page tables and ensure synchronized consistency between
>> virtual IRQ masks and wait_for_interrupt flags on the VPUs.
>>
>> Note that this code doesn't handle CPU hotplug yet.
>> Note that this code doesn't support SMP guests.
>>
>> WARNING: This code is in development and guests do not fully boot on SMP
>> hosts yet.
>
> Damn, I just reviewed all that breakage.
>
so sorry...,
>>
>> /* Misc. fields */
>> + spinlock_t irq_lock;
>> + u32 virt_irq; /* HCR exception mask */
>> u32 wait_for_interrupts;
>
> Better to use atomics, IMO.
hmm, yeah, I guess the way to do it would be to have two fields - one
atomic field used for interrupt injection, which is read atomically in
the C-code into a plain u32 variable, which can then be copied
directly onto the hardware during the world-switch...
>
>> @@ -464,13 +466,27 @@ static int kvm_arch_vm_ioctl_irq_line(struct kvm *kvm,
>>
>> trace_kvm_irq_line(irq_level->irq % 2, irq_level->level, vcpu_idx);
>>
>> + spin_lock(&vcpu->arch.irq_lock);
>> if (irq_level->level) {
>> vcpu->arch.virt_irq |= mask;
>> +
>> + /*
>> + * Note that we grab the wq.lock before clearing the wfi flag
>> + * since this ensures that a concurrent call to kvm_vcpu_block
>> + * will either sleep before we grab the lock, in which case we
>> + * wake it up, or will never sleep due to
>> + * kvm_arch_vcpu_runnable being true (iow. this avoids having
>> + * to grab the irq_lock in kvm_arch_vcpu_runnable).
>> + */
>> + spin_lock(&vcpu->wq.lock);
>> vcpu->arch.wait_for_interrupts = 0;
>> +
>> if (waitqueue_active(&vcpu->wq))
>> - wake_up_interruptible(&vcpu->wq);
>> + __wake_up_locked(&vcpu->wq, TASK_INTERRUPTIBLE);
>> + spin_unlock(&vcpu->wq.lock);
>> } else
>> vcpu->arch.virt_irq &= ~mask;
>> + spin_unlock(&vcpu->arch.irq_lock);
>
> This looks overly complicated with two levels of locks. x86 gets by
> with no locks, and a much more complicated interrupt architecture.
>
> My recommendation is:
> wait_for_interrupts is managed solely by the vcpu thread
> KVM_IRQ_LINE does a set_bit(, virt_irq) for the appropriate irq type,
> then IPI/wakeups the vcpu to make it examine both wait_for_interrupts
> and virt_irq.
>
>
this sounds pretty good to me.
something like this:
if (irq_level->level) {
set_bit(&vcpu->arch.irq_lines, bit_nr);
smp_mb();
wake_up_interruptible(&vcpu->wq);
} else
clear_bit(&vcpu->arch.irq_lines, bit_nr);
and the vcpu thread would clear the wait_for_interrupts flag if it
ever sees the mask field be non-zero?
>> +
>> +static void cpu_init_hyp_mode(void *vector)
>> +{
>> + unsigned long hyp_stack_ptr;
>> + void *stack_page;
>> +
>> + stack_page = __get_cpu_var(kvm_arm_hyp_stack_page);
>> + hyp_stack_ptr = (unsigned long)stack_page + PAGE_SIZE;
>> +
>> + cpu_set_vector(vector);
>> +
>> + /*
>> + * Call initialization code
>> + */
>> + asm volatile (
>> + "mov r0, %[pgd_ptr]\n\t"
>> + "mov r1, %[stack_ptr]\n\t"
>> + "hvc #0\n\t" : :
>> + [pgd_ptr] "r" (virt_to_phys(kvm_hyp_pgd)),
>> + [stack_ptr] "r" (hyp_stack_ptr) :
>> + "r0", "r1");
>> +}
>
> (slightly nicer is to allocate hyp_stack_ptr and pgd_ptr to "register
> asm("r0")" and "register asm("r1")" to avoid the extra mov instruction)
>
I agree
>> @@ -522,47 +573,42 @@ static int init_hyp_mode(void)
>> return -ENOMEM;
>>
>> /*
>> - * Allocate stack page for Hypervisor-mode
>> + * Allocate stack pages for Hypervisor-mode
>> */
>> - kvm_arm_hyp_stack_page = (void *)__get_free_page(GFP_KERNEL);
>> - if (!kvm_arm_hyp_stack_page) {
>> - err = -ENOMEM;
>> - goto out_free_pgd;
>> - }
>> + for_each_possible_cpu(cpu) {
>> + void *stack_page;
>>
>> - hyp_stack_ptr = (unsigned long)kvm_arm_hyp_stack_page + PAGE_SIZE;
>> + stack_page = (void *)__get_free_page(GFP_KERNEL);
>
> Best to allocate this (and other per-cpu state) on the cpu's node.
>
why, for performance reasons? The code get slightly more complicated,
since we have to pass the return value through the argument so we have
to pass an opaque pointer to the struct or something like that.
>> + if (!stack_page) {
>> + err = -ENOMEM;
>> + goto out_free_pgd;
>> + }
>> +
>> + per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page;
>> + }
>>
>> init_phys_addr = virt_to_phys(__kvm_hyp_init);
>> init_end_phys_addr = virt_to_phys(__kvm_hyp_init_end);
>> + BUG_ON(init_phys_addr & 0x1f);
>>
>> /*
>> - * Create identity mapping
>> + * Create identity mapping for the init code.
>> */
>> hyp_identity_mapping_add(kvm_hyp_pgd,
>> (unsigned long)init_phys_addr,
>> (unsigned long)init_end_phys_addr);
>>
>> + for_each_online_cpu(cpu) {
>> + smp_call_function_single(cpu, cpu_init_hyp_mode,
>> + (void *)(long)init_phys_addr, 1);
>> + }
>
> Need similar code for cpu hotplug. See kvm_cpu_hotplug() and
> kvm_arch_hardware_enable() which do all this for you.
>
so just to be sure, this will only be called for cpus that are
hotplugged right? we still call the cpu_init_hyp_mode for each cpu
that's online at this point.
Do we need some locking to make sure the two don't overlap (like
should I grab the kvm_lock here)?
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [PATCH v5 11/13] ARM: KVM: Support SMP hosts
2011-12-12 17:37 ` Christoffer Dall
@ 2011-12-12 17:56 ` Avi Kivity
2011-12-12 19:38 ` [Android-virt] " Christoffer Dall
[not found] ` <CAEDV+gJ=zeDpfp0kS2uBvmgRMyCpsV1LitjKR66R4W9Y3VGgWw@mail.gmail.com>
0 siblings, 2 replies; 105+ messages in thread
From: Avi Kivity @ 2011-12-12 17:56 UTC (permalink / raw)
To: Christoffer Dall
Cc: android-virt, kvm, Marc.Zyngier, catalin.marinas, tech,
peter.maydell
On 12/12/2011 07:37 PM, Christoffer Dall wrote:
> >
> > This looks overly complicated with two levels of locks. x86 gets by
> > with no locks, and a much more complicated interrupt architecture.
> >
> > My recommendation is:
> > wait_for_interrupts is managed solely by the vcpu thread
> > KVM_IRQ_LINE does a set_bit(, virt_irq) for the appropriate irq type,
> > then IPI/wakeups the vcpu to make it examine both wait_for_interrupts
> > and virt_irq.
> >
> >
> this sounds pretty good to me.
>
> something like this:
>
> if (irq_level->level) {
> set_bit(&vcpu->arch.irq_lines, bit_nr);
> smp_mb();
> wake_up_interruptible(&vcpu->wq);
or, smp_send_reschedule(). See kvm_vcpu_kick().
An optimization: do a cmpxchg() and don't wakeup if the operation raised
IRQ file FIQ was set (assuming that FIQ has a higher priority than IRQ).
> } else
> clear_bit(&vcpu->arch.irq_lines, bit_nr);
>
>
> and the vcpu thread would clear the wait_for_interrupts flag if it
> ever sees the mask field be non-zero?
Yes. This is what x86 does, except it's a lot more complicated.
> >> @@ -522,47 +573,42 @@ static int init_hyp_mode(void)
> >> return -ENOMEM;
> >>
> >> /*
> >> - * Allocate stack page for Hypervisor-mode
> >> + * Allocate stack pages for Hypervisor-mode
> >> */
> >> - kvm_arm_hyp_stack_page = (void *)__get_free_page(GFP_KERNEL);
> >> - if (!kvm_arm_hyp_stack_page) {
> >> - err = -ENOMEM;
> >> - goto out_free_pgd;
> >> - }
> >> + for_each_possible_cpu(cpu) {
> >> + void *stack_page;
> >>
> >> - hyp_stack_ptr = (unsigned long)kvm_arm_hyp_stack_page + PAGE_SIZE;
> >> + stack_page = (void *)__get_free_page(GFP_KERNEL);
> >
> > Best to allocate this (and other per-cpu state) on the cpu's node.
> >
>
> why, for performance reasons?
Yes, I'm assuming that all multi-socket A15s will be numa?
> The code get slightly more complicated,
> since we have to pass the return value through the argument so we have
> to pass an opaque pointer to the struct or something like that.
Don't see why, just use alloc_pages_node().
> >>
> >> + for_each_online_cpu(cpu) {
> >> + smp_call_function_single(cpu, cpu_init_hyp_mode,
> >> + (void *)(long)init_phys_addr, 1);
> >> + }
> >
> > Need similar code for cpu hotplug. See kvm_cpu_hotplug() and
> > kvm_arch_hardware_enable() which do all this for you.
> >
> so just to be sure, this will only be called for cpus that are
> hotplugged right? we still call the cpu_init_hyp_mode for each cpu
> that's online at this point.
The infrastructure will call kvm_arch_hardware_enable() for all
currently online cpus and any future hotplugged cpu. Just follow
kvm_init() and fill in the arch callbacks. You do have a call to
kvm_init() somewhere, yes?
> Do we need some locking to make sure the two don't overlap (like
> should I grab the kvm_lock here)?
Let kvm_init() do the driving and relax.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 11/13] ARM: KVM: Support SMP hosts
2011-12-12 17:56 ` Avi Kivity
@ 2011-12-12 19:38 ` Christoffer Dall
[not found] ` <CAEDV+gJ=zeDpfp0kS2uBvmgRMyCpsV1LitjKR66R4W9Y3VGgWw@mail.gmail.com>
1 sibling, 0 replies; 105+ messages in thread
From: Christoffer Dall @ 2011-12-12 19:38 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm, Marc.Zyngier, android-virt, tech
On Mon, Dec 12, 2011 at 12:56 PM, Avi Kivity <avi@redhat.com> wrote:
> On 12/12/2011 07:37 PM, Christoffer Dall wrote:
>> >
>> > This looks overly complicated with two levels of locks. x86 gets by
>> > with no locks, and a much more complicated interrupt architecture.
>> >
>> > My recommendation is:
>> > wait_for_interrupts is managed solely by the vcpu thread
>> > KVM_IRQ_LINE does a set_bit(, virt_irq) for the appropriate irq type,
>> > then IPI/wakeups the vcpu to make it examine both wait_for_interrupts
>> > and virt_irq.
>> >
>> >
>> this sounds pretty good to me.
>>
>> something like this:
>>
>> if (irq_level->level) {
>> set_bit(&vcpu->arch.irq_lines, bit_nr);
>> smp_mb();
>> wake_up_interruptible(&vcpu->wq);
>
> or, smp_send_reschedule(). See kvm_vcpu_kick().
>
> An optimization: do a cmpxchg() and don't wakeup if the operation raised
> IRQ file FIQ was set (assuming that FIQ has a higher priority than IRQ).
>
>> } else
>> clear_bit(&vcpu->arch.irq_lines, bit_nr);
>>
>>
>> and the vcpu thread would clear the wait_for_interrupts flag if it
>> ever sees the mask field be non-zero?
>
> Yes. This is what x86 does, except it's a lot more complicated.
>
>> >> @@ -522,47 +573,42 @@ static int init_hyp_mode(void)
>> >> return -ENOMEM;
>> >>
>> >> /*
>> >> - * Allocate stack page for Hypervisor-mode
>> >> + * Allocate stack pages for Hypervisor-mode
>> >> */
>> >> - kvm_arm_hyp_stack_page = (void *)__get_free_page(GFP_KERNEL);
>> >> - if (!kvm_arm_hyp_stack_page) {
>> >> - err = -ENOMEM;
>> >> - goto out_free_pgd;
>> >> - }
>> >> + for_each_possible_cpu(cpu) {
>> >> + void *stack_page;
>> >>
>> >> - hyp_stack_ptr = (unsigned long)kvm_arm_hyp_stack_page + PAGE_SIZE;
>> >> + stack_page = (void *)__get_free_page(GFP_KERNEL);
>> >
>> > Best to allocate this (and other per-cpu state) on the cpu's node.
>> >
>>
>> why, for performance reasons?
>
> Yes, I'm assuming that all multi-socket A15s will be numa?
>
I have no idea. Marc, Peter, Catalin?
>> The code get slightly more complicated,
>> since we have to pass the return value through the argument so we have
>> to pass an opaque pointer to the struct or something like that.
>
> Don't see why, just use alloc_pages_node().
>
got it, I thought you wanted to issue the actual allocation from each
cpu as to parallelize the work. Now I understand.
>> >>
>> >> + for_each_online_cpu(cpu) {
>> >> + smp_call_function_single(cpu, cpu_init_hyp_mode,
>> >> + (void *)(long)init_phys_addr, 1);
>> >> + }
>> >
>> > Need similar code for cpu hotplug. See kvm_cpu_hotplug() and
>> > kvm_arch_hardware_enable() which do all this for you.
>> >
>> so just to be sure, this will only be called for cpus that are
>> hotplugged right? we still call the cpu_init_hyp_mode for each cpu
>> that's online at this point.
>
> The infrastructure will call kvm_arch_hardware_enable() for all
> currently online cpus and any future hotplugged cpu. Just follow
> kvm_init() and fill in the arch callbacks. You do have a call to
> kvm_init() somewhere, yes?
>
>> Do we need some locking to make sure the two don't overlap (like
>> should I grab the kvm_lock here)?
>
> Let kvm_init() do the driving and relax.
>
This sounds good (actually looking into this was on my todo list, so
now is a good time I guess).
^ permalink raw reply [flat|nested] 105+ messages in thread
[parent not found: <CAEDV+gJ=zeDpfp0kS2uBvmgRMyCpsV1LitjKR66R4W9Y3VGgWw@mail.gmail.com>]
* Re: [PATCH v5 11/13] ARM: KVM: Support SMP hosts
2011-12-11 10:25 ` [PATCH v5 11/13] ARM: KVM: Support SMP hosts Christoffer Dall
2011-12-12 14:30 ` Avi Kivity
@ 2011-12-19 6:15 ` Antonios Motakis
2011-12-19 14:57 ` [Android-virt] " Christoffer Dall
1 sibling, 1 reply; 105+ messages in thread
From: Antonios Motakis @ 2011-12-19 6:15 UTC (permalink / raw)
To: Christoffer Dall
Cc: android-virt, kvm, Marc.Zyngier, catalin.marinas, tech, avi,
peter.maydell
On 12/11/2011 11:25 AM, Christoffer Dall wrote:
> WARNING: This code is in development and guests do not fully boot on SMP
> hosts yet.
Hello,
What would still be needed to fully booted SMP? For example, are there
identified critical sections and structures that need to be worked on,
or there are parts that still need to be reviewed to find those? Or is
it only a matter of fixing up any existing locking/syncing introduced in
this patch?
I'd like to throw some cycles on this, so I'll start by looking in this
patch again more carefully (and guest SMP as well).
Antonios
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 11/13] ARM: KVM: Support SMP hosts
2011-12-19 6:15 ` Antonios Motakis
@ 2011-12-19 14:57 ` Christoffer Dall
2011-12-19 15:19 ` Marc Zyngier
0 siblings, 1 reply; 105+ messages in thread
From: Christoffer Dall @ 2011-12-19 14:57 UTC (permalink / raw)
To: Antonios Motakis; +Cc: kvm, Marc.Zyngier, avi, android-virt, tech
On Mon, Dec 19, 2011 at 1:15 AM, Antonios Motakis
<a.motakis@virtualopensystems.com> wrote:
> On 12/11/2011 11:25 AM, Christoffer Dall wrote:
>> WARNING: This code is in development and guests do not fully boot on SMP
>> hosts yet.
> Hello,
>
> What would still be needed to fully booted SMP? For example, are there
> identified critical sections and structures that need to be worked on,
> or there are parts that still need to be reviewed to find those? Or is
> it only a matter of fixing up any existing locking/syncing introduced in
> this patch?
>
You should simply start booting a UP guest on an SMP host, see where
it crashes and start tracking it down.
Same procedure for guest SMP.
> I'd like to throw some cycles on this, so I'll start by looking in this
> patch again more carefully (and guest SMP as well).
>
that sounds good, just go for it.
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 11/13] ARM: KVM: Support SMP hosts
2011-12-19 14:57 ` [Android-virt] " Christoffer Dall
@ 2011-12-19 15:19 ` Marc Zyngier
2011-12-19 15:30 ` Antonios Motakis
0 siblings, 1 reply; 105+ messages in thread
From: Marc Zyngier @ 2011-12-19 15:19 UTC (permalink / raw)
To: Christoffer Dall
Cc: android-virt, a.motakis, tech@virtualopensystems.com,
avi@redhat.com, kvm@vger.kernel.org
On 19/12/11 14:57, Christoffer Dall wrote:
> On Mon, Dec 19, 2011 at 1:15 AM, Antonios Motakis
> <a.motakis@virtualopensystems.com> wrote:
>> On 12/11/2011 11:25 AM, Christoffer Dall wrote:
>>> WARNING: This code is in development and guests do not fully boot on SMP
>>> hosts yet.
>> Hello,
>>
>> What would still be needed to fully booted SMP? For example, are there
>> identified critical sections and structures that need to be worked on,
>> or there are parts that still need to be reviewed to find those? Or is
>> it only a matter of fixing up any existing locking/syncing introduced in
>> this patch?
>>
>
> You should simply start booting a UP guest on an SMP host, see where
> it crashes and start tracking it down.
For the time being, I've yet to see UP guest crashing on SMP host. On
the model, that is...
> Same procedure for guest SMP.
That's a very different kettle of fish. I see both CPUs starting to run,
and end up with both in WFI after a while, without any interrupt pending...
I'll investigate that as soon as I can.
M.
--
Jazz is not dead. It just smells funny...
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 11/13] ARM: KVM: Support SMP hosts
2011-12-19 15:19 ` Marc Zyngier
@ 2011-12-19 15:30 ` Antonios Motakis
2011-12-19 15:37 ` Marc Zyngier
0 siblings, 1 reply; 105+ messages in thread
From: Antonios Motakis @ 2011-12-19 15:30 UTC (permalink / raw)
To: Marc Zyngier
Cc: Christoffer Dall, android-virt, tech@virtualopensystems.com,
avi@redhat.com, kvm@vger.kernel.org
On 12/19/2011 04:19 PM, Marc Zyngier wrote:
> On 19/12/11 14:57, Christoffer Dall wrote:
>>
>>
>> You should simply start booting a UP guest on an SMP host, see where
>> it crashes and start tracking it down.
> For the time being, I've yet to see UP guest crashing on SMP host. On
> the model, that is...
>
>
Last time I tried to run a guest in a 2 core model, it hanged and
crashed after a while. Anyway, I will investigate. So I gather critical
sections have been dealt with and it's just a matter of ironing bugs
right now?
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 11/13] ARM: KVM: Support SMP hosts
2011-12-19 15:30 ` Antonios Motakis
@ 2011-12-19 15:37 ` Marc Zyngier
2011-12-19 15:40 ` Christoffer Dall
0 siblings, 1 reply; 105+ messages in thread
From: Marc Zyngier @ 2011-12-19 15:37 UTC (permalink / raw)
To: Antonios Motakis
Cc: Christoffer Dall, android-virt@lists.cs.columbia.edu,
tech@virtualopensystems.com, avi@redhat.com, kvm@vger.kernel.org
On 19/12/11 15:30, Antonios Motakis wrote:
> On 12/19/2011 04:19 PM, Marc Zyngier wrote:
>> On 19/12/11 14:57, Christoffer Dall wrote:
>>>
>>>
>>> You should simply start booting a UP guest on an SMP host, see where
>>> it crashes and start tracking it down.
>> For the time being, I've yet to see UP guest crashing on SMP host. On
>> the model, that is...
>>
>>
>
> Last time I tried to run a guest in a 2 core model, it hanged and
> crashed after a while. Anyway, I will investigate. So I gather critical
> sections have been dealt with and it's just a matter of ironing bugs
> right now?
Depends when you tested. V4 was definitely unusable on SMP.
With my patches merged in Christoffer's v5 release, the basics should be
covered (correct SMP setup in the boot-wrapper and KVM, MPIDR
virtualization).
If you find any problem (and let's face it, I'm sure you will... ;-),
I'll be happy to investigate.
Cheers,
M.
--
Jazz is not dead. It just smells funny...
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 11/13] ARM: KVM: Support SMP hosts
2011-12-19 15:37 ` Marc Zyngier
@ 2011-12-19 15:40 ` Christoffer Dall
2011-12-19 15:42 ` Antonios Motakis
0 siblings, 1 reply; 105+ messages in thread
From: Christoffer Dall @ 2011-12-19 15:40 UTC (permalink / raw)
To: Marc Zyngier
Cc: Antonios Motakis, tech@virtualopensystems.com, avi@redhat.com,
android-virt@lists.cs.columbia.edu, kvm@vger.kernel.org
On Mon, Dec 19, 2011 at 10:37 AM, Marc Zyngier <marc.zyngier@arm.com> wrote:
> On 19/12/11 15:30, Antonios Motakis wrote:
>> On 12/19/2011 04:19 PM, Marc Zyngier wrote:
>>> On 19/12/11 14:57, Christoffer Dall wrote:
>>>>
>>>>
>>>> You should simply start booting a UP guest on an SMP host, see where
>>>> it crashes and start tracking it down.
>>> For the time being, I've yet to see UP guest crashing on SMP host. On
>>> the model, that is...
>>>
>>>
>>
>> Last time I tried to run a guest in a 2 core model, it hanged and
>> crashed after a while. Anyway, I will investigate. So I gather critical
>> sections have been dealt with and it's just a matter of ironing bugs
>> right now?
>
> Depends when you tested. V4 was definitely unusable on SMP.
>
> With my patches merged in Christoffer's v5 release, the basics should be
> covered (correct SMP setup in the boot-wrapper and KVM, MPIDR
> virtualization).
>
> If you find any problem (and let's face it, I'm sure you will... ;-),
> I'll be happy to investigate.
>
I had the guest boot process crash on a page fault on the v5 series.
Consistently. I just didn't have time to investigate yet.
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 11/13] ARM: KVM: Support SMP hosts
2011-12-19 15:40 ` Christoffer Dall
@ 2011-12-19 15:42 ` Antonios Motakis
2011-12-19 15:45 ` Marc Zyngier
0 siblings, 1 reply; 105+ messages in thread
From: Antonios Motakis @ 2011-12-19 15:42 UTC (permalink / raw)
To: Christoffer Dall
Cc: Marc Zyngier, tech@virtualopensystems.com, avi@redhat.com,
android-virt@lists.cs.columbia.edu, kvm@vger.kernel.org
On 12/19/2011 04:40 PM, Christoffer Dall wrote:
> On Mon, Dec 19, 2011 at 10:37 AM, Marc Zyngier<marc.zyngier@arm.com> wrote:
>> On 19/12/11 15:30, Antonios Motakis wrote:
>>> On 12/19/2011 04:19 PM, Marc Zyngier wrote:
>>>> On 19/12/11 14:57, Christoffer Dall wrote:
>>>>>
>>>>> You should simply start booting a UP guest on an SMP host, see where
>>>>> it crashes and start tracking it down.
>>>> For the time being, I've yet to see UP guest crashing on SMP host. On
>>>> the model, that is...
>>>>
>>>>
>>> Last time I tried to run a guest in a 2 core model, it hanged and
>>> crashed after a while. Anyway, I will investigate. So I gather critical
>>> sections have been dealt with and it's just a matter of ironing bugs
>>> right now?
>> Depends when you tested. V4 was definitely unusable on SMP.
>>
>> With my patches merged in Christoffer's v5 release, the basics should be
>> covered (correct SMP setup in the boot-wrapper and KVM, MPIDR
>> virtualization).
>>
>> If you find any problem (and let's face it, I'm sure you will... ;-),
>> I'll be happy to investigate.
>>
> I had the guest boot process crash on a page fault on the v5 series.
> Consistently. I just didn't have time to investigate yet.
I was also running v5, however I didn't record the exact crash behavior,
since I assumed I was having the same results as everyone. I will look
into it now.
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 11/13] ARM: KVM: Support SMP hosts
2011-12-19 15:42 ` Antonios Motakis
@ 2011-12-19 15:45 ` Marc Zyngier
[not found] ` <CAEDV+gL929Hpa=PncVWeHRNAa5fBuorNNYFC=iix=PO+5aO2cg@mail.gmail.com>
0 siblings, 1 reply; 105+ messages in thread
From: Marc Zyngier @ 2011-12-19 15:45 UTC (permalink / raw)
To: Antonios Motakis
Cc: Christoffer Dall, tech@virtualopensystems.com, avi@redhat.com,
android-virt@lists.cs.columbia.edu, kvm@vger.kernel.org
On 19/12/11 15:42, Antonios Motakis wrote:
> On 12/19/2011 04:40 PM, Christoffer Dall wrote:
>> On Mon, Dec 19, 2011 at 10:37 AM, Marc Zyngier<marc.zyngier@arm.com> wrote:
>>> On 19/12/11 15:30, Antonios Motakis wrote:
>>>> On 12/19/2011 04:19 PM, Marc Zyngier wrote:
>>>>> On 19/12/11 14:57, Christoffer Dall wrote:
>>>>>>
>>>>>> You should simply start booting a UP guest on an SMP host, see where
>>>>>> it crashes and start tracking it down.
>>>>> For the time being, I've yet to see UP guest crashing on SMP host. On
>>>>> the model, that is...
>>>>>
>>>>>
>>>> Last time I tried to run a guest in a 2 core model, it hanged and
>>>> crashed after a while. Anyway, I will investigate. So I gather critical
>>>> sections have been dealt with and it's just a matter of ironing bugs
>>>> right now?
>>> Depends when you tested. V4 was definitely unusable on SMP.
>>>
>>> With my patches merged in Christoffer's v5 release, the basics should be
>>> covered (correct SMP setup in the boot-wrapper and KVM, MPIDR
>>> virtualization).
>>>
>>> If you find any problem (and let's face it, I'm sure you will... ;-),
>>> I'll be happy to investigate.
>>>
>> I had the guest boot process crash on a page fault on the v5 series.
>> Consistently. I just didn't have time to investigate yet.
>
> I was also running v5, however I didn't record the exact crash behavior,
> since I assumed I was having the same results as everyone. I will look
> into it now.
If you manage to find a way to reliably reproduce it, I really want help
having it fixed ASAP. Kernel config and co much appreciated.
M.
--
Jazz is not dead. It just smells funny...
^ permalink raw reply [flat|nested] 105+ messages in thread
* [PATCH v5 12/13] ARM: KVM: Fix guest view of MPIDR
2011-12-11 10:24 [PATCH v5 00/13] KVM/ARM Implementation Christoffer Dall
` (10 preceding siblings ...)
2011-12-11 10:25 ` [PATCH v5 11/13] ARM: KVM: Support SMP hosts Christoffer Dall
@ 2011-12-11 10:25 ` Christoffer Dall
2011-12-12 14:32 ` Avi Kivity
2011-12-11 10:25 ` [PATCH v5 13/13] ARM: KVM: Support SMP guests Christoffer Dall
2011-12-11 11:32 ` [PATCH v5 00/13] KVM/ARM Implementation Peter Maydell
13 siblings, 1 reply; 105+ messages in thread
From: Christoffer Dall @ 2011-12-11 10:25 UTC (permalink / raw)
To: android-virt, kvm; +Cc: Marc.Zyngier, catalin.marinas, tech, avi, peter.maydell
From: Marc Zyngier <marc.zyngier@arm.com>
A guest may need to know which CPU it has booted on (and Linux does).
Now that we can run KVM on a SMP host, QEMU may be running on any
CPU. In that case, directly reading MPIDR will give an inconsistent
view on the guest CPU number (among other problems).
The solution is to use the VMPIDR register, which is computed by
using the host MPIDR and overriding the low bits with KVM vcpu_id.
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
arch/arm/include/asm/kvm_host.h | 1 +
arch/arm/kernel/asm-offsets.c | 1 +
arch/arm/kvm/arm.c | 4 ++++
arch/arm/kvm/interrupts.S | 8 ++++++++
4 files changed, 14 insertions(+), 0 deletions(-)
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index a0ffbe8..7fcc412 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -63,6 +63,7 @@ struct kvm_vcpu_arch {
/* System control coprocessor (cp15) */
struct {
u32 c0_MIDR; /* Main ID Register */
+ u32 c0_MPIDR; /* MultiProcessor ID Register */
u32 c1_SCTLR; /* System Control Register */
u32 c1_ACTLR; /* Auxilliary Control Register */
u32 c1_CPACR; /* Coprocessor Access Control */
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index c126cfb..1c6e2ee 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -148,6 +148,7 @@ int main(void)
#ifdef CONFIG_KVM_ARM_HOST
DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
DEFINE(VCPU_MIDR, offsetof(struct kvm_vcpu, arch.cp15.c0_MIDR));
+ DEFINE(VCPU_MPIDR, offsetof(struct kvm_vcpu, arch.cp15.c0_MPIDR));
DEFINE(VCPU_SCTLR, offsetof(struct kvm_vcpu, arch.cp15.c1_SCTLR));
DEFINE(VCPU_CPACR, offsetof(struct kvm_vcpu, arch.cp15.c1_CPACR));
DEFINE(VCPU_TTBR0, offsetof(struct kvm_vcpu, arch.cp15.c2_TTBR0));
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 6e384e2..9c5c38e 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -32,6 +32,7 @@
#include <asm/ptrace.h>
#include <asm/mman.h>
#include <asm/tlbflush.h>
+#include <asm/cputype.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
@@ -270,6 +271,9 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
[sctlr] "=r" (sctlr));
vcpu->arch.cp15.c1_SCTLR = sctlr & ~1U;
+ /* Compute guest MPIDR */
+ vcpu->arch.cp15.c0_MPIDR = (read_cpuid_mpidr() & ~0xff) | vcpu->vcpu_id;
+
return 0;
}
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index d516bf4..fbc26ca 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -245,6 +245,10 @@ ENTRY(__kvm_vcpu_run)
ldr r1, [r0, #VCPU_MIDR]
mcr p15, 4, r1, c0, c0, 0
+ @ Write guest view of MPIDR into VMPIDR
+ ldr r1, [r0, #VCPU_MPIDR]
+ mcr p15, 4, r1, c0, c0, 5
+
@ Load guest registers
add r0, r0, #(VCPU_USR_SP)
load_mode_state r0, usr
@@ -291,6 +295,10 @@ __kvm_vcpu_return:
mrc p15, 0, r2, c0, c0, 0
mcr p15, 4, r2, c0, c0, 0
+ @ Back to hardware MPIDR
+ mrc p15, 0, r2, c0, c0, 5
+ mcr p15, 4, r2, c0, c0, 5
+
@ Set VMID == 0
mov r2, #0
mov r3, #0
^ permalink raw reply related [flat|nested] 105+ messages in thread
* Re: [PATCH v5 12/13] ARM: KVM: Fix guest view of MPIDR
2011-12-11 10:25 ` [PATCH v5 12/13] ARM: KVM: Fix guest view of MPIDR Christoffer Dall
@ 2011-12-12 14:32 ` Avi Kivity
2011-12-12 17:39 ` Christoffer Dall
0 siblings, 1 reply; 105+ messages in thread
From: Avi Kivity @ 2011-12-12 14:32 UTC (permalink / raw)
To: Christoffer Dall
Cc: android-virt, kvm, Marc.Zyngier, catalin.marinas, tech,
peter.maydell
On 12/11/2011 12:25 PM, Christoffer Dall wrote:
> From: Marc Zyngier <marc.zyngier@arm.com>
>
> A guest may need to know which CPU it has booted on (and Linux does).
> Now that we can run KVM on a SMP host, QEMU may be running on any
s/QEMU/userspace/
> CPU. In that case, directly reading MPIDR will give an inconsistent
> view on the guest CPU number (among other problems).
>
> The solution is to use the VMPIDR register, which is computed by
> using the host MPIDR and overriding the low bits with KVM vcpu_id.
>
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Since you're posting the patch for (eventual) inclusion, you need to
sign off as well.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [PATCH v5 12/13] ARM: KVM: Fix guest view of MPIDR
2011-12-12 14:32 ` Avi Kivity
@ 2011-12-12 17:39 ` Christoffer Dall
2011-12-12 17:44 ` Marc Zyngier
0 siblings, 1 reply; 105+ messages in thread
From: Christoffer Dall @ 2011-12-12 17:39 UTC (permalink / raw)
To: Avi Kivity
Cc: android-virt, kvm, Marc.Zyngier, catalin.marinas, tech,
peter.maydell
On Mon, Dec 12, 2011 at 9:32 AM, Avi Kivity <avi@redhat.com> wrote:
> On 12/11/2011 12:25 PM, Christoffer Dall wrote:
>> From: Marc Zyngier <marc.zyngier@arm.com>
>>
>> A guest may need to know which CPU it has booted on (and Linux does).
>> Now that we can run KVM on a SMP host, QEMU may be running on any
>
> s/QEMU/userspace/
>
>> CPU. In that case, directly reading MPIDR will give an inconsistent
>> view on the guest CPU number (among other problems).
>>
>> The solution is to use the VMPIDR register, which is computed by
>> using the host MPIDR and overriding the low bits with KVM vcpu_id.
>>
>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>
> Since you're posting the patch for (eventual) inclusion, you need to
> sign off as well.
>
I merged this into the world-switch patch.
Can I add mark as signed-off-by in there then or should he really have
written that entire patch then. Marc?
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [PATCH v5 12/13] ARM: KVM: Fix guest view of MPIDR
2011-12-12 17:39 ` Christoffer Dall
@ 2011-12-12 17:44 ` Marc Zyngier
2011-12-12 19:43 ` Christoffer Dall
0 siblings, 1 reply; 105+ messages in thread
From: Marc Zyngier @ 2011-12-12 17:44 UTC (permalink / raw)
To: Christoffer Dall
Cc: Avi Kivity, android-virt@lists.cs.columbia.edu,
kvm@vger.kernel.org, Catalin Marinas, tech@virtualopensystems.com,
peter.maydell@linaro.org
On 12/12/11 17:39, Christoffer Dall wrote:
> On Mon, Dec 12, 2011 at 9:32 AM, Avi Kivity <avi@redhat.com> wrote:
>> On 12/11/2011 12:25 PM, Christoffer Dall wrote:
>>> From: Marc Zyngier <marc.zyngier@arm.com>
>>>
>>> A guest may need to know which CPU it has booted on (and Linux does).
>>> Now that we can run KVM on a SMP host, QEMU may be running on any
>>
>> s/QEMU/userspace/
>>
>>> CPU. In that case, directly reading MPIDR will give an inconsistent
>>> view on the guest CPU number (among other problems).
>>>
>>> The solution is to use the VMPIDR register, which is computed by
>>> using the host MPIDR and overriding the low bits with KVM vcpu_id.
>>>
>>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>>
>> Since you're posting the patch for (eventual) inclusion, you need to
>> sign off as well.
>>
> I merged this into the world-switch patch.
>
> Can I add mark as signed-off-by in there then or should he really have
> written that entire patch then. Marc?
I don't mind if my name appears or not, really. As long as the
functionality is in, I'm happy ;-).
M.
--
Jazz is not dead. It just smells funny...
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [PATCH v5 12/13] ARM: KVM: Fix guest view of MPIDR
2011-12-12 17:44 ` Marc Zyngier
@ 2011-12-12 19:43 ` Christoffer Dall
2011-12-13 9:46 ` Avi Kivity
0 siblings, 1 reply; 105+ messages in thread
From: Christoffer Dall @ 2011-12-12 19:43 UTC (permalink / raw)
To: Marc Zyngier
Cc: Avi Kivity, android-virt@lists.cs.columbia.edu,
kvm@vger.kernel.org, Catalin Marinas, tech@virtualopensystems.com,
peter.maydell@linaro.org
On Mon, Dec 12, 2011 at 12:44 PM, Marc Zyngier <marc.zyngier@arm.com> wrote:
> On 12/12/11 17:39, Christoffer Dall wrote:
>> On Mon, Dec 12, 2011 at 9:32 AM, Avi Kivity <avi@redhat.com> wrote:
>>> On 12/11/2011 12:25 PM, Christoffer Dall wrote:
>>>> From: Marc Zyngier <marc.zyngier@arm.com>
>>>>
>>>> A guest may need to know which CPU it has booted on (and Linux does).
>>>> Now that we can run KVM on a SMP host, QEMU may be running on any
>>>
>>> s/QEMU/userspace/
>>>
>>>> CPU. In that case, directly reading MPIDR will give an inconsistent
>>>> view on the guest CPU number (among other problems).
>>>>
>>>> The solution is to use the VMPIDR register, which is computed by
>>>> using the host MPIDR and overriding the low bits with KVM vcpu_id.
>>>>
>>>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>>>
>>> Since you're posting the patch for (eventual) inclusion, you need to
>>> sign off as well.
>>>
>> I merged this into the world-switch patch.
>>
>> Can I add mark as signed-off-by in there then or should he really have
>> written that entire patch then. Marc?
>
> I don't mind if my name appears or not, really. As long as the
> functionality is in, I'm happy ;-).
>
ok, focus will be on patch readability for next round then. thanks.
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [PATCH v5 12/13] ARM: KVM: Fix guest view of MPIDR
2011-12-12 19:43 ` Christoffer Dall
@ 2011-12-13 9:46 ` Avi Kivity
2011-12-13 13:38 ` Christoffer Dall
0 siblings, 1 reply; 105+ messages in thread
From: Avi Kivity @ 2011-12-13 9:46 UTC (permalink / raw)
To: Christoffer Dall
Cc: Marc Zyngier, android-virt@lists.cs.columbia.edu,
kvm@vger.kernel.org, Catalin Marinas, tech@virtualopensystems.com,
peter.maydell@linaro.org
On 12/12/2011 09:43 PM, Christoffer Dall wrote:
> >
> ok, focus will be on patch readability for next round then. thanks.
Great, please try to keep your submarine surfaced for longer periods of
time so we can interact better.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [PATCH v5 12/13] ARM: KVM: Fix guest view of MPIDR
2011-12-13 9:46 ` Avi Kivity
@ 2011-12-13 13:38 ` Christoffer Dall
0 siblings, 0 replies; 105+ messages in thread
From: Christoffer Dall @ 2011-12-13 13:38 UTC (permalink / raw)
To: Avi Kivity
Cc: Marc Zyngier, android-virt@lists.cs.columbia.edu,
kvm@vger.kernel.org, Catalin Marinas, tech@virtualopensystems.com,
peter.maydell@linaro.org
On Tue, Dec 13, 2011 at 4:46 AM, Avi Kivity <avi@redhat.com> wrote:
> On 12/12/2011 09:43 PM, Christoffer Dall wrote:
>> >
>> ok, focus will be on patch readability for next round then. thanks.
>
> Great, please try to keep your submarine surfaced for longer periods of
> time so we can interact better.
>
we will do our best, now there's also Virtual Open Systems and ARM
people involved, so let's try and push this as much as we can in the
near coming future.
^ permalink raw reply [flat|nested] 105+ messages in thread
* [PATCH v5 13/13] ARM: KVM: Support SMP guests
2011-12-11 10:24 [PATCH v5 00/13] KVM/ARM Implementation Christoffer Dall
` (11 preceding siblings ...)
2011-12-11 10:25 ` [PATCH v5 12/13] ARM: KVM: Fix guest view of MPIDR Christoffer Dall
@ 2011-12-11 10:25 ` Christoffer Dall
2011-12-11 11:32 ` [PATCH v5 00/13] KVM/ARM Implementation Peter Maydell
13 siblings, 0 replies; 105+ messages in thread
From: Christoffer Dall @ 2011-12-11 10:25 UTC (permalink / raw)
To: android-virt, kvm; +Cc: Marc.Zyngier, catalin.marinas, tech, avi, peter.maydell
This patch is a beginning attempt to support SMP guests. So far we only
add locking for the second stage PGD stored on the kvm_arch struct.
WARNING: This code is untested and does not yet support SMP guests.
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
---
arch/arm/include/asm/kvm_host.h | 12 ++++++--
arch/arm/kvm/arm.c | 1 +
arch/arm/kvm/mmu.c | 57 +++++++++++++++++++++++++--------------
3 files changed, 47 insertions(+), 23 deletions(-)
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 7fcc412..555a6f1 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -31,9 +31,15 @@ struct kvm_vcpu;
u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
struct kvm_arch {
- u32 vmid; /* The VMID used for the virt. memory system */
- pgd_t *pgd; /* 1-level 2nd stage table */
- u64 vttbr; /* VTTBR value associated with above pgd and vmid */
+ /* The VMID used for the virt. memory system */
+ u32 vmid;
+
+ /* 1-level 2nd stage table and lock */
+ struct mutex pgd_mutex;
+ pgd_t *pgd;
+
+ /* VTTBR value associated with above pgd and vmid */
+ u64 vttbr;
};
#define EXCEPTION_NONE 0
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9c5c38e..14ccc4d 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -119,6 +119,7 @@ int kvm_arch_init_vm(struct kvm *kvm)
ret = kvm_alloc_stage2_pgd(kvm);
if (ret)
goto out_fail_alloc;
+ mutex_init(&kvm->arch.pgd_mutex);
pgd_phys = virt_to_phys(kvm->arch.pgd);
kvm->arch.vttbr = pgd_phys & ((1LLU << 40) - 1) & ~((2 << VTTBR_X) - 1);
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 50c9571..baeb8a1 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -177,6 +177,9 @@ out:
* Allocates the 1st level table only of size defined by PGD2_ORDER (can
* support either full 40-bit input addresses or limited to 32-bit input
* addresses). Clears the allocated pages.
+ *
+ * Note we don't need locking here as this is only called when the VM is
+ * destroyed, which can only be done once.
*/
int kvm_alloc_stage2_pgd(struct kvm *kvm)
{
@@ -204,6 +207,9 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
* Walks the level-1 page table pointed to by kvm->arch.pgd and frees all
* underlying level-2 and level-3 tables before freeing the actual level-1 table
* and setting the struct pointer to NULL.
+ *
+ * Note we don't need locking here as this is only called when the VM is
+ * destroyed, which can only be done once.
*/
void kvm_free_stage2_pgd(struct kvm *kvm)
{
@@ -239,49 +245,38 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
kvm->arch.pgd = NULL;
}
-static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
- gfn_t gfn, struct kvm_memory_slot *memslot)
+static int __user_mem_abort(struct kvm *kvm, phys_addr_t addr, pfn_t pfn)
{
- pfn_t pfn;
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte, new_pte;
- pfn = gfn_to_pfn(vcpu->kvm, gfn);
-
- if (is_error_pfn(pfn)) {
- kvm_err(-EFAULT, "Guest gfn %u (0x%08lx) does not have "
- "corresponding host mapping",
- gfn, gfn << PAGE_SHIFT);
- return -EFAULT;
- }
-
/* Create 2nd stage page table mapping - Level 1 */
- pgd = vcpu->kvm->arch.pgd + pgd_index(fault_ipa);
- pud = pud_offset(pgd, fault_ipa);
+ pgd = kvm->arch.pgd + pgd_index(addr);
+ pud = pud_offset(pgd, addr);
if (pud_none(*pud)) {
- pmd = pmd_alloc_one(NULL, fault_ipa);
+ pmd = pmd_alloc_one(NULL, addr);
if (!pmd) {
kvm_err(-ENOMEM, "Cannot allocate 2nd stage pmd");
return -ENOMEM;
}
pud_populate(NULL, pud, pmd);
- pmd += pmd_index(fault_ipa);
+ pmd += pmd_index(addr);
} else
- pmd = pmd_offset(pud, fault_ipa);
+ pmd = pmd_offset(pud, addr);
/* Create 2nd stage page table mapping - Level 2 */
if (pmd_none(*pmd)) {
- pte = pte_alloc_one_kernel(NULL, fault_ipa);
+ pte = pte_alloc_one_kernel(NULL, addr);
if (!pte) {
kvm_err(-ENOMEM, "Cannot allocate 2nd stage pte");
return -ENOMEM;
}
pmd_populate_kernel(NULL, pmd, pte);
- pte += pte_index(fault_ipa);
+ pte += pte_index(addr);
} else
- pte = pte_offset_kernel(pmd, fault_ipa);
+ pte = pte_offset_kernel(pmd, addr);
/* Create 2nd stage page table mapping - Level 3 */
new_pte = pfn_pte(pfn, PAGE_KVM_GUEST);
@@ -290,6 +285,28 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return 0;
}
+static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+ gfn_t gfn, struct kvm_memory_slot *memslot)
+{
+ pfn_t pfn;
+ int ret;
+
+ pfn = gfn_to_pfn(vcpu->kvm, gfn);
+
+ if (is_error_pfn(pfn)) {
+ kvm_err(-EFAULT, "Guest gfn %u (0x%08lx) does not have "
+ "corresponding host mapping",
+ gfn, gfn << PAGE_SHIFT);
+ return -EFAULT;
+ }
+
+ mutex_lock(&vcpu->kvm->arch.pgd_mutex);
+ ret = __user_mem_abort(vcpu->kvm, fault_ipa, pfn);
+ mutex_unlock(&vcpu->kvm->arch.pgd_mutex);
+
+ return ret;
+}
+
/**
* kvm_handle_mmio_return -- Handle MMIO loads after user space emulation
* @vcpu: The VCPU pointer
^ permalink raw reply related [flat|nested] 105+ messages in thread
* Re: [PATCH v5 00/13] KVM/ARM Implementation
2011-12-11 10:24 [PATCH v5 00/13] KVM/ARM Implementation Christoffer Dall
` (12 preceding siblings ...)
2011-12-11 10:25 ` [PATCH v5 13/13] ARM: KVM: Support SMP guests Christoffer Dall
@ 2011-12-11 11:32 ` Peter Maydell
2011-12-11 19:23 ` Christoffer Dall
13 siblings, 1 reply; 105+ messages in thread
From: Peter Maydell @ 2011-12-11 11:32 UTC (permalink / raw)
To: Christoffer Dall
Cc: android-virt, kvm, Marc.Zyngier, catalin.marinas, tech, avi
On 11 December 2011 10:24, Christoffer Dall
<c.dall@virtualopensystems.com> wrote:
> The following series implements KVM support for ARM processors,
> specifically on the Cortex A-15 platform.
> Still on the to-do list:
> - Reuse VMIDs
> - Fix SMP host support
> - Fix SMP guest support
> - Support guest Thumb mode for MMIO emulation
> - Further testing
> - Performance improvements
Other items for this list:
- Support Neon/VFP in guests (the fpu regs struct is empty ATM)
- Support guest debugging
I couldn't see any support for the TLS registers in your cp15 emulation:
did I miss it, or do we handle it without needing to trap?
-- PMM
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [PATCH v5 00/13] KVM/ARM Implementation
2011-12-11 11:32 ` [PATCH v5 00/13] KVM/ARM Implementation Peter Maydell
@ 2011-12-11 19:23 ` Christoffer Dall
2011-12-11 19:27 ` Peter Maydell
2012-01-11 16:48 ` Peter Maydell
0 siblings, 2 replies; 105+ messages in thread
From: Christoffer Dall @ 2011-12-11 19:23 UTC (permalink / raw)
To: Peter Maydell; +Cc: android-virt, kvm, Marc.Zyngier, catalin.marinas, tech, avi
On Sun, Dec 11, 2011 at 6:32 AM, Peter Maydell <peter.maydell@linaro.org> wrote:
> On 11 December 2011 10:24, Christoffer Dall
> <c.dall@virtualopensystems.com> wrote:
>> The following series implements KVM support for ARM processors,
>> specifically on the Cortex A-15 platform.
>
>> Still on the to-do list:
>> - Reuse VMIDs
>> - Fix SMP host support
>> - Fix SMP guest support
>> - Support guest Thumb mode for MMIO emulation
>> - Further testing
>> - Performance improvements
>
> Other items for this list:
> - Support Neon/VFP in guests (the fpu regs struct is empty ATM)
> - Support guest debugging
>
ok, thanks, will add these to the list. I have a feeling it will keep
growing for a while :)
> I couldn't see any support for the TLS registers in your cp15 emulation:
> did I miss it, or do we handle it without needing to trap?
by TLS you mean the cp15, c13 registers (tid and friends?) If so, I
handle these in the world-switch code (look at read_cp15_state and
write_cp15_state).
otherwise, help me out on the acronym...
-Christoffer
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [PATCH v5 00/13] KVM/ARM Implementation
2011-12-11 19:23 ` Christoffer Dall
@ 2011-12-11 19:27 ` Peter Maydell
2012-01-11 16:48 ` Peter Maydell
1 sibling, 0 replies; 105+ messages in thread
From: Peter Maydell @ 2011-12-11 19:27 UTC (permalink / raw)
To: Christoffer Dall
Cc: android-virt, kvm, Marc.Zyngier, catalin.marinas, tech, avi
On 11 December 2011 19:23, Christoffer Dall
<c.dall@virtualopensystems.com> wrote:
> by TLS you mean the cp15, c13 registers (tid and friends?) If so, I
> handle these in the world-switch code (look at read_cp15_state and
> write_cp15_state).
>
> otherwise, help me out on the acronym...
Yes, those are the ones (TLS == thread local storage). Thanks for
the pointer.
-- PMM
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [PATCH v5 00/13] KVM/ARM Implementation
2011-12-11 19:23 ` Christoffer Dall
2011-12-11 19:27 ` Peter Maydell
@ 2012-01-11 16:48 ` Peter Maydell
2012-01-12 3:29 ` Christoffer Dall
1 sibling, 1 reply; 105+ messages in thread
From: Peter Maydell @ 2012-01-11 16:48 UTC (permalink / raw)
To: Christoffer Dall
Cc: android-virt, kvm, Marc.Zyngier, catalin.marinas, tech, avi,
Rusty Russell
On 11 December 2011 19:23, Christoffer Dall
<c.dall@virtualopensystems.com> wrote:
> On Sun, Dec 11, 2011 at 6:32 AM, Peter Maydell <peter.maydell@linaro.org> wrote:
>> On 11 December 2011 10:24, Christoffer Dall
>> <c.dall@virtualopensystems.com> wrote:
>>> Still on the to-do list:
>>> - Reuse VMIDs
>>> - Fix SMP host support
>>> - Fix SMP guest support
>>> - Support guest Thumb mode for MMIO emulation
>>> - Further testing
>>> - Performance improvements
>>
>> Other items for this list:
>> - Support Neon/VFP in guests (the fpu regs struct is empty ATM)
>> - Support guest debugging
>
> ok, thanks, will add these to the list. I have a feeling it will keep
> growing for a while :)
Do you have a kernel-side TODO list somewhere public (wiki page?)
(It would be quite useful to be able to boot a reasonably modern
[read, ARMv7, Thumb2, VFPv3] guest userspace; does anybody plan
to work on this part soon?)
thanks
-- PMM
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [PATCH v5 00/13] KVM/ARM Implementation
2012-01-11 16:48 ` Peter Maydell
@ 2012-01-12 3:29 ` Christoffer Dall
2012-01-12 8:19 ` Peter Maydell
0 siblings, 1 reply; 105+ messages in thread
From: Christoffer Dall @ 2012-01-12 3:29 UTC (permalink / raw)
To: Peter Maydell
Cc: android-virt, kvm, Marc.Zyngier, catalin.marinas, tech, avi,
Rusty Russell
On Jan 11, 2012, at 8:48 AM, Peter Maydell wrote:
> On 11 December 2011 19:23, Christoffer Dall
> <c.dall@virtualopensystems.com> wrote:
>> On Sun, Dec 11, 2011 at 6:32 AM, Peter Maydell <peter.maydell@linaro.org> wrote:
>>> On 11 December 2011 10:24, Christoffer Dall
>>> <c.dall@virtualopensystems.com> wrote:
>>>> Still on the to-do list:
>>>> - Reuse VMIDs
>>>> - Fix SMP host support
>>>> - Fix SMP guest support
>>>> - Support guest Thumb mode for MMIO emulation
>>>> - Further testing
>>>> - Performance improvements
>>>
>>> Other items for this list:
>>> - Support Neon/VFP in guests (the fpu regs struct is empty ATM)
>>> - Support guest debugging
>>
>> ok, thanks, will add these to the list. I have a feeling it will keep
>> growing for a while :)
>
> Do you have a kernel-side TODO list somewhere public (wiki page?)
>
I wanted to create this as issues on the github repos...
> (It would be quite useful to be able to boot a reasonably modern
> [read, ARMv7, Thumb2, VFPv3] guest userspace; does anybody plan
> to work on this part soon?)
We have booted the linaro init environment and recent Angstrom distributions. Android is being actively tested. What specifically did you have in mind?
-Christoffer
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [PATCH v5 00/13] KVM/ARM Implementation
2012-01-12 3:29 ` Christoffer Dall
@ 2012-01-12 8:19 ` Peter Maydell
2012-01-12 16:15 ` [Android-virt] " Christoffer Dall
0 siblings, 1 reply; 105+ messages in thread
From: Peter Maydell @ 2012-01-12 8:19 UTC (permalink / raw)
To: Christoffer Dall
Cc: android-virt, kvm, Marc.Zyngier, catalin.marinas, tech, avi,
Rusty Russell
On 12 January 2012 03:29, Christoffer Dall
<c.dall@virtualopensystems.com> wrote:
> On Jan 11, 2012, at 8:48 AM, Peter Maydell wrote:
>> (It would be quite useful to be able to boot a reasonably modern
>> [read, ARMv7, Thumb2, VFPv3] guest userspace; does anybody plan
>> to work on this part soon?)
>
> We have booted the linaro init environment and recent Angstrom
> distributions. Android is being actively tested. What specifically
> did you have in mind?
I meant fixes for these todo list items:
>>>> - Support guest Thumb mode for MMIO emulation
>>>> - Support Neon/VFP in guests (the fpu regs struct is empty ATM)
Have I missed patches which added these? Last time I tried
booting a stock linaro userspace it died very early because
of the missing Thumb2 guest support.
-- PMM
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 00/13] KVM/ARM Implementation
2012-01-12 8:19 ` Peter Maydell
@ 2012-01-12 16:15 ` Christoffer Dall
2012-01-20 2:59 ` Christoffer Dall
0 siblings, 1 reply; 105+ messages in thread
From: Christoffer Dall @ 2012-01-12 16:15 UTC (permalink / raw)
To: Peter Maydell; +Cc: kvm, Marc.Zyngier, avi, android-virt, tech
On Thu, Jan 12, 2012 at 12:19 AM, Peter Maydell
<peter.maydell@linaro.org> wrote:
> On 12 January 2012 03:29, Christoffer Dall
> <c.dall@virtualopensystems.com> wrote:
>> On Jan 11, 2012, at 8:48 AM, Peter Maydell wrote:
>>> (It would be quite useful to be able to boot a reasonably modern
>>> [read, ARMv7, Thumb2, VFPv3] guest userspace; does anybody plan
>>> to work on this part soon?)
>>
>> We have booted the linaro init environment and recent Angstrom
>> distributions. Android is being actively tested. What specifically
>> did you have in mind?
>
> I meant fixes for these todo list items:
>>>>> - Support guest Thumb mode for MMIO emulation
>>>>> - Support Neon/VFP in guests (the fpu regs struct is empty ATM)
>
> Have I missed patches which added these? Last time I tried
> booting a stock linaro userspace it died very early because
> of the missing Thumb2 guest support.
>
no, you're not missing anything. I am in fact working on these.
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 00/13] KVM/ARM Implementation
2012-01-12 16:15 ` [Android-virt] " Christoffer Dall
@ 2012-01-20 2:59 ` Christoffer Dall
2012-01-30 22:46 ` Peter Maydell
0 siblings, 1 reply; 105+ messages in thread
From: Christoffer Dall @ 2012-01-20 2:59 UTC (permalink / raw)
To: Peter Maydell; +Cc: kvm, Marc.Zyngier, avi, android-virt, tech
There's a new list of issues available at:
https://github.com/virtualopensystems/linux-kvm-arm/issues
-Christoffer
On Thu, Jan 12, 2012 at 11:15 AM, Christoffer Dall
<c.dall@virtualopensystems.com> wrote:
> On Thu, Jan 12, 2012 at 12:19 AM, Peter Maydell
> <peter.maydell@linaro.org> wrote:
>> On 12 January 2012 03:29, Christoffer Dall
>> <c.dall@virtualopensystems.com> wrote:
>>> On Jan 11, 2012, at 8:48 AM, Peter Maydell wrote:
>>>> (It would be quite useful to be able to boot a reasonably modern
>>>> [read, ARMv7, Thumb2, VFPv3] guest userspace; does anybody plan
>>>> to work on this part soon?)
>>>
>>> We have booted the linaro init environment and recent Angstrom
>>> distributions. Android is being actively tested. What specifically
>>> did you have in mind?
>>
>> I meant fixes for these todo list items:
>>>>>> - Support guest Thumb mode for MMIO emulation
>>>>>> - Support Neon/VFP in guests (the fpu regs struct is empty ATM)
>>
>> Have I missed patches which added these? Last time I tried
>> booting a stock linaro userspace it died very early because
>> of the missing Thumb2 guest support.
>>
> no, you're not missing anything. I am in fact working on these.
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 00/13] KVM/ARM Implementation
2012-01-20 2:59 ` Christoffer Dall
@ 2012-01-30 22:46 ` Peter Maydell
2012-01-30 23:02 ` Alexander Graf
` (2 more replies)
0 siblings, 3 replies; 105+ messages in thread
From: Peter Maydell @ 2012-01-30 22:46 UTC (permalink / raw)
To: Christoffer Dall; +Cc: kvm, Marc.Zyngier, avi, android-virt, tech
On 20 January 2012 02:59, Christoffer Dall
<c.dall@virtualopensystems.com> wrote:
> There's a new list of issues available at:
>
> https://github.com/virtualopensystems/linux-kvm-arm/issues
Thanks for putting this up. Here's a couple more for you :-)
* Support guest kernels configured for LPAE
At the moment (well, if you have Marc's 3.3rc1 tree with the A15 L2
cache control register bodge in it) you can boot an A15 kernel
configured without LPAE as a KVM guest, but an LPAE kernel with LPAE
enabled will not boot. This probably mostly requires supporting the
64 bit wide cp15 registers that LPAE implies.
* handle QEMU being ^C'd
Currently if you ^C the qemu process then instead of a nice
clean exit things go a bit pear shaped, with apparently part
of the qemu/kvm combo having quit and the rest not and lots
of error messages being emitted. (I admit to not having looked
at this one enough to be certain tht it's a kernel side thing
rather than a QEMU one; I'm just guessing.)
-- PMM
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 00/13] KVM/ARM Implementation
2012-01-30 22:46 ` Peter Maydell
@ 2012-01-30 23:02 ` Alexander Graf
2012-01-31 14:39 ` Antonios Motakis
2012-02-01 12:11 ` Marc Zyngier
2 siblings, 0 replies; 105+ messages in thread
From: Alexander Graf @ 2012-01-30 23:02 UTC (permalink / raw)
To: Peter Maydell
Cc: Christoffer Dall, Marc.Zyngier, tech, android-virt, avi, kvm
On 30.01.2012, at 23:46, Peter Maydell wrote:
> On 20 January 2012 02:59, Christoffer Dall
> <c.dall@virtualopensystems.com> wrote:
>> There's a new list of issues available at:
>>
>> https://github.com/virtualopensystems/linux-kvm-arm/issues
>
> Thanks for putting this up. Here's a couple more for you :-)
>
> * Support guest kernels configured for LPAE
>
> At the moment (well, if you have Marc's 3.3rc1 tree with the A15 L2
> cache control register bodge in it) you can boot an A15 kernel
> configured without LPAE as a KVM guest, but an LPAE kernel with LPAE
> enabled will not boot. This probably mostly requires supporting the
> 64 bit wide cp15 registers that LPAE implies.
>
> * handle QEMU being ^C'd
>
> Currently if you ^C the qemu process then instead of a nice
> clean exit things go a bit pear shaped, with apparently part
> of the qemu/kvm combo having quit and the rest not and lots
> of error messages being emitted. (I admit to not having looked
> at this one enough to be certain tht it's a kernel side thing
> rather than a QEMU one; I'm just guessing.)
That one sounds like missing signal detection in the kvm code. I remember having had that in ppc kvm too during early development.
Alex
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 00/13] KVM/ARM Implementation
2012-01-30 22:46 ` Peter Maydell
2012-01-30 23:02 ` Alexander Graf
@ 2012-01-31 14:39 ` Antonios Motakis
2012-02-01 12:11 ` Marc Zyngier
2 siblings, 0 replies; 105+ messages in thread
From: Antonios Motakis @ 2012-01-31 14:39 UTC (permalink / raw)
To: Peter Maydell
Cc: Christoffer Dall, kvm, Marc.Zyngier, avi, android-virt, tech
On 01/30/2012 11:46 PM, Peter Maydell wrote:
> On 20 January 2012 02:59, Christoffer Dall
> <c.dall@virtualopensystems.com> wrote:
>> There's a new list of issues available at:
>>
>> https://github.com/virtualopensystems/linux-kvm-arm/issues
> Thanks for putting this up. Here's a couple more for you :-)
>
> * Support guest kernels configured for LPAE
>
> At the moment (well, if you have Marc's 3.3rc1 tree with the A15 L2
> cache control register bodge in it) you can boot an A15 kernel
> configured without LPAE as a KVM guest, but an LPAE kernel with LPAE
> enabled will not boot. This probably mostly requires supporting the
> 64 bit wide cp15 registers that LPAE implies.
>
> * handle QEMU being ^C'd
>
> Currently if you ^C the qemu process then instead of a nice
> clean exit things go a bit pear shaped, with apparently part
> of the qemu/kvm combo having quit and the rest not and lots
> of error messages being emitted. (I admit to not having looked
> at this one enough to be certain tht it's a kernel side thing
> rather than a QEMU one; I'm just guessing.)
>
> -- PMM
I took the initiative to add them to that list, thanks.
-Antonios
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 00/13] KVM/ARM Implementation
2012-01-30 22:46 ` Peter Maydell
2012-01-30 23:02 ` Alexander Graf
2012-01-31 14:39 ` Antonios Motakis
@ 2012-02-01 12:11 ` Marc Zyngier
2012-02-01 12:20 ` Peter Maydell
2 siblings, 1 reply; 105+ messages in thread
From: Marc Zyngier @ 2012-02-01 12:11 UTC (permalink / raw)
To: Peter Maydell
Cc: Christoffer Dall, kvm@vger.kernel.org, avi@redhat.com,
android-virt@lists.cs.columbia.edu, tech@virtualopensystems.com
On 30/01/12 22:46, Peter Maydell wrote:
> On 20 January 2012 02:59, Christoffer Dall
> <c.dall@virtualopensystems.com> wrote:
>> There's a new list of issues available at:
>>
>> https://github.com/virtualopensystems/linux-kvm-arm/issues
>
> Thanks for putting this up. Here's a couple more for you :-)
>
> * Support guest kernels configured for LPAE
>
> At the moment (well, if you have Marc's 3.3rc1 tree with the A15 L2
> cache control register bodge in it) you can boot an A15 kernel
> configured without LPAE as a KVM guest, but an LPAE kernel with LPAE
> enabled will not boot. This probably mostly requires supporting the
> 64 bit wide cp15 registers that LPAE implies.
So I had a look at that one. LPAE is not the problem (an LPAE kernel
runs fine as a guest), but a KVM-enabled kernel dies trying to execute
an SMC instruction.
The obvious fix would be to test for the virt extensions in ID_PFR1
before enabling KVM, and mask out the virt extensions from the same
register in the guest by trapping CP15-c0. But c0 is used by cache
maintenance operations, and trapping would lead to unacceptable
performance degradation.
Ideas anyone?
M.
--
Jazz is not dead. It just smells funny...
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 00/13] KVM/ARM Implementation
2012-02-01 12:11 ` Marc Zyngier
@ 2012-02-01 12:20 ` Peter Maydell
2012-02-01 13:40 ` Marc Zyngier
0 siblings, 1 reply; 105+ messages in thread
From: Peter Maydell @ 2012-02-01 12:20 UTC (permalink / raw)
To: Marc Zyngier
Cc: Christoffer Dall, kvm@vger.kernel.org, avi@redhat.com,
android-virt@lists.cs.columbia.edu, tech@virtualopensystems.com
On 1 February 2012 12:11, Marc Zyngier <marc.zyngier@arm.com> wrote:
> The obvious fix would be to test for the virt extensions in ID_PFR1
> before enabling KVM, and mask out the virt extensions from the same
> register in the guest by trapping CP15-c0. But c0 is used by cache
> maintenance operations, and trapping would lead to unacceptable
> performance degradation.
You can just set HCR.TID3 to get hyp traps on only the feature
ID registers, right? (traps access to only ID_PFR0, ID_PFR1, ID_DFR0,
ID_AFR0, ID_MMFR0, ID_MMFR1, ID_MMFR2, ID_MMFR3, ID_ISAR0, ID_ISAR1,
ID_ISAR2, ID_ISAR3, ID_ISAR4, ID_ISAR5, MVFR0, and MVFR1, none
of which should be on performance-critical paths I hope.)
-- PMM
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 00/13] KVM/ARM Implementation
2012-02-01 12:20 ` Peter Maydell
@ 2012-02-01 13:40 ` Marc Zyngier
2012-02-01 13:57 ` Peter Maydell
2012-02-01 13:59 ` Christoffer Dall
0 siblings, 2 replies; 105+ messages in thread
From: Marc Zyngier @ 2012-02-01 13:40 UTC (permalink / raw)
To: Peter Maydell
Cc: Christoffer Dall, kvm@vger.kernel.org, avi@redhat.com,
android-virt@lists.cs.columbia.edu, tech@virtualopensystems.com
On 01/02/12 12:20, Peter Maydell wrote:
> On 1 February 2012 12:11, Marc Zyngier <marc.zyngier@arm.com> wrote:
>> The obvious fix would be to test for the virt extensions in ID_PFR1
>> before enabling KVM, and mask out the virt extensions from the same
>> register in the guest by trapping CP15-c0. But c0 is used by cache
>> maintenance operations, and trapping would lead to unacceptable
>> performance degradation.
>
> You can just set HCR.TID3 to get hyp traps on only the feature
> ID registers, right? (traps access to only ID_PFR0, ID_PFR1, ID_DFR0,
> ID_AFR0, ID_MMFR0, ID_MMFR1, ID_MMFR2, ID_MMFR3, ID_ISAR0, ID_ISAR1,
> ID_ISAR2, ID_ISAR3, ID_ISAR4, ID_ISAR5, MVFR0, and MVFR1, none
> of which should be on performance-critical paths I hope.)
Good catch. It looks much better, except that ID_MMFR3 is used in
cpu_v7_set_pte_ext(), which is called each time you map a page. Ouch.
I'm not even sure we can the cache ID_MMFR3 in the kernel (multi-cluster
systems may have different features).
M.
--
Jazz is not dead. It just smells funny...
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 00/13] KVM/ARM Implementation
2012-02-01 13:40 ` Marc Zyngier
@ 2012-02-01 13:57 ` Peter Maydell
2012-02-01 13:59 ` Christoffer Dall
1 sibling, 0 replies; 105+ messages in thread
From: Peter Maydell @ 2012-02-01 13:57 UTC (permalink / raw)
To: Marc Zyngier
Cc: Christoffer Dall, kvm@vger.kernel.org, avi@redhat.com,
android-virt@lists.cs.columbia.edu, tech@virtualopensystems.com
On 1 February 2012 13:40, Marc Zyngier <marc.zyngier@arm.com> wrote:
> On 01/02/12 12:20, Peter Maydell wrote:
>> On 1 February 2012 12:11, Marc Zyngier <marc.zyngier@arm.com> wrote:
>>> The obvious fix would be to test for the virt extensions in ID_PFR1
>>> before enabling KVM, and mask out the virt extensions from the same
>>> register in the guest by trapping CP15-c0. But c0 is used by cache
>>> maintenance operations, and trapping would lead to unacceptable
>>> performance degradation.
>>
>> You can just set HCR.TID3 to get hyp traps on only the feature
>> ID registers, right? (traps access to only ID_PFR0, ID_PFR1, ID_DFR0,
>> ID_AFR0, ID_MMFR0, ID_MMFR1, ID_MMFR2, ID_MMFR3, ID_ISAR0, ID_ISAR1,
>> ID_ISAR2, ID_ISAR3, ID_ISAR4, ID_ISAR5, MVFR0, and MVFR1, none
>> of which should be on performance-critical paths I hope.)
>
> Good catch. It looks much better, except that ID_MMFR3 is used in
> cpu_v7_set_pte_ext(), which is called each time you map a page. Ouch.
>
> I'm not even sure we can the cache ID_MMFR3 in the kernel (multi-cluster
> systems may have different features).
We only look at the coherent-walk bits there, right? We can just
cache a single "does any core require clean-to-PoU after translation
table update?" [ie "does any core have 0 in this field?"] flag,
I guess. The chances that you actually have a cluster with different
cores differing here seem quite low[*], and if we do encounter one then
cleaning the dcache line even on the cores that don't need it would
be a safe if slightly less than optimal approach...
[*] particularly since every multicore Cortex A-class processor
supports coherent walk...
-- PMM
^ permalink raw reply [flat|nested] 105+ messages in thread
* Re: [Android-virt] [PATCH v5 00/13] KVM/ARM Implementation
2012-02-01 13:40 ` Marc Zyngier
2012-02-01 13:57 ` Peter Maydell
@ 2012-02-01 13:59 ` Christoffer Dall
1 sibling, 0 replies; 105+ messages in thread
From: Christoffer Dall @ 2012-02-01 13:59 UTC (permalink / raw)
To: Marc Zyngier
Cc: Peter Maydell, kvm@vger.kernel.org, avi@redhat.com,
android-virt@lists.cs.columbia.edu, tech@virtualopensystems.com
If the final implementation is going to be as per this thread:
http://lists.linaro.org/pipermail/boot-architecture/2011-August/000058.html
then this shouldn't actually be an issue right?
On Wed, Feb 1, 2012 at 8:40 AM, Marc Zyngier <marc.zyngier@arm.com> wrote:
> On 01/02/12 12:20, Peter Maydell wrote:
>> On 1 February 2012 12:11, Marc Zyngier <marc.zyngier@arm.com> wrote:
>>> The obvious fix would be to test for the virt extensions in ID_PFR1
>>> before enabling KVM, and mask out the virt extensions from the same
>>> register in the guest by trapping CP15-c0. But c0 is used by cache
>>> maintenance operations, and trapping would lead to unacceptable
>>> performance degradation.
>>
>> You can just set HCR.TID3 to get hyp traps on only the feature
>> ID registers, right? (traps access to only ID_PFR0, ID_PFR1, ID_DFR0,
>> ID_AFR0, ID_MMFR0, ID_MMFR1, ID_MMFR2, ID_MMFR3, ID_ISAR0, ID_ISAR1,
>> ID_ISAR2, ID_ISAR3, ID_ISAR4, ID_ISAR5, MVFR0, and MVFR1, none
>> of which should be on performance-critical paths I hope.)
>
> Good catch. It looks much better, except that ID_MMFR3 is used in
> cpu_v7_set_pte_ext(), which is called each time you map a page. Ouch.
>
> I'm not even sure we can the cache ID_MMFR3 in the kernel (multi-cluster
> systems may have different features).
>
> M.
> --
> Jazz is not dead. It just smells funny...
>
^ permalink raw reply [flat|nested] 105+ messages in thread