From: Christoffer Dall <c.dall@virtualopensystems.com>
To: android-virt@lists.cs.columbia.edu, kvm@vger.kernel.org
Subject: [PATCH v8 13/15] ARM: KVM: Handle guest faults in KVM
Date: Fri, 15 Jun 2012 15:09:08 -0400 [thread overview]
Message-ID: <20120615190908.24590.95861.stgit@ubuntu> (raw)
In-Reply-To: <20120615190553.24590.18391.stgit@ubuntu>
From: Christoffer Dall <cdall@cs.columbia.edu>
Handles the guest faults in KVM by mapping in corresponding user pages
in the 2nd stage page tables.
Introduces new ARM-specific kernel memory types, PAGE_KVM_GUEST and
pgprot_guest variables used to map 2nd stage memory for KVM guests.
Leverages MMU notifiers on KVM/ARM by supporting the kvm_unmap_hva() operation,
where we remove the HVA from the 2nd stage translation. All other KVM MMU
notifierhooks are NOPs.
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
---
arch/arm/include/asm/kvm_asm.h | 3 +
arch/arm/include/asm/kvm_host.h | 20 ++++
arch/arm/include/asm/pgtable-3level.h | 8 ++
arch/arm/include/asm/pgtable.h | 4 +
arch/arm/kvm/Kconfig | 1
arch/arm/kvm/exports.c | 1
arch/arm/kvm/interrupts.S | 37 +++++++
arch/arm/kvm/mmu.c | 164 +++++++++++++++++++++++++++++++++
arch/arm/mm/mmu.c | 3 +
9 files changed, 240 insertions(+), 1 deletion(-)
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index c2ec131..c4f40f3 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -32,6 +32,7 @@
#define SMCHYP_HVBAR_W 0xfffffff0
#ifndef __ASSEMBLY__
+struct kvm;
struct kvm_vcpu;
extern char __kvm_hyp_init[];
@@ -45,6 +46,8 @@ extern char __kvm_hyp_vector[];
extern char __kvm_hyp_code_start[];
extern char __kvm_hyp_code_end[];
+extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
+
extern void __kvm_flush_vm_context(void);
extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index ed4144b..21712c5 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -116,4 +116,24 @@ struct kvm_vcpu_stat {
u32 halt_wakeup;
};
+#define KVM_ARCH_WANT_MMU_NOTIFIER
+struct kvm;
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
+
+/* We do not have shadow page tables, hence the empty hooks */
+static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+{
+ return 0;
+}
+
+static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+ return 0;
+}
+
+static inline void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva,
+ pte_t pte)
+{
+}
+
#endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 1169a8a..e10bb5e 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -102,6 +102,14 @@
*/
#define L_PGD_SWAPPER (_AT(pgdval_t, 1) << 55) /* swapper_pg_dir entry */
+/*
+ * 2-nd stage PTE definitions for LPAE.
+ */
+#define L_PTE2_READ (_AT(pteval_t, 1) << 6) /* HAP[0] */
+#define L_PTE2_WRITE (_AT(pteval_t, 1) << 7) /* HAP[1] */
+#define L_PTE2_NORM_WB (_AT(pteval_t, 3) << 4) /* MemAttr[3:2] */
+#define L_PTE2_INNER_WB (_AT(pteval_t, 3) << 2) /* MemAttr[1:0] */
+
#ifndef __ASSEMBLY__
#define pud_none(pud) (!pud_val(pud))
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 4b72287..2561a8b 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -70,6 +70,7 @@ extern void __pgd_error(const char *file, int line, pgd_t);
extern pgprot_t pgprot_user;
extern pgprot_t pgprot_kernel;
+extern pgprot_t pgprot_guest;
#define _MOD_PROT(p, b) __pgprot(pgprot_val(p) | (b))
@@ -83,6 +84,9 @@ extern pgprot_t pgprot_kernel;
#define PAGE_KERNEL _MOD_PROT(pgprot_kernel, L_PTE_XN)
#define PAGE_KERNEL_EXEC pgprot_kernel
#define PAGE_HYP _MOD_PROT(pgprot_kernel, L_PTE_USER)
+#define PAGE_KVM_GUEST _MOD_PROT(pgprot_guest, L_PTE2_READ | \
+ L_PTE2_WRITE | L_PTE2_NORM_WB | \
+ L_PTE2_INNER_WB)
#define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN)
#define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 83abbe0..7fa50d3 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -36,6 +36,7 @@ config KVM_ARM_HOST
depends on KVM
depends on MMU
depends on CPU_V7 && ARM_VIRT_EXT
+ select MMU_NOTIFIER
---help---
Provides host support for ARM processors.
diff --git a/arch/arm/kvm/exports.c b/arch/arm/kvm/exports.c
index 9bdaf11..ec6e7c4 100644
--- a/arch/arm/kvm/exports.c
+++ b/arch/arm/kvm/exports.c
@@ -30,5 +30,6 @@ EXPORT_SYMBOL_GPL(__kvm_hyp_code_end);
EXPORT_SYMBOL_GPL(__kvm_vcpu_run);
EXPORT_SYMBOL_GPL(__kvm_flush_vm_context);
+EXPORT_SYMBOL_GPL(__kvm_tlb_flush_vmid);
EXPORT_SYMBOL_GPL(smp_send_reschedule);
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 13b7b85..4a70250 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -34,9 +34,46 @@ __kvm_hyp_code_start:
.globl __kvm_hyp_code_start
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@ Flush per-VMID TLBs
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+/*
+ * void __kvm_tlb_flush_vmid(struct kvm *kvm);
+ *
+ * We rely on the hardware to broadcast the TLB invalidation to all CPUs
+ * inside the inner-shareable domain (which is the case for all v7
+ * implementations). If we come across a non-IS SMP implementation, we'll
+ * have to use an IPI based mechanism. Until then, we stick to the simple
+ * hardware assisted version.
+ */
+ENTRY(__kvm_tlb_flush_vmid)
+ hvc #0 @ Switch to Hyp mode
+ push {r2, r3}
+
+ add r0, r0, #KVM_VTTBR
+ ldrd r2, r3, [r0]
+ mcrr p15, 6, r2, r3, c2 @ Write VTTBR
+ isb
+ mcr p15, 0, r0, c8, c3, 0 @ TLBIALLIS (rt ignored)
+ dsb
+ isb
+ mov r2, #0
+ mov r3, #0
+ mcrr p15, 6, r2, r3, c2 @ Back to VMID #0
+ isb
+
+ pop {r2, r3}
+ hvc #0 @ Back to SVC
+ bx lr
+ENDPROC(__kvm_tlb_flush_vmid)
+
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@ Flush TLBs and instruction caches of current CPU for all VMIDs
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+/*
+ * void __kvm_flush_vm_context(void);
+ */
ENTRY(__kvm_flush_vm_context)
hvc #0 @ switch to hyp-mode
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index b256540..8ea311a 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -19,6 +19,7 @@
#include <asm/pgalloc.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_mmu.h>
+#include <asm/kvm_asm.h>
static pgd_t *kvm_hyp_pgd;
static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
@@ -166,6 +167,9 @@ out:
* Allocates the 1st level table only of size defined by PGD2_ORDER (can
* support either full 40-bit input addresses or limited to 32-bit input
* addresses). Clears the allocated pages.
+ *
+ * Note we don't need locking here as this is only called when the VM is
+ * created, which can only be done once.
*/
int kvm_alloc_stage2_pgd(struct kvm *kvm)
{
@@ -227,6 +231,9 @@ static void free_stage2_ptes(pmd_t *pmd, unsigned long addr)
* Walks the level-1 page table pointed to by kvm->arch.pgd and frees all
* underlying level-2 and level-3 tables before freeing the actual level-1 table
* and setting the struct pointer to NULL.
+ *
+ * Note we don't need locking here as this is only called when the VM is
+ * destroyed, which can only be done once.
*/
void kvm_free_stage2_pgd(struct kvm *kvm)
{
@@ -262,9 +269,164 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
kvm->arch.pgd = NULL;
}
+static const pte_t null_pte;
+
+static int stage2_set_pte(struct kvm *kvm, phys_addr_t addr,
+ const pte_t *new_pte)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ /* Create 2nd stage page table mapping - Level 1 */
+ pgd = kvm->arch.pgd + pgd_index(addr);
+ pud = pud_offset(pgd, addr);
+ if (pud_none(*pud)) {
+ BUG_ON(new_pte == &null_pte);
+ pmd = pmd_alloc_one(NULL, addr);
+ if (!pmd) {
+ kvm_err("Cannot allocate 2nd stage pmd\n");
+ return -ENOMEM;
+ }
+ pud_populate(NULL, pud, pmd);
+ pmd += pmd_index(addr);
+ } else
+ pmd = pmd_offset(pud, addr);
+
+ /* Create 2nd stage page table mapping - Level 2 */
+ if (pmd_none(*pmd)) {
+ BUG_ON(new_pte == &null_pte);
+ pte = pte_alloc_one_kernel(NULL, addr);
+ if (!pte) {
+ kvm_err("Cannot allocate 2nd stage pte\n");
+ return -ENOMEM;
+ }
+ pmd_populate_kernel(NULL, pmd, pte);
+ pte += pte_index(addr);
+ } else
+ pte = pte_offset_kernel(pmd, addr);
+
+ /* Create 2nd stage page table mapping - Level 3 */
+ set_pte_ext(pte, *new_pte, 0);
+
+ return 0;
+}
+
+static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+ gfn_t gfn, struct kvm_memory_slot *memslot)
+{
+ pte_t new_pte;
+ pfn_t pfn;
+ int ret;
+
+ pfn = gfn_to_pfn(vcpu->kvm, gfn);
+
+ if (is_error_pfn(pfn)) {
+ put_page(pfn_to_page(pfn));
+ kvm_err("Guest gfn %u (0x%08x) does not have\n"
+ "corresponding host mapping",
+ (unsigned int)gfn,
+ (unsigned int)gfn << PAGE_SHIFT);
+ return -EFAULT;
+ }
+
+ mutex_lock(&vcpu->kvm->arch.pgd_mutex);
+ new_pte = pfn_pte(pfn, PAGE_KVM_GUEST);
+ ret = stage2_set_pte(vcpu->kvm, fault_ipa, &new_pte);
+ if (ret)
+ put_page(pfn_to_page(pfn));
+ mutex_unlock(&vcpu->kvm->arch.pgd_mutex);
+
+ return ret;
+}
+
+#define HSR_ABT_FS (0x3f)
+#define HPFAR_MASK (~0xf)
+
+/**
+ * kvm_handle_guest_abort - handles all 2nd stage aborts
+ * @vcpu: the VCPU pointer
+ * @run: the kvm_run structure
+ *
+ * Any abort that gets to the host is almost guaranteed to be caused by a
+ * missing second stage translation table entry, which can mean that either the
+ * guest simply needs more memory and we must allocate an appropriate page or it
+ * can mean that the guest tried to access I/O memory, which is emulated by user
+ * space. The distinction is based on the IPA causing the fault and whether this
+ * memory region has been registered as standard RAM by user space.
+ */
int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
- return -EINVAL;
+ unsigned long hsr_ec;
+ unsigned long fault_status;
+ phys_addr_t fault_ipa;
+ struct kvm_memory_slot *memslot = NULL;
+ bool is_iabt;
+ gfn_t gfn;
+
+ hsr_ec = vcpu->arch.hsr >> HSR_EC_SHIFT;
+ is_iabt = (hsr_ec == HSR_EC_IABT);
+
+ /* Check that the second stage fault is a translation fault */
+ fault_status = vcpu->arch.hsr & HSR_ABT_FS;
+ if ((fault_status & 0x3c) != 0x4) {
+ kvm_err("Unsupported fault status: EC=%lx DFCS=%lx\n",
+ hsr_ec, fault_status);
+ return -EFAULT;
+ }
+
+ fault_ipa = ((phys_addr_t)vcpu->arch.hpfar & HPFAR_MASK) << 8;
+
+ gfn = fault_ipa >> PAGE_SHIFT;
+ if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+ if (is_iabt) {
+ kvm_err("Inst. abort on I/O address %08lx\n",
+ (unsigned long)fault_ipa);
+ return -EFAULT;
+ }
+
+ kvm_pr_unimpl("I/O address abort...");
+ return 0;
+ }
+
+ memslot = gfn_to_memslot(vcpu->kvm, gfn);
+ if (!memslot->user_alloc) {
+ kvm_err("non user-alloc memslots not supported\n");
+ return -EINVAL;
+ }
+
+ return user_mem_abort(vcpu, fault_ipa, gfn, memslot);
+}
+
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+{
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
+ int needs_stage2_flush = 0;
+
+ slots = kvm_memslots(kvm);
+
+ /* we only care about the pages that the guest sees */
+ kvm_for_each_memslot(memslot, slots) {
+ unsigned long start = memslot->userspace_addr;
+ unsigned long end;
+
+ end = start + (memslot->npages << PAGE_SHIFT);
+ if (hva >= start && hva < end) {
+ gpa_t gpa_offset = hva - start;
+ gpa_t gpa = (memslot->base_gfn << PAGE_SHIFT) +
+ gpa_offset;
+
+ stage2_set_pte(kvm, gpa, &null_pte);
+ needs_stage2_flush = 1;
+ }
+ }
+
+ if (needs_stage2_flush)
+ __kvm_tlb_flush_vmid(kvm);
+
+ return 0;
}
int kvm_hyp_pgd_alloc(void)
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index f7439e7..7dd4b54 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -56,9 +56,11 @@ static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK;
static unsigned int ecc_mask __initdata = 0;
pgprot_t pgprot_user;
pgprot_t pgprot_kernel;
+pgprot_t pgprot_guest;
EXPORT_SYMBOL(pgprot_user);
EXPORT_SYMBOL(pgprot_kernel);
+EXPORT_SYMBOL(pgprot_guest);
struct cachepolicy {
const char policy[16];
@@ -520,6 +522,7 @@ static void __init build_mem_type_table(void)
pgprot_user = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
L_PTE_DIRTY | kern_pgprot);
+ pgprot_guest = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG);
mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
next prev parent reply other threads:[~2012-06-15 19:15 UTC|newest]
Thread overview: 54+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-06-15 19:06 [PATCH v8 00/15] KVM/ARM Implementation Christoffer Dall
2012-06-15 19:06 ` [PATCH v8 01/15] ARM: add mem_type prot_pte accessor Christoffer Dall
2012-06-15 19:07 ` [PATCH v8 02/15] KVM: use KVM_CAP_IRQ_ROUTING to protect the routing related code Christoffer Dall
2012-06-18 13:06 ` Avi Kivity
2012-06-15 19:07 ` [PATCH v8 03/15] KVM: Introduce __KVM_HAVE_IRQ_LINE Christoffer Dall
2012-06-18 13:07 ` Avi Kivity
2012-06-15 19:07 ` [PATCH v8 04/15] KVM: Guard mmu_notifier specific code with CONFIG_MMU_NOTIFIER Christoffer Dall
2012-06-18 13:08 ` Avi Kivity
2012-06-18 17:47 ` Christoffer Dall
2012-06-19 8:37 ` Avi Kivity
2012-06-28 21:28 ` Marcelo Tosatti
2012-06-15 19:07 ` [PATCH v8 05/15] ARM: KVM: Initial skeleton to compile KVM support Christoffer Dall
2012-06-15 19:07 ` [PATCH v8 06/15] ARM: KVM: Hypervisor identity mapping Christoffer Dall
2012-06-18 13:12 ` Avi Kivity
2012-06-18 17:55 ` Christoffer Dall
2012-06-19 8:38 ` Avi Kivity
2012-06-15 19:07 ` [PATCH v8 07/15] ARM: KVM: Hypervisor inititalization Christoffer Dall
2012-06-28 22:35 ` Marcelo Tosatti
2012-06-28 22:53 ` Christoffer Dall
2012-06-29 1:07 ` Marcelo Tosatti
2012-06-15 19:08 ` [PATCH v8 08/15] ARM: KVM: Module unloading support Christoffer Dall
2012-06-15 19:08 ` [PATCH v8 09/15] ARM: KVM: Memory virtualization setup Christoffer Dall
2012-06-21 12:29 ` Gleb Natapov
2012-06-21 19:48 ` Christoffer Dall
2012-06-28 22:34 ` Marcelo Tosatti
2012-06-28 22:51 ` Christoffer Dall
2012-06-15 19:08 ` [PATCH v8 10/15] ARM: KVM: Inject IRQs and FIQs from userspace Christoffer Dall
2012-06-18 13:32 ` Avi Kivity
2012-06-18 20:56 ` Christoffer Dall
2012-06-19 8:49 ` Avi Kivity
2012-06-20 3:17 ` Christoffer Dall
2012-06-15 19:08 ` [PATCH v8 11/15] ARM: KVM: World-switch implementation Christoffer Dall
2012-06-18 13:41 ` Avi Kivity
2012-06-18 22:05 ` Christoffer Dall
2012-06-19 9:16 ` Avi Kivity
2012-06-20 3:27 ` Christoffer Dall
2012-06-20 4:40 ` Christoffer Dall
2012-06-21 8:13 ` Avi Kivity
2012-06-21 17:54 ` Christoffer Dall
2012-07-02 13:07 ` Avi Kivity
2012-06-15 19:08 ` [PATCH v8 12/15] ARM: KVM: Emulation framework and CP15 emulation Christoffer Dall
2012-06-15 19:09 ` Christoffer Dall [this message]
2012-06-18 13:45 ` [PATCH v8 13/15] ARM: KVM: Handle guest faults in KVM Avi Kivity
2012-06-18 22:20 ` Christoffer Dall
2012-06-19 9:32 ` Avi Kivity
2012-06-19 10:41 ` Andrea Arcangeli
2012-06-20 15:13 ` Christoffer Dall
2012-06-20 17:49 ` Andrea Arcangeli
2012-06-15 19:09 ` [PATCH v8 14/15] ARM: KVM: Handle I/O aborts Christoffer Dall
2012-06-18 13:48 ` Avi Kivity
2012-06-18 22:28 ` Christoffer Dall
2012-06-15 19:09 ` [PATCH v8 15/15] ARM: KVM: Guest wait-for-interrupts (WFI) support Christoffer Dall
2012-06-28 21:49 ` [PATCH v8 00/15] KVM/ARM Implementation Marcelo Tosatti
2012-06-28 22:44 ` Christoffer Dall
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20120615190908.24590.95861.stgit@ubuntu \
--to=c.dall@virtualopensystems.com \
--cc=android-virt@lists.cs.columbia.edu \
--cc=kvm@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.