From: Christoffer Dall <c.dall@virtualopensystems.com>
To: android-virt@lists.cs.columbia.edu, kvm@vger.kernel.org
Cc: tech@virtualopensystems.com
Subject: [PATCH v7 10/12] ARM: KVM: Handle guest faults in KVM
Date: Mon, 12 Mar 2012 02:52:53 -0400 [thread overview]
Message-ID: <20120312065253.8074.63510.stgit@ubuntu> (raw)
In-Reply-To: <20120312065134.8074.36949.stgit@ubuntu>
From: Christoffer Dall <cdall@cs.columbia.edu>
Handles the guest faults in KVM by mapping in corresponding user pages
in the 2nd stage page tables.
Introduces new ARM-specific kernel memory types, PAGE_KVM_GUEST and
pgprot_guest variables used to map 2nd stage memory for KVM guests.
Leverages MMU notifiers on KVM/ARM by supporting the kvm_unmap_hva() operation,
where we remove the HVA from the 2nd stage translation. All other KVM MMU
notifierhooks are NOPs.
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
---
arch/arm/include/asm/kvm_asm.h | 3 +
arch/arm/include/asm/kvm_host.h | 19 ++++
arch/arm/include/asm/pgtable-3level.h | 8 ++
arch/arm/include/asm/pgtable.h | 4 +
arch/arm/kvm/Kconfig | 1
arch/arm/kvm/interrupts.S | 37 ++++++++
arch/arm/kvm/mmu.c | 162 +++++++++++++++++++++++++++++++++
arch/arm/mm/mmu.c | 3 +
8 files changed, 236 insertions(+), 1 deletions(-)
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 69afdf3..c7cbe24 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -32,6 +32,7 @@
#define SMCHYP_HVBAR_W 0xfffffff0
#ifndef __ASSEMBLY__
+struct kvm;
struct kvm_vcpu;
extern char __kvm_hyp_init[];
@@ -42,6 +43,8 @@ extern char __kvm_hyp_vector[];
extern char __kvm_hyp_code_start[];
extern char __kvm_hyp_code_end[];
+extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
+
extern void __kvm_flush_vm_context(void);
extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 72ba708..241a950 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -114,4 +114,23 @@ struct kvm_vcpu_stat {
u32 halt_wakeup;
};
+#define KVM_ARCH_WANT_MMU_NOTIFIER
+struct kvm;
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
+
+/* We do not have shadow page tables, hence the empty hooks */
+static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+{
+ return 0;
+}
+
+static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+ return 0;
+}
+
+static inline void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+{
+}
+
#endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index edc3cb9..6dc5331 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -104,6 +104,14 @@
*/
#define L_PGD_SWAPPER (_AT(pgdval_t, 1) << 55) /* swapper_pg_dir entry */
+/*
+ * 2-nd stage PTE definitions for LPAE.
+ */
+#define L_PTE2_READ (_AT(pteval_t, 1) << 6) /* HAP[0] */
+#define L_PTE2_WRITE (_AT(pteval_t, 1) << 7) /* HAP[1] */
+#define L_PTE2_NORM_WB (_AT(pteval_t, 3) << 4) /* MemAttr[3:2] */
+#define L_PTE2_INNER_WB (_AT(pteval_t, 3) << 2) /* MemAttr[1:0] */
+
#ifndef __ASSEMBLY__
#define pud_none(pud) (!pud_val(pud))
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 4b72287..2561a8b 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -70,6 +70,7 @@ extern void __pgd_error(const char *file, int line, pgd_t);
extern pgprot_t pgprot_user;
extern pgprot_t pgprot_kernel;
+extern pgprot_t pgprot_guest;
#define _MOD_PROT(p, b) __pgprot(pgprot_val(p) | (b))
@@ -83,6 +84,9 @@ extern pgprot_t pgprot_kernel;
#define PAGE_KERNEL _MOD_PROT(pgprot_kernel, L_PTE_XN)
#define PAGE_KERNEL_EXEC pgprot_kernel
#define PAGE_HYP _MOD_PROT(pgprot_kernel, L_PTE_USER)
+#define PAGE_KVM_GUEST _MOD_PROT(pgprot_guest, L_PTE2_READ | \
+ L_PTE2_WRITE | L_PTE2_NORM_WB | \
+ L_PTE2_INNER_WB)
#define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN)
#define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 83abbe0..7fa50d3 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -36,6 +36,7 @@ config KVM_ARM_HOST
depends on KVM
depends on MMU
depends on CPU_V7 && ARM_VIRT_EXT
+ select MMU_NOTIFIER
---help---
Provides host support for ARM processors.
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index fbc0bec..0cf4965 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -35,9 +35,46 @@ __kvm_hyp_code_start:
.globl __kvm_hyp_code_start
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@ Flush per-VMID TLBs
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+/*
+ * void __kvm_tlb_flush_vmid(struct kvm *kvm);
+ *
+ * We rely on the hardware to broadcast the TLB invalidation to all CPUs
+ * inside the inner-shareable domain (which is the case for all v7
+ * implementations). If we come across a non-IS SMP implementation, we'll
+ * have to use an IPI based mechanism. Until then, we stick to the simple
+ * hardware assisted version.
+ */
+ENTRY(__kvm_tlb_flush_vmid)
+ hvc #0 @ Switch to Hyp mode
+ push {r2, r3}
+
+ add r0, r0, #KVM_VTTBR
+ ldrd r2, r3, [r0]
+ mcrr p15, 6, r2, r3, c2 @ Write VTTBR
+ isb
+ mcr p15, 0, r0, c8, c3, 0 @ TLBIALLIS (rt ignored)
+ dsb
+ isb
+ mov r2, #0
+ mov r3, #0
+ mcrr p15, 6, r2, r3, c2 @ Back to VMID #0
+ isb
+
+ pop {r2, r3}
+ hvc #0 @ Back to SVC
+ mov pc, lr
+ENDPROC(__kvm_tlb_flush_vmid)
+
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@ Flush TLBs and instruction caches of current CPU for all VMIDs
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+/*
+ * void __kvm_flush_vm_context(void);
+ */
ENTRY(__kvm_flush_vm_context)
hvc #0 @ switch to hyp-mode
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 94411a6..7eae6c2 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -19,6 +19,7 @@
#include <asm/pgalloc.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_mmu.h>
+#include <asm/kvm_asm.h>
pgd_t *kvm_hyp_pgd;
DEFINE_MUTEX(kvm_hyp_pgd_mutex);
@@ -169,6 +170,9 @@ out:
* Allocates the 1st level table only of size defined by PGD2_ORDER (can
* support either full 40-bit input addresses or limited to 32-bit input
* addresses). Clears the allocated pages.
+ *
+ * Note we don't need locking here as this is only called when the VM is
+ * destroyed, which can only be done once.
*/
int kvm_alloc_stage2_pgd(struct kvm *kvm)
{
@@ -230,6 +234,9 @@ static void free_stage2_ptes(pmd_t *pmd, unsigned long addr)
* Walks the level-1 page table pointed to by kvm->arch.pgd and frees all
* underlying level-2 and level-3 tables before freeing the actual level-1 table
* and setting the struct pointer to NULL.
+ *
+ * Note we don't need locking here as this is only called when the VM is
+ * destroyed, which can only be done once.
*/
void kvm_free_stage2_pgd(struct kvm *kvm)
{
@@ -265,7 +272,160 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
kvm->arch.pgd = NULL;
}
+static const pte_t null_pte;
+
+static int stage2_set_pte(struct kvm *kvm, phys_addr_t addr, const pte_t *new_pte)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ /* Create 2nd stage page table mapping - Level 1 */
+ pgd = kvm->arch.pgd + pgd_index(addr);
+ pud = pud_offset(pgd, addr);
+ if (pud_none(*pud)) {
+ BUG_ON(new_pte == &null_pte);
+ pmd = pmd_alloc_one(NULL, addr);
+ if (!pmd) {
+ kvm_err("Cannot allocate 2nd stage pmd\n");
+ return -ENOMEM;
+ }
+ pud_populate(NULL, pud, pmd);
+ pmd += pmd_index(addr);
+ } else
+ pmd = pmd_offset(pud, addr);
+
+ /* Create 2nd stage page table mapping - Level 2 */
+ if (pmd_none(*pmd)) {
+ BUG_ON(new_pte == &null_pte);
+ pte = pte_alloc_one_kernel(NULL, addr);
+ if (!pte) {
+ kvm_err("Cannot allocate 2nd stage pte\n");
+ return -ENOMEM;
+ }
+ pmd_populate_kernel(NULL, pmd, pte);
+ pte += pte_index(addr);
+ } else
+ pte = pte_offset_kernel(pmd, addr);
+
+ /* Create 2nd stage page table mapping - Level 3 */
+ set_pte_ext(pte, *new_pte, 0);
+
+ return 0;
+}
+
+static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+ gfn_t gfn, struct kvm_memory_slot *memslot)
+{
+ pte_t new_pte;
+ pfn_t pfn;
+ int ret;
+
+ pfn = gfn_to_pfn(vcpu->kvm, gfn);
+
+ if (is_error_pfn(pfn)) {
+ put_page(pfn_to_page(pfn));
+ kvm_err("Guest gfn %u (0x%08x) does not have \n"
+ "corresponding host mapping",
+ (unsigned int)gfn,
+ (unsigned int)gfn << PAGE_SHIFT);
+ return -EFAULT;
+ }
+
+ mutex_lock(&vcpu->kvm->arch.pgd_mutex);
+ new_pte = pfn_pte(pfn, PAGE_KVM_GUEST);
+ ret = stage2_set_pte(vcpu->kvm, fault_ipa, &new_pte);
+ if (ret)
+ put_page(pfn_to_page(pfn));
+ mutex_unlock(&vcpu->kvm->arch.pgd_mutex);
+
+ return ret;
+}
+
+#define HSR_ABT_FS (0x3f)
+#define HPFAR_MASK (~0xf)
+
+/**
+ * kvm_handle_guest_abort - handles all 2nd stage aborts
+ * @vcpu: the VCPU pointer
+ * @run: the kvm_run structure
+ *
+ * Any abort that gets to the host is almost guaranteed to be caused by a
+ * missing second stage translation table entry, which can mean that either the
+ * guest simply needs more memory and we must allocate an appropriate page or it
+ * can mean that the guest tried to access I/O memory, which is emulated by user
+ * space. The distinction is based on the IPA causing the fault and whether this
+ * memory region has been registered as standard RAM by user space.
+ */
int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
- return -EINVAL;
+ unsigned long hsr_ec;
+ unsigned long fault_status;
+ phys_addr_t fault_ipa;
+ struct kvm_memory_slot *memslot = NULL;
+ bool is_iabt;
+ gfn_t gfn;
+
+ hsr_ec = vcpu->arch.hsr >> HSR_EC_SHIFT;
+ is_iabt = (hsr_ec == HSR_EC_IABT);
+
+ /* Check that the second stage fault is a translation fault */
+ fault_status = vcpu->arch.hsr & HSR_ABT_FS;
+ if ((fault_status & 0x3c) != 0x4) {
+ kvm_err("Unsupported fault status: %lx\n",
+ fault_status & 0x3c);
+ return -EFAULT;
+ }
+
+ fault_ipa = ((phys_addr_t)vcpu->arch.hpfar & HPFAR_MASK) << 8;
+
+ gfn = fault_ipa >> PAGE_SHIFT;
+ if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+ if (is_iabt) {
+ kvm_err("Inst. abort on I/O address %08lx\n",
+ (unsigned long)fault_ipa);
+ return -EFAULT;
+ }
+
+ kvm_pr_unimpl("I/O address abort...");
+ return 0;
+ }
+
+ memslot = gfn_to_memslot(vcpu->kvm, gfn);
+ if (!memslot->user_alloc) {
+ kvm_err("non user-alloc memslots not supported\n");
+ return -EINVAL;
+ }
+
+ return user_mem_abort(vcpu, fault_ipa, gfn, memslot);
+}
+
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+{
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
+ int needs_stage2_flush = 0;
+
+ slots = kvm_memslots(kvm);
+
+ /* we only care about the pages that the guest sees */
+ kvm_for_each_memslot(memslot, slots) {
+ unsigned long start = memslot->userspace_addr;
+ unsigned long end;
+
+ end = start + (memslot->npages << PAGE_SHIFT);
+ if (hva >= start && hva < end) {
+ gpa_t gpa_offset = hva - start;
+ gpa_t gpa = (memslot->base_gfn << PAGE_SHIFT) + gpa_offset;
+
+ stage2_set_pte(kvm, gpa, &null_pte);
+ needs_stage2_flush = 1;
+ }
+ }
+
+ if (needs_stage2_flush)
+ __kvm_tlb_flush_vmid(kvm);
+
+ return 0;
}
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 94c5a0c..1a2df14 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -54,9 +54,11 @@ static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK;
static unsigned int ecc_mask __initdata = 0;
pgprot_t pgprot_user;
pgprot_t pgprot_kernel;
+pgprot_t pgprot_guest;
EXPORT_SYMBOL(pgprot_user);
EXPORT_SYMBOL(pgprot_kernel);
+EXPORT_SYMBOL(pgprot_guest);
struct cachepolicy {
const char policy[16];
@@ -504,6 +506,7 @@ static void __init build_mem_type_table(void)
pgprot_user = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
L_PTE_DIRTY | kern_pgprot);
+ pgprot_guest = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG);
mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
next prev parent reply other threads:[~2012-03-12 6:52 UTC|newest]
Thread overview: 36+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-03-12 6:51 [PATCH v7 00/12] KVM/ARM Implementation Christoffer Dall
2012-03-12 6:51 ` [PATCH v7 01/12] KVM: Introduce __KVM_HAVE_IRQ_LINE Christoffer Dall
2012-03-23 0:41 ` [PATCH] ARM: KVM: Check the cpuid we're being asked to emulate Rusty Russell
2012-05-14 22:57 ` Christoffer Dall
2012-05-16 23:58 ` Rusty Russell
2012-05-20 18:34 ` Christoffer Dall
2012-05-21 1:13 ` Rusty Russell
2012-03-12 6:52 ` [PATCH v7 02/12] KVM: Guard mmu_notifier specific code with CONFIG_MMU_NOTIFIER Christoffer Dall
2012-03-12 15:50 ` Avi Kivity
2012-03-12 6:52 ` [PATCH v7 03/12] ARM: KVM: Initial skeleton to compile KVM support Christoffer Dall
2012-03-12 6:52 ` [PATCH v7 04/12] ARM: KVM: Hypervisor identity mapping Christoffer Dall
2012-03-12 6:52 ` [PATCH v7 05/12] ARM: KVM: Hypervisor inititalization Christoffer Dall
2012-03-12 6:52 ` [PATCH v7 06/12] ARM: KVM: Memory virtualization setup Christoffer Dall
2012-03-12 6:52 ` [PATCH v7 07/12] ARM: KVM: Inject IRQs and FIQs from userspace Christoffer Dall
2012-03-12 6:52 ` [PATCH v7 08/12] ARM: KVM: World-switch implementation Christoffer Dall
2012-03-23 0:23 ` Rusty Russell
2012-03-28 13:05 ` Avi Kivity
2012-03-28 21:57 ` Rusty Russell
2012-03-29 10:49 ` Avi Kivity
2012-05-14 18:08 ` Christoffer Dall
2012-03-12 6:52 ` [PATCH v7 09/12] ARM: KVM: Emulation framework and CP15 emulation Christoffer Dall
2012-03-12 6:52 ` Christoffer Dall [this message]
2012-03-12 15:31 ` [Android-virt] [PATCH v7 10/12] ARM: KVM: Handle guest faults in KVM Marc Zyngier
2012-03-12 16:23 ` Christoffer Dall
2012-03-12 16:28 ` Marc Zyngier
2012-03-12 6:53 ` [PATCH v7 11/12] ARM: KVM: Handle I/O aborts Christoffer Dall
2012-03-12 6:53 ` [PATCH v7 12/12] ARM: KVM: Guest wait-for-interrupts (WFI) support Christoffer Dall
2012-03-12 17:36 ` [PATCH v7 00/12] KVM/ARM Implementation Avi Kivity
2012-03-23 0:40 ` [PATCH] ARM: KVM: Remove l2ctlr write Rusty Russell
2012-05-14 22:59 ` Christoffer Dall
2012-03-29 5:11 ` [PATCH 0/3] Emulation cleanups Rusty, Russell <rusty.russell
2012-03-29 5:15 ` [PATCH 1/3] ARM: KVM: Remove l2ctlr write Rusty Russell
2012-03-29 5:17 ` [PATCH 2/3] ARM: KVM: Fake up performance counters a little more precisely Rusty Russell
2012-05-14 22:49 ` Christoffer Dall
2012-05-17 0:12 ` Rusty Russell
2012-03-29 5:17 ` [PATCH 3/3] ARM: KVM: Check the cpuid we're being asked to emulate Rusty Russell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20120312065253.8074.63510.stgit@ubuntu \
--to=c.dall@virtualopensystems.com \
--cc=android-virt@lists.cs.columbia.edu \
--cc=kvm@vger.kernel.org \
--cc=tech@virtualopensystems.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).