From: Christoffer Dall <c.dall@virtualopensystems.com>
To: android-virt@lists.cs.columbia.edu, kvm@vger.kernel.org
Cc: tech@virtualopensystems.com
Subject: [PATCH v6 09/12] ARM: KVM: Handle guest faults in KVM
Date: Thu, 23 Feb 2012 02:33:09 -0500 [thread overview]
Message-ID: <20120223073309.3266.17383.stgit@ubuntu> (raw)
In-Reply-To: <20120223073159.3266.45217.stgit@ubuntu>
From: Christoffer Dall <c.dall@virtualopensystems.com>
Handles the guest faults in KVM by mapping in corresponding user pages
in the 2nd stage page tables.
Introduces new ARM-specific kernel memory types, PAGE_KVM_GUEST and
pgprot_guest variables used to map 2nd stage memory for KVM guests.
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
---
arch/arm/include/asm/pgtable-3level.h | 8 ++
arch/arm/include/asm/pgtable.h | 4 +
arch/arm/kvm/mmu.c | 127 +++++++++++++++++++++++++++++++++
arch/arm/mm/mmu.c | 3 +
4 files changed, 141 insertions(+), 1 deletions(-)
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index edc3cb9..6dc5331 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -104,6 +104,14 @@
*/
#define L_PGD_SWAPPER (_AT(pgdval_t, 1) << 55) /* swapper_pg_dir entry */
+/*
+ * 2-nd stage PTE definitions for LPAE.
+ */
+#define L_PTE2_READ (_AT(pteval_t, 1) << 6) /* HAP[0] */
+#define L_PTE2_WRITE (_AT(pteval_t, 1) << 7) /* HAP[1] */
+#define L_PTE2_NORM_WB (_AT(pteval_t, 3) << 4) /* MemAttr[3:2] */
+#define L_PTE2_INNER_WB (_AT(pteval_t, 3) << 2) /* MemAttr[1:0] */
+
#ifndef __ASSEMBLY__
#define pud_none(pud) (!pud_val(pud))
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 4b72287..2561a8b 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -70,6 +70,7 @@ extern void __pgd_error(const char *file, int line, pgd_t);
extern pgprot_t pgprot_user;
extern pgprot_t pgprot_kernel;
+extern pgprot_t pgprot_guest;
#define _MOD_PROT(p, b) __pgprot(pgprot_val(p) | (b))
@@ -83,6 +84,9 @@ extern pgprot_t pgprot_kernel;
#define PAGE_KERNEL _MOD_PROT(pgprot_kernel, L_PTE_XN)
#define PAGE_KERNEL_EXEC pgprot_kernel
#define PAGE_HYP _MOD_PROT(pgprot_kernel, L_PTE_USER)
+#define PAGE_KVM_GUEST _MOD_PROT(pgprot_guest, L_PTE2_READ | \
+ L_PTE2_WRITE | L_PTE2_NORM_WB | \
+ L_PTE2_INNER_WB)
#define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN)
#define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 94411a6..eb91da2 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -169,6 +169,9 @@ out:
* Allocates the 1st level table only of size defined by PGD2_ORDER (can
* support either full 40-bit input addresses or limited to 32-bit input
* addresses). Clears the allocated pages.
+ *
+ * Note we don't need locking here as this is only called when the VM is
+ * destroyed, which can only be done once.
*/
int kvm_alloc_stage2_pgd(struct kvm *kvm)
{
@@ -230,6 +233,9 @@ static void free_stage2_ptes(pmd_t *pmd, unsigned long addr)
* Walks the level-1 page table pointed to by kvm->arch.pgd and frees all
* underlying level-2 and level-3 tables before freeing the actual level-1 table
* and setting the struct pointer to NULL.
+ *
+ * Note we don't need locking here as this is only called when the VM is
+ * destroyed, which can only be done once.
*/
void kvm_free_stage2_pgd(struct kvm *kvm)
{
@@ -265,7 +271,126 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
kvm->arch.pgd = NULL;
}
+static int __user_mem_abort(struct kvm *kvm, phys_addr_t addr, pfn_t pfn)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte, new_pte;
+
+ /* Create 2nd stage page table mapping - Level 1 */
+ pgd = kvm->arch.pgd + pgd_index(addr);
+ pud = pud_offset(pgd, addr);
+ if (pud_none(*pud)) {
+ pmd = pmd_alloc_one(NULL, addr);
+ if (!pmd) {
+ put_page(pfn_to_page(pfn));
+ kvm_err("Cannot allocate 2nd stage pmd\n");
+ return -ENOMEM;
+ }
+ pud_populate(NULL, pud, pmd);
+ pmd += pmd_index(addr);
+ } else
+ pmd = pmd_offset(pud, addr);
+
+ /* Create 2nd stage page table mapping - Level 2 */
+ if (pmd_none(*pmd)) {
+ pte = pte_alloc_one_kernel(NULL, addr);
+ if (!pte) {
+ put_page(pfn_to_page(pfn));
+ kvm_err("Cannot allocate 2nd stage pte\n");
+ return -ENOMEM;
+ }
+ pmd_populate_kernel(NULL, pmd, pte);
+ pte += pte_index(addr);
+ } else
+ pte = pte_offset_kernel(pmd, addr);
+
+ /* Create 2nd stage page table mapping - Level 3 */
+ new_pte = pfn_pte(pfn, PAGE_KVM_GUEST);
+ set_pte_ext(pte, new_pte, 0);
+
+ return 0;
+}
+
+static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+ gfn_t gfn, struct kvm_memory_slot *memslot)
+{
+ pfn_t pfn;
+ int ret;
+
+ pfn = gfn_to_pfn(vcpu->kvm, gfn);
+
+ if (is_error_pfn(pfn)) {
+ put_page(pfn_to_page(pfn));
+ kvm_err("Guest gfn %u (0x%08x) does not have \n"
+ "corresponding host mapping",
+ (unsigned int)gfn,
+ (unsigned int)gfn << PAGE_SHIFT);
+ return -EFAULT;
+ }
+
+ mutex_lock(&vcpu->kvm->arch.pgd_mutex);
+ ret = __user_mem_abort(vcpu->kvm, fault_ipa, pfn);
+ mutex_unlock(&vcpu->kvm->arch.pgd_mutex);
+
+ return ret;
+}
+
+#define HSR_ABT_FS (0x3f)
+#define HPFAR_MASK (~0xf)
+
+/**
+ * kvm_handle_guest_abort - handles all 2nd stage aborts
+ * @vcpu: the VCPU pointer
+ * @run: the kvm_run structure
+ *
+ * Any abort that gets to the host is almost guaranteed to be caused by a
+ * missing second stage translation table entry, which can mean that either the
+ * guest simply needs more memory and we must allocate an appropriate page or it
+ * can mean that the guest tried to access I/O memory, which is emulated by user
+ * space. The distinction is based on the IPA causing the fault and whether this
+ * memory region has been registered as standard RAM by user space.
+ */
int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
- return -EINVAL;
+ unsigned long hsr_ec;
+ unsigned long fault_status;
+ phys_addr_t fault_ipa;
+ struct kvm_memory_slot *memslot = NULL;
+ bool is_iabt;
+ gfn_t gfn;
+
+ hsr_ec = vcpu->arch.hsr >> HSR_EC_SHIFT;
+ is_iabt = (hsr_ec == HSR_EC_IABT);
+
+ /* Check that the second stage fault is a translation fault */
+ fault_status = vcpu->arch.hsr & HSR_ABT_FS;
+ if ((fault_status & 0x3c) != 0x4) {
+ kvm_err("Unsupported fault status: %lx\n",
+ fault_status & 0x3c);
+ return -EFAULT;
+ }
+
+ fault_ipa = ((phys_addr_t)vcpu->arch.hpfar & HPFAR_MASK) << 8;
+
+ gfn = fault_ipa >> PAGE_SHIFT;
+ if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+ if (is_iabt) {
+ kvm_err("Inst. abort on I/O address %08lx\n",
+ (unsigned long)fault_ipa);
+ return -EFAULT;
+ }
+
+ kvm_pr_unimpl("I/O address abort...");
+ return 0;
+ }
+
+ memslot = gfn_to_memslot(vcpu->kvm, gfn);
+ if (!memslot->user_alloc) {
+ kvm_err("non user-alloc memslots not supported\n");
+ return -EINVAL;
+ }
+
+ return user_mem_abort(vcpu, fault_ipa, gfn, memslot);
}
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 94c5a0c..1a2df14 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -54,9 +54,11 @@ static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK;
static unsigned int ecc_mask __initdata = 0;
pgprot_t pgprot_user;
pgprot_t pgprot_kernel;
+pgprot_t pgprot_guest;
EXPORT_SYMBOL(pgprot_user);
EXPORT_SYMBOL(pgprot_kernel);
+EXPORT_SYMBOL(pgprot_guest);
struct cachepolicy {
const char policy[16];
@@ -504,6 +506,7 @@ static void __init build_mem_type_table(void)
pgprot_user = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
L_PTE_DIRTY | kern_pgprot);
+ pgprot_guest = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG);
mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
next prev parent reply other threads:[~2012-02-23 7:33 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-02-23 7:32 [PATCH v6 00/12] KVM/ARM Implementation Christoffer Dall
2012-02-23 7:32 ` [PATCH v6 01/12] KVM: Introduce __KVM_HAVE_IRQ_LINE Christoffer Dall
2012-02-23 7:32 ` [PATCH v6 02/12] ARM: KVM: Initial skeleton to compile KVM support Christoffer Dall
2012-02-24 3:32 ` Rusty Russell
2012-02-24 4:43 ` Christoffer Dall
2012-02-25 3:50 ` Rusty Russell
2012-02-25 15:20 ` Christoffer Dall
2012-03-11 21:41 ` Christoffer Dall
2012-02-23 7:32 ` [PATCH v6 03/12] ARM: KVM: Hypervisor identity mapping Christoffer Dall
2012-02-24 3:33 ` Rusty Russell
2012-02-23 7:32 ` [PATCH v6 04/12] ARM: KVM: Hypervisor inititalization Christoffer Dall
2012-02-24 4:00 ` Rusty Russell
2012-03-11 22:24 ` Christoffer Dall
2012-03-13 3:20 ` Rusty Russell
2012-03-05 1:12 ` Rusty Russell
2012-03-05 2:13 ` Christoffer Dall
2012-02-23 7:32 ` [PATCH v6 05/12] ARM: KVM: Memory virtualization setup Christoffer Dall
2012-02-23 7:32 ` [PATCH v6 06/12] ARM: KVM: Inject IRQs and FIQs from userspace Christoffer Dall
2012-02-23 7:32 ` [PATCH v6 07/12] ARM: KVM: World-switch implementation Christoffer Dall
2012-02-23 7:33 ` [PATCH v6 08/12] ARM: KVM: Emulation framework and CP15 emulation Christoffer Dall
2012-02-23 7:33 ` Christoffer Dall [this message]
2012-02-23 7:33 ` [PATCH v6 10/12] ARM: KVM: Handle I/O aborts Christoffer Dall
2012-02-23 7:33 ` [PATCH v6 11/12] ARM: KVM: Guest wait-for-interrupts (WFI) support Christoffer Dall
2012-02-23 7:33 ` [PATCH v6 12/12] ARM: KVM: Handle CP15 CR9 accesses for L2CTLR emulation Christoffer Dall
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20120223073309.3266.17383.stgit@ubuntu \
--to=c.dall@virtualopensystems.com \
--cc=android-virt@lists.cs.columbia.edu \
--cc=kvm@vger.kernel.org \
--cc=tech@virtualopensystems.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.