[PATCH v6 09/12] ARM: KVM: Handle guest faults in KVM

public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed

From: Christoffer Dall <c.dall@virtualopensystems.com>
To: android-virt@lists.cs.columbia.edu, kvm@vger.kernel.org
Cc: tech@virtualopensystems.com
Subject: [PATCH v6 09/12] ARM: KVM: Handle guest faults in KVM
Date: Thu, 23 Feb 2012 02:33:09 -0500	[thread overview]
Message-ID: <20120223073309.3266.17383.stgit@ubuntu> (raw)
In-Reply-To: <20120223073159.3266.45217.stgit@ubuntu>

From: Christoffer Dall <c.dall@virtualopensystems.com>

Handles the guest faults in KVM by mapping in corresponding user pages
in the 2nd stage page tables.

Introduces new ARM-specific kernel memory types, PAGE_KVM_GUEST and
pgprot_guest variables used to map 2nd stage memory for KVM guests.

Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
---
 arch/arm/include/asm/pgtable-3level.h |    8 ++
 arch/arm/include/asm/pgtable.h        |    4 +
 arch/arm/kvm/mmu.c                    |  127 +++++++++++++++++++++++++++++++++
 arch/arm/mm/mmu.c                     |    3 +
 4 files changed, 141 insertions(+), 1 deletions(-)

diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index edc3cb9..6dc5331 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -104,6 +104,14 @@
  */
 #define L_PGD_SWAPPER		(_AT(pgdval_t, 1) << 55)	/* swapper_pg_dir entry */
 
+/*
+ * 2-nd stage PTE definitions for LPAE.
+ */
+#define L_PTE2_READ		(_AT(pteval_t, 1) << 6)	/* HAP[0] */
+#define L_PTE2_WRITE		(_AT(pteval_t, 1) << 7)	/* HAP[1] */
+#define L_PTE2_NORM_WB		(_AT(pteval_t, 3) << 4)	/* MemAttr[3:2] */
+#define L_PTE2_INNER_WB		(_AT(pteval_t, 3) << 2)	/* MemAttr[1:0] */
+
 #ifndef __ASSEMBLY__
 
 #define pud_none(pud)		(!pud_val(pud))
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 4b72287..2561a8b 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -70,6 +70,7 @@ extern void __pgd_error(const char *file, int line, pgd_t);
 
 extern pgprot_t		pgprot_user;
 extern pgprot_t		pgprot_kernel;
+extern pgprot_t		pgprot_guest;
 
 #define _MOD_PROT(p, b)	__pgprot(pgprot_val(p) | (b))
 
@@ -83,6 +84,9 @@ extern pgprot_t		pgprot_kernel;
 #define PAGE_KERNEL		_MOD_PROT(pgprot_kernel, L_PTE_XN)
 #define PAGE_KERNEL_EXEC	pgprot_kernel
 #define PAGE_HYP		_MOD_PROT(pgprot_kernel, L_PTE_USER)
+#define PAGE_KVM_GUEST		_MOD_PROT(pgprot_guest, L_PTE2_READ | \
+					  L_PTE2_WRITE | L_PTE2_NORM_WB | \
+					  L_PTE2_INNER_WB)
 
 #define __PAGE_NONE		__pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN)
 #define __PAGE_SHARED		__pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 94411a6..eb91da2 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -169,6 +169,9 @@ out:
  * Allocates the 1st level table only of size defined by PGD2_ORDER (can
  * support either full 40-bit input addresses or limited to 32-bit input
  * addresses). Clears the allocated pages.
+ *
+ * Note we don't need locking here as this is only called when the VM is
+ * destroyed, which can only be done once.
  */
 int kvm_alloc_stage2_pgd(struct kvm *kvm)
 {
@@ -230,6 +233,9 @@ static void free_stage2_ptes(pmd_t *pmd, unsigned long addr)
  * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all
  * underlying level-2 and level-3 tables before freeing the actual level-1 table
  * and setting the struct pointer to NULL.
+ *
+ * Note we don't need locking here as this is only called when the VM is
+ * destroyed, which can only be done once.
  */
 void kvm_free_stage2_pgd(struct kvm *kvm)
 {
@@ -265,7 +271,126 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
 	kvm->arch.pgd = NULL;
 }
 
+static int __user_mem_abort(struct kvm *kvm, phys_addr_t addr, pfn_t pfn)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte, new_pte;
+
+	/* Create 2nd stage page table mapping - Level 1 */
+	pgd = kvm->arch.pgd + pgd_index(addr);
+	pud = pud_offset(pgd, addr);
+	if (pud_none(*pud)) {
+		pmd = pmd_alloc_one(NULL, addr);
+		if (!pmd) {
+			put_page(pfn_to_page(pfn));
+			kvm_err("Cannot allocate 2nd stage pmd\n");
+			return -ENOMEM;
+		}
+		pud_populate(NULL, pud, pmd);
+		pmd += pmd_index(addr);
+	} else
+		pmd = pmd_offset(pud, addr);
+
+	/* Create 2nd stage page table mapping - Level 2 */
+	if (pmd_none(*pmd)) {
+		pte = pte_alloc_one_kernel(NULL, addr);
+		if (!pte) {
+			put_page(pfn_to_page(pfn));
+			kvm_err("Cannot allocate 2nd stage pte\n");
+			return -ENOMEM;
+		}
+		pmd_populate_kernel(NULL, pmd, pte);
+		pte += pte_index(addr);
+	} else
+		pte = pte_offset_kernel(pmd, addr);
+
+	/* Create 2nd stage page table mapping - Level 3 */
+	new_pte = pfn_pte(pfn, PAGE_KVM_GUEST);
+	set_pte_ext(pte, new_pte, 0);
+
+	return 0;
+}
+
+static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+			  gfn_t gfn, struct kvm_memory_slot *memslot)
+{
+	pfn_t pfn;
+	int ret;
+
+	pfn = gfn_to_pfn(vcpu->kvm, gfn);
+
+	if (is_error_pfn(pfn)) {
+		put_page(pfn_to_page(pfn));
+		kvm_err("Guest gfn %u (0x%08x) does not have \n"
+				"corresponding host mapping",
+				(unsigned int)gfn,
+				(unsigned int)gfn << PAGE_SHIFT);
+		return -EFAULT;
+	}
+
+	mutex_lock(&vcpu->kvm->arch.pgd_mutex);
+	ret = __user_mem_abort(vcpu->kvm, fault_ipa, pfn);
+	mutex_unlock(&vcpu->kvm->arch.pgd_mutex);
+
+	return ret;
+}
+
+#define HSR_ABT_FS	(0x3f)
+#define HPFAR_MASK	(~0xf)
+
+/**
+ * kvm_handle_guest_abort - handles all 2nd stage aborts
+ * @vcpu:	the VCPU pointer
+ * @run:	the kvm_run structure
+ *
+ * Any abort that gets to the host is almost guaranteed to be caused by a
+ * missing second stage translation table entry, which can mean that either the
+ * guest simply needs more memory and we must allocate an appropriate page or it
+ * can mean that the guest tried to access I/O memory, which is emulated by user
+ * space. The distinction is based on the IPA causing the fault and whether this
+ * memory region has been registered as standard RAM by user space.
+ */
 int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	return -EINVAL;
+	unsigned long hsr_ec;
+	unsigned long fault_status;
+	phys_addr_t fault_ipa;
+	struct kvm_memory_slot *memslot = NULL;
+	bool is_iabt;
+	gfn_t gfn;
+
+	hsr_ec = vcpu->arch.hsr >> HSR_EC_SHIFT;
+	is_iabt = (hsr_ec == HSR_EC_IABT);
+
+	/* Check that the second stage fault is a translation fault */
+	fault_status = vcpu->arch.hsr & HSR_ABT_FS;
+	if ((fault_status & 0x3c) != 0x4) {
+		kvm_err("Unsupported fault status: %lx\n",
+				fault_status & 0x3c);
+		return -EFAULT;
+	}
+
+	fault_ipa = ((phys_addr_t)vcpu->arch.hpfar & HPFAR_MASK) << 8;
+
+	gfn = fault_ipa >> PAGE_SHIFT;
+	if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+		if (is_iabt) {
+			kvm_err("Inst. abort on I/O address %08lx\n",
+				(unsigned long)fault_ipa);
+			return -EFAULT;
+		}
+
+		kvm_pr_unimpl("I/O address abort...");
+		return 0;
+	}
+
+	memslot = gfn_to_memslot(vcpu->kvm, gfn);
+	if (!memslot->user_alloc) {
+		kvm_err("non user-alloc memslots not supported\n");
+		return -EINVAL;
+	}
+
+	return user_mem_abort(vcpu, fault_ipa, gfn, memslot);
 }
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 94c5a0c..1a2df14 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -54,9 +54,11 @@ static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK;
 static unsigned int ecc_mask __initdata = 0;
 pgprot_t pgprot_user;
 pgprot_t pgprot_kernel;
+pgprot_t pgprot_guest;
 
 EXPORT_SYMBOL(pgprot_user);
 EXPORT_SYMBOL(pgprot_kernel);
+EXPORT_SYMBOL(pgprot_guest);
 
 struct cachepolicy {
 	const char	policy[16];
@@ -504,6 +506,7 @@ static void __init build_mem_type_table(void)
 	pgprot_user   = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
 	pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
 				 L_PTE_DIRTY | kern_pgprot);
+	pgprot_guest  = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG);
 
 	mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
 	mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;

next prev parent reply	other threads:[~2012-02-23  7:33 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-02-23  7:32 [PATCH v6 00/12] KVM/ARM Implementation Christoffer Dall
2012-02-23  7:32 ` [PATCH v6 01/12] KVM: Introduce __KVM_HAVE_IRQ_LINE Christoffer Dall
2012-02-23  7:32 ` [PATCH v6 02/12] ARM: KVM: Initial skeleton to compile KVM support Christoffer Dall
2012-02-24  3:32   ` Rusty Russell
2012-02-24  4:43     ` Christoffer Dall
2012-02-25  3:50       ` Rusty Russell
2012-02-25 15:20         ` Christoffer Dall
2012-03-11 21:41     ` Christoffer Dall
2012-02-23  7:32 ` [PATCH v6 03/12] ARM: KVM: Hypervisor identity mapping Christoffer Dall
2012-02-24  3:33   ` Rusty Russell
2012-02-23  7:32 ` [PATCH v6 04/12] ARM: KVM: Hypervisor inititalization Christoffer Dall
2012-02-24  4:00   ` Rusty Russell
2012-03-11 22:24     ` Christoffer Dall
2012-03-13  3:20       ` Rusty Russell
2012-03-05  1:12   ` Rusty Russell
2012-03-05  2:13     ` Christoffer Dall
2012-02-23  7:32 ` [PATCH v6 05/12] ARM: KVM: Memory virtualization setup Christoffer Dall
2012-02-23  7:32 ` [PATCH v6 06/12] ARM: KVM: Inject IRQs and FIQs from userspace Christoffer Dall
2012-02-23  7:32 ` [PATCH v6 07/12] ARM: KVM: World-switch implementation Christoffer Dall
2012-02-23  7:33 ` [PATCH v6 08/12] ARM: KVM: Emulation framework and CP15 emulation Christoffer Dall
2012-02-23  7:33 ` Christoffer Dall [this message]
2012-02-23  7:33 ` [PATCH v6 10/12] ARM: KVM: Handle I/O aborts Christoffer Dall
2012-02-23  7:33 ` [PATCH v6 11/12] ARM: KVM: Guest wait-for-interrupts (WFI) support Christoffer Dall
2012-02-23  7:33 ` [PATCH v6 12/12] ARM: KVM: Handle CP15 CR9 accesses for L2CTLR emulation Christoffer Dall

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:edc3cb9 dfblob:6dc5331 dfblob:4b72287 dfblob:2561a8b
dfblob:94411a6 dfblob:eb91da2 dfblob:94c5a0c dfblob:1a2df14 )
 OR (
bs:"[PATCH v6 09/12] ARM: KVM: Handle guest faults in KVM" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120223073309.3266.17383.stgit@ubuntu \
    --to=c.dall@virtualopensystems.com \
    --cc=android-virt@lists.cs.columbia.edu \
    --cc=kvm@vger.kernel.org \
    --cc=tech@virtualopensystems.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox