public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Christoffer Dall <c.dall@virtualopensystems.com>
To: android-virt@lists.cs.columbia.edu, kvm@vger.kernel.org
Subject: [PATCH v8 09/15] ARM: KVM: Memory virtualization setup
Date: Fri, 15 Jun 2012 15:08:22 -0400	[thread overview]
Message-ID: <20120615190822.24590.25657.stgit@ubuntu> (raw)
In-Reply-To: <20120615190553.24590.18391.stgit@ubuntu>

From: Christoffer Dall <cdall@cs.columbia.edu>

This commit introduces the framework for guest memory management
through the use of 2nd stage translation. Each VM has a pointer
to a level-1 table (the pgd field in struct kvm_arch) which is
used for the 2nd stage translations. Entries are added when handling
guest faults (later patch) and the table itself can be allocated and
freed through the following functions implemented in
arch/arm/kvm/arm_mmu.c:
 - kvm_alloc_stage2_pgd(struct kvm *kvm);
 - kvm_free_stage2_pgd(struct kvm *kvm);

Further, each entry in TLBs and caches are tagged with a VMID
identifier in addition to ASIDs. The VMIDs are assigned consecutively
to VMs in the order that VMs are executed, and caches and tlbs are
invalidated when the VMID space has been used to allow for more than
255 simultaenously running guests.

The 2nd stage pgd is allocated in kvm_arch_init_vm(). The table is
freed in kvm_arch_destroy_vm(). Both functions are called from the main
KVM code.

Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
---
 arch/arm/include/asm/kvm_arm.h |    2 -
 arch/arm/include/asm/kvm_mmu.h |    5 ++
 arch/arm/kvm/arm.c             |   65 ++++++++++++++++++++++---
 arch/arm/kvm/mmu.c             |  103 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 166 insertions(+), 9 deletions(-)

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 7f30cbd..257242f 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -62,7 +62,7 @@
  * SWIO:	Turn set/way invalidates into set/way clean+invalidate
  */
 #define HCR_GUEST_MASK (HCR_TSC | HCR_TWI | HCR_VM | HCR_BSU_IS | HCR_FB | \
-			HCR_AMO | HCR_IMO | HCR_FMO | HCR_FMO | HCR_SWIO)
+			HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | HCR_SWIO)
 
 /* Hyp System Control Register (HSCTLR) bits */
 #define HSCTLR_TE	(1 << 30)
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 1aa1af4..d95662eb 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -34,4 +34,9 @@ int kvm_hyp_pgd_alloc(void);
 pgd_t *kvm_hyp_pgd_get(void);
 void kvm_hyp_pgd_free(void);
 
+int kvm_alloc_stage2_pgd(struct kvm *kvm);
+void kvm_free_stage2_pgd(struct kvm *kvm);
+
+int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);
+
 #endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index efe130c..81babe9 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -38,6 +38,13 @@
 
 static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
 
+/* The VMID used in the VTTBR */
+#define VMID_BITS               8
+#define VMID_MASK               ((1 << VMID_BITS) - 1)
+#define VMID_FIRST_GENERATION	(1 << VMID_BITS)
+static u64 next_vmid;		/* The next available VMID in the sequence */
+DEFINE_SPINLOCK(kvm_vmid_lock);
+
 int kvm_arch_hardware_enable(void *garbage)
 {
 	return 0;
@@ -70,14 +77,6 @@ void kvm_arch_sync_events(struct kvm *kvm)
 {
 }
 
-int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
-{
-	if (type)
-		return -EINVAL;
-
-	return 0;
-}
-
 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 {
 	return VM_FAULT_SIGBUS;
@@ -93,10 +92,46 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 	return 0;
 }
 
+/**
+ * kvm_arch_init_vm - initializes a VM data structure
+ * @kvm:	pointer to the KVM struct
+ */
+int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
+{
+	int ret = 0;
+
+	if (type)
+		return -EINVAL;
+
+	ret = kvm_alloc_stage2_pgd(kvm);
+	if (ret)
+		goto out_fail_alloc;
+	mutex_init(&kvm->arch.pgd_mutex);
+
+	ret = create_hyp_mappings(kvm, kvm + 1);
+	if (ret)
+		goto out_free_stage2_pgd;
+
+	/* Mark the initial VMID invalid */
+	kvm->arch.vmid = 0;
+
+	return ret;
+out_free_stage2_pgd:
+	kvm_free_stage2_pgd(kvm);
+out_fail_alloc:
+	return ret;
+}
+
+/**
+ * kvm_arch_destroy_vm - destroy the VM data structure
+ * @kvm:	pointer to the KVM struct
+ */
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
 	int i;
 
+	kvm_free_stage2_pgd(kvm);
+
 	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
 		if (kvm->vcpus[i]) {
 			kvm_arch_vcpu_free(kvm->vcpus[i]);
@@ -172,6 +207,10 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 	if (err)
 		goto free_vcpu;
 
+	err = create_hyp_mappings(vcpu, vcpu + 1);
+	if (err)
+		goto free_vcpu;
+
 	return vcpu;
 free_vcpu:
 	kmem_cache_free(kvm_vcpu_cache, vcpu);
@@ -181,6 +220,7 @@ out:
 
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
+	kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
 
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -416,6 +456,15 @@ int kvm_arch_init(void *opaque)
 	if (err)
 		goto out_err;
 
+	/*
+	 * The upper 56 bits of VMIDs are used to identify the generation
+	 * counter, so VMIDs initialized to 0, having generation == 0, will
+	 * never be considered valid and therefor a new VMID must always be
+	 * assigned. Whent he VMID generation rolls over, we start from
+	 * VMID_FIRST_GENERATION again.
+	 */
+	next_vmid = VMID_FIRST_GENERATION;
+
 	return 0;
 out_err:
 	return err;
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index a320b56a..b256540 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -159,6 +159,109 @@ out:
 	return err;
 }
 
+/**
+ * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
+ * @kvm:	The KVM struct pointer for the VM.
+ *
+ * Allocates the 1st level table only of size defined by PGD2_ORDER (can
+ * support either full 40-bit input addresses or limited to 32-bit input
+ * addresses). Clears the allocated pages.
+ */
+int kvm_alloc_stage2_pgd(struct kvm *kvm)
+{
+	pgd_t *pgd;
+
+	if (kvm->arch.pgd != NULL) {
+		kvm_err("kvm_arch already initialized?\n");
+		return -EINVAL;
+	}
+
+	pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, PGD2_ORDER);
+	if (!pgd)
+		return -ENOMEM;
+
+	memset(pgd, 0, PTRS_PER_PGD2 * sizeof(pgd_t));
+	kvm->arch.pgd = pgd;
+
+	return 0;
+}
+
+static void free_guest_pages(pte_t *pte, unsigned long addr)
+{
+	unsigned int i;
+	struct page *page;
+
+	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
+		if (!pte_present(*pte))
+			goto next_page;
+		page = pfn_to_page(pte_pfn(*pte));
+		put_page(page);
+next_page:
+		pte++;
+	}
+}
+
+static void free_stage2_ptes(pmd_t *pmd, unsigned long addr)
+{
+	unsigned int i;
+	pte_t *pte;
+	struct page *page;
+
+	for (i = 0; i < PTRS_PER_PMD; i++, addr += PMD_SIZE) {
+		BUG_ON(pmd_sect(*pmd));
+		if (!pmd_none(*pmd) && pmd_table(*pmd)) {
+			pte = pte_offset_kernel(pmd, addr);
+			free_guest_pages(pte, addr);
+			page = virt_to_page((void *)pte);
+			WARN_ON(atomic_read(&page->_count) != 1);
+			pte_free_kernel(NULL, pte);
+		}
+		pmd++;
+	}
+}
+
+/**
+ * kvm_free_stage2_pgd - free all stage-2 tables
+ * @kvm:	The KVM struct pointer for the VM.
+ *
+ * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all
+ * underlying level-2 and level-3 tables before freeing the actual level-1 table
+ * and setting the struct pointer to NULL.
+ */
+void kvm_free_stage2_pgd(struct kvm *kvm)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	unsigned long long i, addr;
+
+	if (kvm->arch.pgd == NULL)
+		return;
+
+	/*
+	 * We do this slightly different than other places, since we need more
+	 * than 32 bits and for instance pgd_addr_end converts to unsigned long.
+	 */
+	addr = 0;
+	for (i = 0; i < PTRS_PER_PGD2; i++) {
+		addr = i * (unsigned long long)PGDIR_SIZE;
+		pgd = kvm->arch.pgd + i;
+		pud = pud_offset(pgd, addr);
+
+		if (pud_none(*pud))
+			continue;
+
+		BUG_ON(pud_bad(*pud));
+
+		pmd = pmd_offset(pud, addr);
+		free_stage2_ptes(pmd, addr);
+		pmd_free(NULL, pmd);
+	}
+
+	free_pages((unsigned long)kvm->arch.pgd, PGD2_ORDER);
+	kvm->arch.pgd = NULL;
+}
+
 int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	return -EINVAL;


  parent reply	other threads:[~2012-06-15 19:13 UTC|newest]

Thread overview: 54+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-06-15 19:06 [PATCH v8 00/15] KVM/ARM Implementation Christoffer Dall
2012-06-15 19:06 ` [PATCH v8 01/15] ARM: add mem_type prot_pte accessor Christoffer Dall
2012-06-15 19:07 ` [PATCH v8 02/15] KVM: use KVM_CAP_IRQ_ROUTING to protect the routing related code Christoffer Dall
2012-06-18 13:06   ` Avi Kivity
2012-06-15 19:07 ` [PATCH v8 03/15] KVM: Introduce __KVM_HAVE_IRQ_LINE Christoffer Dall
2012-06-18 13:07   ` Avi Kivity
2012-06-15 19:07 ` [PATCH v8 04/15] KVM: Guard mmu_notifier specific code with CONFIG_MMU_NOTIFIER Christoffer Dall
2012-06-18 13:08   ` Avi Kivity
2012-06-18 17:47     ` Christoffer Dall
2012-06-19  8:37       ` Avi Kivity
2012-06-28 21:28   ` Marcelo Tosatti
2012-06-15 19:07 ` [PATCH v8 05/15] ARM: KVM: Initial skeleton to compile KVM support Christoffer Dall
2012-06-15 19:07 ` [PATCH v8 06/15] ARM: KVM: Hypervisor identity mapping Christoffer Dall
2012-06-18 13:12   ` Avi Kivity
2012-06-18 17:55     ` Christoffer Dall
2012-06-19  8:38       ` Avi Kivity
2012-06-15 19:07 ` [PATCH v8 07/15] ARM: KVM: Hypervisor inititalization Christoffer Dall
2012-06-28 22:35   ` Marcelo Tosatti
2012-06-28 22:53     ` Christoffer Dall
2012-06-29  1:07       ` Marcelo Tosatti
2012-06-15 19:08 ` [PATCH v8 08/15] ARM: KVM: Module unloading support Christoffer Dall
2012-06-15 19:08 ` Christoffer Dall [this message]
2012-06-21 12:29   ` [PATCH v8 09/15] ARM: KVM: Memory virtualization setup Gleb Natapov
2012-06-21 19:48     ` Christoffer Dall
2012-06-28 22:34   ` Marcelo Tosatti
2012-06-28 22:51     ` Christoffer Dall
2012-06-15 19:08 ` [PATCH v8 10/15] ARM: KVM: Inject IRQs and FIQs from userspace Christoffer Dall
2012-06-18 13:32   ` Avi Kivity
2012-06-18 20:56     ` Christoffer Dall
2012-06-19  8:49       ` Avi Kivity
2012-06-20  3:17         ` Christoffer Dall
2012-06-15 19:08 ` [PATCH v8 11/15] ARM: KVM: World-switch implementation Christoffer Dall
2012-06-18 13:41   ` Avi Kivity
2012-06-18 22:05     ` Christoffer Dall
2012-06-19  9:16       ` Avi Kivity
2012-06-20  3:27         ` Christoffer Dall
2012-06-20  4:40           ` Christoffer Dall
2012-06-21  8:13             ` Avi Kivity
2012-06-21 17:54               ` Christoffer Dall
2012-07-02 13:07                 ` Avi Kivity
2012-06-15 19:08 ` [PATCH v8 12/15] ARM: KVM: Emulation framework and CP15 emulation Christoffer Dall
2012-06-15 19:09 ` [PATCH v8 13/15] ARM: KVM: Handle guest faults in KVM Christoffer Dall
2012-06-18 13:45   ` Avi Kivity
2012-06-18 22:20     ` Christoffer Dall
2012-06-19  9:32       ` Avi Kivity
2012-06-19 10:41         ` Andrea Arcangeli
2012-06-20 15:13           ` Christoffer Dall
2012-06-20 17:49             ` Andrea Arcangeli
2012-06-15 19:09 ` [PATCH v8 14/15] ARM: KVM: Handle I/O aborts Christoffer Dall
2012-06-18 13:48   ` Avi Kivity
2012-06-18 22:28     ` Christoffer Dall
2012-06-15 19:09 ` [PATCH v8 15/15] ARM: KVM: Guest wait-for-interrupts (WFI) support Christoffer Dall
2012-06-28 21:49 ` [PATCH v8 00/15] KVM/ARM Implementation Marcelo Tosatti
2012-06-28 22:44   ` Christoffer Dall

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120615190822.24590.25657.stgit@ubuntu \
    --to=c.dall@virtualopensystems.com \
    --cc=android-virt@lists.cs.columbia.edu \
    --cc=kvm@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox