[RFC PATCH 1/2] KVM: ARM: Transparent huge pages and hugetlbfs support

linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed

* [RFC PATCH 1/2] KVM: ARM: Transparent huge pages and hugetlbfs support
@ 2013-06-18  2:17 Christoffer Dall
  2013-06-18  2:17 ` [RFC PATCH 2/2] arm64: KVM: Fix build errors after 32-bit THP support Christoffer Dall
  0 siblings, 1 reply; 3+ messages in thread
From: Christoffer Dall @ 2013-06-18  2:17 UTC (permalink / raw)
  To: linux-arm-kernel

From: Christoffer Dall <cdall@cs.columbia.edu>

Support transparent huge pages in 32-bit KVM/ARM.  The whole
transparent_hugepage_adjust stuff is far from pretty, but this is how
it's solved on x86 so we duplicate their logic.  This should be shared
across architectures if possible (like many other things), but can
always be changed down the road.

The pud_huge checking on the unmap path may feel a bit silly as the
pud_huge check is always defined to false, but the compiler should be
smart about this.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h |   7 +-
 arch/arm/include/asm/kvm_mmu.h  |   6 +-
 arch/arm/kvm/mmu.c              | 158 +++++++++++++++++++++++++++++++++-------
 3 files changed, 137 insertions(+), 34 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 1f3cee2..45a165e 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -33,10 +33,9 @@
 
 #define KVM_VCPU_MAX_FEATURES 1
 
-/* We don't currently support large pages. */
-#define KVM_HPAGE_GFN_SHIFT(x)	0
-#define KVM_NR_PAGE_SIZES	1
-#define KVM_PAGES_PER_HPAGE(x)	(1UL<<31)
+#define KVM_HPAGE_GFN_SHIFT(_level)	(((_level) - 1) * 21)
+#define KVM_HPAGE_SIZE			(1UL << KVM_HPAGE_GFN_SHIFT(2))
+#define KVM_PAGES_PER_HPAGE		(KVM_HPAGE_SIZE / PAGE_SIZE)
 
 #include <kvm/arm_vgic.h>
 
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 472ac70..9ef71b1 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -105,7 +105,8 @@ static inline void kvm_set_s2pte_writable(pte_t *pte)
 
 struct kvm;
 
-static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
+static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
+					      unsigned long size)
 {
 	/*
 	 * If we are going to insert an instruction page and the icache is
@@ -120,8 +121,7 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
 	 * need any kind of flushing (DDI 0406C.b - Page B3-1392).
 	 */
 	if (icache_is_pipt()) {
-		unsigned long hva = gfn_to_hva(kvm, gfn);
-		__cpuc_coherent_user_range(hva, hva + PAGE_SIZE);
+		__cpuc_coherent_user_range(hva, hva + size);
 	} else if (!icache_is_vivt_asid_tagged()) {
 		/* any kind of VIPT cache */
 		__flush_icache_all();
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index ca6bea4..9170c98 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -19,6 +19,7 @@
 #include <linux/mman.h>
 #include <linux/kvm_host.h>
 #include <linux/io.h>
+#include <linux/hugetlb.h>
 #include <trace/events/kvm.h>
 #include <asm/pgalloc.h>
 #include <asm/cacheflush.h>
@@ -87,19 +88,27 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
 
 static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
 {
-	pmd_t *pmd_table = pmd_offset(pud, 0);
-	pud_clear(pud);
-	kvm_tlb_flush_vmid_ipa(kvm, addr);
-	pmd_free(NULL, pmd_table);
+	if (pud_huge(*pud)) {
+		pud_clear(pud);
+	} else {
+		pmd_t *pmd_table = pmd_offset(pud, 0);
+		pud_clear(pud);
+		kvm_tlb_flush_vmid_ipa(kvm, addr);
+		pmd_free(NULL, pmd_table);
+	}
 	put_page(virt_to_page(pud));
 }
 
 static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
 {
-	pte_t *pte_table = pte_offset_kernel(pmd, 0);
-	pmd_clear(pmd);
-	kvm_tlb_flush_vmid_ipa(kvm, addr);
-	pte_free_kernel(NULL, pte_table);
+	if (pmd_huge(*pmd)) {
+		pmd_clear(pmd);
+	} else {
+		pte_t *pte_table = pte_offset_kernel(pmd, 0);
+		pmd_clear(pmd);
+		kvm_tlb_flush_vmid_ipa(kvm, addr);
+		pte_free_kernel(NULL, pte_table);
+	}
 	put_page(virt_to_page(pmd));
 }
 
@@ -142,12 +151,34 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
 			continue;
 		}
 
+		if (pud_huge(*pud)) {
+			/*
+			 * If we are dealing with a huge pud, just clear it and
+			 * move on.
+			 */
+			clear_pud_entry(kvm, pud, addr);
+			addr += PUD_SIZE;
+			continue;
+		}
+
 		pmd = pmd_offset(pud, addr);
 		if (pmd_none(*pmd)) {
 			addr += PMD_SIZE;
 			continue;
 		}
 
+		if (pmd_huge(*pmd)) {
+			/*
+			 * If we are dealing with a huge pmd, just clear it and
+			 * walk back up the ladder.
+			 */
+			clear_pmd_entry(kvm, pmd, addr);
+			if (pmd_empty(pmd))
+				clear_pud_entry(kvm, pud, addr);
+			addr += PMD_SIZE;
+			continue;
+		}
+
 		pte = pte_offset_kernel(pmd, addr);
 		clear_pte_entry(kvm, pte, addr);
 		range = PAGE_SIZE;
@@ -432,7 +463,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
 {
 	pgd_t *pgd;
 	pud_t *pud;
-	pmd_t *pmd;
+	pmd_t *pmd, old_pmd;
 	pte_t *pte, old_pte;
 
 	/* Create 2nd stage page table mapping - Level 1 */
@@ -448,7 +479,22 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
 
 	pmd = pmd_offset(pud, addr);
 
-	/* Create 2nd stage page table mapping - Level 2 */
+	/* Create 2nd stage section mappings (huge tlb pages) - Level 2 */
+	if (pte_huge(*new_pte) || pmd_huge(*pmd)) {
+		pte_t *huge_pte = (pte_t *)pmd;
+		VM_BUG_ON(pmd_present(*pmd) && !pmd_huge(*pmd));
+
+		old_pmd = *pmd;
+		kvm_set_pte(huge_pte, *new_pte); /* new_pte really new_pmd */
+		if (pmd_present(old_pmd))
+			kvm_tlb_flush_vmid_ipa(kvm, addr);
+		else
+			get_page(virt_to_page(pmd));
+		return 0;
+	}
+
+	/* Create 2nd stage page mappings - Level 2 */
+	BUG_ON(pmd_present(*pmd) && pmd_huge(*pmd));
 	if (pmd_none(*pmd)) {
 		if (!cache)
 			return 0; /* ignore calls from kvm_set_spte_hva */
@@ -514,16 +560,55 @@ out:
 	return ret;
 }
 
+static bool transparent_hugepage_adjust(struct kvm *kvm, pfn_t *pfnp,
+					phys_addr_t *ipap)
+{
+	pfn_t pfn = *pfnp;
+	gfn_t gfn = *ipap >> PAGE_SHIFT;
+
+	if (PageTransCompound(pfn_to_page(pfn))) {
+		unsigned long mask;
+		/*
+		 * mmu_notifier_retry was successful and we hold the
+		 * mmu_lock here, so the pmd can't become splitting
+		 * from under us, and in turn
+		 * __split_huge_page_refcount() can't run from under
+		 * us and we can safely transfer the refcount from
+		 * PG_tail to PG_head as we switch the pfn from tail to
+		 * head.
+		 */
+		mask = KVM_PAGES_PER_HPAGE - 1;
+		VM_BUG_ON((gfn & mask) != (pfn & mask));
+		if (pfn & mask) {
+			gfn &= ~mask;
+			*ipap &= ~(KVM_HPAGE_SIZE - 1);
+			kvm_release_pfn_clean(pfn);
+			pfn &= ~mask;
+			kvm_get_pfn(pfn);
+			*pfnp = pfn;
+		}
+
+		return true;
+	}
+
+	return false;
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
-			  gfn_t gfn, struct kvm_memory_slot *memslot,
+			  struct kvm_memory_slot *memslot,
 			  unsigned long fault_status)
 {
-	pte_t new_pte;
-	pfn_t pfn;
 	int ret;
-	bool write_fault, writable;
+	bool write_fault, writable, hugetlb = false, force_pte = false;
 	unsigned long mmu_seq;
+	gfn_t gfn = fault_ipa >> PAGE_SHIFT;
+	unsigned long hva = gfn_to_hva(vcpu->kvm, gfn);
+	struct kvm *kvm = vcpu->kvm;
 	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
+	struct vm_area_struct *vma;
+	pfn_t pfn;
+	pte_t new_pte;
+	unsigned long psize;
 
 	write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
 	if (fault_status == FSC_PERM && !write_fault) {
@@ -531,6 +616,27 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		return -EFAULT;
 	}
 
+	/* Let's check if we will get back a huge page */
+	down_read(&current->mm->mmap_sem);
+	vma = find_vma_intersection(current->mm, hva, hva + 1);
+	if (is_vm_hugetlb_page(vma)) {
+		hugetlb = true;
+		hva &= PMD_MASK;
+		gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
+		psize = PMD_SIZE;
+	} else {
+		psize = PAGE_SIZE;
+		if (vma->vm_start & ~PMD_MASK)
+			force_pte = true;
+	}
+	up_read(&current->mm->mmap_sem);
+
+	pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
+	if (is_error_pfn(pfn))
+		return -EFAULT;
+
+	coherent_icache_guest_page(kvm, hva, psize);
+
 	/* We need minimum second+third level pages */
 	ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS);
 	if (ret)
@@ -548,26 +654,24 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	 */
 	smp_rmb();
 
-	pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable);
-	if (is_error_pfn(pfn))
-		return -EFAULT;
-
-	new_pte = pfn_pte(pfn, PAGE_S2);
-	coherent_icache_guest_page(vcpu->kvm, gfn);
-
-	spin_lock(&vcpu->kvm->mmu_lock);
-	if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
+	spin_lock(&kvm->mmu_lock);
+	if (mmu_notifier_retry(kvm, mmu_seq))
 		goto out_unlock;
+	if (!hugetlb && !force_pte)
+		hugetlb = transparent_hugepage_adjust(kvm, &pfn, &fault_ipa);
+	new_pte = pfn_pte(pfn, PAGE_S2);
+	if (hugetlb)
+		new_pte = pte_mkhuge(new_pte);
 	if (writable) {
 		kvm_set_s2pte_writable(&new_pte);
 		kvm_set_pfn_dirty(pfn);
 	}
-	stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false);
+	ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false);
 
 out_unlock:
-	spin_unlock(&vcpu->kvm->mmu_lock);
+	spin_unlock(&kvm->mmu_lock);
 	kvm_release_pfn_clean(pfn);
-	return 0;
+	return ret;
 }
 
 /**
@@ -636,7 +740,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 	memslot = gfn_to_memslot(vcpu->kvm, gfn);
 
-	ret = user_mem_abort(vcpu, fault_ipa, gfn, memslot, fault_status);
+	ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status);
 	if (ret == 0)
 		ret = 1;
 out_unlock:
-- 
1.8.1.2

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [RFC PATCH 2/2] arm64: KVM: Fix build errors after 32-bit THP support
  2013-06-18  2:17 [RFC PATCH 1/2] KVM: ARM: Transparent huge pages and hugetlbfs support Christoffer Dall
@ 2013-06-18  2:17 ` Christoffer Dall
  2013-06-18 10:31   ` Catalin Marinas
  0 siblings, 1 reply; 3+ messages in thread
From: Christoffer Dall @ 2013-06-18  2:17 UTC (permalink / raw)
  To: linux-arm-kernel

When KVM 32-bit adds THP support the build of KVM/arm64 will break
because we rename some definition to be more sane and change the
interface to coherent_icache_guest_page.

Huge pages are not supported on arm64 and the pgtable predicates will
always return false, so this shouldn't change any functionality on the
64-bit side.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_host.h | 4 ++--
 arch/arm64/include/asm/kvm_mmu.h  | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 644d739..f5d73dc 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -38,8 +38,8 @@
 
 /* We don't currently support large pages. */
 #define KVM_HPAGE_GFN_SHIFT(x)	0
-#define KVM_NR_PAGE_SIZES	1
-#define KVM_PAGES_PER_HPAGE(x)	(1UL<<31)
+#define KVM_HPAGE_SIZE		1
+#define KVM_PAGES_PER_HPAGE	(1UL<<31)
 
 struct kvm_vcpu;
 int kvm_target_cpu(void);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index efe609c..c86749e 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -118,11 +118,11 @@ static inline void kvm_set_s2pte_writable(pte_t *pte)
 
 struct kvm;
 
-static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
+static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
+					      size_t size)
 {
 	if (!icache_is_aliasing()) {		/* PIPT */
-		unsigned long hva = gfn_to_hva(kvm, gfn);
-		flush_icache_range(hva, hva + PAGE_SIZE);
+		flush_icache_range(hva, hva + size);
 	} else if (!icache_is_aivivt()) {	/* non ASID-tagged VIVT */
 		/* any kind of VIPT cache */
 		__flush_icache_all();
-- 
1.8.1.2

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [RFC PATCH 2/2] arm64: KVM: Fix build errors after 32-bit THP support
  2013-06-18  2:17 ` [RFC PATCH 2/2] arm64: KVM: Fix build errors after 32-bit THP support Christoffer Dall
@ 2013-06-18 10:31   ` Catalin Marinas
  0 siblings, 0 replies; 3+ messages in thread
From: Catalin Marinas @ 2013-06-18 10:31 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Jun 18, 2013 at 03:17:41AM +0100, Christoffer Dall wrote:
> When KVM 32-bit adds THP support the build of KVM/arm64 will break
> because we rename some definition to be more sane and change the
> interface to coherent_icache_guest_page.
> 
> Huge pages are not supported on arm64 and the pgtable predicates will
> always return false, so this shouldn't change any functionality on the
> 64-bit side.

FYI, huge pages will be supported on arm64 starting with 3.11-rc1. Given
that it's -rc6 now, I would suggest you aim the kvm patches at 3.12.

-- 
Catalin

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2013-06-18 10:31 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-06-18  2:17 [RFC PATCH 1/2] KVM: ARM: Transparent huge pages and hugetlbfs support Christoffer Dall
2013-06-18  2:17 ` [RFC PATCH 2/2] arm64: KVM: Fix build errors after 32-bit THP support Christoffer Dall
2013-06-18 10:31   ` Catalin Marinas

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).