All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
To: benh@kernel.crashing.org, paulus@samba.org
Cc: linuxppc-dev@lists.ozlabs.org,
	"Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Subject: [PATCH -V3 25/25] powerpc: Handle hugepages in kvm
Date: Fri, 15 Mar 2013 15:10:07 +0530	[thread overview]
Message-ID: <1363340407-22619-26-git-send-email-aneesh.kumar@linux.vnet.ibm.com> (raw)
In-Reply-To: <1363340407-22619-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

We could possibly avoid some of these changes because most of the HUGE PMD bits
map to PTE bits.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/kvm_book3s_64.h |   31 ++++++++++++
 arch/powerpc/kvm/book3s_64_mmu_hv.c      |   12 ++++-
 arch/powerpc/kvm/book3s_hv_rm_mmu.c      |   75 ++++++++++++++++++++++--------
 3 files changed, 97 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 38bec1d..1c5c799 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -110,6 +110,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
 	return rb;
 }
 
+/* FIXME !! should we use hpte_actual_psize or hpte decode ? */
 static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
 {
 	/* only handle 4k, 64k and 16M pages for now */
@@ -189,6 +190,36 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *p, int writing)
 	return pte;
 }
 
+/*
+ * Lock and read a linux hugepage PMD.  If it's present and writable, atomically
+ * set dirty and referenced bits and return the PMD, otherwise return 0.
+ */
+static inline pmd_t kvmppc_read_update_linux_hugepmd(pmd_t *p, int writing)
+{
+	pmd_t pmd, tmp;
+
+	/* wait until _PAGE_BUSY is clear then set it atomically */
+	__asm__ __volatile__ (
+		"1:	ldarx	%0,0,%3\n"
+		"	andi.	%1,%0,%4\n"
+		"	bne-	1b\n"
+		"	ori	%1,%0,%4\n"
+		"	stdcx.	%1,0,%3\n"
+		"	bne-	1b"
+		: "=&r" (pmd), "=&r" (tmp), "=m" (*p)
+		: "r" (p), "i" (PMD_HUGE_BUSY)
+		: "cc");
+
+	if (pmd_large(pmd)) {
+		pmd = pmd_mkyoung(pmd);
+		if (writing && pmd_write(pmd))
+			pmd = pte_mkdirty(pmd);
+	}
+
+	*p = pmd;	/* clears PMD_HUGE_BUSY */
+	return pmd;
+}
+
 /* Return HPTE cache control bits corresponding to Linux pte bits */
 static inline unsigned long hpte_cache_bits(unsigned long pte_val)
 {
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 4f2a7dc..da006da 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -675,6 +675,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		}
 		/* if the guest wants write access, see if that is OK */
 		if (!writing && hpte_is_writable(r)) {
+			int hugepage;
 			pte_t *ptep, pte;
 
 			/*
@@ -683,11 +684,18 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			 */
 			rcu_read_lock_sched();
 			ptep = find_linux_pte_or_hugepte(current->mm->pgd,
-							 hva, NULL, NULL);
-			if (ptep && pte_present(*ptep)) {
+							 hva, NULL, &hugepage);
+			if (!hugepage && ptep && pte_present(*ptep)) {
 				pte = kvmppc_read_update_linux_pte(ptep, 1);
 				if (pte_write(pte))
 					write_ok = 1;
+			} else if (hugepage && ptep) {
+				pmd_t pmd = *(pmd_t *)ptep;
+				if (pmd_large(pmd)) {
+					pmd = kvmppc_read_update_linux_hugepmd((pmd_t *)ptep, 1);
+					if (pmd_write(pmd))
+						write_ok = 1;
+				}
 			}
 			rcu_read_unlock_sched();
 		}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 7c8e1ed..e9d4e3a 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -146,24 +146,37 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
 }
 
 static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
-			      int writing, unsigned long *pte_sizep)
+			      int writing, unsigned long *pte_sizep,
+			      int *hugepage)
 {
 	pte_t *ptep;
 	unsigned long ps = *pte_sizep;
 	unsigned int shift;
 
-	ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift, NULL);
+	ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift, hugepage);
 	if (!ptep)
 		return __pte(0);
-	if (shift)
-		*pte_sizep = 1ul << shift;
-	else
-		*pte_sizep = PAGE_SIZE;
+	if (*hugepage) {
+		*pte_sizep = 1ul << 24;
+	} else {
+		if (shift)
+			*pte_sizep = 1ul << shift;
+		else
+			*pte_sizep = PAGE_SIZE;
+	}
 	if (ps > *pte_sizep)
 		return __pte(0);
-	if (!pte_present(*ptep))
-		return __pte(0);
-	return kvmppc_read_update_linux_pte(ptep, writing);
+
+	if (*hugepage) {
+		pmd_t *pmdp = (pmd_t *)ptep;
+		if (!pmd_large(*pmdp))
+			return __pmd(0);
+		return kvmppc_read_update_linux_hugepmd(pmdp, writing);
+	} else {
+		if (!pte_present(*ptep))
+			return __pte(0);
+		return kvmppc_read_update_linux_pte(ptep, writing);
+	}
 }
 
 static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
@@ -239,18 +252,34 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 		pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);
 		pa &= PAGE_MASK;
 	} else {
+		int hugepage;
+
 		/* Translate to host virtual address */
 		hva = __gfn_to_hva_memslot(memslot, gfn);
 
 		/* Look up the Linux PTE for the backing page */
 		pte_size = psize;
-		pte = lookup_linux_pte(pgdir, hva, writing, &pte_size);
-		if (pte_present(pte)) {
-			if (writing && !pte_write(pte))
-				/* make the actual HPTE be read-only */
-				ptel = hpte_make_readonly(ptel);
-			is_io = hpte_cache_bits(pte_val(pte));
-			pa = pte_pfn(pte) << PAGE_SHIFT;
+		pte = lookup_linux_pte(pgdir, hva, writing, &pte_size, &hugepage);
+		if (hugepage) {
+			pmd_t pmd = (pmd_t)pte;
+			if (!pmd_large(pmd)) {
+				if (writing && !pmd_write(pmd))
+					/* make the actual HPTE be read-only */
+					ptel = hpte_make_readonly(ptel);
+				/*
+				 * we support hugepage only for RAM
+				 */
+				is_io = 0;
+				pa = pmd_pfn(pmd) << PAGE_SHIFT;
+			}
+		} else {
+			if (pte_present(pte)) {
+				if (writing && !pte_write(pte))
+					/* make the actual HPTE be read-only */
+					ptel = hpte_make_readonly(ptel);
+				is_io = hpte_cache_bits(pte_val(pte));
+				pa = pte_pfn(pte) << PAGE_SHIFT;
+			}
 		}
 	}
 
@@ -645,10 +674,18 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 			gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
 			memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
 			if (memslot) {
+				int hugepage;
 				hva = __gfn_to_hva_memslot(memslot, gfn);
-				pte = lookup_linux_pte(pgdir, hva, 1, &psize);
-				if (pte_present(pte) && !pte_write(pte))
-					r = hpte_make_readonly(r);
+				pte = lookup_linux_pte(pgdir, hva, 1,
+						       &psize, &hugepage);
+				if (hugepage) {
+					pmd_t pmd = (pmd_t)pte;
+					if (pmd_large(pmd) && !pmd_write(pmd))
+						r = hpte_make_readonly(r);
+				} else {
+					if (pte_present(pte) && !pte_write(pte))
+						r = hpte_make_readonly(r);
+				}
 			}
 		}
 	}
-- 
1.7.10

      parent reply	other threads:[~2013-03-15  9:44 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-03-15  9:39 [PATCH -V3 00/25] THP support for PPC64 Aneesh Kumar K.V
2013-03-15  9:39 ` [PATCH -V3 01/25] powerpc: Use signed formatting when printing error Aneesh Kumar K.V
2013-03-15  9:39 ` [PATCH -V3 02/25] powerpc: Save DAR and DSISR in pt_regs on MCE Aneesh Kumar K.V
2013-03-15  9:39 ` [PATCH -V3 03/25] powerpc: Don't hard code the size of pte page Aneesh Kumar K.V
2013-03-15  9:39 ` [PATCH -V3 04/25] powerpc: Reduce the PTE_INDEX_SIZE Aneesh Kumar K.V
2013-03-15  9:39 ` [PATCH -V3 05/25] powerpc: Move the pte free routines from common header Aneesh Kumar K.V
2013-03-15  9:39 ` [PATCH -V3 06/25] powerpc: Reduce PTE table memory wastage Aneesh Kumar K.V
2013-03-15  9:39 ` [PATCH -V3 07/25] powerpc: Use encode avpn where we need only avpn values Aneesh Kumar K.V
2013-03-15  9:39 ` [PATCH -V3 08/25] powerpc: Decode the pte-lp-encoding bits correctly Aneesh Kumar K.V
2013-03-15  9:39 ` [PATCH -V3 09/25] powerpc: Fix hpte_decode to use the correct decoding for page sizes Aneesh Kumar K.V
2013-03-15  9:39 ` [PATCH -V3 10/25] powerpc: Return all the valid pte ecndoing in KVM_PPC_GET_SMMU_INFO ioctl Aneesh Kumar K.V
2013-03-15  9:39 ` [PATCH -V3 11/25] powerpc: Update tlbie/tlbiel as per ISA doc Aneesh Kumar K.V
2013-03-15  9:39 ` [PATCH -V3 12/25] powerpc: print both base and actual page size on hash failure Aneesh Kumar K.V
2013-03-15  9:39 ` [PATCH -V3 13/25] powerpc: Print page size info during boot Aneesh Kumar K.V
2013-03-15  9:39 ` [PATCH -V3 14/25] mm/THP: HPAGE_SHIFT is not a #define on some arch Aneesh Kumar K.V
2013-03-15  9:39 ` [PATCH -V3 15/25] mm/THP: Add pmd args to pgtable deposit and withdraw APIs Aneesh Kumar K.V
2013-03-15  9:39 ` [PATCH -V3 16/25] mm/THP: withdraw the pgtable after pmdp related operations Aneesh Kumar K.V
2013-03-15  9:39 ` [PATCH -V3 17/25] powerpc/THP: Implement transparent hugepages for ppc64 Aneesh Kumar K.V
2013-03-15  9:40 ` [PATCH -V3 18/25] powerpc/THP: Double the PMD table size for THP Aneesh Kumar K.V
2013-03-15  9:40 ` [PATCH -V3 19/25] powerpc/THP: Differentiate THP PMD entries from HUGETLB PMD entries Aneesh Kumar K.V
2013-03-15  9:40 ` [PATCH -V3 20/25] powerpc/THP: Add code to handle HPTE faults for large pages Aneesh Kumar K.V
2013-03-15  9:40 ` [PATCH -V3 21/25] powerpc: Handle hugepage in perf callchain Aneesh Kumar K.V
2013-03-15  9:40 ` [PATCH -V3 22/25] powerpc/THP: get_user_pages_fast changes Aneesh Kumar K.V
2013-03-15  9:40 ` [PATCH -V3 23/25] powerpc/THP: Enable THP on PPC64 Aneesh Kumar K.V
2013-03-15  9:40 ` [PATCH -V3 24/25] powerpc: Optimize hugepage invalidate Aneesh Kumar K.V
2013-03-15  9:40 ` Aneesh Kumar K.V [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1363340407-22619-26-git-send-email-aneesh.kumar@linux.vnet.ibm.com \
    --to=aneesh.kumar@linux.vnet.ibm.com \
    --cc=benh@kernel.crashing.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=paulus@samba.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.