linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
To: benh@kernel.crashing.org, paulus@samba.org
Cc: linuxppc-dev@lists.ozlabs.org, linux-mm@kvack.org,
	"Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Subject: [PATCH -V5 25/25] powerpc: Handle hugepages in kvm
Date: Thu,  4 Apr 2013 11:28:03 +0530	[thread overview]
Message-ID: <1365055083-31956-26-git-send-email-aneesh.kumar@linux.vnet.ibm.com> (raw)
In-Reply-To: <1365055083-31956-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

We could possibly avoid some of these changes because most of the HUGE PMD bits
map to PTE bits.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/kvm_book3s_64.h |   31 ++++++++++++
 arch/powerpc/kvm/book3s_64_mmu_hv.c      |   12 ++++-
 arch/powerpc/kvm/book3s_hv_rm_mmu.c      |   75 ++++++++++++++++++++++--------
 3 files changed, 97 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 38bec1d..1c5c799 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -110,6 +110,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
 	return rb;
 }
 
+/* FIXME !! should we use hpte_actual_psize or hpte decode ? */
 static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
 {
 	/* only handle 4k, 64k and 16M pages for now */
@@ -189,6 +190,36 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *p, int writing)
 	return pte;
 }
 
+/*
+ * Lock and read a linux hugepage PMD.  If it's present and writable, atomically
+ * set dirty and referenced bits and return the PMD, otherwise return 0.
+ */
+static inline pmd_t kvmppc_read_update_linux_hugepmd(pmd_t *p, int writing)
+{
+	pmd_t pmd, tmp;
+
+	/* wait until _PAGE_BUSY is clear then set it atomically */
+	__asm__ __volatile__ (
+		"1:	ldarx	%0,0,%3\n"
+		"	andi.	%1,%0,%4\n"
+		"	bne-	1b\n"
+		"	ori	%1,%0,%4\n"
+		"	stdcx.	%1,0,%3\n"
+		"	bne-	1b"
+		: "=&r" (pmd), "=&r" (tmp), "=m" (*p)
+		: "r" (p), "i" (PMD_HUGE_BUSY)
+		: "cc");
+
+	if (pmd_large(pmd)) {
+		pmd = pmd_mkyoung(pmd);
+		if (writing && pmd_write(pmd))
+			pmd = pte_mkdirty(pmd);
+	}
+
+	*p = pmd;	/* clears PMD_HUGE_BUSY */
+	return pmd;
+}
+
 /* Return HPTE cache control bits corresponding to Linux pte bits */
 static inline unsigned long hpte_cache_bits(unsigned long pte_val)
 {
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 4f2a7dc..da006da 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -675,6 +675,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		}
 		/* if the guest wants write access, see if that is OK */
 		if (!writing && hpte_is_writable(r)) {
+			int hugepage;
 			pte_t *ptep, pte;
 
 			/*
@@ -683,11 +684,18 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			 */
 			rcu_read_lock_sched();
 			ptep = find_linux_pte_or_hugepte(current->mm->pgd,
-							 hva, NULL, NULL);
-			if (ptep && pte_present(*ptep)) {
+							 hva, NULL, &hugepage);
+			if (!hugepage && ptep && pte_present(*ptep)) {
 				pte = kvmppc_read_update_linux_pte(ptep, 1);
 				if (pte_write(pte))
 					write_ok = 1;
+			} else if (hugepage && ptep) {
+				pmd_t pmd = *(pmd_t *)ptep;
+				if (pmd_large(pmd)) {
+					pmd = kvmppc_read_update_linux_hugepmd((pmd_t *)ptep, 1);
+					if (pmd_write(pmd))
+						write_ok = 1;
+				}
 			}
 			rcu_read_unlock_sched();
 		}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 7c8e1ed..e9d4e3a 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -146,24 +146,37 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
 }
 
 static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
-			      int writing, unsigned long *pte_sizep)
+			      int writing, unsigned long *pte_sizep,
+			      int *hugepage)
 {
 	pte_t *ptep;
 	unsigned long ps = *pte_sizep;
 	unsigned int shift;
 
-	ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift, NULL);
+	ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift, hugepage);
 	if (!ptep)
 		return __pte(0);
-	if (shift)
-		*pte_sizep = 1ul << shift;
-	else
-		*pte_sizep = PAGE_SIZE;
+	if (*hugepage) {
+		*pte_sizep = 1ul << 24;
+	} else {
+		if (shift)
+			*pte_sizep = 1ul << shift;
+		else
+			*pte_sizep = PAGE_SIZE;
+	}
 	if (ps > *pte_sizep)
 		return __pte(0);
-	if (!pte_present(*ptep))
-		return __pte(0);
-	return kvmppc_read_update_linux_pte(ptep, writing);
+
+	if (*hugepage) {
+		pmd_t *pmdp = (pmd_t *)ptep;
+		if (!pmd_large(*pmdp))
+			return __pmd(0);
+		return kvmppc_read_update_linux_hugepmd(pmdp, writing);
+	} else {
+		if (!pte_present(*ptep))
+			return __pte(0);
+		return kvmppc_read_update_linux_pte(ptep, writing);
+	}
 }
 
 static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
@@ -239,18 +252,34 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 		pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);
 		pa &= PAGE_MASK;
 	} else {
+		int hugepage;
+
 		/* Translate to host virtual address */
 		hva = __gfn_to_hva_memslot(memslot, gfn);
 
 		/* Look up the Linux PTE for the backing page */
 		pte_size = psize;
-		pte = lookup_linux_pte(pgdir, hva, writing, &pte_size);
-		if (pte_present(pte)) {
-			if (writing && !pte_write(pte))
-				/* make the actual HPTE be read-only */
-				ptel = hpte_make_readonly(ptel);
-			is_io = hpte_cache_bits(pte_val(pte));
-			pa = pte_pfn(pte) << PAGE_SHIFT;
+		pte = lookup_linux_pte(pgdir, hva, writing, &pte_size, &hugepage);
+		if (hugepage) {
+			pmd_t pmd = (pmd_t)pte;
+			if (!pmd_large(pmd)) {
+				if (writing && !pmd_write(pmd))
+					/* make the actual HPTE be read-only */
+					ptel = hpte_make_readonly(ptel);
+				/*
+				 * we support hugepage only for RAM
+				 */
+				is_io = 0;
+				pa = pmd_pfn(pmd) << PAGE_SHIFT;
+			}
+		} else {
+			if (pte_present(pte)) {
+				if (writing && !pte_write(pte))
+					/* make the actual HPTE be read-only */
+					ptel = hpte_make_readonly(ptel);
+				is_io = hpte_cache_bits(pte_val(pte));
+				pa = pte_pfn(pte) << PAGE_SHIFT;
+			}
 		}
 	}
 
@@ -645,10 +674,18 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 			gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
 			memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
 			if (memslot) {
+				int hugepage;
 				hva = __gfn_to_hva_memslot(memslot, gfn);
-				pte = lookup_linux_pte(pgdir, hva, 1, &psize);
-				if (pte_present(pte) && !pte_write(pte))
-					r = hpte_make_readonly(r);
+				pte = lookup_linux_pte(pgdir, hva, 1,
+						       &psize, &hugepage);
+				if (hugepage) {
+					pmd_t pmd = (pmd_t)pte;
+					if (pmd_large(pmd) && !pmd_write(pmd))
+						r = hpte_make_readonly(r);
+				} else {
+					if (pte_present(pte) && !pte_write(pte))
+						r = hpte_make_readonly(r);
+				}
 			}
 		}
 	}
-- 
1.7.10

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2013-04-04  5:58 UTC|newest]

Thread overview: 73+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-04-04  5:57 [PATCH -V5 00/25] THP support for PPC64 Aneesh Kumar K.V
2013-04-04  5:57 ` [PATCH -V5 01/25] powerpc: Use signed formatting when printing error Aneesh Kumar K.V
2013-04-04  5:57 ` [PATCH -V5 02/25] powerpc: Save DAR and DSISR in pt_regs on MCE Aneesh Kumar K.V
2013-04-04  5:57 ` [PATCH -V5 03/25] powerpc: Don't hard code the size of pte page Aneesh Kumar K.V
2013-04-04  5:57 ` [PATCH -V5 04/25] powerpc: Reduce the PTE_INDEX_SIZE Aneesh Kumar K.V
2013-04-11  7:10   ` David Gibson
2013-04-04  5:57 ` [PATCH -V5 05/25] powerpc: Move the pte free routines from common header Aneesh Kumar K.V
2013-04-04  5:57 ` [PATCH -V5 06/25] powerpc: Reduce PTE table memory wastage Aneesh Kumar K.V
2013-04-10  4:46   ` David Gibson
2013-04-10  6:29     ` Aneesh Kumar K.V
2013-04-10  7:04       ` David Gibson
2013-04-10  7:53         ` Aneesh Kumar K.V
2013-04-10 17:47           ` Aneesh Kumar K.V
2013-04-11  1:20             ` David Gibson
2013-04-11  1:12           ` David Gibson
2013-04-10  7:14   ` Michael Ellerman
2013-04-10  7:54     ` Aneesh Kumar K.V
2013-04-10  8:52       ` Aneesh Kumar K.V
2013-04-04  5:57 ` [PATCH -V5 07/25] powerpc: Use encode avpn where we need only avpn values Aneesh Kumar K.V
2013-04-04  5:57 ` [PATCH -V5 08/25] powerpc: Decode the pte-lp-encoding bits correctly Aneesh Kumar K.V
2013-04-10  7:19   ` David Gibson
2013-04-10  8:11     ` Aneesh Kumar K.V
2013-04-10 17:49       ` Aneesh Kumar K.V
2013-04-11  1:28       ` David Gibson
2013-04-04  5:57 ` [PATCH -V5 09/25] powerpc: Fix hpte_decode to use the correct decoding for page sizes Aneesh Kumar K.V
2013-04-11  3:20   ` David Gibson
2013-04-04  5:57 ` [PATCH -V5 10/25] powerpc: print both base and actual page size on hash failure Aneesh Kumar K.V
2013-04-11  3:21   ` David Gibson
2013-04-04  5:57 ` [PATCH -V5 11/25] powerpc: Print page size info during boot Aneesh Kumar K.V
2013-04-04  5:57 ` [PATCH -V5 12/25] powerpc: Return all the valid pte ecndoing in KVM_PPC_GET_SMMU_INFO ioctl Aneesh Kumar K.V
2013-04-11  3:24   ` David Gibson
2013-04-11  5:11     ` Aneesh Kumar K.V
2013-04-11  5:57       ` David Gibson
2013-04-04  5:57 ` [PATCH -V5 13/25] powerpc: Update tlbie/tlbiel as per ISA doc Aneesh Kumar K.V
2013-04-11  3:30   ` David Gibson
2013-04-11  5:20     ` Aneesh Kumar K.V
2013-04-11  6:16       ` David Gibson
2013-04-11  6:36         ` Aneesh Kumar K.V
2013-04-04  5:57 ` [PATCH -V5 14/25] mm/THP: HPAGE_SHIFT is not a #define on some arch Aneesh Kumar K.V
2013-04-11  3:36   ` David Gibson
2013-04-04  5:57 ` [PATCH -V5 15/25] mm/THP: Add pmd args to pgtable deposit and withdraw APIs Aneesh Kumar K.V
2013-04-11  3:40   ` David Gibson
2013-04-04  5:57 ` [PATCH -V5 16/25] mm/THP: withdraw the pgtable after pmdp related operations Aneesh Kumar K.V
2013-04-04  5:57 ` [PATCH -V5 17/25] powerpc/THP: Implement transparent hugepages for ppc64 Aneesh Kumar K.V
2013-04-11  5:38   ` David Gibson
2013-04-11  7:40     ` Aneesh Kumar K.V
2013-04-12  0:51       ` David Gibson
2013-04-12  5:06         ` Aneesh Kumar K.V
2013-04-12  5:39           ` David Gibson
2013-04-04  5:57 ` [PATCH -V5 18/25] powerpc/THP: Double the PMD table size for THP Aneesh Kumar K.V
2013-04-11  6:18   ` David Gibson
2013-04-04  5:57 ` [PATCH -V5 19/25] powerpc/THP: Differentiate THP PMD entries from HUGETLB PMD entries Aneesh Kumar K.V
2013-04-10  7:21   ` Michael Ellerman
2013-04-10 18:26     ` Aneesh Kumar K.V
2013-04-12  1:28   ` David Gibson
2013-04-04  5:57 ` [PATCH -V5 20/25] powerpc/THP: Add code to handle HPTE faults for large pages Aneesh Kumar K.V
2013-04-12  4:01   ` David Gibson
2013-04-04  5:57 ` [PATCH -V5 21/25] powerpc: Handle hugepage in perf callchain Aneesh Kumar K.V
2013-04-12  1:34   ` David Gibson
2013-04-12  5:05     ` Aneesh Kumar K.V
2013-04-04  5:58 ` [PATCH -V5 22/25] powerpc/THP: get_user_pages_fast changes Aneesh Kumar K.V
2013-04-12  1:41   ` David Gibson
2013-04-04  5:58 ` [PATCH -V5 23/25] powerpc/THP: Enable THP on PPC64 Aneesh Kumar K.V
2013-04-04  5:58 ` [PATCH -V5 24/25] powerpc: Optimize hugepage invalidate Aneesh Kumar K.V
2013-04-12  4:21   ` David Gibson
2013-04-14 10:02     ` Aneesh Kumar K.V
2013-04-15  1:18       ` David Gibson
2013-04-04  5:58 ` Aneesh Kumar K.V [this message]
2013-04-04  6:00 ` [PATCH -V5 00/25] THP support for PPC64 Simon Jeons
2013-04-04  6:10   ` Aneesh Kumar K.V
2013-04-04  6:14 ` Simon Jeons
2013-04-04  8:38   ` Aneesh Kumar K.V
2013-04-19  1:55 ` Simon Jeons

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1365055083-31956-26-git-send-email-aneesh.kumar@linux.vnet.ibm.com \
    --to=aneesh.kumar@linux.vnet.ibm.com \
    --cc=benh@kernel.crashing.org \
    --cc=linux-mm@kvack.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=paulus@samba.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).