From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
To: benh@kernel.crashing.org, paulus@samba.org
Cc: linuxppc-dev@lists.ozlabs.org,
"Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Subject: [PATCH -V3 25/25] powerpc: Handle hugepages in kvm
Date: Fri, 15 Mar 2013 15:10:07 +0530 [thread overview]
Message-ID: <1363340407-22619-26-git-send-email-aneesh.kumar@linux.vnet.ibm.com> (raw)
In-Reply-To: <1363340407-22619-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
We could possibly avoid some of these changes because most of the HUGE PMD bits
map to PTE bits.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/kvm_book3s_64.h | 31 ++++++++++++
arch/powerpc/kvm/book3s_64_mmu_hv.c | 12 ++++-
arch/powerpc/kvm/book3s_hv_rm_mmu.c | 75 ++++++++++++++++++++++--------
3 files changed, 97 insertions(+), 21 deletions(-)
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 38bec1d..1c5c799 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -110,6 +110,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
return rb;
}
+/* FIXME !! should we use hpte_actual_psize or hpte decode ? */
static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
{
/* only handle 4k, 64k and 16M pages for now */
@@ -189,6 +190,36 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *p, int writing)
return pte;
}
+/*
+ * Lock and read a linux hugepage PMD. If it's present and writable, atomically
+ * set dirty and referenced bits and return the PMD, otherwise return 0.
+ */
+static inline pmd_t kvmppc_read_update_linux_hugepmd(pmd_t *p, int writing)
+{
+ pmd_t pmd, tmp;
+
+ /* wait until _PAGE_BUSY is clear then set it atomically */
+ __asm__ __volatile__ (
+ "1: ldarx %0,0,%3\n"
+ " andi. %1,%0,%4\n"
+ " bne- 1b\n"
+ " ori %1,%0,%4\n"
+ " stdcx. %1,0,%3\n"
+ " bne- 1b"
+ : "=&r" (pmd), "=&r" (tmp), "=m" (*p)
+ : "r" (p), "i" (PMD_HUGE_BUSY)
+ : "cc");
+
+ if (pmd_large(pmd)) {
+ pmd = pmd_mkyoung(pmd);
+ if (writing && pmd_write(pmd))
+ pmd = pte_mkdirty(pmd);
+ }
+
+ *p = pmd; /* clears PMD_HUGE_BUSY */
+ return pmd;
+}
+
/* Return HPTE cache control bits corresponding to Linux pte bits */
static inline unsigned long hpte_cache_bits(unsigned long pte_val)
{
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 4f2a7dc..da006da 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -675,6 +675,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
/* if the guest wants write access, see if that is OK */
if (!writing && hpte_is_writable(r)) {
+ int hugepage;
pte_t *ptep, pte;
/*
@@ -683,11 +684,18 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
*/
rcu_read_lock_sched();
ptep = find_linux_pte_or_hugepte(current->mm->pgd,
- hva, NULL, NULL);
- if (ptep && pte_present(*ptep)) {
+ hva, NULL, &hugepage);
+ if (!hugepage && ptep && pte_present(*ptep)) {
pte = kvmppc_read_update_linux_pte(ptep, 1);
if (pte_write(pte))
write_ok = 1;
+ } else if (hugepage && ptep) {
+ pmd_t pmd = *(pmd_t *)ptep;
+ if (pmd_large(pmd)) {
+ pmd = kvmppc_read_update_linux_hugepmd((pmd_t *)ptep, 1);
+ if (pmd_write(pmd))
+ write_ok = 1;
+ }
}
rcu_read_unlock_sched();
}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 7c8e1ed..e9d4e3a 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -146,24 +146,37 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
}
static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
- int writing, unsigned long *pte_sizep)
+ int writing, unsigned long *pte_sizep,
+ int *hugepage)
{
pte_t *ptep;
unsigned long ps = *pte_sizep;
unsigned int shift;
- ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift, NULL);
+ ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift, hugepage);
if (!ptep)
return __pte(0);
- if (shift)
- *pte_sizep = 1ul << shift;
- else
- *pte_sizep = PAGE_SIZE;
+ if (*hugepage) {
+ *pte_sizep = 1ul << 24;
+ } else {
+ if (shift)
+ *pte_sizep = 1ul << shift;
+ else
+ *pte_sizep = PAGE_SIZE;
+ }
if (ps > *pte_sizep)
return __pte(0);
- if (!pte_present(*ptep))
- return __pte(0);
- return kvmppc_read_update_linux_pte(ptep, writing);
+
+ if (*hugepage) {
+ pmd_t *pmdp = (pmd_t *)ptep;
+ if (!pmd_large(*pmdp))
+ return __pmd(0);
+ return kvmppc_read_update_linux_hugepmd(pmdp, writing);
+ } else {
+ if (!pte_present(*ptep))
+ return __pte(0);
+ return kvmppc_read_update_linux_pte(ptep, writing);
+ }
}
static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
@@ -239,18 +252,34 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);
pa &= PAGE_MASK;
} else {
+ int hugepage;
+
/* Translate to host virtual address */
hva = __gfn_to_hva_memslot(memslot, gfn);
/* Look up the Linux PTE for the backing page */
pte_size = psize;
- pte = lookup_linux_pte(pgdir, hva, writing, &pte_size);
- if (pte_present(pte)) {
- if (writing && !pte_write(pte))
- /* make the actual HPTE be read-only */
- ptel = hpte_make_readonly(ptel);
- is_io = hpte_cache_bits(pte_val(pte));
- pa = pte_pfn(pte) << PAGE_SHIFT;
+ pte = lookup_linux_pte(pgdir, hva, writing, &pte_size, &hugepage);
+ if (hugepage) {
+ pmd_t pmd = (pmd_t)pte;
+ if (!pmd_large(pmd)) {
+ if (writing && !pmd_write(pmd))
+ /* make the actual HPTE be read-only */
+ ptel = hpte_make_readonly(ptel);
+ /*
+ * we support hugepage only for RAM
+ */
+ is_io = 0;
+ pa = pmd_pfn(pmd) << PAGE_SHIFT;
+ }
+ } else {
+ if (pte_present(pte)) {
+ if (writing && !pte_write(pte))
+ /* make the actual HPTE be read-only */
+ ptel = hpte_make_readonly(ptel);
+ is_io = hpte_cache_bits(pte_val(pte));
+ pa = pte_pfn(pte) << PAGE_SHIFT;
+ }
}
}
@@ -645,10 +674,18 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
if (memslot) {
+ int hugepage;
hva = __gfn_to_hva_memslot(memslot, gfn);
- pte = lookup_linux_pte(pgdir, hva, 1, &psize);
- if (pte_present(pte) && !pte_write(pte))
- r = hpte_make_readonly(r);
+ pte = lookup_linux_pte(pgdir, hva, 1,
+ &psize, &hugepage);
+ if (hugepage) {
+ pmd_t pmd = (pmd_t)pte;
+ if (pmd_large(pmd) && !pmd_write(pmd))
+ r = hpte_make_readonly(r);
+ } else {
+ if (pte_present(pte) && !pte_write(pte))
+ r = hpte_make_readonly(r);
+ }
}
}
}
--
1.7.10
prev parent reply other threads:[~2013-03-15 9:44 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-03-15 9:39 [PATCH -V3 00/25] THP support for PPC64 Aneesh Kumar K.V
2013-03-15 9:39 ` [PATCH -V3 01/25] powerpc: Use signed formatting when printing error Aneesh Kumar K.V
2013-03-15 9:39 ` [PATCH -V3 02/25] powerpc: Save DAR and DSISR in pt_regs on MCE Aneesh Kumar K.V
2013-03-15 9:39 ` [PATCH -V3 03/25] powerpc: Don't hard code the size of pte page Aneesh Kumar K.V
2013-03-15 9:39 ` [PATCH -V3 04/25] powerpc: Reduce the PTE_INDEX_SIZE Aneesh Kumar K.V
2013-03-15 9:39 ` [PATCH -V3 05/25] powerpc: Move the pte free routines from common header Aneesh Kumar K.V
2013-03-15 9:39 ` [PATCH -V3 06/25] powerpc: Reduce PTE table memory wastage Aneesh Kumar K.V
2013-03-15 9:39 ` [PATCH -V3 07/25] powerpc: Use encode avpn where we need only avpn values Aneesh Kumar K.V
2013-03-15 9:39 ` [PATCH -V3 08/25] powerpc: Decode the pte-lp-encoding bits correctly Aneesh Kumar K.V
2013-03-15 9:39 ` [PATCH -V3 09/25] powerpc: Fix hpte_decode to use the correct decoding for page sizes Aneesh Kumar K.V
2013-03-15 9:39 ` [PATCH -V3 10/25] powerpc: Return all the valid pte ecndoing in KVM_PPC_GET_SMMU_INFO ioctl Aneesh Kumar K.V
2013-03-15 9:39 ` [PATCH -V3 11/25] powerpc: Update tlbie/tlbiel as per ISA doc Aneesh Kumar K.V
2013-03-15 9:39 ` [PATCH -V3 12/25] powerpc: print both base and actual page size on hash failure Aneesh Kumar K.V
2013-03-15 9:39 ` [PATCH -V3 13/25] powerpc: Print page size info during boot Aneesh Kumar K.V
2013-03-15 9:39 ` [PATCH -V3 14/25] mm/THP: HPAGE_SHIFT is not a #define on some arch Aneesh Kumar K.V
2013-03-15 9:39 ` [PATCH -V3 15/25] mm/THP: Add pmd args to pgtable deposit and withdraw APIs Aneesh Kumar K.V
2013-03-15 9:39 ` [PATCH -V3 16/25] mm/THP: withdraw the pgtable after pmdp related operations Aneesh Kumar K.V
2013-03-15 9:39 ` [PATCH -V3 17/25] powerpc/THP: Implement transparent hugepages for ppc64 Aneesh Kumar K.V
2013-03-15 9:40 ` [PATCH -V3 18/25] powerpc/THP: Double the PMD table size for THP Aneesh Kumar K.V
2013-03-15 9:40 ` [PATCH -V3 19/25] powerpc/THP: Differentiate THP PMD entries from HUGETLB PMD entries Aneesh Kumar K.V
2013-03-15 9:40 ` [PATCH -V3 20/25] powerpc/THP: Add code to handle HPTE faults for large pages Aneesh Kumar K.V
2013-03-15 9:40 ` [PATCH -V3 21/25] powerpc: Handle hugepage in perf callchain Aneesh Kumar K.V
2013-03-15 9:40 ` [PATCH -V3 22/25] powerpc/THP: get_user_pages_fast changes Aneesh Kumar K.V
2013-03-15 9:40 ` [PATCH -V3 23/25] powerpc/THP: Enable THP on PPC64 Aneesh Kumar K.V
2013-03-15 9:40 ` [PATCH -V3 24/25] powerpc: Optimize hugepage invalidate Aneesh Kumar K.V
2013-03-15 9:40 ` Aneesh Kumar K.V [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1363340407-22619-26-git-send-email-aneesh.kumar@linux.vnet.ibm.com \
--to=aneesh.kumar@linux.vnet.ibm.com \
--cc=benh@kernel.crashing.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=paulus@samba.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).