All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
To: benh@kernel.crashing.org, paulus@samba.org, mpe@ellerman.id.au
Cc: linuxppc-dev@lists.ozlabs.org,
	"Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Subject: [PATCH V2 63/68] powerpc/mm: THP is only available on hash64 as of now
Date: Sat,  9 Apr 2016 11:43:59 +0530	[thread overview]
Message-ID: <1460182444-2468-64-git-send-email-aneesh.kumar@linux.vnet.ibm.com> (raw)
In-Reply-To: <1460182444-2468-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

Only code movement in this patch. No functionality change.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h |  24 +-
 arch/powerpc/mm/pgtable-hash64.c             | 363 ++++++++++++++++++++++++++-
 arch/powerpc/mm/pgtable_64.c                 | 360 --------------------------
 3 files changed, 371 insertions(+), 376 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index be8f46853cc9..e162122ee533 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -777,18 +777,6 @@ static inline void vmemmap_remove_mapping(unsigned long start,
 #endif
 struct page *realmode_pfn_to_page(unsigned long pfn);
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
-extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
-extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
-extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
-		       pmd_t *pmdp, pmd_t pmd);
-extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
-				 pmd_t *pmd);
-extern int has_transparent_hugepage(void);
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-
-
 static inline pte_t pmd_pte(pmd_t pmd)
 {
 	return __pte(pmd_val(pmd));
@@ -803,7 +791,6 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
 {
 	return (pte_t *)pmd;
 }
-
 #define pmd_pfn(pmd)		pte_pfn(pmd_pte(pmd))
 #define pmd_dirty(pmd)		pte_dirty(pmd_pte(pmd))
 #define pmd_young(pmd)		pte_young(pmd_pte(pmd))
@@ -830,6 +817,16 @@ static inline int pmd_protnone(pmd_t pmd)
 #define __HAVE_ARCH_PMD_WRITE
 #define pmd_write(pmd)		pte_write(pmd_pte(pmd))
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
+extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
+extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
+extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+		       pmd_t *pmdp, pmd_t pmd);
+extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
+				 pmd_t *pmd);
+extern int has_transparent_hugepage(void);
+
 static inline pmd_t pmd_mkhuge(pmd_t pmd)
 {
 	return __pmd(pmd_val(pmd) | (_PAGE_PTE | H_PAGE_THP_HUGE));
@@ -878,5 +875,6 @@ static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
 	 */
 	return true;
 }
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */
diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c
index b926b0a4ae0c..a11fde8ac274 100644
--- a/arch/powerpc/mm/pgtable-hash64.c
+++ b/arch/powerpc/mm/pgtable-hash64.c
@@ -12,15 +12,15 @@
  *
  */
 
-/*
- * PPC64 THP Support for hash based MMUs
- */
 #include <linux/sched.h>
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
 
 #include "mmu_decl.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/thp.h>
+
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 /*
  * On hash-based CPUs, the vmemmap is bolted in the hash table.
@@ -99,3 +99,360 @@ int hlmap_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags)
 	smp_wmb();
 	return 0;
 }
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+/*
+ * This is called when relaxing access to a hugepage. It's also called in the page
+ * fault path when we don't hit any of the major fault cases, ie, a minor
+ * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
+ * handled those two for us, we additionally deal with missing execute
+ * permission here on some processors
+ */
+int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
+			  pmd_t *pmdp, pmd_t entry, int dirty)
+{
+	int changed;
+#ifdef CONFIG_DEBUG_VM
+	WARN_ON(!pmd_trans_huge(*pmdp));
+	assert_spin_locked(&vma->vm_mm->page_table_lock);
+#endif
+	changed = !pmd_same(*(pmdp), entry);
+	if (changed) {
+		__ptep_set_access_flags(pmdp_ptep(pmdp), pmd_pte(entry));
+		/*
+		 * Since we are not supporting SW TLB systems, we don't
+		 * have any thing similar to flush_tlb_page_nohash()
+		 */
+	}
+	return changed;
+}
+
+unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
+				  pmd_t *pmdp, unsigned long clr,
+				  unsigned long set)
+{
+
+	unsigned long old, tmp;
+	unsigned long busy = cpu_to_be64(H_PAGE_BUSY);
+
+#ifdef CONFIG_DEBUG_VM
+	WARN_ON(!pmd_trans_huge(*pmdp));
+	assert_spin_locked(&mm->page_table_lock);
+#endif
+
+	clr = cpu_to_be64(clr);
+	set = cpu_to_be64(set);
+	__asm__ __volatile__(
+	"1:	ldarx	%0,0,%3\n\
+		and.	%1,%0,%6\n\
+		bne-	1b \n\
+		andc	%1,%0,%4 \n\
+		or	%1,%1,%7\n\
+		stdcx.	%1,0,%3 \n\
+		bne-	1b"
+	: "=&r" (old), "=&r" (tmp), "=m" (*pmdp)
+	: "r" (pmdp), "r" (clr), "m" (*pmdp), "r" (busy), "r" (set)
+	: "cc" );
+
+	old = be64_to_cpu(old);
+
+	trace_hugepage_update(addr, old, clr, set);
+	if (old & H_PAGE_HASHPTE)
+		hpte_do_hugepage_flush(mm, addr, pmdp, old);
+	return old;
+}
+
+pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
+			  pmd_t *pmdp)
+{
+	pmd_t pmd;
+
+	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+	VM_BUG_ON(pmd_trans_huge(*pmdp));
+
+	pmd = *pmdp;
+	pmd_clear(pmdp);
+	/*
+	 * Wait for all pending hash_page to finish. This is needed
+	 * in case of subpage collapse. When we collapse normal pages
+	 * to hugepage, we first clear the pmd, then invalidate all
+	 * the PTE entries. The assumption here is that any low level
+	 * page fault will see a none pmd and take the slow path that
+	 * will wait on mmap_sem. But we could very well be in a
+	 * hash_page with local ptep pointer value. Such a hash page
+	 * can result in adding new HPTE entries for normal subpages.
+	 * That means we could be modifying the page content as we
+	 * copy them to a huge page. So wait for parallel hash_page
+	 * to finish before invalidating HPTE entries. We can do this
+	 * by sending an IPI to all the cpus and executing a dummy
+	 * function there.
+	 */
+	kick_all_cpus_sync();
+	/*
+	 * Now invalidate the hpte entries in the range
+	 * covered by pmd. This make sure we take a
+	 * fault and will find the pmd as none, which will
+	 * result in a major fault which takes mmap_sem and
+	 * hence wait for collapse to complete. Without this
+	 * the __collapse_huge_page_copy can result in copying
+	 * the old content.
+	 */
+	flush_tlb_pmd_range(vma->vm_mm, &pmd, address);
+	return pmd;
+}
+
+/*
+ * We currently remove entries from the hashtable regardless of whether
+ * the entry was young or dirty.
+ *
+ * We should be more intelligent about this but for the moment we override
+ * these functions and force a tlb flush unconditionally
+ */
+int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+			      unsigned long address, pmd_t *pmdp)
+{
+	return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
+}
+
+/*
+ * We want to put the pgtable in pmd and use pgtable for tracking
+ * the base page size hptes
+ */
+void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+				pgtable_t pgtable)
+{
+	pgtable_t *pgtable_slot;
+	assert_spin_locked(&mm->page_table_lock);
+	/*
+	 * we store the pgtable in the second half of PMD
+	 */
+	pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
+	*pgtable_slot = pgtable;
+	/*
+	 * expose the deposited pgtable to other cpus.
+	 * before we set the hugepage PTE at pmd level
+	 * hash fault code looks at the deposted pgtable
+	 * to store hash index values.
+	 */
+	smp_wmb();
+}
+
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
+{
+	pgtable_t pgtable;
+	pgtable_t *pgtable_slot;
+
+	assert_spin_locked(&mm->page_table_lock);
+	pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
+	pgtable = *pgtable_slot;
+	/*
+	 * Once we withdraw, mark the entry NULL.
+	 */
+	*pgtable_slot = NULL;
+	/*
+	 * We store HPTE information in the deposited PTE fragment.
+	 * zero out the content on withdraw.
+	 */
+	memset(pgtable, 0, PTE_FRAG_SIZE);
+	return pgtable;
+}
+
+void pmdp_huge_split_prepare(struct vm_area_struct *vma,
+			     unsigned long address, pmd_t *pmdp)
+{
+	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+	VM_BUG_ON(REGION_ID(address) != USER_REGION_ID);
+
+	/*
+	 * We can't mark the pmd none here, because that will cause a race
+	 * against exit_mmap. We need to continue mark pmd TRANS HUGE, while
+	 * we spilt, but at the same time we wan't rest of the ppc64 code
+	 * not to insert hash pte on this, because we will be modifying
+	 * the deposited pgtable in the caller of this function. Hence
+	 * clear the _PAGE_USER so that we move the fault handling to
+	 * higher level function and that will serialize against ptl.
+	 * We need to flush existing hash pte entries here even though,
+	 * the translation is still valid, because we will withdraw
+	 * pgtable_t after this.
+	 */
+	pmd_hugepage_update(vma->vm_mm, address, pmdp, 0, _PAGE_PRIVILEGED);
+}
+
+
+/*
+ * set a new huge pmd. We should not be called for updating
+ * an existing pmd entry. That should go via pmd_hugepage_update.
+ */
+void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+		pmd_t *pmdp, pmd_t pmd)
+{
+#ifdef CONFIG_DEBUG_VM
+	WARN_ON(pte_present(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
+	assert_spin_locked(&mm->page_table_lock);
+	WARN_ON(!pmd_trans_huge(pmd));
+#endif
+	trace_hugepage_set_pmd(addr, pmd_val(pmd));
+	return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
+}
+
+/*
+ * We use this to invalidate a pmdp entry before switching from a
+ * hugepte to regular pmd entry.
+ */
+void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
+		     pmd_t *pmdp)
+{
+	pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
+
+	/*
+	 * This ensures that generic code that rely on IRQ disabling
+	 * to prevent a parallel THP split work as expected.
+	 */
+	kick_all_cpus_sync();
+}
+
+/*
+ * A linux hugepage PMD was changed and the corresponding hash table entries
+ * neesd to be flushed.
+ */
+void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
+			    pmd_t *pmdp, unsigned long old_pmd)
+{
+	int ssize;
+	unsigned int psize;
+	unsigned long vsid;
+	unsigned long flags = 0;
+	const struct cpumask *tmp;
+
+	/* get the base page size,vsid and segment size */
+#ifdef CONFIG_DEBUG_VM
+	psize = get_slice_psize(mm, addr);
+	BUG_ON(psize == MMU_PAGE_16M);
+#endif
+	if (old_pmd & H_PAGE_COMBO)
+		psize = MMU_PAGE_4K;
+	else
+		psize = MMU_PAGE_64K;
+
+	if (!is_kernel_addr(addr)) {
+		ssize = user_segment_size(addr);
+		vsid = get_vsid(mm->context.id, addr, ssize);
+		WARN_ON(vsid == 0);
+	} else {
+		vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
+		ssize = mmu_kernel_ssize;
+	}
+
+	tmp = cpumask_of(smp_processor_id());
+	if (cpumask_equal(mm_cpumask(mm), tmp))
+		flags |= HPTE_LOCAL_UPDATE;
+
+	return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, flags);
+}
+
+static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
+{
+	return __pmd(pmd_val(pmd) | pgprot_val(pgprot));
+}
+
+pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
+{
+	unsigned long pmdv;
+
+	pmdv = (pfn << PAGE_SHIFT) & PTE_RPN_MASK;
+	return pmd_set_protbits(__pmd(pmdv), pgprot);
+}
+
+pmd_t mk_pmd(struct page *page, pgprot_t pgprot)
+{
+	return pfn_pmd(page_to_pfn(page), pgprot);
+}
+
+pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+	unsigned long pmdv;
+
+	pmdv = pmd_val(pmd);
+	pmdv &= _HPAGE_CHG_MASK;
+	return pmd_set_protbits(__pmd(pmdv), newprot);
+}
+
+/*
+ * This is called at the end of handling a user page fault, when the
+ * fault has been handled by updating a HUGE PMD entry in the linux page tables.
+ * We use it to preload an HPTE into the hash table corresponding to
+ * the updated linux HUGE PMD entry.
+ */
+void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
+			  pmd_t *pmd)
+{
+	return;
+}
+
+pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
+			      unsigned long addr, pmd_t *pmdp)
+{
+	pmd_t old_pmd;
+	pgtable_t pgtable;
+	unsigned long old;
+	pgtable_t *pgtable_slot;
+
+	old = pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
+	old_pmd = __pmd(old);
+	/*
+	 * We have pmd == none and we are holding page_table_lock.
+	 * So we can safely go and clear the pgtable hash
+	 * index info.
+	 */
+	pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
+	pgtable = *pgtable_slot;
+	/*
+	 * Let's zero out old valid and hash index details
+	 * hash fault look at them.
+	 */
+	memset(pgtable, 0, PTE_FRAG_SIZE);
+	/*
+	 * Serialize against find_linux_pte_or_hugepte which does lock-less
+	 * lookup in page tables with local interrupts disabled. For huge pages
+	 * it casts pmd_t to pte_t. Since format of pte_t is different from
+	 * pmd_t we want to prevent transit from pmd pointing to page table
+	 * to pmd pointing to huge page (and back) while interrupts are disabled.
+	 * We clear pmd to possibly replace it with page table pointer in
+	 * different code paths. So make sure we wait for the parallel
+	 * find_linux_pte_or_hugepage to finish.
+	 */
+	kick_all_cpus_sync();
+	return old_pmd;
+}
+
+int has_transparent_hugepage(void)
+{
+
+	if (!mmu_has_feature(MMU_FTR_16M_PAGE))
+		return 0;
+	/*
+	 * We support THP only if PMD_SIZE is 16MB.
+	 */
+	if (mmu_psize_defs[MMU_PAGE_16M].shift != PMD_SHIFT)
+		return 0;
+	/*
+	 * We need to make sure that we support 16MB hugepage in a segement
+	 * with base page size 64K or 4K. We only enable THP with a PAGE_SIZE
+	 * of 64K.
+	 */
+	/*
+	 * If we have 64K HPTE, we will be using that by default
+	 */
+	if (mmu_psize_defs[MMU_PAGE_64K].shift &&
+	    (mmu_psize_defs[MMU_PAGE_64K].penc[MMU_PAGE_16M] == -1))
+		return 0;
+	/*
+	 * Ok we only have 4K HPTE
+	 */
+	if (mmu_psize_defs[MMU_PAGE_4K].penc[MMU_PAGE_16M] == -1)
+		return 0;
+
+	return 1;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 092c07115612..2ab2fa2aa32a 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -55,9 +55,6 @@
 
 #include "mmu_decl.h"
 
-#define CREATE_TRACE_POINTS
-#include <trace/events/thp.h>
-
 #ifdef CONFIG_PPC_STD_MMU_64
 #if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT))
 #error TASK_SIZE_USER64 exceeds user VSID range
@@ -433,360 +430,3 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
 	}
 }
 #endif
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-
-/*
- * This is called when relaxing access to a hugepage. It's also called in the page
- * fault path when we don't hit any of the major fault cases, ie, a minor
- * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
- * handled those two for us, we additionally deal with missing execute
- * permission here on some processors
- */
-int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
-			  pmd_t *pmdp, pmd_t entry, int dirty)
-{
-	int changed;
-#ifdef CONFIG_DEBUG_VM
-	WARN_ON(!pmd_trans_huge(*pmdp));
-	assert_spin_locked(&vma->vm_mm->page_table_lock);
-#endif
-	changed = !pmd_same(*(pmdp), entry);
-	if (changed) {
-		__ptep_set_access_flags(pmdp_ptep(pmdp), pmd_pte(entry));
-		/*
-		 * Since we are not supporting SW TLB systems, we don't
-		 * have any thing similar to flush_tlb_page_nohash()
-		 */
-	}
-	return changed;
-}
-
-unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
-				  pmd_t *pmdp, unsigned long clr,
-				  unsigned long set)
-{
-
-	unsigned long old, tmp;
-	unsigned long busy = cpu_to_be64(H_PAGE_BUSY);
-
-#ifdef CONFIG_DEBUG_VM
-	WARN_ON(!pmd_trans_huge(*pmdp));
-	assert_spin_locked(&mm->page_table_lock);
-#endif
-
-	clr = cpu_to_be64(clr);
-	set = cpu_to_be64(set);
-	__asm__ __volatile__(
-	"1:	ldarx	%0,0,%3\n\
-		and.	%1,%0,%6\n\
-		bne-	1b \n\
-		andc	%1,%0,%4 \n\
-		or	%1,%1,%7\n\
-		stdcx.	%1,0,%3 \n\
-		bne-	1b"
-	: "=&r" (old), "=&r" (tmp), "=m" (*pmdp)
-	: "r" (pmdp), "r" (clr), "m" (*pmdp), "r" (busy), "r" (set)
-	: "cc" );
-
-	old = be64_to_cpu(old);
-
-	trace_hugepage_update(addr, old, clr, set);
-	if (old & H_PAGE_HASHPTE)
-		hpte_do_hugepage_flush(mm, addr, pmdp, old);
-	return old;
-}
-
-pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
-			  pmd_t *pmdp)
-{
-	pmd_t pmd;
-
-	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
-	VM_BUG_ON(pmd_trans_huge(*pmdp));
-
-	pmd = *pmdp;
-	pmd_clear(pmdp);
-	/*
-	 * Wait for all pending hash_page to finish. This is needed
-	 * in case of subpage collapse. When we collapse normal pages
-	 * to hugepage, we first clear the pmd, then invalidate all
-	 * the PTE entries. The assumption here is that any low level
-	 * page fault will see a none pmd and take the slow path that
-	 * will wait on mmap_sem. But we could very well be in a
-	 * hash_page with local ptep pointer value. Such a hash page
-	 * can result in adding new HPTE entries for normal subpages.
-	 * That means we could be modifying the page content as we
-	 * copy them to a huge page. So wait for parallel hash_page
-	 * to finish before invalidating HPTE entries. We can do this
-	 * by sending an IPI to all the cpus and executing a dummy
-	 * function there.
-	 */
-	kick_all_cpus_sync();
-	/*
-	 * Now invalidate the hpte entries in the range
-	 * covered by pmd. This make sure we take a
-	 * fault and will find the pmd as none, which will
-	 * result in a major fault which takes mmap_sem and
-	 * hence wait for collapse to complete. Without this
-	 * the __collapse_huge_page_copy can result in copying
-	 * the old content.
-	 */
-	flush_tlb_pmd_range(vma->vm_mm, &pmd, address);
-	return pmd;
-}
-
-/*
- * We currently remove entries from the hashtable regardless of whether
- * the entry was young or dirty.
- *
- * We should be more intelligent about this but for the moment we override
- * these functions and force a tlb flush unconditionally
- */
-int pmdp_test_and_clear_young(struct vm_area_struct *vma,
-			      unsigned long address, pmd_t *pmdp)
-{
-	return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
-}
-
-/*
- * We want to put the pgtable in pmd and use pgtable for tracking
- * the base page size hptes
- */
-void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
-				pgtable_t pgtable)
-{
-	pgtable_t *pgtable_slot;
-	assert_spin_locked(&mm->page_table_lock);
-	/*
-	 * we store the pgtable in the second half of PMD
-	 */
-	pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
-	*pgtable_slot = pgtable;
-	/*
-	 * expose the deposited pgtable to other cpus.
-	 * before we set the hugepage PTE at pmd level
-	 * hash fault code looks at the deposted pgtable
-	 * to store hash index values.
-	 */
-	smp_wmb();
-}
-
-pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
-{
-	pgtable_t pgtable;
-	pgtable_t *pgtable_slot;
-
-	assert_spin_locked(&mm->page_table_lock);
-	pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
-	pgtable = *pgtable_slot;
-	/*
-	 * Once we withdraw, mark the entry NULL.
-	 */
-	*pgtable_slot = NULL;
-	/*
-	 * We store HPTE information in the deposited PTE fragment.
-	 * zero out the content on withdraw.
-	 */
-	memset(pgtable, 0, PTE_FRAG_SIZE);
-	return pgtable;
-}
-
-void pmdp_huge_split_prepare(struct vm_area_struct *vma,
-			     unsigned long address, pmd_t *pmdp)
-{
-	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
-	VM_BUG_ON(REGION_ID(address) != USER_REGION_ID);
-
-	/*
-	 * We can't mark the pmd none here, because that will cause a race
-	 * against exit_mmap. We need to continue mark pmd TRANS HUGE, while
-	 * we spilt, but at the same time we wan't rest of the ppc64 code
-	 * not to insert hash pte on this, because we will be modifying
-	 * the deposited pgtable in the caller of this function. Hence
-	 * clear the _PAGE_USER so that we move the fault handling to
-	 * higher level function and that will serialize against ptl.
-	 * We need to flush existing hash pte entries here even though,
-	 * the translation is still valid, because we will withdraw
-	 * pgtable_t after this.
-	 */
-	pmd_hugepage_update(vma->vm_mm, address, pmdp, 0, _PAGE_PRIVILEGED);
-}
-
-
-/*
- * set a new huge pmd. We should not be called for updating
- * an existing pmd entry. That should go via pmd_hugepage_update.
- */
-void set_pmd_at(struct mm_struct *mm, unsigned long addr,
-		pmd_t *pmdp, pmd_t pmd)
-{
-#ifdef CONFIG_DEBUG_VM
-	WARN_ON(pte_present(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
-	assert_spin_locked(&mm->page_table_lock);
-	WARN_ON(!pmd_trans_huge(pmd));
-#endif
-	trace_hugepage_set_pmd(addr, pmd_val(pmd));
-	return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
-}
-
-/*
- * We use this to invalidate a pmdp entry before switching from a
- * hugepte to regular pmd entry.
- */
-void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
-		     pmd_t *pmdp)
-{
-	pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
-
-	/*
-	 * This ensures that generic code that rely on IRQ disabling
-	 * to prevent a parallel THP split work as expected.
-	 */
-	kick_all_cpus_sync();
-}
-
-/*
- * A linux hugepage PMD was changed and the corresponding hash table entries
- * neesd to be flushed.
- */
-void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
-			    pmd_t *pmdp, unsigned long old_pmd)
-{
-	int ssize;
-	unsigned int psize;
-	unsigned long vsid;
-	unsigned long flags = 0;
-	const struct cpumask *tmp;
-
-	/* get the base page size,vsid and segment size */
-#ifdef CONFIG_DEBUG_VM
-	psize = get_slice_psize(mm, addr);
-	BUG_ON(psize == MMU_PAGE_16M);
-#endif
-	if (old_pmd & H_PAGE_COMBO)
-		psize = MMU_PAGE_4K;
-	else
-		psize = MMU_PAGE_64K;
-
-	if (!is_kernel_addr(addr)) {
-		ssize = user_segment_size(addr);
-		vsid = get_vsid(mm->context.id, addr, ssize);
-		WARN_ON(vsid == 0);
-	} else {
-		vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
-		ssize = mmu_kernel_ssize;
-	}
-
-	tmp = cpumask_of(smp_processor_id());
-	if (cpumask_equal(mm_cpumask(mm), tmp))
-		flags |= HPTE_LOCAL_UPDATE;
-
-	return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, flags);
-}
-
-static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
-{
-	return __pmd(pmd_val(pmd) | pgprot_val(pgprot));
-}
-
-pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
-{
-	unsigned long pmdv;
-
-	pmdv = (pfn << PAGE_SHIFT) & PTE_RPN_MASK;
-	return pmd_set_protbits(__pmd(pmdv), pgprot);
-}
-
-pmd_t mk_pmd(struct page *page, pgprot_t pgprot)
-{
-	return pfn_pmd(page_to_pfn(page), pgprot);
-}
-
-pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
-{
-	unsigned long pmdv;
-
-	pmdv = pmd_val(pmd);
-	pmdv &= _HPAGE_CHG_MASK;
-	return pmd_set_protbits(__pmd(pmdv), newprot);
-}
-
-/*
- * This is called at the end of handling a user page fault, when the
- * fault has been handled by updating a HUGE PMD entry in the linux page tables.
- * We use it to preload an HPTE into the hash table corresponding to
- * the updated linux HUGE PMD entry.
- */
-void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
-			  pmd_t *pmd)
-{
-	return;
-}
-
-pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
-			      unsigned long addr, pmd_t *pmdp)
-{
-	pmd_t old_pmd;
-	pgtable_t pgtable;
-	unsigned long old;
-	pgtable_t *pgtable_slot;
-
-	old = pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
-	old_pmd = __pmd(old);
-	/*
-	 * We have pmd == none and we are holding page_table_lock.
-	 * So we can safely go and clear the pgtable hash
-	 * index info.
-	 */
-	pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
-	pgtable = *pgtable_slot;
-	/*
-	 * Let's zero out old valid and hash index details
-	 * hash fault look at them.
-	 */
-	memset(pgtable, 0, PTE_FRAG_SIZE);
-	/*
-	 * Serialize against find_linux_pte_or_hugepte which does lock-less
-	 * lookup in page tables with local interrupts disabled. For huge pages
-	 * it casts pmd_t to pte_t. Since format of pte_t is different from
-	 * pmd_t we want to prevent transit from pmd pointing to page table
-	 * to pmd pointing to huge page (and back) while interrupts are disabled.
-	 * We clear pmd to possibly replace it with page table pointer in
-	 * different code paths. So make sure we wait for the parallel
-	 * find_linux_pte_or_hugepage to finish.
-	 */
-	kick_all_cpus_sync();
-	return old_pmd;
-}
-
-int has_transparent_hugepage(void)
-{
-
-	if (!mmu_has_feature(MMU_FTR_16M_PAGE))
-		return 0;
-	/*
-	 * We support THP only if PMD_SIZE is 16MB.
-	 */
-	if (mmu_psize_defs[MMU_PAGE_16M].shift != PMD_SHIFT)
-		return 0;
-	/*
-	 * We need to make sure that we support 16MB hugepage in a segement
-	 * with base page size 64K or 4K. We only enable THP with a PAGE_SIZE
-	 * of 64K.
-	 */
-	/*
-	 * If we have 64K HPTE, we will be using that by default
-	 */
-	if (mmu_psize_defs[MMU_PAGE_64K].shift &&
-	    (mmu_psize_defs[MMU_PAGE_64K].penc[MMU_PAGE_16M] == -1))
-		return 0;
-	/*
-	 * Ok we only have 4K HPTE
-	 */
-	if (mmu_psize_defs[MMU_PAGE_4K].penc[MMU_PAGE_16M] == -1)
-		return 0;
-
-	return 1;
-}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-- 
2.5.0

  parent reply	other threads:[~2016-04-09  6:17 UTC|newest]

Thread overview: 139+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-09  6:12 [PATCH V2 00/68] PowerISA 3.0 Radix page table support Aneesh Kumar K.V
2016-04-09  6:12 ` [PATCH V2 01/68] powerpc/cxl: Use REGION_ID instead of opencoding Aneesh Kumar K.V
2016-04-11  0:37   ` Andrew Donnellan
2016-04-13  2:42   ` Aneesh Kumar K.V
2016-04-20  3:03     ` Michael Ellerman
2016-04-20  7:53       ` Aneesh Kumar K.V
2016-04-09  6:12 ` [PATCH V2 02/68] powerpc/mm/nohash: Return correctly from flush_tlb_page Aneesh Kumar K.V
2016-04-20  4:08   ` [V2, " Michael Ellerman
2016-04-20  7:49     ` Aneesh Kumar K.V
2016-04-09  6:12 ` [PATCH V2 03/68] powerpc/mm/nohash: Update non SMP version of flush_tlb_page to handle hugetlb address Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 04/68] powerpc/mm: Use big endian page table for book3s 64 Aneesh Kumar K.V
2016-04-22 11:01   ` Michael Ellerman
2016-04-24 22:29     ` Aneesh Kumar K.V
2016-05-29 11:03   ` Anton Blanchard
2016-05-29 21:27     ` Benjamin Herrenschmidt
2016-05-29 23:08       ` Anton Blanchard
2016-05-30  3:42         ` Michael Ellerman
2016-05-30  5:31         ` Anton Blanchard
2016-05-30  8:42         ` Aneesh Kumar K.V
2016-05-30 11:00           ` Benjamin Herrenschmidt
2016-05-30 14:48             ` Aneesh Kumar K.V
2016-05-30 14:59       ` Segher Boessenkool
2016-04-09  6:13 ` [PATCH V2 05/68] powerpc/mm: use _PAGE_READ to indicate Read access Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 06/68] powerpc/mm/subpage: Clear RWX bit to indicate no access Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 07/68] powerpc/mm: Use pte_user instead of opencoding Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 08/68] powerpc/mm: Replace _PAGE_USER with _PAGE_PRIVILEGED Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 09/68] powerpc/mm: Remove RPN_SHIFT and RPN_SIZE Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 10/68] powerpc/mm: Update _PAGE_KERNEL_RO Aneesh Kumar K.V
2016-11-20  0:43   ` [V2,10/68] " Geoff Levand
2016-11-20 18:03     ` Aneesh Kumar K.V
2016-11-21  0:33       ` Geoff Levand
2016-11-23 10:41     ` Aneesh Kumar K.V
2016-11-24  4:04       ` Geoff Levand
2016-04-09  6:13 ` [PATCH V2 11/68] powerpc/mm: Use helper for finding pte bits mapping I/O area Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 12/68] powerpc/mm: Drop WIMG in favour of new constants Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 13/68] powerpc/mm: Use generic version of pmdp_clear_flush_young Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 14/68] powerpc/mm: Use generic version of ptep_clear_flush_young Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 15/68] powerpc/mm: Move common data structure between radix and hash to book3s 64 generic headers Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 16/68] powerpc/mm/power9: Add partition table format Aneesh Kumar K.V
2016-04-11  0:59   ` Balbir Singh
2016-04-09  6:13 ` [PATCH V2 17/68] powerpc/mm/hash: Add support for POWER9 hash Aneesh Kumar K.V
2016-04-11  4:55   ` Balbir Singh
2016-04-16 19:06     ` Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 18/68] powerpc/mm: Move hash and no hash code to separate files Aneesh Kumar K.V
2016-04-11  5:14   ` Balbir Singh
2016-04-17 10:20     ` Aneesh Kumar K.V
2016-04-20  6:17       ` Balbir Singh
2016-04-09  6:13 ` [PATCH V2 19/68] powerpc/mm/book3s: Rename hash specific PTE bits to carry H_ prefix Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 20/68] powerpc/mm: Handle _PTE_NONE_MASK Aneesh Kumar K.V
2016-04-11  6:09   ` Balbir Singh
2016-04-09  6:13 ` [PATCH V2 21/68] powerpc/mm: Move common pte bits and accessors to book3s/64/pgtable.h Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 22/68] powerpc/mm: Move pte accessors that operate on common pte bits to pgtable.h Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 23/68] powerpc/mm: Make page table size a variable Aneesh Kumar K.V
2016-04-12  1:49   ` Balbir Singh
2016-04-17 10:27     ` Aneesh Kumar K.V
2016-04-20  6:21       ` Balbir Singh
2016-04-09  6:13 ` [PATCH V2 24/68] powerpc/mm: Move page table index and and vaddr to pgtable.h Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 25/68] powerpc/mm: Move pte related function together Aneesh Kumar K.V
2016-04-14  6:50   ` Balbir Singh
2016-04-09  6:13 ` [PATCH V2 26/68] powerpc/mm/radix: Add radix pte defines Aneesh Kumar K.V
2016-04-21  4:12   ` Balbir Singh
2016-04-23  8:30     ` Benjamin Herrenschmidt
2016-04-26  1:40       ` Balbir Singh
2016-04-09  6:13 ` [PATCH V2 27/68] powerpc/mm/radix: Dummy radix_enabled() Aneesh Kumar K.V
2016-04-21  4:27   ` Balbir Singh
2016-04-09  6:13 ` [PATCH V2 28/68] powerpc/mm: Add radix callbacks to pte accessors Aneesh Kumar K.V
2016-04-21  4:30   ` Balbir Singh
2016-04-09  6:13 ` [PATCH V2 29/68] powerpc/mm: Move hugetlb and THP related pmd accessors to pgtable.h Aneesh Kumar K.V
2016-04-21  9:53   ` Balbir Singh
2016-04-21  9:59     ` Michael Ellerman
2016-04-21 11:42       ` Balbir Singh
2016-04-09  6:13 ` [PATCH V2 30/68] powerpc/mm/radix: Add radix callback for pmd accessors Aneesh Kumar K.V
2016-04-21 11:39   ` Balbir Singh
2016-04-09  6:13 ` [PATCH V2 31/68] powerpc/mm: Abstraction for early init routines Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 32/68] powerpc/mm/radix: Add radix callback " Aneesh Kumar K.V
2016-04-21 12:22   ` Balbir Singh
2016-04-09  6:13 ` [PATCH V2 33/68] powerpc/mm: Abstraction for vmemmap and map_kernel_page Aneesh Kumar K.V
2016-04-21 12:59   ` Balbir Singh
2016-04-28  6:17   ` Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 34/68] powerpc/mm/radix: Add radix callback for vmemmap and map_kernel page Aneesh Kumar K.V
2016-04-21 13:46   ` Balbir Singh
2016-04-09  6:13 ` [PATCH V2 35/68] powerpc/mm: Abstraction for switch_mmu_context Aneesh Kumar K.V
2016-04-21 14:12   ` Balbir Singh
2016-04-28  6:13     ` Aneesh Kumar K.V
2016-04-28  6:13   ` Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 36/68] powerpc/mm/radix: Add mmu context handling callback for radix Aneesh Kumar K.V
2016-04-22  6:19   ` Balbir Singh
2016-04-09  6:13 ` [PATCH V2 37/68] powerpc/mm: Rename mmu_context_hash64.c to mmu_context_book3s64.c Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 38/68] powerpc/mm: Hash linux abstraction for tlbflush routines Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 39/68] powerpc/mm/radix: Add " Aneesh Kumar K.V
2016-04-22  7:20   ` Balbir Singh
2016-04-09  6:13 ` [PATCH V2 40/68] powerpc/mm/radix: Add MMU_FTR_RADIX Aneesh Kumar K.V
2016-04-22  7:23   ` Balbir Singh
2016-04-09  6:13 ` [PATCH V2 41/68] powerpc/mm/radix: Use STD_MMU_64 to properly isolate hash related code Aneesh Kumar K.V
2016-04-22  7:32   ` Balbir Singh
2016-04-09  6:13 ` [PATCH V2 42/68] powerpc/mm/radix: Isolate hash table function from pseries guest code Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 43/68] powerpc/mm/radix: Add checks in slice code to catch radix usage Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 44/68] powerpc/mm/radix: Limit paca allocation in radix Aneesh Kumar K.V
2016-04-22  8:07   ` Balbir Singh
2016-04-09  6:13 ` [PATCH V2 45/68] powerpc/mm/radix: Pick the address layout for radix config Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 46/68] powerpc/mm/radix: Update secondary PTCR Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 47/68] powerpc/mm: Make a copy of pgalloc.h for 32 and 64 book3s Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 48/68] powerpc/mm: Copy pgalloc (part 2) Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 49/68] powerpc/mm: Revert changes made to nohash pgalloc-64.h Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 50/68] powerpc/mm: Simplify the code dropping 4 level table #ifdef Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 51/68] powerpc/mm: Rename function to indicate we are allocating fragments Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 52/68] powerpc/mm: make 4k and 64k use pte_t for pgtable_t Aneesh Kumar K.V
2016-04-26  2:58   ` Balbir Singh
2016-04-28  6:03   ` Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 53/68] powerpc/mm: Add radix pgalloc details Aneesh Kumar K.V
2016-04-26  3:05   ` Balbir Singh
2016-04-09  6:13 ` [PATCH V2 54/68] powerpc/mm: Update pte filter for radix Aneesh Kumar K.V
2016-04-26  3:06   ` Balbir Singh
2016-04-09  6:13 ` [PATCH V2 55/68] powerpc/mm: VMALLOC abstraction Aneesh Kumar K.V
2016-04-22  6:52   ` Michael Neuling
2016-04-23  3:29     ` Aneesh Kumar K.V
2016-04-26  6:20       ` Balbir Singh
2016-04-26  4:47   ` Balbir Singh
2016-04-28  6:09   ` Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 56/68] powerpc/radix: update mmu cache Aneesh Kumar K.V
2016-04-26  6:23   ` Balbir Singh
2016-04-09  6:13 ` [PATCH V2 57/68] powerpc/mm: pte_frag abstraction Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 58/68] powerpc/mm: Fix vma_mmu_pagesize for radix Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 59/68] powerpc/mm: Add radix support for hugetlb Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 60/68] powerpc/mm/radix: Make sure swapper pgdir is properly aligned Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 61/68] powerpc/mm/radix: Add hugetlb support 4K page size Aneesh Kumar K.V
2016-04-09  6:13 ` [PATCH V2 62/68] powerpc/mm: Drop PTE_ATOMIC_UPDATES from pmd_hugepage_update Aneesh Kumar K.V
2016-04-09  6:13 ` Aneesh Kumar K.V [this message]
2016-04-09  6:14 ` [PATCH V2 64/68] powerpc/mm/thp: Abstraction for THP functions Aneesh Kumar K.V
2016-04-09  6:14 ` [PATCH V2 65/68] powerpc/mm/radix: Add radix THP callbacks Aneesh Kumar K.V
2016-04-09  6:14 ` [PATCH V2 66/68] powerpc/mm/radix: Add THP support for 4k linux page size Aneesh Kumar K.V
2016-04-28  4:56   ` [V2, " Michael Ellerman
2016-04-28  6:28     ` Aneesh Kumar K.V
2016-04-09  6:14 ` [PATCH V2 67/68] powerpc/mm/radix: Cputable update for radix Aneesh Kumar K.V
2016-04-28 14:10   ` [V2,67/68] " Michael Ellerman
2016-04-09  6:14 ` [PATCH V2 68/68] powerpc/mm/radix: Use firmware feature to disable radix Aneesh Kumar K.V
2016-04-20  2:59   ` [V2, " Michael Ellerman
2016-04-20  8:21     ` Aneesh Kumar K.V
2016-04-20 11:25     ` Michael Neuling

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1460182444-2468-64-git-send-email-aneesh.kumar@linux.vnet.ibm.com \
    --to=aneesh.kumar@linux.vnet.ibm.com \
    --cc=benh@kernel.crashing.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=mpe@ellerman.id.au \
    --cc=paulus@samba.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.