From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
To: Paul Mackerras <paulus@samba.org>
Cc: linuxppc-dev@lists.ozlabs.org, David Gibson <dwg@au1.ibm.com>
Subject: Re: [PATCH -V6 05/27] powerpc: New hugepage directory format
Date: Thu, 25 Apr 2013 11:30:37 +0530 [thread overview]
Message-ID: <87wqrr175m.fsf@linux.vnet.ibm.com> (raw)
In-Reply-To: <20130424054734.GA2073@drongo>
Paul Mackerras <paulus@samba.org> writes:
> On Mon, Apr 22, 2013 at 03:30:39PM +0530, Aneesh Kumar K.V wrote:
>> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>
> [snip]
>
>> /*
>> - * Use the top bit of the higher-level page table entries to indicate whether
>> - * the entries we point to contain hugepages. This works because we know that
>> - * the page tables live in kernel space. If we ever decide to support having
>> - * page tables at arbitrary addresses, this breaks and will have to change.
>> - */
>> -#ifdef CONFIG_PPC64
>> -#define PD_HUGE 0x8000000000000000
>> -#else
>> -#define PD_HUGE 0x80000000
>> -#endif
>
> I think this is a good thing to do ultimately, but if you do this you
> also need to fix arch/powerpc/kernel/head_fsl_booke.S:
>
> #ifdef CONFIG_PTE_64BIT
> #ifdef CONFIG_HUGETLB_PAGE
> #define FIND_PTE \
> rlwinm r12, r10, 13, 19, 29; /* Compute pgdir/pmd offset */ \
> lwzx r11, r12, r11; /* Get pgd/pmd entry */ \
> rlwinm. r12, r11, 0, 0, 20; /* Extract pt base address */ \
> blt 1000f; /* Normal non-huge page */ \
> beq 2f; /* Bail if no table */ \
> oris r11, r11, PD_HUGE@h; /* Put back address bit */ \
> andi. r10, r11, HUGEPD_SHIFT_MASK@l; /* extract size field */ \
> xor r12, r10, r11; /* drop size bits from pointer */ \
> b 1001f; \
>
that should be easy, but
> and this, from arch/powerpc/mm/tlb_low_64e.S:
>
> cmpdi cr0,r14,0
> bge tlb_miss_fault_bolted /* Bad pgd entry or hugepage; bail */
>
> (of which there are several similar instances in that file).
>
> If you want to avoid fixing these bits of assembly code (and any
> others I missed in my quick scan), you'll need to keep the definition
> of PD_HUGE, at least on anything not 64-bit Book3S.
I am not sure we can find all such usages easily. I looked at the commit
d1b9b12811ef079c37fe464f51953746d8b78e2a and am worried we might be
breaking assumptions like that. Considering that I won't be able to test
FSL, I am inclined to go with the second option you listed, even though
that adds few more #ifdef. How about
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 62e11a3..4daf7e6 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -6,6 +6,33 @@
extern struct kmem_cache *hugepte_cache;
+#ifdef CONFIG_PPC_BOOK3S_64
+/*
+ * This should work for other subarchs too. But right now we use the
+ * new format only for 64bit book3s
+ */
+static inline pte_t *hugepd_page(hugepd_t hpd)
+{
+ BUG_ON(!hugepd_ok(hpd));
+ /*
+ * We have only four bits to encode, MMU page size
+ */
+ BUILD_BUG_ON((MMU_PAGE_COUNT - 1) > 0xf);
+ return (pte_t *)(hpd.pd & ~HUGEPD_SHIFT_MASK);
+}
+
+static inline unsigned int hugepd_mmu_psize(hugepd_t hpd)
+{
+ return (hpd.pd & HUGEPD_SHIFT_MASK) >> 2;
+}
+
+static inline unsigned int hugepd_shift(hugepd_t hpd)
+{
+ return mmu_psize_to_shift(hugepd_mmu_psize(hpd));
+}
+
+#else
+
static inline pte_t *hugepd_page(hugepd_t hpd)
{
BUG_ON(!hugepd_ok(hpd));
@@ -17,6 +44,9 @@ static inline unsigned int hugepd_shift(hugepd_t hpd)
return hpd.pd & HUGEPD_SHIFT_MASK;
}
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+
static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
unsigned pdshift)
{
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index b59e06f..05895cf 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -21,6 +21,7 @@
* complete pgtable.h but only a portion of it.
*/
#include <asm/pgtable-ppc64.h>
+#include <asm/bug.h>
/*
* Segment table
@@ -159,6 +160,24 @@ struct mmu_psize_def
unsigned long avpnm; /* bits to mask out in AVPN in the HPTE */
unsigned long sllp; /* SLB L||LP (exact mask to use in slbmte) */
};
+extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+
+static inline int shift_to_mmu_psize(unsigned int shift)
+{
+ int psize;
+
+ for (psize = 0; psize < MMU_PAGE_COUNT; ++psize)
+ if (mmu_psize_defs[psize].shift == shift)
+ return psize;
+ return -1;
+}
+
+static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
+{
+ if (mmu_psize_defs[mmu_psize].shift)
+ return mmu_psize_defs[mmu_psize].shift;
+ BUG();
+}
#endif /* __ASSEMBLY__ */
@@ -193,7 +212,6 @@ static inline int segment_shift(int ssize)
/*
* The current system page and segment sizes
*/
-extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
extern int mmu_linear_psize;
extern int mmu_virtual_psize;
extern int mmu_vmalloc_psize;
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index f072e97..652719c 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -249,6 +249,7 @@ extern long long virt_phys_offset;
#define is_kernel_addr(x) ((x) >= PAGE_OFFSET)
#endif
+#ifndef CONFIG_PPC_BOOK3S_64
/*
* Use the top bit of the higher-level page table entries to indicate whether
* the entries we point to contain hugepages. This works because we know that
@@ -260,6 +261,7 @@ extern long long virt_phys_offset;
#else
#define PD_HUGE 0x80000000
#endif
+#endif /* CONFIG_PPC_BOOK3S_64 */
/*
* Some number of bits at the level of the page table that points to
@@ -354,10 +356,21 @@ typedef unsigned long pgprot_t;
typedef struct { signed long pd; } hugepd_t;
#ifdef CONFIG_HUGETLB_PAGE
+#ifdef CONFIG_PPC_BOOK3S_64
+static inline int hugepd_ok(hugepd_t hpd)
+{
+ /*
+ * hugepd pointer, bottom two bits == 00 and next 4 bits
+ * indicate size of table
+ */
+ return (((hpd.pd & 0x3) == 0x0) && ((hpd.pd & HUGEPD_SHIFT_MASK) != 0));
+}
+#else
static inline int hugepd_ok(hugepd_t hpd)
{
return (hpd.pd > 0);
}
+#endif
#define is_hugepd(pdep) (hugepd_ok(*((hugepd_t *)(pdep))))
#else /* CONFIG_HUGETLB_PAGE */
diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h
index 292725c..69e352a 100644
--- a/arch/powerpc/include/asm/pgalloc-64.h
+++ b/arch/powerpc/include/asm/pgalloc-64.h
@@ -35,7 +35,10 @@ struct vmemmap_backing {
#define MAX_PGTABLE_INDEX_SIZE 0xf
extern struct kmem_cache *pgtable_cache[];
-#define PGT_CACHE(shift) (pgtable_cache[(shift)-1])
+#define PGT_CACHE(shift) ({ \
+ BUG_ON(!(shift)); \
+ pgtable_cache[(shift) - 1]; \
+ })
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 1a6de0a..b5f4a5f 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -48,23 +48,6 @@ static u64 gpage_freearray[MAX_NUMBER_GPAGES];
static unsigned nr_gpages;
#endif
-static inline int shift_to_mmu_psize(unsigned int shift)
-{
- int psize;
-
- for (psize = 0; psize < MMU_PAGE_COUNT; ++psize)
- if (mmu_psize_defs[psize].shift == shift)
- return psize;
- return -1;
-}
-
-static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
-{
- if (mmu_psize_defs[mmu_psize].shift)
- return mmu_psize_defs[mmu_psize].shift;
- BUG();
-}
-
#define hugepd_none(hpd) ((hpd).pd == 0)
pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift)
@@ -145,6 +128,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
if (unlikely(!hugepd_none(*hpdp)))
break;
else
+ /* We use the old format for PPC_FSL_BOOK3E */
hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
}
/* If we bailed from the for loop early, an error occurred, clean up */
@@ -156,9 +140,15 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
#else
if (!hugepd_none(*hpdp))
kmem_cache_free(cachep, new);
- else
+ else {
+#ifdef CONFIG_PPC_BOOK3S_64
+ hpdp->pd = (unsigned long)new |
+ (shift_to_mmu_psize(pshift) << 2);
+#else
hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
#endif
+ }
+#endif
spin_unlock(&mm->page_table_lock);
return 0;
}
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 7e2246f..a56de85 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -129,8 +129,7 @@ void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
align = max_t(unsigned long, align, minalign);
name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift);
new = kmem_cache_create(name, table_size, align, 0, ctor);
- PGT_CACHE(shift) = new;
-
+ pgtable_cache[shift - 1] = new;
pr_debug("Allocated pgtable cache for order %d\n", shift);
}
next prev parent reply other threads:[~2013-04-25 6:00 UTC|newest]
Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-04-22 10:00 [PATCH -V6 00/27] THP support for PPC64 Aneesh Kumar K.V
2013-04-22 10:00 ` [PATCH -V6 01/27] powerpc: Use signed formatting when printing error Aneesh Kumar K.V
2013-04-22 10:00 ` [PATCH -V6 02/27] powerpc: Save DAR and DSISR in pt_regs on MCE Aneesh Kumar K.V
2013-04-22 10:00 ` [PATCH -V6 03/27] powerpc: Don't hard code the size of pte page Aneesh Kumar K.V
2013-04-22 10:00 ` [PATCH -V6 04/27] powerpc: Don't truncate pgd_index wrongly Aneesh Kumar K.V
2013-04-22 10:00 ` [PATCH -V6 05/27] powerpc: New hugepage directory format Aneesh Kumar K.V
2013-04-23 7:01 ` Paul Mackerras
2013-04-23 8:42 ` Aneesh Kumar K.V
2013-04-24 5:47 ` Paul Mackerras
2013-04-25 6:00 ` Aneesh Kumar K.V [this message]
2013-04-22 10:00 ` [PATCH -V6 06/27] powerpc: Switch 16GB and 16MB explicit hugepages to a different page table format Aneesh Kumar K.V
2013-04-22 10:00 ` [PATCH -V6 07/27] powerpc: Reduce the PTE_INDEX_SIZE Aneesh Kumar K.V
2013-04-22 10:00 ` [PATCH -V6 08/27] powerpc: Move the pte free routines from common header Aneesh Kumar K.V
2013-04-22 10:00 ` [PATCH -V6 09/27] powerpc: Reduce PTE table memory wastage Aneesh Kumar K.V
2013-04-22 10:00 ` [PATCH -V6 10/27] powerpc: Use encode avpn where we need only avpn values Aneesh Kumar K.V
2013-04-22 10:00 ` [PATCH -V6 11/27] powerpc: Decode the pte-lp-encoding bits correctly Aneesh Kumar K.V
2013-04-22 10:00 ` [PATCH -V6 12/27] powerpc: Fix hpte_decode to use the correct decoding for page sizes Aneesh Kumar K.V
2013-04-22 10:00 ` [PATCH -V6 13/27] powerpc: print both base and actual page size on hash failure Aneesh Kumar K.V
2013-04-22 10:00 ` [PATCH -V6 14/27] powerpc: Print page size info during boot Aneesh Kumar K.V
2013-04-22 10:00 ` [PATCH -V6 15/27] powerpc: Update tlbie/tlbiel as per ISA doc Aneesh Kumar K.V
2013-04-22 10:00 ` [PATCH -V6 16/27] mm/THP: HPAGE_SHIFT is not a #define on some arch Aneesh Kumar K.V
2013-04-22 15:43 ` Andrea Arcangeli
2013-04-22 10:00 ` [PATCH -V6 17/27] mm/THP: Add pmd args to pgtable deposit and withdraw APIs Aneesh Kumar K.V
2013-04-22 15:46 ` Andrea Arcangeli
2013-04-22 10:00 ` [PATCH -V6 18/27] mm/THP: withdraw the pgtable after pmdp related operations Aneesh Kumar K.V
2013-04-22 15:49 ` Andrea Arcangeli
2013-04-24 9:08 ` Aneesh Kumar K.V
2013-04-24 15:14 ` Andrea Arcangeli
2013-04-25 6:11 ` Aneesh Kumar K.V
2013-04-22 10:00 ` [PATCH -V6 19/27] powerpc/THP: Double the PMD table size for THP Aneesh Kumar K.V
2013-04-22 10:00 ` [PATCH -V6 20/27] powerpc/THP: Implement transparent hugepages for ppc64 Aneesh Kumar K.V
2013-04-22 10:00 ` [PATCH -V6 21/27] powerpc: move find_linux_pte_or_hugepte and gup_hugepte to common code Aneesh Kumar K.V
2013-04-22 10:00 ` [PATCH -V6 22/27] powerpc: Update find_linux_pte_or_hugepte to handle transparent hugepages Aneesh Kumar K.V
2013-04-22 10:00 ` [PATCH -V6 23/27] powerpc: Replace find_linux_pte with find_linux_pte_or_hugepte Aneesh Kumar K.V
2013-04-24 6:29 ` Paul Mackerras
2013-04-22 10:00 ` [PATCH -V6 24/27] powerpc: Update gup_pmd_range to handle transparent hugepages Aneesh Kumar K.V
2013-04-22 10:00 ` [PATCH -V6 25/27] powerpc/THP: Add code to handle HPTE faults for large pages Aneesh Kumar K.V
2013-04-22 10:01 ` [PATCH -V6 26/27] powerpc/THP: Enable THP on PPC64 Aneesh Kumar K.V
2013-04-22 10:01 ` [PATCH -V6 27/27] powerpc: Optimize hugepage invalidate Aneesh Kumar K.V
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87wqrr175m.fsf@linux.vnet.ibm.com \
--to=aneesh.kumar@linux.vnet.ibm.com \
--cc=dwg@au1.ibm.com \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=paulus@samba.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.