linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 2/2] powerpc: make 64K huge pages more reliable
@ 2007-11-28  5:03 Jon Tollefson
  2007-12-03  2:06 ` David Gibson
  0 siblings, 1 reply; 5+ messages in thread
From: Jon Tollefson @ 2007-11-28  5:03 UTC (permalink / raw)
  To: linuxppc-dev, Linux Memory Management List

This patch adds reliability to the 64K huge page option by making use of 
the PMD for 64K huge pages when base pages are 4k.  So instead of a 12 
bit pte it would be 7 bit pmd and a 5 bit pte. The pgd and pud offsets 
would continue as 9 bits and 7 bits respectively.  This will allow the 
pgtable to fit in one base page.  This patch would have to be applied 
after part 1.

Signed-off-by: Jon Tollefson <kniht@linux.vnet.ibm.com>
---

 hugetlbpage.c |   53 ++++++++++++++++++++++++++++++++++++++---------------
 1 files changed, 38 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index f4632ad..c6df45b 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -30,15 +30,11 @@
 #define NUM_LOW_AREAS	(0x100000000UL >> SID_SHIFT)
 #define NUM_HIGH_AREAS	(PGTABLE_RANGE >> HTLB_AREA_SHIFT)
 
-#ifdef CONFIG_PPC_64K_PAGES
-#define HUGEPTE_INDEX_SIZE	(PMD_SHIFT-HPAGE_SHIFT)
-#else
-#define HUGEPTE_INDEX_SIZE	(PUD_SHIFT-HPAGE_SHIFT)
-#endif
-#define PTRS_PER_HUGEPTE	(1 << HUGEPTE_INDEX_SIZE)
-#define HUGEPTE_TABLE_SIZE	(sizeof(pte_t) << HUGEPTE_INDEX_SIZE)
+unsigned int hugepte_shift;
+#define PTRS_PER_HUGEPTE	(1 << hugepte_shift)
+#define HUGEPTE_TABLE_SIZE	(sizeof(pte_t) << hugepte_shift)
 
-#define HUGEPD_SHIFT		(HPAGE_SHIFT + HUGEPTE_INDEX_SIZE)
+#define HUGEPD_SHIFT		(HPAGE_SHIFT + hugepte_shift)
 #define HUGEPD_SIZE		(1UL << HUGEPD_SHIFT)
 #define HUGEPD_MASK		(~(HUGEPD_SIZE-1))
 
@@ -105,7 +101,14 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 			if (!pmd_none(*pm))
 				return hugepte_offset((hugepd_t *)pm, addr);
 #else
-			return hugepte_offset((hugepd_t *)pu, addr);
+			if (HPAGE_SHIFT == HPAGE_SHIFT_64K) {
+				pmd_t *pm;
+				pm = pmd_offset(pu, addr);
+				if (!pmd_none(*pm))
+					return hugepte_offset((hugepd_t *)pm, addr);
+			} else {
+				return hugepte_offset((hugepd_t *)pu, addr);
+			}
 #endif
 		}
 	}
@@ -133,7 +136,14 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
 		if (pm)
 			hpdp = (hugepd_t *)pm;
 #else
-		hpdp = (hugepd_t *)pu;
+		if (HPAGE_SHIFT == HPAGE_SHIFT_64K) {
+			pmd_t *pm;
+			pm = pmd_alloc(mm, pu, addr);
+			if (pm)
+				hpdp = (hugepd_t *)pm;
+		} else {
+			hpdp = (hugepd_t *)pu;
+		}
 #endif
 	}
 
@@ -161,7 +171,6 @@ static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp)
 						 PGF_CACHENUM_MASK));
 }
 
-#ifdef CONFIG_PPC_64K_PAGES
 static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
 				   unsigned long addr, unsigned long end,
 				   unsigned long floor, unsigned long ceiling)
@@ -194,7 +203,6 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
 	pud_clear(pud);
 	pmd_free_tlb(tlb, pmd);
 }
-#endif
 
 static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
 				   unsigned long addr, unsigned long end,
@@ -213,9 +221,15 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
 			continue;
 		hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling);
 #else
-		if (pud_none(*pud))
-			continue;
-		free_hugepte_range(tlb, (hugepd_t *)pud);
+		if (HPAGE_SHIFT == HPAGE_SHIFT_64K) {
+			if (pud_none_or_clear_bad(pud))
+				continue;
+			hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling);
+		} else {
+			if (pud_none(*pud))
+				continue;
+			free_hugepte_range(tlb, (hugepd_t *)pud);
+		}
 #endif
 	} while (pud++, addr = next, addr != end);
 
@@ -538,6 +552,15 @@ void set_huge_psize(int psize)
 			mmu_psize_defs[psize].shift == HPAGE_SHIFT_64K)) {
 		HPAGE_SHIFT = mmu_psize_defs[psize].shift;
 		mmu_huge_psize = psize;
+#ifdef CONFIG_PPC_64K_PAGES
+		hugepte_shift = (PMD_SHIFT-HPAGE_SHIFT);
+#else
+		if (HPAGE_SHIFT == HPAGE_SHIFT_64K)
+			hugepte_shift = (PMD_SHIFT-HPAGE_SHIFT);
+		else
+			hugepte_shift = (PUD_SHIFT-HPAGE_SHIFT);
+#endif
+
 	} else
 		HPAGE_SHIFT = 0;
 }

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] powerpc: make 64K huge pages more reliable
  2007-11-28  5:03 [PATCH 2/2] powerpc: make 64K huge pages more reliable Jon Tollefson
@ 2007-12-03  2:06 ` David Gibson
  2007-12-03 21:33   ` Jon Tollefson
  2007-12-03 21:51   ` Chris Snook
  0 siblings, 2 replies; 5+ messages in thread
From: David Gibson @ 2007-12-03  2:06 UTC (permalink / raw)
  To: kniht; +Cc: linuxppc-dev, Linux Memory Management List

On Tue, Nov 27, 2007 at 11:03:16PM -0600, Jon Tollefson wrote:
> This patch adds reliability to the 64K huge page option by making use of 
> the PMD for 64K huge pages when base pages are 4k.  So instead of a 12 
> bit pte it would be 7 bit pmd and a 5 bit pte. The pgd and pud offsets 
> would continue as 9 bits and 7 bits respectively.  This will allow the 
> pgtable to fit in one base page.  This patch would have to be applied 
> after part 1.

Hrm.. shouldn't we just ban 64K hugepages on a 64K base page size
setup?  There's not a whole lot of point to it, after all...

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] powerpc: make 64K huge pages more reliable
  2007-12-03  2:06 ` David Gibson
@ 2007-12-03 21:33   ` Jon Tollefson
  2007-12-03 21:51   ` Chris Snook
  1 sibling, 0 replies; 5+ messages in thread
From: Jon Tollefson @ 2007-12-03 21:33 UTC (permalink / raw)
  To: David Gibson; +Cc: linuxppc-dev, Linux Memory Management List

David Gibson wrote:
> On Tue, Nov 27, 2007 at 11:03:16PM -0600, Jon Tollefson wrote:
>   
>> This patch adds reliability to the 64K huge page option by making use of 
>> the PMD for 64K huge pages when base pages are 4k.  So instead of a 12 
>> bit pte it would be 7 bit pmd and a 5 bit pte. The pgd and pud offsets 
>> would continue as 9 bits and 7 bits respectively.  This will allow the 
>> pgtable to fit in one base page.  This patch would have to be applied 
>> after part 1.
>>     
>
> Hrm.. shouldn't we just ban 64K hugepages on a 64K base page size
> setup?  There's not a whole lot of point to it, after all...
>   

Banning the base and huge page size from being the same size feels like
an artificial barrier.  It is probably not the most massively useful
combination, but it shouldn't hurt performance. 

Jon

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] powerpc: make 64K huge pages more reliable
  2007-12-03  2:06 ` David Gibson
  2007-12-03 21:33   ` Jon Tollefson
@ 2007-12-03 21:51   ` Chris Snook
  2007-12-04  0:28     ` David Gibson
  1 sibling, 1 reply; 5+ messages in thread
From: Chris Snook @ 2007-12-03 21:51 UTC (permalink / raw)
  To: kniht, linuxppc-dev, Linux Memory Management List

David Gibson wrote:
> On Tue, Nov 27, 2007 at 11:03:16PM -0600, Jon Tollefson wrote:
>> This patch adds reliability to the 64K huge page option by making use of 
>> the PMD for 64K huge pages when base pages are 4k.  So instead of a 12 
>> bit pte it would be 7 bit pmd and a 5 bit pte. The pgd and pud offsets 
>> would continue as 9 bits and 7 bits respectively.  This will allow the 
>> pgtable to fit in one base page.  This patch would have to be applied 
>> after part 1.
> 
> Hrm.. shouldn't we just ban 64K hugepages on a 64K base page size
> setup?  There's not a whole lot of point to it, after all...
> 

Actually, it sounds to me like an ideal way to benchmark the efficiency of the 
hugepage implementation and VM effects, without the TLB performance obscuring 
the results.

I agree that it's not something people will want to do very often, but the same 
can be said about quite a lot of strange things that we allow just because 
there's no fundamental reason why they cannot be.

	-- Chris

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] powerpc: make 64K huge pages more reliable
  2007-12-03 21:51   ` Chris Snook
@ 2007-12-04  0:28     ` David Gibson
  0 siblings, 0 replies; 5+ messages in thread
From: David Gibson @ 2007-12-04  0:28 UTC (permalink / raw)
  To: Chris Snook; +Cc: linuxppc-dev, kniht, Linux Memory Management List

On Mon, Dec 03, 2007 at 04:51:53PM -0500, Chris Snook wrote:
> David Gibson wrote:
> > On Tue, Nov 27, 2007 at 11:03:16PM -0600, Jon Tollefson wrote:
> >> This patch adds reliability to the 64K huge page option by making use of 
> >> the PMD for 64K huge pages when base pages are 4k.  So instead of a 12 
> >> bit pte it would be 7 bit pmd and a 5 bit pte. The pgd and pud offsets 
> >> would continue as 9 bits and 7 bits respectively.  This will allow the 
> >> pgtable to fit in one base page.  This patch would have to be applied 
> >> after part 1.
> > 
> > Hrm.. shouldn't we just ban 64K hugepages on a 64K base page size
> > setup?  There's not a whole lot of point to it, after all...
> > 
> 
> Actually, it sounds to me like an ideal way to benchmark the
> efficiency of the hugepage implementation and VM effects, without
> the TLB performance obscuring the results.

Well.. maybe.  The pagetable format, and therefore the hugepage size,
will affect the size of that overhead so even with this its not clear
how meaningful test results would be.

> I agree that it's not something people will want to do very often,
> but the same can be said about quite a lot of strange things that we
> allow just because there's no fundamental reason why they cannot be.

Hrm, yeah I guess, since this was a fairly simple fix.  I still don't
think it's worth jumping through too many hoops to make this possible,
though.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2007-12-04  0:50 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-11-28  5:03 [PATCH 2/2] powerpc: make 64K huge pages more reliable Jon Tollefson
2007-12-03  2:06 ` David Gibson
2007-12-03 21:33   ` Jon Tollefson
2007-12-03 21:51   ` Chris Snook
2007-12-04  0:28     ` David Gibson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).