* Long Format VHPT patches
@ 2004-08-25 2:34 Ian Wienand
2004-08-25 17:01 ` Jesse Barnes
` (7 more replies)
0 siblings, 8 replies; 9+ messages in thread
From: Ian Wienand @ 2004-08-25 2:34 UTC (permalink / raw)
To: linux-ia64
[-- Attachment #1.1: Type: text/plain, Size: 2037 bytes --]
Hi,
Having support for the IA64 long format VHPT is becoming an increasing
foundation for ongoing work such as superpages and TLB sharing
schemes, so we would like to get the following patches into a
development tree to start getting feedback and facilitate these other
projects. Our testing shows them to be reliable.
The first two patches abstract ivt.S a little, creating a new header
ivt.h
01-sf-VHPT-macro-def-ivt.h.patch
02-sf-VHPT-macro-calls-ivt.S.patch
The next series of patches add the guts of using the long format vhpt
03-VHPT-hugetlb-support.patch
04-lf-VHPT-macro-def-ivt.h.patch
05-lf-VHPT-macro-calls-ivt.S.patch
The final patches add the support code and are fairly self explanatory
06-lf-VHPT-declarations.patch
07-lf-VHPT-initialise.patch
08-lf-VHPT-tlb-flush-dec.patch
09-lf-VHPT-tlb-flush.patch
10-lf-VHPT-mmu_context-dec.patch
11-lf-VHPT-kconfig.patch
The idea and original patches were by Matthew Chapman, and stewardship
passed to Darren Williams who got them to this stage.
I've attached all the patches (against 2.6.9-rc1) to this email as
they're not that big, but if you actually want to try them out it's
might be easier for many to grab them from cvs as in the footnote.
While the kernel is building you might like to also read the following
relevant paper :)
Matthew Chapman, Ian Wienand, Gernot Heiser
Itanium Page Tables and TLB
Technical Report UNSW-CSE-TR-0307, May 2003.
ftp://ftp.cse.unsw.edu.au/pub/doc/papers/UNSW/0307.pdf
Thanks,
-i
ianw@gelato.unsw.edu.au
http://www.gelato.unsw.edu.au
--- A note on our patches in CVS ---
If you would like to try them out quickly, we would encourage you to
just grab the always latest versions from our CVS and use quilt to
apply them, i.e. in a fresh kernel directory do
cvs -d:pserver:anoncvs@gelato.unsw.edu.au:/gelato login
[anoncvs]
cvs -d:pserver:anoncvs@gelato.unsw.edu.au:/gelato co -d patches lvhpt
then use 'quilt push -a' to push all patches, build and run as usual.
[-- Attachment #1.2: 01-sf-VHPT-macro-def-ivt.h.patch --]
[-- Type: text/plain, Size: 3270 bytes --]
#
# This patch creates macros for common code that walks the 3 level page
# table found in ivt.S
#
diff -Nru a/include/asm-ia64/ivt.h b/include/asm-ia64/ivt.h
--- /dev/null Wed Dec 31 16:00:00 196900
+++ b/include/asm-ia64/ivt.h 2004-08-12 19:19:37 +10:00
@@ -0,0 +1,66 @@
+#ifndef _ASM_IA64_IVT_H
+#define _ASM_IA64_IVT_H
+/*
+ * Copyright (C) 2004 Gelato@UNSW
+ * Matthew Chapman <matthewc@cse.unsw.edu.au>
+ * Darren Williams <dsw@gelato.unsw.edu.au>
+ */
+
+#define temp r24
+#define rgn r25 /* region register */
+#define tir r26 /* translation register */
+#define mbz r27 /* "must be zero" else signifies non valid region */
+#define scratch r28
+
+/* va=r16 ppte=r19 fail=p6 ok=p7 */
+#define FIND_PTE(va, ppte, fail, ok) \
+ rsm psr.dt; /* switch to using physical data addressing */ \
+ mov ppte=IA64_KR(PT_BASE); /* get the page table base address */ \
+ shl mbz=va,3; /* shift bit 60 into sign bit */ \
+ shr.u rgn=va,61; /* get the region number into 'rgn' */ \
+ ;; \
+ cmp.eq p6,p7=5,rgn; /* is faulting address in region 5? */ \
+ shr.u temp=va,PGDIR_SHIFT; /* get bits 33-63 of faulting address */ \
+ ;; \
+(p7) dep ppte=rgn,ppte,(PAGE_SHIFT-3),3; /* put region number bits in place */ \
+ srlz.d; \
+ LOAD_PHYSICAL(p6, ppte, swapper_pg_dir);/* region 5 is rooted at swapper_pg_dir */ \
+ \
+ .pred.rel "mutex", p6, p7; \
+(p6) shr.u mbz=mbz,PGDIR_SHIFT+PAGE_SHIFT; \
+(p7) shr.u mbz=mbz,PGDIR_SHIFT+PAGE_SHIFT-3; \
+ ;; \
+(p6) dep ppte=temp,ppte,3,(PAGE_SHIFT-3); /* ppte=PTA + IFA(33,42)*8 */ \
+(p7) dep ppte=temp,ppte,3,(PAGE_SHIFT-6); /* ppte=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8) */ \
+ cmp.eq ok,fail=0,mbz; /* unused address bits all zeroes? */ \
+ shr.u temp=va,PMD_SHIFT; /* shift L2 index into position */ \
+ ;; \
+ ld8 ppte=[ppte]; /* fetch the L1 entry (may be 0)*/ \
+ ;; \
+(ok) cmp.eq fail,ok=ppte,r0; /* was L1 entry NULL?*/ \
+ dep ppte=temp,ppte,3,(PAGE_SHIFT-3); /* compute address of L2 page table entry*/ \
+ ;; \
+(ok) ld8 ppte=[ppte]; /* fetch the L2 entry (may be 0)*/ \
+ shr.u temp=va,PAGE_SHIFT; /* shift L3 index into position*/ \
+ ;; \
+(ok) cmp.eq.or.andcm fail,ok=ppte,r0; /* was L2 entry NULL?*/ \
+ dep ppte=temp,ppte,3,(PAGE_SHIFT-3); /* compute address of L3 page table entry*/
+
+#define LOAD_PTE_MISS(va, ppte, pte, hpte, failfn) \
+ mov ppte=cr.iha; /* get virtual address of L3 PTE */ \
+ movl r30=1f; /* load continuation point */ \
+ ;; \
+1: ld8 pte=[ppte]; \
+ mov b0=r29; /* restore possibly destroyed b0 */ \
+ ;; \
+ tbit.z p6,p0=pte,_PAGE_P_BIT; /* page present bit cleared? */ \
+(p6) br.cond.spnt failfn;
+
+#define LOAD_PTE_FAULT(va, ppte, pte, hpte, failfn) \
+ thash ppte=va; /* get virtual address of L3 PTE */ \
+ movl r30=1f; /* load continuation point */ \
+ ;; \
+1: ld8 pte=[ppte]; \
+ mov b0=r29; /* restore possibly destroyed b0 */
+
+#endif /* _ASM_IA64_IVT_H */
[-- Attachment #1.3: 02-sf-VHPT-macro-calls-ivt.S.patch --]
[-- Type: text/plain, Size: 6168 bytes --]
# This patch abstracts code into ivt.h
diff -Nru a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
--- a/arch/ia64/kernel/ivt.S 2004-08-12 19:21:11 +10:00
+++ b/arch/ia64/kernel/ivt.S 2004-08-12 19:21:11 +10:00
@@ -40,6 +40,7 @@
#include <linux/config.h>
+#include <asm/ivt.h>
#include <asm/asmmacro.h>
#include <asm/break.h>
#include <asm/ia32.h>
@@ -223,14 +224,7 @@
mov r29=b0 // save b0
mov r31=pr // save predicates
.itlb_fault:
- mov r17=cr.iha // get virtual address of L3 PTE
- movl r30=1f // load nested fault continuation point
- ;;
-1: ld8 r18=[r17] // read L3 PTE
- ;;
- mov b0=r29
- tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared?
-(p6) br.cond.spnt page_fault
+ LOAD_PTE_MISS(r16,r17,r18,r22,page_fault) // find PTE and check present bit
;;
itc.i r18
;;
@@ -266,15 +260,8 @@
mov r16=cr.ifa // get virtual address
mov r29=b0 // save b0
mov r31=pr // save predicates
-dtlb_fault:
- mov r17=cr.iha // get virtual address of L3 PTE
- movl r30=1f // load nested fault continuation point
- ;;
-1: ld8 r18=[r17] // read L3 PTE
- ;;
- mov b0=r29
- tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared?
-(p6) br.cond.spnt page_fault
+.dtlb_fault:
+ LOAD_PTE_MISS(r16,r17,r18,r22,page_fault) // find PTE and check present bit
;;
itc.d r18
;;
@@ -406,39 +393,7 @@
*
* Clobbered: b0, r18, r19, r21, psr.dt (cleared)
*/
- rsm psr.dt // switch to using physical data addressing
- mov r19=IA64_KR(PT_BASE) // get the page table base address
- shl r21=r16,3 // shift bit 60 into sign bit
- ;;
- shr.u r17=r16,61 // get the region number into r17
- ;;
- cmp.eq p6,p7=5,r17 // is faulting address in region 5?
- shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of faulting address
- ;;
-(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
-
- srlz.d
- LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir
-
- .pred.rel "mutex", p6, p7
-(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
-(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
- ;;
-(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8
-(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
- cmp.eq p7,p6=0,r21 // unused address bits all zeroes?
- shr.u r18=r16,PMD_SHIFT // shift L2 index into position
- ;;
- ld8 r17=[r17] // fetch the L1 entry (may be 0)
- ;;
-(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL?
- dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry
- ;;
-(p7) ld8 r17=[r17] // fetch the L2 entry (may be 0)
- shr.u r19=r16,PAGE_SHIFT // shift L3 index into position
- ;;
-(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL?
- dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry
+ FIND_PTE(r16,r17,p6,p7)
(p6) br.cond.spnt page_fault
mov b0=r30
br.sptk.many b0 // return to continuation point
@@ -501,16 +456,13 @@
* page table TLB entry isn't present, we take a nested TLB miss hit where we look
* up the physical address of the L3 PTE and then continue at label 1 below.
*/
- mov r16=cr.ifa // get the address that caused the fault
- movl r30=1f // load continuation point in case of nested fault
- ;;
- thash r17=r16 // compute virtual address of L3 PTE
mov r29=b0 // save b0 in case of nested fault
mov r31=pr // save pr
+ ;;
+ LOAD_PTE_FAULT(r16,r17,r18,r22,.dtlb_fault)
+
#ifdef CONFIG_SMP
mov r28=ar.ccv // save ar.ccv
- ;;
-1: ld8 r18=[r17]
;; // avoid RAW on r18
mov ar.ccv=r18 // set compare value for cmpxchg
or r25=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits
@@ -533,14 +485,10 @@
cmp.eq p6,p7=r18,r25 // is it same as the newly installed
;;
(p7) ptc.l r16,r24
- mov b0=r29 // restore b0
mov ar.ccv=r28
#else
- ;;
-1: ld8 r18=[r17]
;; // avoid RAW on r18
or r18=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits
- mov b0=r29 // restore b0
;;
st8 [r17]=r18 // store back updated PTE
itc.d r18 // install updated PTE
@@ -556,7 +504,7 @@
DBG_FAULT(9)
// Like Entry 8, except for instruction access
mov r16=cr.ifa // get the address that caused the fault
- movl r30=1f // load continuation point in case of nested fault
+ mov r29=b0 // save b0
mov r31=pr // save predicates
#ifdef CONFIG_ITANIUM
/*
@@ -570,13 +518,10 @@
(p6) mov r16=r18 // if so, use cr.iip instead of cr.ifa
#endif /* CONFIG_ITANIUM */
;;
- thash r17=r16 // compute virtual address of L3 PTE
- mov r29=b0 // save b0 in case of nested fault)
+ LOAD_PTE_FAULT(r16,r17,r18,r22,.itlb_fault)
#ifdef CONFIG_SMP
mov r28=ar.ccv // save ar.ccv
;;
-1: ld8 r18=[r17]
- ;;
mov ar.ccv=r18 // set compare value for cmpxchg
or r25=_PAGE_A,r18 // set the accessed bit
;;
@@ -598,14 +543,10 @@
cmp.eq p6,p7=r18,r25 // is it same as the newly installed
;;
(p7) ptc.l r16,r24
- mov b0=r29 // restore b0
mov ar.ccv=r28
#else /* !CONFIG_SMP */
;;
-1: ld8 r18=[r17]
- ;;
or r18=_PAGE_A,r18 // set the accessed bit
- mov b0=r29 // restore b0
;;
st8 [r17]=r18 // store back updated PTE
itc.i r18 // install updated PTE
@@ -621,15 +562,12 @@
DBG_FAULT(10)
// Like Entry 8, except for data access
mov r16=cr.ifa // get the address that caused the fault
- movl r30=1f // load continuation point in case of nested fault
- ;;
- thash r17=r16 // compute virtual address of L3 PTE
+ mov r29=b0 // save b0
mov r31=pr
- mov r29=b0 // save b0 in case of nested fault)
+ ;;
+ LOAD_PTE_FAULT(r16,r17,r18,r22,.dtlb_fault)
#ifdef CONFIG_SMP
mov r28=ar.ccv // save ar.ccv
- ;;
-1: ld8 r18=[r17]
;; // avoid RAW on r18
mov ar.ccv=r18 // set compare value for cmpxchg
or r25=_PAGE_A,r18 // set the dirty bit
@@ -661,7 +599,6 @@
st8 [r17]=r18 // store back updated PTE
itc.d r18 // install updated PTE
#endif
- mov b0=r29 // restore b0
mov pr=r31,-1
rfi
END(daccess_bit)
[-- Attachment #1.4: 03-VHPT-hugetlb-support.patch --]
[-- Type: text/plain, Size: 3195 bytes --]
# add hugetlb support for the page table walker
diff -Nru a/include/asm-ia64/ivt.h b/include/asm-ia64/ivt.h
--- a/include/asm-ia64/ivt.h 2004-08-12 19:22:57 +10:00
+++ b/include/asm-ia64/ivt.h 2004-08-12 19:22:57 +10:00
@@ -5,22 +5,42 @@
* Matthew Chapman <matthewc@cse.unsw.edu.au>
* Darren Williams <dsw@gelato.unsw.edu.au>
*/
-
+
+#define eva r22 /* effective va after accounting for hugepages */
#define temp r24
#define rgn r25 /* region register */
#define tir r26 /* translation register */
#define mbz r27 /* "must be zero" else signifies non valid region */
#define scratch r28
+#ifdef CONFIG_HUGETLB_PAGE
+#define HUGETLB_GET_ITIR(tir) mov tir=cr.itir
+#define HUGETLB_GET_EVA(tir, temp, va, eva) \
+ movl scratch=HPAGE_SHIFT; \
+ extr.u temp=tir,2,6 /* extract the default page size bits */ \
+ ;; \
+ cmp.eq p6,p7=scratch,temp; /* default page size is huge page size ? */ \
+ movl scratch=HPAGE_SHIFT-PAGE_SHIFT \
+ ;; \
+(p6) shr eva=va,scratch; \
+(p7) mov eva=va \
+ ;;
+#else /* !CONFIG_HUGETLB_PAGE */
+#define HUGETLB_GET_ITIR(tir)
+#define HUGETLB_GET_EVA(tir, temp, va, eva) mov eva=va;;
+#endif /* !CONFIG_HUGETLB_PAGE */
+
/* va=r16 ppte=r19 fail=p6 ok=p7 */
#define FIND_PTE(va, ppte, fail, ok) \
+ HUGETLB_GET_ITIR(tir); \
rsm psr.dt; /* switch to using physical data addressing */ \
mov ppte=IA64_KR(PT_BASE); /* get the page table base address */ \
shl mbz=va,3; /* shift bit 60 into sign bit */ \
shr.u rgn=va,61; /* get the region number into 'rgn' */ \
;; \
+ HUGETLB_GET_EVA(tir, temp, va, eva) \
cmp.eq p6,p7=5,rgn; /* is faulting address in region 5? */ \
- shr.u temp=va,PGDIR_SHIFT; /* get bits 33-63 of faulting address */ \
+ shr.u temp=eva,PGDIR_SHIFT; /* get bits 33-63 of faulting address */ \
;; \
(p7) dep ppte=rgn,ppte,(PAGE_SHIFT-3),3; /* put region number bits in place */ \
srlz.d; \
@@ -33,7 +53,7 @@
(p6) dep ppte=temp,ppte,3,(PAGE_SHIFT-3); /* ppte=PTA + IFA(33,42)*8 */ \
(p7) dep ppte=temp,ppte,3,(PAGE_SHIFT-6); /* ppte=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8) */ \
cmp.eq ok,fail=0,mbz; /* unused address bits all zeroes? */ \
- shr.u temp=va,PMD_SHIFT; /* shift L2 index into position */ \
+ shr.u temp=eva,PMD_SHIFT; /* shift L2 index into position */ \
;; \
ld8 ppte=[ppte]; /* fetch the L1 entry (may be 0)*/ \
;; \
@@ -41,7 +61,7 @@
dep ppte=temp,ppte,3,(PAGE_SHIFT-3); /* compute address of L2 page table entry*/ \
;; \
(ok) ld8 ppte=[ppte]; /* fetch the L2 entry (may be 0)*/ \
- shr.u temp=va,PAGE_SHIFT; /* shift L3 index into position*/ \
+ shr.u temp=eva,PAGE_SHIFT; /* shift L3 index into position*/ \
;; \
(ok) cmp.eq.or.andcm fail,ok=ppte,r0; /* was L2 entry NULL?*/ \
dep ppte=temp,ppte,3,(PAGE_SHIFT-3); /* compute address of L3 page table entry*/
[-- Attachment #1.5: 04-lf-VHPT-macro-def-ivt.h.patch --]
[-- Type: text/plain, Size: 4767 bytes --]
# abstract from arch/ia64/kernel/ivt.S
Index: fixup/include/asm-ia64/ivt.h
===================================================================
--- fixup.orig/include/asm-ia64/ivt.h 2004-08-25 11:06:35.832453532 +1000
+++ fixup/include/asm-ia64/ivt.h 2004-08-25 11:06:52.054133021 +1000
@@ -30,6 +30,14 @@
#define HUGETLB_GET_EVA(tir, temp, va, eva) mov eva=va;;
#endif /* !CONFIG_HUGETLB_PAGE */
+/*
+ * FIND_PTE - given the virtual address find the page table entry.
+ * @va, virtual address that caused the look up
+ * @ppte, pointer to the page table entry at L1,L2 and L3
+ * @fail,@ok, predicate registers p6,p7
+ *
+ * Walks the ia64 3 level page table and sets the pointer 'ppte' for 'va'.
+ */
/* va=r16 ppte=r19 fail=p6 ok=p7 */
#define FIND_PTE(va, ppte, fail, ok) \
HUGETLB_GET_ITIR(tir); \
@@ -66,6 +74,104 @@
(ok) cmp.eq.or.andcm fail,ok=ppte,r0; /* was L2 entry NULL?*/ \
dep ppte=temp,ppte,3,(PAGE_SHIFT-3); /* compute address of L3 page table entry*/
+#ifdef CONFIG_IA64_LONG_FORMAT_VHPT
+/*
+ * LOAD_PTE_MISS - load pte into tlb and VHPT
+ * @va, virtual address
+ * @ppte, pointer to the page table entry
+ * @pte, actual pte
+ * @hpte, hash page table entry
+ *
+ * Given a va get the ppte and load its value into pte
+ */
+#define LOAD_PTE_MISS(va, ppte, pte, hpte, failfn) \
+ ;; \
+ FIND_PTE(va, ppte, p6, p7) \
+ ;; \
+(p7) ld8 pte=[ppte]; \
+ ;; \
+(p7) tbit.z p6,p0=pte,_PAGE_P_BIT; /* page present bit cleared? */ \
+(p6) br.cond.spnt failfn;
+
+/* Since we access the page table physically, we access the long VHPT physically as well
+ * to avoid switching back and forth */
+
+/*
+ * LOAD_PTE_FAULT - get the pte entry from the VHPT for va
+ * @va, virtual address to resolve
+ * @ppte, pointer to the page table entry
+ * @pte, page table entry
+ * @hpte, store pte in this hash page table entry
+ * @failfn, function called if fault not resolved
+ *
+ * Retrieve the pte via the hashed page table and store it in pte=r18
+ */
+#define tag r25
+#define htag r26
+#define LOAD_PTE_FAULT(va, ppte, pte, hpte, failfn) \
+ thash hpte=va; \
+ rsm psr.dt; \
+ ;; \
+ tpa hpte=hpte; /* make hash address physical */ \
+ ttag tag=va; \
+ ;; \
+ srlz.d; \
+ add temp=16,hpte; \
+ add ppte=24,hpte; \
+ ;; \
+ ld8 htag=[temp]; \
+ ld8 ppte=[ppte]; \
+ ;; \
+ cmp.ne p6,p7=htag, tag; /* verify tag */ \
+ ;; \
+(p7) ld8 pte=[ppte]; \
+(p6) br.cond.spnt failfn;
+
+/*
+ * VHPT_INSERT -
+ * @va, virtual address to be inserted
+ * @ppte, pointer to the page table entry
+ * @pte, page table entry to be inserted
+ * @hpte, insert pte into this hash page table entry
+ *
+ * Insert the va into the VHPT and tlb, the tlb insert
+ * happens in ivt.S for the appropriate fault instruction or data.
+ */
+#define tir r26
+#define VHPT_INSERT(va, ppte, pte, hpte) \
+ mov hpte=cr.iha; \
+ mov tir=cr.itir; \
+ ;; \
+ tpa hpte=hpte; /* make hash address physical */ \
+ ttag tag=va; \
+ ;; \
+ add temp=16,hpte; \
+ ;; \
+ st8 [hpte]=pte,8; /* fill out VHPT entry */ \
+ st8 [temp]=tag,8; \
+ ;; \
+ st8 [hpte]=tir,8; \
+ st8 [temp]=ppte;
+
+/*
+ * Update the VHPT with pte value obtained from LOAD_PTE_FAULT
+ */
+#define VHPT_UPDATE(cond, pte, hpte) \
+(cond) st8 [hpte]=pte,16;
+
+/*
+ * Invalidate the tlb for the VHPT pointing to hpte, this is achieved by
+ * setting the invalid tag bit(63) in the VHPT tag field. A VHPT entry with
+ * ti bit set to one will never be inserted into a processor's TLBs.
+ *
+ */
+#define VHPT_PURGE(cond, hpte) \
+(cond) dep tag=-1,r0,63,1; /* set tag-invalid bit */ \
+ ;; \
+(cond) st8 [hpte]=tag; /* hpte already points to tag (see above) */
+
+#else /* !CONFIG_IA64_LONG_FORMAT_VHPT */
+
#define LOAD_PTE_MISS(va, ppte, pte, hpte, failfn) \
mov ppte=cr.iha; /* get virtual address of L3 PTE */ \
movl r30=1f; /* load continuation point */ \
@@ -83,4 +189,9 @@
1: ld8 pte=[ppte]; \
mov b0=r29; /* restore possibly destroyed b0 */
+#define VHPT_INSERT(va, ppte, pte, hpte) /* nothing */
+#define VHPT_UPDATE(cond, pte, hpte) /* nothing */
+#define VHPT_PURGE(cond, hpte) /* nothing */
+#endif /* !CONFIG_IA64_LONG_FORMAT_VHPT */
+
#endif /* _ASM_IA64_IVT_H */
[-- Attachment #1.6: 05-lf-VHPT-macro-calls-ivt.S.patch --]
[-- Type: text/plain, Size: 4525 bytes --]
# Add support for long format vhpt in abstracted macros
diff -Nru a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
--- a/arch/ia64/kernel/ivt.S 2004-08-12 19:24:47 +10:00
+++ b/arch/ia64/kernel/ivt.S 2004-08-12 19:24:47 +10:00
@@ -102,7 +102,13 @@
* do_page_fault gets invoked in the following cases:
* - the faulting virtual address uses unimplemented address bits
* - the faulting virtual address has no L1, L2, or L3 mapping
+ *
+ * This fault should not occur with the long format VHPT since we keep it
+ * permanently mapped.
*/
+#ifdef CONFIG_IA64_LONG_FORMAT_VHPT
+ FAULT(0)
+#else
mov r16=cr.ifa // get address that caused the TLB miss
#ifdef CONFIG_HUGETLB_PAGE
movl r18=PAGE_SHIFT
@@ -207,6 +213,7 @@
mov pr=r31,-1 // restore predicate registers
rfi
+#endif /* !CONFIG_IA64_LONG_FORMAT_VHPT */
END(vhpt_miss)
.org ia64_ivt+0x400
@@ -226,13 +233,14 @@
.itlb_fault:
LOAD_PTE_MISS(r16,r17,r18,r22,page_fault) // find PTE and check present bit
;;
+ VHPT_INSERT(r16,r17,r18,r22)
itc.i r18
- ;;
#ifdef CONFIG_SMP
/*
* Tell the assemblers dependency-violation checker that the above "itc" instructions
* cannot possibly affect the following loads:
*/
+ ;;
dv_serialize_data
ld8 r19=[r17] // read L3 PTE again and see if same
@@ -240,6 +248,7 @@
;;
cmp.ne p7,p0=r18,r19
;;
+ VHPT_PURGE(p7,r22)
(p7) ptc.l r16,r20
#endif
mov pr=r31,-1
@@ -263,13 +272,14 @@
.dtlb_fault:
LOAD_PTE_MISS(r16,r17,r18,r22,page_fault) // find PTE and check present bit
;;
+ VHPT_INSERT(r16,r17,r18,r22)
itc.d r18
- ;;
#ifdef CONFIG_SMP
/*
* Tell the assemblers dependency-violation checker that the above "itc" instructions
* cannot possibly affect the following loads:
*/
+ ;;
dv_serialize_data
ld8 r19=[r17] // read L3 PTE again and see if same
@@ -277,6 +287,7 @@
;;
cmp.ne p7,p0=r18,r19
;;
+ VHPT_PURGE(p7,r22)
(p7) ptc.l r16,r20
#endif
mov pr=r31,-1
@@ -381,6 +392,9 @@
* continuation point passed in register r30 (or call page_fault if the address is
* not mapped).
*
+ * This fault should not occur with the long format VHPT since we keep it
+ * permanently mapped.
+ *
* Input: r16: faulting address
* r29: saved b0
* r30: continuation address
@@ -391,12 +405,17 @@
* r30: continuation address
* r31: saved pr
*
- * Clobbered: b0, r18, r19, r21, psr.dt (cleared)
+ * Clobbered: b0, psr.dt (cleared), r24-r26 (see FIND_PTE)
*/
+#ifdef CONFIG_IA64_LONG_FORMAT_VHPT
+ DBG_FAULT(5)
+ FAULT(5)
+#else
FIND_PTE(r16,r17,p6,p7)
(p6) br.cond.spnt page_fault
mov b0=r30
br.sptk.many b0 // return to continuation point
+#endif
END(nested_dtlb_miss)
.org ia64_ivt+0x1800
@@ -472,6 +491,7 @@
;;
cmp.eq p6,p7=r26,r18
;;
+ VHPT_UPDATE(p6,r18,r22)
(p6) itc.d r25 // install updated PTE
;;
/*
@@ -484,6 +504,7 @@
;;
cmp.eq p6,p7=r18,r25 // is it same as the newly installed
;;
+ VHPT_PURGE(p7,r22)
(p7) ptc.l r16,r24
mov ar.ccv=r28
#else
@@ -491,6 +512,7 @@
or r18=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits
;;
st8 [r17]=r18 // store back updated PTE
+ VHPT_UPDATE(p0,r18,r22)
itc.d r18 // install updated PTE
#endif
mov pr=r31,-1 // restore pr
@@ -530,6 +552,7 @@
;;
cmp.eq p6,p7=r26,r18
;;
+ VHPT_UPDATE(p6,r18,r22)
(p6) itc.i r25 // install updated PTE
;;
/*
@@ -542,6 +565,7 @@
;;
cmp.eq p6,p7=r18,r25 // is it same as the newly installed
;;
+ VHPT_PURGE(p7,r22)
(p7) ptc.l r16,r24
mov ar.ccv=r28
#else /* !CONFIG_SMP */
@@ -549,6 +573,7 @@
or r18=_PAGE_A,r18 // set the accessed bit
;;
st8 [r17]=r18 // store back updated PTE
+ VHPT_UPDATE(p0,r18,r22)
itc.i r18 // install updated PTE
#endif /* !CONFIG_SMP */
mov pr=r31,-1
@@ -577,6 +602,7 @@
;;
cmp.eq p6,p7=r26,r18
;;
+ VHPT_UPDATE(p6,r18,r22)
(p6) itc.d r25 // install updated PTE
/*
* Tell the assemblers dependency-violation checker that the above "itc" instructions
@@ -588,6 +614,7 @@
;;
cmp.eq p6,p7=r18,r25 // is it same as the newly installed
;;
+ VHPT_PURGE(p7,r22)
(p7) ptc.l r16,r24
mov ar.ccv=r28
#else
@@ -597,6 +624,7 @@
or r18=_PAGE_A,r18 // set the accessed bit
;;
st8 [r17]=r18 // store back updated PTE
+ VHPT_UPDATE(p0,r18,r22)
itc.d r18 // install updated PTE
#endif
mov pr=r31,-1
[-- Attachment #1.7: 06-lf-VHPT-declarations.patch --]
[-- Type: text/plain, Size: 4579 bytes --]
# setup the long format VHPT
Index: linux-2.6.9.rc1/arch/ia64/kernel/setup.c
===================================================================
--- linux-2.6.9.rc1.orig/arch/ia64/kernel/setup.c 2004-08-17 12:03:33.000000000 +1000
+++ linux-2.6.9.rc1/arch/ia64/kernel/setup.c 2004-08-25 12:15:37.147958127 +1000
@@ -226,6 +226,19 @@
#endif
}
+static void __init parse_cmdline_early (char ** cmdline_p)
+{
+#ifdef CONFIG_IA64_LONG_FORMAT_VHPT
+ char *p;
+ extern int lvhpt_adj_setup(char *s);
+
+ strlcpy(saved_command_line, *cmdline_p, COMMAND_LINE_SIZE);
+ if ((p = strstr(*cmdline_p, "lvhpt_adjust="))) {
+ lvhpt_adj_setup(p + 13);
+ }
+#endif
+}
+
static void __init
io_port_init (void)
{
Index: linux-2.6.9.rc1/arch/ia64/mm/init.c
===================================================================
--- linux-2.6.9.rc1.orig/arch/ia64/mm/init.c 2004-08-25 12:12:40.157725920 +1000
+++ linux-2.6.9.rc1/arch/ia64/mm/init.c 2004-08-25 12:15:37.148934689 +1000
@@ -39,6 +39,11 @@
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+#ifdef CONFIG_IA64_LONG_FORMAT_VHPT
+unsigned long vhpt_base[NR_CPUS];
+unsigned long long_vhpt_bits, long_vhpt_size;
+#endif
+
extern void ia64_tlb_init (void);
unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL;
@@ -274,6 +279,50 @@
ia64_patch_gate();
}
+#ifdef CONFIG_IA64_LONG_FORMAT_VHPT
+/*
+ * This code must be called on a CPU which has it's MMU
+ * initialized. The page allocator seems to depend on it.
+ *
+ * Returns 0 on success.
+ */
+unsigned int
+alloc_vhpt(int cpu)
+{
+#ifdef CONFIG_NUMA
+ struct page *page;
+
+ page = alloc_pages_node(cpu_to_node(cpu), __GFP_HIGHMEM|GFP_ATOMIC, long_vhpt_bits - PAGE_SHIFT);
+ if (!page)
+ return -1;
+ vhpt_base[cpu] = (unsigned long) page_address(page);
+#else
+ vhpt_base[cpu] = (unsigned long)__get_free_pages(__GFP_HIGHMEM|GFP_ATOMIC, long_vhpt_bits - PAGE_SHIFT);
+#endif
+ return (vhpt_base[cpu] == 0);
+}
+
+static int lvhpt_adjust;
+
+int __init
+lvhpt_adj_setup(char *s)
+{
+ if (sscanf(s, "%d", &lvhpt_adjust) <= 0)
+ lvhpt_adjust = 0;
+ return 1;
+}
+
+__setup("lvhpt_adjust=", lvhpt_adj_setup);
+
+void __init
+compute_vhpt_size(void)
+{
+ long_vhpt_bits = 22 + lvhpt_adjust;
+ long_vhpt_size = 1 << long_vhpt_bits;
+}
+
+#endif /* CONFIG_IA64_LONG_FORMAT_VHPT */
+
void __devinit
ia64_mmu_init (void *my_cpu_data)
{
Index: linux-2.6.9.rc1/include/asm-ia64/kregs.h
===================================================================
--- linux-2.6.9.rc1.orig/include/asm-ia64/kregs.h 2004-08-17 12:03:33.000000000 +1000
+++ linux-2.6.9.rc1/include/asm-ia64/kregs.h 2004-08-25 12:15:37.149911252 +1000
@@ -30,6 +30,7 @@
#define IA64_TR_PALCODE 1 /* itr1: maps PALcode as required by EFI */
#define IA64_TR_PERCPU_DATA 1 /* dtr1: percpu data */
#define IA64_TR_CURRENT_STACK 2 /* dtr2: maps kernel's memory- & register-stacks */
+#define IA64_TR_LONG_VHPT 3 /* dtr3: maps long format VHPT */
/* Processor status register bits: */
#define IA64_PSR_BE_BIT 1
Index: linux-2.6.9.rc1/include/asm-ia64/page.h
===================================================================
--- linux-2.6.9.rc1.orig/include/asm-ia64/page.h 2004-08-17 12:03:33.000000000 +1000
+++ linux-2.6.9.rc1/include/asm-ia64/page.h 2004-08-25 12:15:37.149911252 +1000
@@ -139,6 +139,11 @@
return order;
}
+#ifdef CONFIG_IA64_LONG_FORMAT_VHPT
+/* Long format VHPT entry */
+typedef struct { unsigned long pte, itir, tag, ig; } long_pte_t;
+#endif
+
# endif /* __KERNEL__ */
#endif /* !__ASSEMBLY__ */
Index: linux-2.6.9.rc1/include/asm-ia64/pgtable.h
===================================================================
--- linux-2.6.9.rc1.orig/include/asm-ia64/pgtable.h 2004-08-25 12:12:42.332530581 +1000
+++ linux-2.6.9.rc1/include/asm-ia64/pgtable.h 2004-08-25 12:15:37.150887814 +1000
@@ -523,6 +523,12 @@
extern void memmap_init (unsigned long size, int nid, unsigned long zone,
unsigned long start_pfn);
# endif /* CONFIG_VIRTUAL_MEM_MAP */
+
+#ifdef CONFIG_IA64_LONG_FORMAT_VHPT
+extern unsigned long vhpt_base[NR_CPUS];
+extern unsigned long long_vhpt_bits, long_vhpt_size;
+#endif
+
# endif /* !__ASSEMBLY__ */
/*
@@ -543,6 +549,11 @@
#define KERNEL_TR_PAGE_SIZE (1 << KERNEL_TR_PAGE_SHIFT)
/*
+ * Long format VHPT
+ */
+#define LONG_VHPT_BASE (0xc000000000000000 - long_vhpt_size)
+
+/*
* No page table caches to initialise
*/
#define pgtable_cache_init() do { } while (0)
[-- Attachment #1.8: 07-lf-VHPT-initialise.patch --]
[-- Type: text/plain, Size: 5485 bytes --]
# initalise the long format vhpt
Index: linux-2.6.9.rc1/arch/ia64/kernel/setup.c
===================================================================
--- linux-2.6.9.rc1.orig/arch/ia64/kernel/setup.c 2004-08-25 12:15:37.147958127 +1000
+++ linux-2.6.9.rc1/arch/ia64/kernel/setup.c 2004-08-25 12:16:50.598152540 +1000
@@ -321,9 +321,15 @@
ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist);
*cmdline_p = __va(ia64_boot_param->command_line);
- strlcpy(saved_command_line, *cmdline_p, COMMAND_LINE_SIZE);
+ parse_cmdline_early(cmdline_p);
efi_init();
+#ifdef CONFIG_IA64_LONG_FORMAT_VHPT
+ {
+ extern void compute_vhpt_size(void);
+ compute_vhpt_size();
+ }
+#endif
io_port_init();
#ifdef CONFIG_IA64_GENERIC
Index: linux-2.6.9.rc1/arch/ia64/kernel/smpboot.c
===================================================================
--- linux-2.6.9.rc1.orig/arch/ia64/kernel/smpboot.c 2004-08-25 12:12:40.151866545 +1000
+++ linux-2.6.9.rc1/arch/ia64/kernel/smpboot.c 2004-08-25 12:16:50.599129102 +1000
@@ -376,6 +376,11 @@
complete(&c_idle->done);
}
+#ifdef CONFIG_IA64_LONG_FORMAT_VHPT
+/* required for do_boot_cpu, defined in init.c */
+extern unsigned int alloc_vhpt(int cpu);
+#endif
+
static int __devinit
do_boot_cpu (int sapicid, int cpu)
{
@@ -399,6 +404,13 @@
panic("failed fork for CPU %d", cpu);
task_for_booting_cpu = c_idle.idle;
+#ifdef CONFIG_IA64_LONG_FORMAT_VHPT
+ if (alloc_vhpt(cpu)) {
+ panic("Couldn't allocate VHPT on CPU %d, size: 0x%lx\n", cpu, long_vhpt_size);
+ }
+ printk(KERN_INFO "Allocated long format VHPT for CPU %d at: 0x%lx, size: 0x%lx\n", cpu, vhpt_base[cpu], long_vhpt_size);
+#endif /* CONFIG_IA64_LONG_FORMAT_VHPT */
+
Dprintk("Sending wakeup vector %lu to AP 0x%x/0x%x.\n", ap_wakeup_vector, cpu, sapicid);
platform_send_ipi(cpu, ap_wakeup_vector, IA64_IPI_DM_INT, 0);
Index: linux-2.6.9.rc1/arch/ia64/mm/init.c
===================================================================
--- linux-2.6.9.rc1.orig/arch/ia64/mm/init.c 2004-08-25 12:15:37.148934689 +1000
+++ linux-2.6.9.rc1/arch/ia64/mm/init.c 2004-08-25 12:16:50.600105665 +1000
@@ -326,7 +326,7 @@
void __devinit
ia64_mmu_init (void *my_cpu_data)
{
- unsigned long psr, pta, impl_va_bits;
+ unsigned long psr, pta;
extern void __devinit tlb_init (void);
int cpu;
@@ -336,16 +336,44 @@
# define VHPT_ENABLE_BIT 1
#endif
+#ifdef CONFIG_IA64_LONG_FORMAT_VHPT
+ cpu = smp_processor_id();
+ if (cpu == 0)
+ {
+ vhpt_base[cpu] = (unsigned long)__alloc_bootmem(long_vhpt_size, long_vhpt_size,
+ __pa(MAX_DMA_ADDRESS));
+ if (vhpt_base[cpu] == 0) {
+ panic("Couldn't allocate VHPT on CPU %d, size: 0x%lx\n",
+ cpu, long_vhpt_size);
+ }
+ printk(KERN_INFO "Allocated long format VHPT for CPU %d at: 0x%lx, size: 0x%lx\n",
+ cpu, vhpt_base[cpu], long_vhpt_size);
+ }
+#endif
+
/* Pin mapping for percpu area into TLB */
psr = ia64_clear_ic();
ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR,
pte_val(pfn_pte(__pa(my_cpu_data) >> PAGE_SHIFT, PAGE_KERNEL)),
PERCPU_PAGE_SHIFT);
+#ifdef CONFIG_IA64_LONG_FORMAT_VHPT
+ /* Insert the permanent translation for the VHPT */
+ ia64_itr(0x2, IA64_TR_LONG_VHPT, LONG_VHPT_BASE,
+ pte_val(pfn_pte(__pa(vhpt_base[cpu]) >> PAGE_SHIFT, PAGE_KERNEL)), long_vhpt_bits);
+#endif
+
ia64_set_psr(psr);
ia64_srlz_i();
+#ifdef CONFIG_IA64_LONG_FORMAT_VHPT
+# define VHPT_FORMAT_BIT 1
+# define vhpt_bits long_vhpt_bits
+ pta = LONG_VHPT_BASE;
+#else
/*
+ * SHORT FORMAT VHPT (virtually mapped linear pagetable)
+ *
* Check if the virtually mapped linear page table (VMLPT) overlaps with a mapped
* address space. The IA-64 architecture guarantees that at least 50 bits of
* virtual address space are implemented but if we pick a large enough page size
@@ -356,6 +384,7 @@
* address space to not permit mappings that would overlap with the VMLPT.
* --davidm 00/12/06
*/
+# define VHPT_FORMAT_BIT 0
# define pte_bits 3
# define mapped_space_bits (3*(PAGE_SHIFT - pte_bits) + PAGE_SHIFT)
/*
@@ -365,27 +394,30 @@
* non-speculative accesses to the virtual page table, so the address range of the
* virtual page table itself needs to be covered by virtual page table.
*/
-# define vmlpt_bits (impl_va_bits - PAGE_SHIFT + pte_bits)
+# define vhpt_bits (impl_va_bits - PAGE_SHIFT + pte_bits)
# define POW2(n) (1ULL << (n))
+ unsigned long impl_va_bits;
impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask | (7UL << 61)));
if (impl_va_bits < 51 || impl_va_bits > 61)
panic("CPU has bogus IMPL_VA_MSB value of %lu!\n", impl_va_bits - 1);
/* place the VMLPT at the end of each page-table mapped region: */
- pta = POW2(61) - POW2(vmlpt_bits);
+ pta = POW2(61) - POW2(vhpt_bits);
if (POW2(mapped_space_bits) >= pta)
panic("mm/init: overlap between virtually mapped linear page table and "
"mapped kernel space!");
+#endif
+
/*
* Set the (virtually mapped linear) page table address. Bit
* 8 selects between the short and long format, bits 2-7 the
* size of the table, and bit 0 whether the VHPT walker is
* enabled.
*/
- ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | VHPT_ENABLE_BIT);
+ ia64_set_pta(pta | (VHPT_FORMAT_BIT << 8) | (vhpt_bits << 2) | VHPT_ENABLE_BIT);
ia64_tlb_init();
[-- Attachment #1.9: 08-lf-VHPT-tlb-flush-dec.patch --]
[-- Type: text/plain, Size: 784 bytes --]
#flushing of the long format vhpt
diff -Nru a/include/asm-ia64/tlbflush.h b/include/asm-ia64/tlbflush.h
--- a/include/asm-ia64/tlbflush.h 2004-08-12 19:29:25 +10:00
+++ b/include/asm-ia64/tlbflush.h 2004-08-12 19:29:25 +10:00
@@ -19,6 +19,21 @@
* can be very expensive, so try to avoid them whenever possible.
*/
+/* Flushing a translation from the long format VHPT */
+#ifdef CONFIG_IA64_LONG_FORMAT_VHPT
+# define INVALID_TAG (1UL << 63)
+
+static inline void
+flush_vhpt_page(unsigned long addr)
+{
+ long_pte_t *hpte;
+ hpte = (long_pte_t *)ia64_thash(addr);
+ hpte->tag = INVALID_TAG;
+}
+#else
+# define flush_vhpt_page(addr) do { } while (0)
+#endif
+
/*
* Flush everything (kernel mapping may also have changed due to
* vmalloc/vfree).
[-- Attachment #1.10: 09-lf-VHPT-tlb-flush.patch --]
[-- Type: text/plain, Size: 3113 bytes --]
# long format support for flushing vhpt
diff -Nru a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
--- a/arch/ia64/mm/tlb.c 2004-08-12 19:29:42 +10:00
+++ b/arch/ia64/mm/tlb.c 2004-08-12 19:29:42 +10:00
@@ -110,6 +110,14 @@
{
unsigned long i, j, flags, count0, count1, stride0, stride1, addr;
+#ifdef CONFIG_IA64_LONG_FORMAT_VHPT
+ unsigned long page;
+
+ /* Admittedly 0 is a valid tag, but in that rare case the present bit will save us */
+ for (page = LONG_VHPT_BASE; page < LONG_VHPT_BASE+long_vhpt_size; page += PAGE_SIZE)
+ clear_page((void *)page);
+#endif
+
addr = local_cpu_data->ptce_base;
count0 = local_cpu_data->ptce_count[0];
count1 = local_cpu_data->ptce_count[1];
@@ -129,6 +137,37 @@
}
EXPORT_SYMBOL(local_flush_tlb_all);
+#ifdef CONFIG_IA64_LONG_FORMAT_VHPT
+static void
+flush_vhpt_range (struct mm_struct *mm, unsigned long from, unsigned long to)
+{
+ unsigned long addr;
+
+ for (addr = from; addr < to; addr += PAGE_SIZE)
+ flush_vhpt_page(addr);
+
+#ifdef CONFIG_SMP
+ {
+ /* Urgh... flush VHPTs of any other CPUs that have run this mm */
+ extern unsigned long vhpt_base[];
+ unsigned long offset;
+ long_pte_t *hpte;
+ int cpu;
+
+ for_each_cpu_mask(cpu, mm->cpu_vm_mask)
+ {
+ for (addr = from; addr < to; addr += PAGE_SIZE)
+ {
+ offset = ia64_thash(addr) & (long_vhpt_size-1);
+ hpte = (long_pte_t *)(vhpt_base[cpu] + offset);
+ hpte->tag = INVALID_TAG;
+ }
+ }
+ }
+#endif
+}
+#endif /* CONFIG_IA64_LONG_FORMAT_VHPT */
+
void
flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long end)
{
@@ -145,6 +184,10 @@
#endif
return;
}
+
+#ifdef CONFIG_IA64_LONG_FORMAT_VHPT
+ flush_vhpt_range(mm, start, end);
+#endif
nbits = ia64_fls(size + 0xfff);
while (unlikely (((1UL << nbits) & purge.mask) == 0) && (nbits < purge.max_bits))
diff -Nru a/include/asm-ia64/tlb.h b/include/asm-ia64/tlb.h
--- a/include/asm-ia64/tlb.h 2004-08-12 19:29:42 +10:00
+++ b/include/asm-ia64/tlb.h 2004-08-12 19:29:42 +10:00
@@ -107,8 +107,10 @@
vma.vm_mm = tlb->mm;
/* flush the address range from the tlb: */
flush_tlb_range(&vma, start, end);
+#ifndef CONFIG_IA64_LONG_FORMAT_VHPT
/* now flush the virt. page-table area mapping the address range: */
flush_tlb_range(&vma, ia64_thash(start), ia64_thash(end));
+#endif
}
/* lastly, release the freed pages */
diff -Nru a/include/asm-ia64/tlbflush.h b/include/asm-ia64/tlbflush.h
--- a/include/asm-ia64/tlbflush.h 2004-08-12 19:29:42 +10:00
+++ b/include/asm-ia64/tlbflush.h 2004-08-12 19:29:42 +10:00
@@ -68,6 +68,7 @@
goto out;
mm->context = 0;
+ cpu_clear(smp_processor_id(),mm->cpu_vm_mask);
if (atomic_read(&mm->mm_users) == 0)
goto out; /* happens as a result of exit_mmap() */
@@ -93,7 +94,10 @@
flush_tlb_range(vma, (addr & PAGE_MASK), (addr & PAGE_MASK) + PAGE_SIZE);
#else
if (vma->vm_mm == current->active_mm)
+ {
+ flush_vhpt_page(addr);
ia64_ptcl(addr, (PAGE_SHIFT << 2));
+ }
else
vma->vm_mm->context = 0;
#endif
[-- Attachment #1.11: 10-lf-VHPT-mmu_context-dec.patch --]
[-- Type: text/plain, Size: 1954 bytes --]
# optimisation around allocating rids in non-optimal order
diff -Nru a/include/asm-ia64/mmu_context.h b/include/asm-ia64/mmu_context.h
--- a/include/asm-ia64/mmu_context.h 2004-08-12 19:30:43 +10:00
+++ b/include/asm-ia64/mmu_context.h 2004-08-12 19:30:43 +10:00
@@ -17,6 +17,20 @@
#define IA64_REGION_ID_KERNEL 0 /* the kernel's region id (tlb.c depends on this being 0) */
+#ifdef CONFIG_IA64_LONG_FORMAT_VHPT
+/*
+ * Due to a high number of collisions in the long format VHPT walker hash function
+ * when RIDs and similar address space layout occur "eg. fork()". The following is
+ * used to space out the RIDs we present to the hardware without messing with Linux's
+ * sequential allocation scheme.
+ * Refer to 'Intel Itanium Processor Reference Manual for Software Development'
+ * http://www.intel.com/design/itanium/manuals.htm
+ */
+#define redistribute_rid(rid) (((rid) & ~0xffff) | (((rid) << 8) & 0xff00) | (((rid) >> 8) & 0xff))
+#else
+#define redistribute_rid(rid) (rid)
+#endif
+
#define ia64_rid(ctx,addr) (((ctx) << 3) | (addr >> 61))
# ifndef __ASSEMBLY__
diff -Nru a/include/asm-ia64/mmu_context.h b/include/asm-ia64/mmu_context.h
--- a/include/asm-ia64/mmu_context.h 2004-08-12 19:31:10 +10:00
+++ b/include/asm-ia64/mmu_context.h 2004-08-12 19:31:10 +10:00
@@ -31,7 +31,7 @@
#define redistribute_rid(rid) (rid)
#endif
-#define ia64_rid(ctx,addr) (((ctx) << 3) | (addr >> 61))
+#define ia64_rid(ctx,addr) redistribute_rid(((ctx) << 3) | (addr >> 61))
# ifndef __ASSEMBLY__
@@ -158,7 +158,12 @@
old_rr4 = ia64_get_rr(0x8000000000000000);
rid = context << 3; /* make space for encoding the region number */
+#ifdef CONFIG_IA64_LONG_FORMAT_VHPT
+ rid = redistribute_rid(rid);
+ rid_incr = 1 << 16;
+#else
rid_incr = 1 << 8;
+#endif
/* encode the region id, preferred page size, and VHPT enable bit: */
rr0 = (rid << 8) | (PAGE_SHIFT << 2) | 1;
[-- Attachment #1.12: 11-lf-VHPT-kconfig.patch --]
[-- Type: text/plain, Size: 2012 bytes --]
# Add Kconfig option for lfvhpt
Index: linux-2.6.9.rc1/arch/ia64/Kconfig
===================================================================
--- linux-2.6.9.rc1.orig/arch/ia64/Kconfig 2004-08-25 12:12:40.135264983 +1000
+++ linux-2.6.9.rc1/arch/ia64/Kconfig 2004-08-25 12:31:17.349118484 +1000
@@ -178,6 +178,16 @@
or have huge holes in the physical address space for other reasons.
See <file:Documentation/vm/numa> for more.
+config IA64_LONG_FORMAT_VHPT
+ bool "Long format VHPT"
+ depends on !DISABLE_VHPT
+ help
+ The long format VHPT is an alternative hashed page table. Advantages
+ of the long format VHPT are lower memory usage when there are a large
+ number of processes in the system.
+ The short format page table walker is currently the Linux default.
+ If you're unsure, answer N.
+
config IA64_CYCLONE
bool "Cyclone (EXA) Time Source support"
help
Index: linux-2.6.9.rc1/Documentation/kernel-parameters.txt
===================================================================
--- linux-2.6.9.rc1.orig/Documentation/kernel-parameters.txt 2004-08-25 12:17:14.643074120 +1000
+++ linux-2.6.9.rc1/Documentation/kernel-parameters.txt 2004-08-25 12:17:14.645027245 +1000
@@ -41,6 +41,7 @@
ISDN Appropriate ISDN support is enabled.
JOY Appropriate joystick support is enabled.
LP Printer support is enabled.
+ LONG_FORMAT_VHPT Long Format VHPT is enabled
LOOP Loopback device support is enabled.
M68k M68k architecture is enabled.
These options have more detailed description inside of
@@ -596,6 +597,12 @@
ltpc= [NET]
Format: <io>,<irq>,<dma>
+ lvhpt_adjust= [IA64,LONG_FORMAT_VHPT]
+ Format: <1-39>
+ Increase size of long format vhpt. Base size is 2^22 bytes, maximum
+ is 2^61 bytes. The lvhpt_adjust value is added to the exponent of the base
+ size (i.e. lvhpt_adjust=1 means size 2^23 bytes)
+
mac5380= [HW,SCSI]
Format: <can_queue>,<cmd_per_lun>,<sg_tablesize>,<hostid>,<use_tags>
[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 189 bytes --]
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: Long Format VHPT patches
2004-08-25 2:34 Long Format VHPT patches Ian Wienand
@ 2004-08-25 17:01 ` Jesse Barnes
2004-08-25 17:12 ` Chen, Kenneth W
` (6 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Jesse Barnes @ 2004-08-25 17:01 UTC (permalink / raw)
To: linux-ia64
On Tuesday, August 24, 2004 7:34 pm, Ian Wienand wrote:
> 11-lf-VHPT-kconfig.patch
Given your conclusions in the paper, would it make sense to unconditionally
switch to long format VHPT? It would simplify things a little and maybe
avoid some trouble in the future.
Jesse
^ permalink raw reply [flat|nested] 9+ messages in thread* RE: Long Format VHPT patches
2004-08-25 2:34 Long Format VHPT patches Ian Wienand
2004-08-25 17:01 ` Jesse Barnes
@ 2004-08-25 17:12 ` Chen, Kenneth W
2004-08-25 20:17 ` Arun Sharma
` (5 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Chen, Kenneth W @ 2004-08-25 17:12 UTC (permalink / raw)
To: linux-ia64
On Tuesday, August 24, 2004 7:34 pm, Ian Wienand wrote:
> 11-lf-VHPT-kconfig.patch
Jesse Barnes wrote on Wednesday, August 25, 2004 10:02 AM
> Given your conclusions in the paper, would it make sense to unconditionally
> switch to long format VHPT? It would simplify things a little and maybe
> avoid some trouble in the future.
The benchmarks done in the paper are very limited in my opinion, I don't
think it covers the whole spectrum of usage model. Our internal evaluation
shows otherwise (i.e., LVHPT are slower in some industry benchmarks). My
point is switching unconditionally so early is probably not a wise idea.
- Ken
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: Long Format VHPT patches
2004-08-25 2:34 Long Format VHPT patches Ian Wienand
2004-08-25 17:01 ` Jesse Barnes
2004-08-25 17:12 ` Chen, Kenneth W
@ 2004-08-25 20:17 ` Arun Sharma
2004-08-25 22:46 ` Peter Chubb
` (4 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Arun Sharma @ 2004-08-25 20:17 UTC (permalink / raw)
To: linux-ia64
On 8/25/2004 10:01 AM, Jesse Barnes wrote:
> On Tuesday, August 24, 2004 7:34 pm, Ian Wienand wrote:
>> 11-lf-VHPT-kconfig.patch
>
> Given your conclusions in the paper, would it make sense to unconditionally
> switch to long format VHPT? It would simplify things a little and maybe
> avoid some trouble in the future.
>
When the super page optimizations are implemented, long format might be a more attractive option. Without it, the increased cache footprint of the long format pte is not compensated by the benefits on the workloads we tried.
-Arun
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: Long Format VHPT patches
2004-08-25 2:34 Long Format VHPT patches Ian Wienand
` (2 preceding siblings ...)
2004-08-25 20:17 ` Arun Sharma
@ 2004-08-25 22:46 ` Peter Chubb
2004-08-26 20:19 ` Christoph Lameter
` (3 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Peter Chubb @ 2004-08-25 22:46 UTC (permalink / raw)
To: linux-ia64
>>>>> "Arun" = Arun Sharma <arun.sharma@intel.com> writes:
Arun> On 8/25/2004 10:01 AM, Jesse Barnes wrote:
>> On Tuesday, August 24, 2004 7:34 pm, Ian Wienand wrote:
>>> 11-lf-VHPT-kconfig.patch
>> Given your conclusions in the paper, would it make sense to
>> unconditionally switch to long format VHPT? It would simplify
>> things a little and maybe avoid some trouble in the future.
>>
Arun> When the super page optimizations are implemented, long format
Arun> might be a more attractive option. Without it, the increased
Arun> cache footprint of the long format pte is not compensated by the
Arun> benefits on the workloads we tried.
I suggest that we do like we did for the large block device patch ---
enable it unconditionally in the -mm and other test series, then flip
back to a config option in mainline. That way, anyone using 2.6 for a
production kernel gets a choice, and while we're in test phase, the
patch gets a good workout.
--
Dr Peter Chubb http://www.gelato.unsw.edu.au peterc AT gelato.unsw.edu.au
The technical we do immediately, the political takes *forever*
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: Long Format VHPT patches
2004-08-25 2:34 Long Format VHPT patches Ian Wienand
` (3 preceding siblings ...)
2004-08-25 22:46 ` Peter Chubb
@ 2004-08-26 20:19 ` Christoph Lameter
2004-08-26 21:20 ` Peter Chubb
` (2 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Christoph Lameter @ 2004-08-26 20:19 UTC (permalink / raw)
To: linux-ia64
One issue that bothers me is that it is no longer possible to swap pte,
pmd and pgd entries via cmpxchg. My page fault scalability patches are based
on that ability. The Itanium can only do a cmpxchg with a 64 bit value.
The long VHPT entries are 32 bytes long.
IMHO the cmpxchg is unavoidable if we want to increase the scalability of
page fault handling because it offers the shortest and most efficient way
to do synchronize updates to the page tables.
The Intel developers manual documents a way to invalidate a long VHPT
entry while updating it. A special pte_cmpxchg would be needed that
invalidates a VHPT via an atomic operation to set the ti bit, then
does the cmpxchg stuff and then validates the VHPT again. The
page fault handler would have to be modified to redo the fault if the ti
bit is set. However, this is may be significantly than a single cmpxchg.
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: Long Format VHPT patches
2004-08-25 2:34 Long Format VHPT patches Ian Wienand
` (4 preceding siblings ...)
2004-08-26 20:19 ` Christoph Lameter
@ 2004-08-26 21:20 ` Peter Chubb
2004-08-27 17:15 ` Arun Sharma
2004-08-31 5:53 ` Ian Wienand
7 siblings, 0 replies; 9+ messages in thread
From: Peter Chubb @ 2004-08-26 21:20 UTC (permalink / raw)
To: linux-ia64
>>>>> "Christoph" = Christoph Lameter <christoph@lameter.com> writes:
Christoph> One issue that bothers me is that it is no longer possible
Christoph> to swap pte, pmd and pgd entries via cmpxchg. My page fault
Christoph> scalability patches are based on that ability. The Itanium
Christoph> can only do a cmpxchg with a 64 bit value. The long VHPT
Christoph> entries are 32 bytes long.
I don't think that that's an issue at present: in our patch, the
hardware-walked long format VHPT is used as a cache of the existing
linux 3-level table, kind of like a software loaded TLB. Thus your
cmpxchg stuff on the pagetables will still work.
And if/when we add new page table structures (inevitable eventually for the
superpage work, I think) they can be designed from the start with
scalability in mind.
--
Dr Peter Chubb http://www.gelato.unsw.edu.au peterc AT gelato.unsw.edu.au
The technical we do immediately, the political takes *forever*
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: Long Format VHPT patches
2004-08-25 2:34 Long Format VHPT patches Ian Wienand
` (5 preceding siblings ...)
2004-08-26 21:20 ` Peter Chubb
@ 2004-08-27 17:15 ` Arun Sharma
2004-08-31 5:53 ` Ian Wienand
7 siblings, 0 replies; 9+ messages in thread
From: Arun Sharma @ 2004-08-27 17:15 UTC (permalink / raw)
To: linux-ia64
On 8/26/2004 2:20 PM, Peter Chubb wrote:
>>>>>> "Christoph" = Christoph Lameter <christoph@lameter.com> writes:
>
> Christoph> One issue that bothers me is that it is no longer possible
> Christoph> to swap pte, pmd and pgd entries via cmpxchg. My page fault
> Christoph> scalability patches are based on that ability. The Itanium
> Christoph> can only do a cmpxchg with a 64 bit value. The long VHPT
> Christoph> entries are 32 bytes long.
>
> I don't think that that's an issue at present: in our patch, the
> hardware-walked long format VHPT is used as a cache of the existing
> linux 3-level table, kind of like a software loaded TLB. Thus your
> cmpxchg stuff on the pagetables will still work.
>
And long format VHPTs are per CPU. So no cmpxchg should be necessary.
-Arun
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: Long Format VHPT patches
2004-08-25 2:34 Long Format VHPT patches Ian Wienand
` (6 preceding siblings ...)
2004-08-27 17:15 ` Arun Sharma
@ 2004-08-31 5:53 ` Ian Wienand
7 siblings, 0 replies; 9+ messages in thread
From: Ian Wienand @ 2004-08-31 5:53 UTC (permalink / raw)
To: linux-ia64
[-- Attachment #1: Type: text/plain, Size: 1179 bytes --]
On Thu, Aug 26, 2004 at 01:19:49PM -0700, Christoph Lameter wrote:
> One issue that bothers me is that it is no longer possible to swap pte,
> pmd and pgd entries via cmpxchg. My page fault scalability patches are based
> on that ability. The Itanium can only do a cmpxchg with a 64 bit value.
> The long VHPT entries are 32 bytes long.
As others have said, for the initial patch this shouldn't be an issue
(we have started making sure that the lvhpt patches will apply ontop
of your patches too).
However, in the longer term the length a pte is. To do anything
interesting with the long format vhpt we are going to need more bits
in a PTE for things like protection keys and page sizes. We have some
preliminary patches at
http://www.gelato.unsw.edu.au/cgi-bin/viewcvs.cgi/cvs/kernel/pteextension/
which do this, and we are building ontop of this approach. The
essence of the change is
-typedef struct { unsigned long pte; } pte_t;
+typedef struct { unsigned long pte; unsigned long itir; } pte_t;
I would like to know from the experienced hackers what the chances of
getting a change like this in are, or if anyone has any other ideas.
-i
[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 189 bytes --]
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2004-08-31 5:53 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-08-25 2:34 Long Format VHPT patches Ian Wienand
2004-08-25 17:01 ` Jesse Barnes
2004-08-25 17:12 ` Chen, Kenneth W
2004-08-25 20:17 ` Arun Sharma
2004-08-25 22:46 ` Peter Chubb
2004-08-26 20:19 ` Christoph Lameter
2004-08-26 21:20 ` Peter Chubb
2004-08-27 17:15 ` Arun Sharma
2004-08-31 5:53 ` Ian Wienand
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox