From: Robin Holt <holt@sgi.com>
To: linux-ia64@vger.kernel.org
Subject: [RFC] 4-level page table directories.
Date: Thu, 27 Oct 2005 04:17:09 +0000 [thread overview]
Message-ID: <20051027041709.GA13193@attica.americas.sgi.com> (raw)
I have started to work on 4-level page tables. This boots. I
make no further claims than that.
At one point, I discussed 4-level page tables on the ia64 mailing
list but did not find that discussion in my quick search from
marc.
David, I think it was you who expressed concern with introducing
the fourth level. I have done some quick benchmarking and found
little difference (well within noise). How had you envisioned
introducing a 3 or 4 level page tables? Were you envisioning
a compile-time or run-time selection?
Thanks,
Robin
Index: linux-2.6/include/asm-ia64/pgtable.h
=================================--- linux-2.6.orig/include/asm-ia64/pgtable.h 2005-10-26 18:59:21.253268550 -0500
+++ linux-2.6/include/asm-ia64/pgtable.h 2005-10-26 23:01:34.572838463 -0500
@@ -84,32 +84,48 @@
#define __DIRTY_BITS _PAGE_ED | __DIRTY_BITS_NO_ED
/*
- * Definitions for first level:
- *
- * PGDIR_SHIFT determines what a first-level page table entry can map.
+ * How many pointers will a page table level hold expressed in shift
*/
-#define PGDIR_SHIFT (PAGE_SHIFT + 2*(PAGE_SHIFT-3))
-#define PGDIR_SIZE (__IA64_UL(1) << PGDIR_SHIFT)
-#define PGDIR_MASK (~(PGDIR_SIZE-1))
-#define PTRS_PER_PGD (1UL << (PAGE_SHIFT-3))
-#define USER_PTRS_PER_PGD (5*PTRS_PER_PGD/8) /* regions 0-4 are user regions */
-#define FIRST_USER_ADDRESS 0
+#define PTRS_PER_PTD_SHIFT (PAGE_SHIFT-3)
/*
- * Definitions for second level:
+ * Definitions for fourth level:
+ */
+#define PTRS_PER_PTE (__IA64_UL(1) << (PTRS_PER_PTD_SHIFT))
+
+/*
+ * Definitions for third level:
*
- * PMD_SHIFT determines the size of the area a second-level page table
+ * PMD_SHIFT determines the size of the area a third-level page table
* can map.
*/
-#define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3))
+#define PMD_SHIFT (PAGE_SHIFT + (PTRS_PER_PTD_SHIFT))
#define PMD_SIZE (1UL << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1))
-#define PTRS_PER_PMD (1UL << (PAGE_SHIFT-3))
+#define PTRS_PER_PMD (1UL << (PTRS_PER_PTD_SHIFT))
/*
- * Definitions for third level:
+ * Definitions for second level:
+ *
+ * PUD_SHIFT determines the size of the area a second-level page table
+ * can map.
+ */
+#define PUD_SHIFT (PMD_SHIFT + (PTRS_PER_PTD_SHIFT))
+#define PUD_SIZE (1UL << PUD_SHIFT)
+#define PUD_MASK (~(PUD_SIZE-1))
+#define PTRS_PER_PUD (1UL << (PTRS_PER_PTD_SHIFT))
+
+/*
+ * Definitions for first level:
+ *
+ * PGDIR_SHIFT determines what a first-level page table entry can map.
*/
-#define PTRS_PER_PTE (__IA64_UL(1) << (PAGE_SHIFT-3))
+#define PGDIR_SHIFT (PUD_SHIFT + (PTRS_PER_PTD_SHIFT))
+#define PGDIR_SIZE (__IA64_UL(1) << PGDIR_SHIFT)
+#define PGDIR_MASK (~(PGDIR_SIZE-1))
+#define PTRS_PER_PGD (1UL << (PTRS_PER_PTD_SHIFT))
+#define USER_PTRS_PER_PGD (5*PTRS_PER_PGD/8) /* regions 0-4 are user regions */
+#define FIRST_USER_ADDRESS 0
/*
* All the normal masks have the "page accessed" bits on, as any time
@@ -160,6 +176,7 @@
#define __S111 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RWX)
#define pgd_ERROR(e) printk("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e))
+#define pud_ERROR(e) printk("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e))
#define pmd_ERROR(e) printk("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e))
#define pte_ERROR(e) printk("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
@@ -256,9 +273,14 @@ ia64_phys_addr_valid (unsigned long addr
#define pud_bad(pud) (!ia64_phys_addr_valid(pud_val(pud)))
#define pud_present(pud) (pud_val(pud) != 0UL)
#define pud_clear(pudp) (pud_val(*(pudp)) = 0UL)
-
#define pud_page(pud) ((unsigned long) __va(pud_val(pud) & _PFN_MASK))
+#define pgd_none(pgd) (!pgd_val(pgd))
+#define pgd_bad(pgd) (!ia64_phys_addr_valid(pgd_val(pgd)))
+#define pgd_present(pgd) (pgd_val(pgd) != 0UL)
+#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0UL)
+#define pgd_page(pgd) ((unsigned long) __va(pgd_val(pgd) & _PFN_MASK))
+
/*
* The following have defined behavior only work if pte_present() is true.
*/
@@ -327,6 +349,10 @@ pgd_offset (struct mm_struct *mm, unsign
#define pgd_offset_gate(mm, addr) pgd_offset_k(addr)
/* Find an entry in the second-level page table.. */
+#define pud_offset(dir,addr) \
+ ((pud_t *) pgd_page(*(dir)) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
+
+/* Find an entry in the third-level page table.. */
#define pmd_offset(dir,addr) \
((pmd_t *) pud_page(*(dir)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
@@ -559,7 +585,6 @@ do { \
#define __HAVE_ARCH_PGD_OFFSET_GATE
#define __HAVE_ARCH_LAZY_MMU_PROT_UPDATE
-#include <asm-generic/pgtable-nopud.h>
#include <asm-generic/pgtable.h>
#endif /* _ASM_IA64_PGTABLE_H */
Index: linux-2.6/include/asm-ia64/pgalloc.h
=================================--- linux-2.6.orig/include/asm-ia64/pgalloc.h 2005-10-26 18:59:21.254245014 -0500
+++ linux-2.6/include/asm-ia64/pgalloc.h 2005-10-26 19:08:46.598882737 -0500
@@ -87,6 +87,23 @@ static inline void pgd_free(pgd_t * pgd)
}
static inline void
+pgd_populate(struct mm_struct *mm, pgd_t * pgd_entry, pud_t * pud)
+{
+ pgd_val(*pgd_entry) = __pa(pud);
+}
+
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ return pgtable_quicklist_alloc();
+}
+
+static inline void pud_free(pud_t * pud)
+{
+ pgtable_quicklist_free(pud);
+}
+#define __pud_free_tlb(tlb, pud) pud_free(pud)
+
+static inline void
pud_populate(struct mm_struct *mm, pud_t * pud_entry, pmd_t * pmd)
{
pud_val(*pud_entry) = __pa(pmd);
Index: linux-2.6/include/asm-ia64/page.h
=================================--- linux-2.6.orig/include/asm-ia64/page.h 2005-10-26 18:59:21.254245014 -0500
+++ linux-2.6/include/asm-ia64/page.h 2005-10-26 19:08:46.604741525 -0500
@@ -174,11 +174,13 @@ get_order (unsigned long size)
*/
typedef struct { unsigned long pte; } pte_t;
typedef struct { unsigned long pmd; } pmd_t;
+ typedef struct { unsigned long pud; } pud_t;
typedef struct { unsigned long pgd; } pgd_t;
typedef struct { unsigned long pgprot; } pgprot_t;
# define pte_val(x) ((x).pte)
# define pmd_val(x) ((x).pmd)
+# define pud_val(x) ((x).pud)
# define pgd_val(x) ((x).pgd)
# define pgprot_val(x) ((x).pgprot)
Index: linux-2.6/arch/ia64/kernel/ivt.S
=================================--- linux-2.6.orig/arch/ia64/kernel/ivt.S 2005-10-26 18:59:21.278656627 -0500
+++ linux-2.6/arch/ia64/kernel/ivt.S 2005-10-26 22:36:41.939866135 -0500
@@ -140,20 +140,26 @@ ENTRY(vhpt_miss)
(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8
(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
cmp.eq p7,p6=0,r21 // unused address bits all zeroes?
- shr.u r18=r22,PMD_SHIFT // shift L2 index into position
+ shr.u r19=r22,PUD_SHIFT // shift L2 index into position
;;
ld8 r17=[r17] // fetch the L1 entry (may be 0)
+ shr.u r18=r22,PMD_SHIFT // shift L3 index into position
;;
(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL?
- dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry
+ dep r28=r19,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry
;;
-(p7) ld8 r20=[r17] // fetch the L2 entry (may be 0)
- shr.u r19=r22,PAGE_SHIFT // shift L3 index into position
+(p7) ld8 r29=[r28] // fetch the L2 entry (may be 0)
+ shr.u r19=r22,PAGE_SHIFT // shift L4 index into position
;;
-(p7) cmp.eq.or.andcm p6,p7=r20,r0 // was L2 entry NULL?
- dep r21=r19,r20,3,(PAGE_SHIFT-3) // compute address of L3 page table entry
+(p7) cmp.eq p6,p7=r29,r0 // was L2 entry NULL?
+ dep r17=r18,r29,3,(PAGE_SHIFT-3) // compute address of L3 page table entry
;;
-(p7) ld8 r18=[r21] // read the L3 PTE
+(p7) ld8 r20=[r17] // fetch the L3 entry (may be 0)
+ ;;
+(p7) cmp.eq.or.andcm p6,p7=r20,r0 // was L3 entry NULL?
+ dep r21=r19,r20,3,(PAGE_SHIFT-3) // compute address of L4 page table entry
+ ;;
+(p7) ld8 r18=[r21] // read the L4 PTE
mov r19=cr.isr // cr.isr bit 0 tells us if this is an insn miss
;;
(p7) tbit.z p6,p7=r18,_PAGE_P_BIT // page present bit cleared?
@@ -192,12 +198,15 @@ ENTRY(vhpt_miss)
* between reading the pagetable and the "itc". If so, flush the entry we
* inserted and retry.
*/
- ld8 r25=[r21] // read L3 PTE again
- ld8 r26=[r17] // read L2 entry again
+ ld8 r25=[r21] // read L4 PTE again
+ ld8 r26=[r17] // read L3 entry again
+ ld8 r30=[r28] // read L2 entry again
;;
- cmp.ne p6,p7=r26,r20 // did L2 entry change
+ cmp.ne p6,p7=r26,r20 // did L3 entry change
mov r27=PAGE_SHIFT<<2
;;
+(p7) cmp.ne.or.andcm p6,p7=r30,r29 // did L2 entry change
+ ;;
(p6) ptc.l r22,r27 // purge PTE page translation
(p7) cmp.ne.or.andcm p6,p7=r25,r18 // did L3 PTE change
;;
@@ -432,18 +441,24 @@ ENTRY(nested_dtlb_miss)
(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8
(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
cmp.eq p7,p6=0,r21 // unused address bits all zeroes?
- shr.u r18=r22,PMD_SHIFT // shift L2 index into position
+ shr.u r19=r22,PUD_SHIFT // shift L2 index into position
;;
ld8 r17=[r17] // fetch the L1 entry (may be 0)
+ shr.u r18=r22,PMD_SHIFT // shift L3 index into position
;;
(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL?
- dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry
+ dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry
;;
(p7) ld8 r17=[r17] // fetch the L2 entry (may be 0)
- shr.u r19=r22,PAGE_SHIFT // shift L3 index into position
+ shr.u r19=r22,PAGE_SHIFT // shift L4 index into position
+ ;;
+(p7) cmp.eq p6,p7=r17,r0 // was L2 entry NULL?
+ dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry
+ ;;
+(p7) ld8 r17=[r17] // fetch the L3 entry (may be 0)
;;
-(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL?
- dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry
+(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L3 entry NULL?
+ dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L4 page table entry
(p6) br.cond.spnt page_fault
mov b0=r30
br.sptk.many b0 // return to continuation point
next reply other threads:[~2005-10-27 4:17 UTC|newest]
Thread overview: 50+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-10-27 4:17 Robin Holt [this message]
2005-10-28 5:19 ` [RFC] 4-level page table directories Ian Wienand
2005-10-28 11:19 ` Robin Holt
2005-10-28 23:23 ` Luck, Tony
2005-10-28 23:55 ` Chen, Kenneth W
2005-10-29 0:49 ` Grant Grundler
2005-10-29 2:18 ` David Mosberger-Tang
2005-11-01 12:13 ` Robin Holt
2005-11-01 15:41 ` David Mosberger-Tang
2005-11-02 10:35 ` Robin Holt
2005-11-02 13:26 ` Robin Holt
2005-11-02 16:11 ` Luck, Tony
2005-11-02 16:23 ` Robin Holt
2005-11-02 16:30 ` Luck, Tony
2005-11-02 17:16 ` Robin Holt
2005-11-02 18:59 ` David Mosberger-Tang
2005-11-02 22:26 ` Ian Wienand
2005-11-03 1:36 ` Gerald Pfeifer
2005-11-03 1:53 ` Chen, Kenneth W
2005-11-03 3:55 ` Jack Steiner
2005-11-03 16:36 ` Robin Holt
2005-11-03 19:59 ` Chen, Kenneth W
2005-11-04 17:58 ` Luck, Tony
2005-11-04 21:37 ` Robin Holt
2005-11-04 21:42 ` Chen, Kenneth W
2005-11-04 22:50 ` Chen, Kenneth W
2005-11-07 21:18 ` Luck, Tony
2005-11-08 0:22 ` Rohit Seth
2005-11-08 12:43 ` Robin Holt
2005-11-08 18:23 ` Boehm, Hans
2005-11-08 18:52 ` Magenheimer, Dan (HP Labs Fort Collins)
2005-11-08 18:56 ` Rohit Seth
2005-11-08 19:36 ` Robin Holt
2005-11-08 20:07 ` Chen, Kenneth W
2005-11-08 20:27 ` Chen, Kenneth W
2005-11-08 22:09 ` Ian Wienand
2005-11-08 23:58 ` Gerald Pfeifer
2005-11-09 0:08 ` David Mosberger-Tang
2005-11-09 0:22 ` Rohit Seth
2005-11-09 0:46 ` Magenheimer, Dan (HP Labs Fort Collins)
2005-11-09 1:18 ` Chen, Kenneth W
2005-11-09 12:11 ` Robin Holt
2005-11-09 14:29 ` Robin Holt
2005-11-09 18:22 ` Chen, Kenneth W
2005-11-09 18:39 ` Luck, Tony
2005-11-10 0:03 ` Gerald Pfeifer
2005-11-10 0:23 ` Jack Steiner
2005-11-10 0:27 ` Luck, Tony
2005-11-10 2:54 ` Jack Steiner
2005-11-10 9:13 ` Robin Holt
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20051027041709.GA13193@attica.americas.sgi.com \
--to=holt@sgi.com \
--cc=linux-ia64@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox