public inbox for linux-ia64@vger.kernel.org
 help / color / mirror / Atom feed
* [Linux-ia64] [PATCH] Performance Patch for TLB misses
@ 2002-11-07 14:00 Christian Hildner
  2002-11-07 16:38 ` David Mosberger
                   ` (8 more replies)
  0 siblings, 9 replies; 10+ messages in thread
From: Christian Hildner @ 2002-11-07 14:00 UTC (permalink / raw)
  To: linux-ia64

[-- Attachment #1: Type: text/plain, Size: 977 bytes --]

Hi,

I created a patch that will (hopefully) increase the performance for TLB
miss handling.

- enlarge the distance between rsm psr.dt and srlz (the movl do not need
data access)

- let the processor check for the case of page not present (because for
useable systems the common case is that the page is present, so optimize
for that and drop the conditional branch)

- let the processor also check for privilege level because it's very
very rare

The second modification will result (rarely) in the insertion of an
unused TLB entry. But this entry will be overwritten with the useable
entry when the page is swapped in. The time needed for the insertion of
the TLB entry and the additional interruption will be neglectible
compared to the time needed for swapping in.

Since I'am waiting for Itanium 2 (Itanium 1 does not have any relevance
nowadays) I have done no performance testing. All Itanium 2 owners are
invited for testing.

Please tell me your comments.

Christian

[-- Attachment #2: ivt.S_patch_2.4.18 --]
[-- Type: text/plain, Size: 3361 bytes --]

--- ivt.S_orig	Tue Mar 19 11:22:28 2002
+++ ivt.S	Thu Nov  7 13:53:02 2002
@@ -114,8 +114,8 @@
 	 *	- the faulting virtual address has no L1, L2, or L3 mapping
 	 */
 	mov r16=cr.ifa				// get address that caused the TLB miss
-	;;
 	rsm psr.dt				// use physical addressing for data
+	;;
 	mov r31=pr				// save the predicate registers
 	mov r19=IA64_KR(PT_BASE)		// get page table base address
 	shl r21=r16,3				// shift bit 60 into sign bit
@@ -125,7 +125,6 @@
 	shr.u r18=r16,PGDIR_SHIFT		// get bits 33-63 of the faulting address
 	;;
 (p7)	dep r17=r17,r19,(PAGE_SHIFT-3),3	// put region number bits in place
-	srlz.d					// ensure "rsm psr.dt" has taken effect
 (p6)	movl r19=__pa(SWAPPER_PGD_ADDR)		// region 5 is rooted at swapper_pg_dir
 (p6)	shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
 (p7)	shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
@@ -135,6 +134,7 @@
 	cmp.eq p7,p6=0,r21			// unused address bits all zeroes?
 	shr.u r18=r16,PMD_SHIFT			// shift L2 index into position
 	;;
+	srlz.d					// ensure "rsm psr.dt" has taken effect
 	ld8 r17=[r17]				// fetch the L1 entry (may be 0)
 	;;
 (p7)	cmp.eq p6,p7=r17,r0			// was L1 entry NULL?
@@ -212,9 +212,6 @@
 1:	ld8 r18=[r17]				// read L3 PTE
 	;;
 	mov b0=r29
-	tbit.z p6,p0=r18,_PAGE_P_BIT		// page present bit cleared?
-(p6)	br.cond.spnt page_fault
-	;;
 	itc.i r18
 	;;
 #ifdef CONFIG_SMP
@@ -250,9 +247,6 @@
 1:	ld8 r18=[r17]				// read L3 PTE
 	;;
 	mov b0=r29
-	tbit.z p6,p0=r18,_PAGE_P_BIT		// page present bit cleared?
-(p6)	br.cond.spnt page_fault
-	;;
 	itc.d r18
 	;;
 #ifdef CONFIG_SMP
@@ -274,12 +268,11 @@
 	DBG_FAULT(3)
 	mov r16=cr.ifa		// get address that caused the TLB miss
 	movl r17=PAGE_KERNEL
-	mov r21=cr.ipsr
 	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
 	mov r31=pr
 	;;
 #ifdef CONFIG_DISABLE_VHPT
-	shr.u r22=r16,61			// get the region number into r21
+	shr.u r22=r16,61			// get the region number
 	;;
 	cmp.gt p8,p0=6,r22			// user mode
 	;;
@@ -289,16 +282,13 @@
 (p8)	mov r29=b0				// save b0
 (p8)	br.cond.dptk itlb_fault
 #endif
-	extr.u r23=r21,IA64_PSR_CPL0_BIT,2	// extract psr.cpl
 	and r19=r19,r16		// clear ed, reserved bits, and PTE control bits
 	shr.u r18=r16,57	// move address bit 61 to bit 4
 	;;
 	andcm r18=0x10,r18	// bit 4=~address-bit(61)
-	cmp.ne p8,p0=r0,r23	// psr.cpl != 0?
 	or r19=r17,r19		// insert PTE control bits into r19
 	;;
 	or r19=r19,r18		// set bit 4 (uncached) if the access was to region 6
-(p8)	br.cond.spnt page_fault
 	;;
 	itc.i r19		// insert the TLB entry
 	mov pr=r31,-1
@@ -318,7 +308,7 @@
 	mov r31=pr
 	;;
 #ifdef CONFIG_DISABLE_VHPT
-	shr.u r22=r16,61			// get the region number into r21
+	shr.u r22=r16,61			// get the region number
 	;;
 	cmp.gt p8,p0=6,r22			// access to region 0-5
 	;;
@@ -328,15 +318,11 @@
 (p8)	mov r29=b0				// save b0
 (p8)	br.cond.dptk dtlb_fault
 #endif
-	extr.u r23=r21,IA64_PSR_CPL0_BIT,2	// extract psr.cpl
 	tbit.nz p6,p7=r20,IA64_ISR_SP_BIT	// is speculation bit on?
 	and r19=r19,r16		// clear ed, reserved bits, and PTE control bits
 	shr.u r18=r16,57	// move address bit 61 to bit 4
 	;;
 	andcm r18=0x10,r18	// bit 4=~address-bit(61)
-	cmp.ne p8,p0=r0,r23
-(p8)	br.cond.spnt page_fault
-
 	dep r21=-1,r21,IA64_PSR_ED_BIT,1
 	or r19=r19,r17		// insert PTE control bits into r19
 	;;
@@ -1412,3 +1398,5 @@
 // 0x7f00 Entry 67 (size 16 bundles) Reserved
 	DBG_FAULT(67)
 	FAULT(67)
+
+

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2002-11-12 18:03 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-11-07 14:00 [Linux-ia64] [PATCH] Performance Patch for TLB misses Christian Hildner
2002-11-07 16:38 ` David Mosberger
2002-11-07 17:14 ` Mario Smarduch
2002-11-11  8:38 ` Christian Hildner
2002-11-11  8:49 ` Christian Hildner
2002-11-11 12:07 ` John Marvin
2002-11-11 18:48 ` David Mosberger
2002-11-12  8:43 ` Christian Hildner
2002-11-12 17:00 ` David Mosberger
2002-11-12 18:03 ` David Mosberger

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox