All of lore.kernel.org
 help / color / mirror / Atom feed
From: Zoltan Menyhart <Zoltan.Menyhart@bull.net>
To: linux-ia64@vger.kernel.org
Subject: accessed/dirty bit handler tuning
Date: Wed, 15 Mar 2006 13:29:30 +0000	[thread overview]
Message-ID: <441816BA.6050606@bull.net> (raw)
In-Reply-To: <44157CF1.5060902@bull.net>

[-- Attachment #1: Type: text/plain, Size: 273 bytes --]

This patch is based on the Christoph's one entitled "Fix race in the
accessed/dirty bit handlers".

- It adds the lacking "srlz.d"
- It uses some "nta" and "bias" cache hints
- It slightly reorganizes the routines for some minor performance improvements

Thanks,

Zoltan



[-- Attachment #2: srlz.d.diff3 --]
[-- Type: text/plain, Size: 6512 bytes --]

Signed-off-by: Zoltan Menyhart <Zoltan.Menyhart@bull.net>

Index: linux-2.6.16-rc5-mm3/arch/ia64/kernel/ivt.S
===================================================================
--- old/arch/ia64/kernel/ivt.S	2006-03-15 12:01:23.000000000 +0100
+++ new/arch/ia64/kernel/ivt.S	2006-03-15 14:11:46.000000000 +0100
@@ -557,29 +557,59 @@ ENTRY(dirty_bit)
 #ifdef CONFIG_SMP
 	mov r28=ar.ccv				// save ar.ccv
 	;;
-1:	ld8 r18=[r17]
-	;;					// avoid RAW on r18
+	/*
+	 * The atomic instructions are handled exclusively by the L2 (L2D) cache.
+	 * "bias" is a hint to acquire exclusive ownership.
+	 * "nta": allocate the cache line only in L2 and to bias it to be replaced.
+	 */
+1:	ld8.bias.nta r18 = [r17]
+	;;
 	mov ar.ccv=r18				// set compare value for cmpxchg
 	or r25=_PAGE_D|_PAGE_A,r18		// set the dirty and accessed bits
 	tbit.z p7,p6 = r18,_PAGE_P_BIT		// Check present bit
-	;;
-(p6)	cmpxchg8.acq r26=[r17],r25,ar.ccv	// Only update if page is present
-	mov r24=PAGE_SHIFT<<2
-	;;
-(p6)	cmp.eq p6,p7=r26,r18			// Only compare if page is present
-	;;
-(p6)	itc.d r25				// install updated PTE
+	mov r24 = PAGE_SHIFT << 2
 	;;
 	/*
-	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
-	 * cannot possibly affect the following loads:
+	 * "nta" is a hint not to allocate the cache line elsewhere than in L2,
+	 * to bias it to be replaced and not to write it back into L3.
+	 *
+	 * We do not care for the result of "cmpxchg". It only makes sure we do not
+	 * overwrite a PTE that has been modified by someone else in the mean time.
+	 * We'll read back the in memory PTE later.
 	 */
-	dv_serialize_data
-
-	ld8 r18=[r17]				// read PTE again
+(p6)	cmpxchg8.acq.nta r26 = [r17],r25,ar.ccv	// Only update if page is present
+	/*
+	 * We load the new translation independently of the success of "cmpxchg". 
+	 * Should "cmpxchg" have failed, we'll purge the new translation later.
+	 */
+(p6)	itc.d r25				// Install updated PTE if page is present
+	;;					// "itc" must be the last in the group
+	/*
+	 * We make sure the visibility of "itc" to generated purges (like "ptc.ga")
+	 * before we re-read the PTE.
+	 * (No, we are not going to use the freshly inserted translation for the next
+	 * "ld".)
+	 * A simple ";;" does not make sure that the purges / invalidations go all the
+	 * way down. E.g. in case of page size of 64 K, up to 16 L1 DTLB entries may be
+	 * purged and all the L1D cache lines brought in via these translations need to
+	 * be invalidated.
+	 */
+(p6)	srlz.d
+	/*
+	 * No need for ";;", the following "ld" can be in the same group as "srlz.d" is.
+	 */
+(p6)	ld8.nta r18 = [r17]			// Read PTE again
 	;;
-	cmp.eq p6,p7=r18,r25			// is it same as the newly installed
+(p6)	cmp.eq p0, p7 = r18, r25		// Is it same as we wanted to install?
 	;;
+	/*
+	 * The new translation (or the old one if "p6" is off) gets purged if:
+	 * - the page is not present
+	 * - the in memory PTE is not what we wanted to write out because:
+	 *   + someone else has modified it after our successful "cmpxchg"
+	 *   + "cmpxchg" has failed (with the exception when someone else has set the
+	 *     very same dirty bit as we wanted to => our new translation is correct)
+	 */
 (p7)	ptc.l r16,r24
 	mov b0=r29				// restore b0
 	mov ar.ccv=r28
@@ -602,7 +632,10 @@ END(dirty_bit)
 // 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
 ENTRY(iaccess_bit)
 	DBG_FAULT(9)
-	// Like Entry 8, except for instruction access
+	/*
+	 * Like Entry 8, except for instruction access.
+	 * For the remarks on cache hints and synchronization issues see there.
+	 */
 	mov r16=cr.ifa				// get the address that caused the fault
 	movl r30=1f				// load continuation point in case of nested fault
 	mov r31=pr				// save predicates
@@ -623,28 +656,20 @@ ENTRY(iaccess_bit)
 #ifdef CONFIG_SMP
 	mov r28=ar.ccv				// save ar.ccv
 	;;
-1:	ld8 r18=[r17]
+1:	ld8.bias.nta r18 = [r17]
 	;;
 	mov ar.ccv=r18				// set compare value for cmpxchg
 	or r25=_PAGE_A,r18			// set the accessed bit
 	tbit.z p7,p6 = r18,_PAGE_P_BIT	 	// Check present bit
+	mov r24 = PAGE_SHIFT << 2
 	;;
-(p6)	cmpxchg8.acq r26=[r17],r25,ar.ccv	// Only if page present
-	mov r24=PAGE_SHIFT<<2
-	;;
-(p6)	cmp.eq p6,p7=r26,r18			// Only if page present
-	;;
-(p6)	itc.i r25				// install updated PTE
+(p6)	cmpxchg8.acq.nta r26 = [r17],r25,ar.ccv	// Only update if page is present
+(p6)	itc.i r25				// Install updated PTE if page is present
 	;;
-	/*
-	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
-	 * cannot possibly affect the following loads:
-	 */
-	dv_serialize_data
-
-	ld8 r18=[r17]				// read PTE again
+(p6)	srlz.d
+(p6)	ld8.nta r18 = [r17]			// Read PTE again
 	;;
-	cmp.eq p6,p7=r18,r25			// is it same as the newly installed
+(p6)	cmp.eq p0, p7 = r18, r25		// Is it same as we wanted to install?
 	;;
 (p7)	ptc.l r16,r24
 	mov b0=r29				// restore b0
@@ -668,7 +693,10 @@ END(iaccess_bit)
 // 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
 ENTRY(daccess_bit)
 	DBG_FAULT(10)
-	// Like Entry 8, except for data access
+	/*
+	 * Like Entry 8, except for data access.
+	 * For the remarks on cache hints and synchronization issues see there.
+	 */
 	mov r16=cr.ifa				// get the address that caused the fault
 	movl r30=1f				// load continuation point in case of nested fault
 	;;
@@ -678,27 +706,20 @@ ENTRY(daccess_bit)
 #ifdef CONFIG_SMP
 	mov r28=ar.ccv				// save ar.ccv
 	;;
-1:	ld8 r18=[r17]
-	;;					// avoid RAW on r18
+1:	ld8.bias.nta r18 = [r17]
+	;;
 	mov ar.ccv=r18				// set compare value for cmpxchg
 	or r25=_PAGE_A,r18			// set the dirty bit
 	tbit.z p7,p6 = r18,_PAGE_P_BIT		// Check present bit
+	mov r24 = PAGE_SHIFT << 2
 	;;
-(p6)	cmpxchg8.acq r26=[r17],r25,ar.ccv	// Only if page is present
-	mov r24=PAGE_SHIFT<<2
-	;;
-(p6)	cmp.eq p6,p7=r26,r18			// Only if page is present
-	;;
-(p6)	itc.d r25				// install updated PTE
-	/*
-	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
-	 * cannot possibly affect the following loads:
-	 */
-	dv_serialize_data
+(p6)	cmpxchg8.acq.nta r26 = [r17],r25,ar.ccv	// Only update if page is present
+(p6)	itc.d r25				// Install updated PTE if page is present
 	;;
-	ld8 r18=[r17]				// read PTE again
+(p6)	srlz.d
+(p6)	ld8.nta r18 = [r17]			// Read PTE again
 	;;
-	cmp.eq p6,p7=r18,r25			// is it same as the newly installed
+(p6)	cmp.eq p0, p7 = r18, r25		// Is it same as we wanted to install?
 	;;
 (p7)	ptc.l r16,r24
 	mov ar.ccv=r28

  parent reply	other threads:[~2006-03-15 13:29 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-03-13 14:08 accessed/dirty bit handler tuning Zoltan Menyhart
2006-03-13 16:31 ` Christoph Lameter
2006-03-13 16:55 ` Zoltan Menyhart
2006-03-13 19:46 ` Chen, Kenneth W
2006-03-13 20:05 ` Luck, Tony
2006-03-13 20:14 ` Chen, Kenneth W
2006-03-13 22:53 ` Chen, Kenneth W
2006-03-14 10:12 ` Zoltan Menyhart
2006-03-14 19:33 ` Chen, Kenneth W
2006-03-15 13:29 ` Zoltan Menyhart [this message]
2006-03-15 17:37 ` Chen, Kenneth W
2006-03-16  9:57 ` Zoltan Menyhart
2006-03-16 10:19 ` Luck, Tony
2006-03-16 19:12 ` Chen, Kenneth W
2006-03-29  8:11 ` Zoltan Menyhart
2006-03-29  8:28 ` Chen, Kenneth W
2006-03-29 13:37 ` Zoltan Menyhart
2006-03-29 17:01 ` Zoltan Menyhart
2006-03-29 22:57 ` Luck, Tony
2006-03-29 22:59 ` Chen, Kenneth W
2006-03-30 15:13 ` Zoltan Menyhart
2006-03-31 16:23 ` Zoltan Menyhart
2006-03-31 19:08 ` Chen, Kenneth W
2006-03-31 21:18 ` Zoltan Menyhart
2006-03-31 21:51 ` Chen, Kenneth W
2006-03-31 22:14 ` Chen, Kenneth W
2006-03-31 22:57 ` Zoltan Menyhart
2006-04-03  8:46 ` Zoltan Menyhart
2006-04-03 13:45 ` Zoltan Menyhart
2006-04-03 15:49 ` Luck, Tony
2006-04-03 15:57 ` Luck, Tony
2006-04-03 16:33 ` Zoltan Menyhart
2006-04-03 16:42 ` David Mosberger-Tang
2006-04-03 17:23 ` Zoltan Menyhart
2006-04-03 17:50 ` Luck, Tony
2006-04-03 18:27 ` Christoph Lameter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=441816BA.6050606@bull.net \
    --to=zoltan.menyhart@bull.net \
    --cc=linux-ia64@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.