From: Zoltan Menyhart <Zoltan.Menyhart@bull.net>
To: linux-ia64@vger.kernel.org
Subject: accessed/dirty bit handler tuning
Date: Wed, 15 Mar 2006 13:29:30 +0000 [thread overview]
Message-ID: <441816BA.6050606@bull.net> (raw)
In-Reply-To: <44157CF1.5060902@bull.net>
[-- Attachment #1: Type: text/plain, Size: 273 bytes --]
This patch is based on the Christoph's one entitled "Fix race in the
accessed/dirty bit handlers".
- It adds the lacking "srlz.d"
- It uses some "nta" and "bias" cache hints
- It slightly reorganizes the routines for some minor performance improvements
Thanks,
Zoltan
[-- Attachment #2: srlz.d.diff3 --]
[-- Type: text/plain, Size: 6512 bytes --]
Signed-off-by: Zoltan Menyhart <Zoltan.Menyhart@bull.net>
Index: linux-2.6.16-rc5-mm3/arch/ia64/kernel/ivt.S
===================================================================
--- old/arch/ia64/kernel/ivt.S 2006-03-15 12:01:23.000000000 +0100
+++ new/arch/ia64/kernel/ivt.S 2006-03-15 14:11:46.000000000 +0100
@@ -557,29 +557,59 @@ ENTRY(dirty_bit)
#ifdef CONFIG_SMP
mov r28=ar.ccv // save ar.ccv
;;
-1: ld8 r18=[r17]
- ;; // avoid RAW on r18
+ /*
+ * The atomic instructions are handled exclusively by the L2 (L2D) cache.
+ * "bias" is a hint to acquire exclusive ownership.
+ * "nta": allocate the cache line only in L2 and to bias it to be replaced.
+ */
+1: ld8.bias.nta r18 = [r17]
+ ;;
mov ar.ccv=r18 // set compare value for cmpxchg
or r25=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits
tbit.z p7,p6 = r18,_PAGE_P_BIT // Check present bit
- ;;
-(p6) cmpxchg8.acq r26=[r17],r25,ar.ccv // Only update if page is present
- mov r24=PAGE_SHIFT<<2
- ;;
-(p6) cmp.eq p6,p7=r26,r18 // Only compare if page is present
- ;;
-(p6) itc.d r25 // install updated PTE
+ mov r24 = PAGE_SHIFT << 2
;;
/*
- * Tell the assemblers dependency-violation checker that the above "itc" instructions
- * cannot possibly affect the following loads:
+ * "nta" is a hint not to allocate the cache line elsewhere than in L2,
+ * to bias it to be replaced and not to write it back into L3.
+ *
+ * We do not care for the result of "cmpxchg". It only makes sure we do not
+ * overwrite a PTE that has been modified by someone else in the mean time.
+ * We'll read back the in memory PTE later.
*/
- dv_serialize_data
-
- ld8 r18=[r17] // read PTE again
+(p6) cmpxchg8.acq.nta r26 = [r17],r25,ar.ccv // Only update if page is present
+ /*
+ * We load the new translation independently of the success of "cmpxchg".
+ * Should "cmpxchg" have failed, we'll purge the new translation later.
+ */
+(p6) itc.d r25 // Install updated PTE if page is present
+ ;; // "itc" must be the last in the group
+ /*
+ * We make sure the visibility of "itc" to generated purges (like "ptc.ga")
+ * before we re-read the PTE.
+ * (No, we are not going to use the freshly inserted translation for the next
+ * "ld".)
+ * A simple ";;" does not make sure that the purges / invalidations go all the
+ * way down. E.g. in case of page size of 64 K, up to 16 L1 DTLB entries may be
+ * purged and all the L1D cache lines brought in via these translations need to
+ * be invalidated.
+ */
+(p6) srlz.d
+ /*
+ * No need for ";;", the following "ld" can be in the same group as "srlz.d" is.
+ */
+(p6) ld8.nta r18 = [r17] // Read PTE again
;;
- cmp.eq p6,p7=r18,r25 // is it same as the newly installed
+(p6) cmp.eq p0, p7 = r18, r25 // Is it same as we wanted to install?
;;
+ /*
+ * The new translation (or the old one if "p6" is off) gets purged if:
+ * - the page is not present
+ * - the in memory PTE is not what we wanted to write out because:
+ * + someone else has modified it after our successful "cmpxchg"
+ * + "cmpxchg" has failed (with the exception when someone else has set the
+ * very same dirty bit as we wanted to => our new translation is correct)
+ */
(p7) ptc.l r16,r24
mov b0=r29 // restore b0
mov ar.ccv=r28
@@ -602,7 +632,10 @@ END(dirty_bit)
// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
ENTRY(iaccess_bit)
DBG_FAULT(9)
- // Like Entry 8, except for instruction access
+ /*
+ * Like Entry 8, except for instruction access.
+ * For the remarks on cache hints and synchronization issues see there.
+ */
mov r16=cr.ifa // get the address that caused the fault
movl r30=1f // load continuation point in case of nested fault
mov r31=pr // save predicates
@@ -623,28 +656,20 @@ ENTRY(iaccess_bit)
#ifdef CONFIG_SMP
mov r28=ar.ccv // save ar.ccv
;;
-1: ld8 r18=[r17]
+1: ld8.bias.nta r18 = [r17]
;;
mov ar.ccv=r18 // set compare value for cmpxchg
or r25=_PAGE_A,r18 // set the accessed bit
tbit.z p7,p6 = r18,_PAGE_P_BIT // Check present bit
+ mov r24 = PAGE_SHIFT << 2
;;
-(p6) cmpxchg8.acq r26=[r17],r25,ar.ccv // Only if page present
- mov r24=PAGE_SHIFT<<2
- ;;
-(p6) cmp.eq p6,p7=r26,r18 // Only if page present
- ;;
-(p6) itc.i r25 // install updated PTE
+(p6) cmpxchg8.acq.nta r26 = [r17],r25,ar.ccv // Only update if page is present
+(p6) itc.i r25 // Install updated PTE if page is present
;;
- /*
- * Tell the assemblers dependency-violation checker that the above "itc" instructions
- * cannot possibly affect the following loads:
- */
- dv_serialize_data
-
- ld8 r18=[r17] // read PTE again
+(p6) srlz.d
+(p6) ld8.nta r18 = [r17] // Read PTE again
;;
- cmp.eq p6,p7=r18,r25 // is it same as the newly installed
+(p6) cmp.eq p0, p7 = r18, r25 // Is it same as we wanted to install?
;;
(p7) ptc.l r16,r24
mov b0=r29 // restore b0
@@ -668,7 +693,10 @@ END(iaccess_bit)
// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
ENTRY(daccess_bit)
DBG_FAULT(10)
- // Like Entry 8, except for data access
+ /*
+ * Like Entry 8, except for data access.
+ * For the remarks on cache hints and synchronization issues see there.
+ */
mov r16=cr.ifa // get the address that caused the fault
movl r30=1f // load continuation point in case of nested fault
;;
@@ -678,27 +706,20 @@ ENTRY(daccess_bit)
#ifdef CONFIG_SMP
mov r28=ar.ccv // save ar.ccv
;;
-1: ld8 r18=[r17]
- ;; // avoid RAW on r18
+1: ld8.bias.nta r18 = [r17]
+ ;;
mov ar.ccv=r18 // set compare value for cmpxchg
or r25=_PAGE_A,r18 // set the dirty bit
tbit.z p7,p6 = r18,_PAGE_P_BIT // Check present bit
+ mov r24 = PAGE_SHIFT << 2
;;
-(p6) cmpxchg8.acq r26=[r17],r25,ar.ccv // Only if page is present
- mov r24=PAGE_SHIFT<<2
- ;;
-(p6) cmp.eq p6,p7=r26,r18 // Only if page is present
- ;;
-(p6) itc.d r25 // install updated PTE
- /*
- * Tell the assemblers dependency-violation checker that the above "itc" instructions
- * cannot possibly affect the following loads:
- */
- dv_serialize_data
+(p6) cmpxchg8.acq.nta r26 = [r17],r25,ar.ccv // Only update if page is present
+(p6) itc.d r25 // Install updated PTE if page is present
;;
- ld8 r18=[r17] // read PTE again
+(p6) srlz.d
+(p6) ld8.nta r18 = [r17] // Read PTE again
;;
- cmp.eq p6,p7=r18,r25 // is it same as the newly installed
+(p6) cmp.eq p0, p7 = r18, r25 // Is it same as we wanted to install?
;;
(p7) ptc.l r16,r24
mov ar.ccv=r28
next prev parent reply other threads:[~2006-03-15 13:29 UTC|newest]
Thread overview: 36+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-03-13 14:08 accessed/dirty bit handler tuning Zoltan Menyhart
2006-03-13 16:31 ` Christoph Lameter
2006-03-13 16:55 ` Zoltan Menyhart
2006-03-13 19:46 ` Chen, Kenneth W
2006-03-13 20:05 ` Luck, Tony
2006-03-13 20:14 ` Chen, Kenneth W
2006-03-13 22:53 ` Chen, Kenneth W
2006-03-14 10:12 ` Zoltan Menyhart
2006-03-14 19:33 ` Chen, Kenneth W
2006-03-15 13:29 ` Zoltan Menyhart [this message]
2006-03-15 17:37 ` Chen, Kenneth W
2006-03-16 9:57 ` Zoltan Menyhart
2006-03-16 10:19 ` Luck, Tony
2006-03-16 19:12 ` Chen, Kenneth W
2006-03-29 8:11 ` Zoltan Menyhart
2006-03-29 8:28 ` Chen, Kenneth W
2006-03-29 13:37 ` Zoltan Menyhart
2006-03-29 17:01 ` Zoltan Menyhart
2006-03-29 22:57 ` Luck, Tony
2006-03-29 22:59 ` Chen, Kenneth W
2006-03-30 15:13 ` Zoltan Menyhart
2006-03-31 16:23 ` Zoltan Menyhart
2006-03-31 19:08 ` Chen, Kenneth W
2006-03-31 21:18 ` Zoltan Menyhart
2006-03-31 21:51 ` Chen, Kenneth W
2006-03-31 22:14 ` Chen, Kenneth W
2006-03-31 22:57 ` Zoltan Menyhart
2006-04-03 8:46 ` Zoltan Menyhart
2006-04-03 13:45 ` Zoltan Menyhart
2006-04-03 15:49 ` Luck, Tony
2006-04-03 15:57 ` Luck, Tony
2006-04-03 16:33 ` Zoltan Menyhart
2006-04-03 16:42 ` David Mosberger-Tang
2006-04-03 17:23 ` Zoltan Menyhart
2006-04-03 17:50 ` Luck, Tony
2006-04-03 18:27 ` Christoph Lameter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=441816BA.6050606@bull.net \
--to=zoltan.menyhart@bull.net \
--cc=linux-ia64@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox