From: Robin Holt <holt@sgi.com>
To: linux-ia64@vger.kernel.org
Subject: Re: [Patch 1/1] 4-level page tables v4.
Date: Thu, 10 Nov 2005 22:38:33 +0000 [thread overview]
Message-ID: <20051110223833.GA8037@lnx-holt.americas.sgi.com> (raw)
In-Reply-To: <20051110161915.GA3630@lnx-holt.americas.sgi.com>
On Thu, Nov 10, 2005 at 01:49:26PM -0800, Luck, Tony wrote:
> Compiling with three levels, I see some differences in the scheduling
> of instructions in the vhpt_miss handler and the nested_dtlb miss
> handler. Side-by-side diff of a disassembly included below (original
> sequence is on the left, new sequence is on the right). For the vhpt
> case the new handler is 3 instructions shorter ... but shorter isn't
> always better.
I used the objdump that Jack Steiner pointed me towards to optomize the
vhpt_miss handler and then test. This instruction order gave the best
performance, but we are talking extremely small differences.
Is the goal to make these identical? If so, it should be easy to do,
but I was not aware that was the intent.
I am going to attach the dispersal analysis the modified objdump that
Jack has produced.
Thanks,
Robin
0000000000000000 <vhpt_miss>: 0000000000000000 <vhpt_miss>:
0: 0 [MLX] mov r16=cr20 0: 0 [MLX] mov r16=cr20
6: 0 movl r18=0xe 6: 0 movl r18=0xe
c: c:
10: 1 R[M2] [MMI] mov r25=cr21;; 10: 1 R[M2] [MMI] mov r25=cr21;;
16: 2 S rsm 0x20000 16: 2 S rsm 0x20000
1c: 2 mov r31=pr 1c: 2 mov r31=pr
20: 3 R[M2] [MII] mov.m r19=ar.k7 20: 3 R[M2] [MII] mov.m r19=ar.k7
26: 3 shl r21=r16,3 26: 3 shl r21=r16,3
2c: 3 shr.u r17=r16,61;; 2c: 3 shr.u r17=r16,61;;
30: 4 S [MII] nop.m 0x0 30: 4 S [MII] nop.m 0x0
36: 4 shr r22=r21,3 | 36: 4 shr.u r22=r21,3
3c: 5 R[I0] extr.u r26=r25,2,6;; 3c: 5 R[I0] extr.u r26=r25,2,6;;
40: 6 S [MII] cmp.eq p0,p8=r18,r26 40: 6 S [MII] cmp.eq p0,p8=r18,r26
46: 6 sub r27=r26,r18;; 46: 6 sub r27=r26,r18;;
4c: 7 S (p08) dep r25=r18,r25,2,6 4c: 7 S (p08) dep r25=r18,r25,2,6
50: 7 [MII] nop.m 0x0 50: 7 [MII] nop.m 0x0
56: 7 (p08) shr r22=r22,r27;; 56: 7 (p08) shr r22=r22,r27;;
5c: 8 S cmp.eq p6,p7=5,r17 5c: 8 S cmp.eq p6,p7=5,r17
60: 8 [MII] nop.m 0x0 60: 8 [MII] nop.m 0x0
66: 8 shr.u r18=r22,36;; 66: 8 shr.u r18=r22,36;;
6c: 9 S (p07) dep r17=r17,r19,11,3 6c: 9 S (p07) dep r17=r17,r19,11,3
70: 9 [MLX] srlz.d 70: 9 [MLX] srlz.d
76: 9 (p06) movl r19=0x0 76: 9 (p06) movl r19=0x0
7c: 7c:
80: 10 nop.m 0x0 80: 10 [MII] nop.m 0x0
86: 10 (p06) shr.u r21=r21,50 86: 10 (p06) shr.u r21=r21,50
8c: 10 (p07) shr.u r21=r21,47;; 8c: 10 (p07) shr.u r21=r21,47;;
90: 11 S nop.m 0x0 90: 11 S [MII] nop.m 0x0
96: 11 (p06) dep r17=r18,r19,3,11 96: 11 (p06) dep r17=r18,r19,3,11
9c: 12 R[I0] (p07) dep r17=r18,r17,3,8 9c: 12 R[I0] (p07) dep r17=r18,r17,3,8
a0: 12 cmp.eq p7,p6=0,r21 a0: 12 [MFI] cmp.eq p7,p6=0,r21
a6: 12 nop.f 0x0 | a6: 12 nop.f 0x0
ac: 12 shr.u r18=r22,25;; | ac: 12 shr.u r20=r22,25;;
b0: 13 ld8 r17=[r17];; | b0: 13 [MMI] ld8 r17=[r17];;
b6: 14 S (p07) cmp.eq p6,p7=r17,r0 | b6: 14 S nop.m 0x0
bc: 14 dep r17=r18,r17,3,11;; | bc: 14 dep r30=r20,r17,3,11
c0: 15 S (p07) ld8 r20=[r17] | c0: 14 [MMI] (p07) cmp.eq p6,p7=r17,r0;;
c6: 15 shr.u r19=r22,14;; | c6: 15 S (p07) ld8 r20=[r30]
cc: 16 S (p07) cmp.eq.or.andcm p6,p7=r20,r0 | cc: 15 shr.u r19=r22,14;;
d0: 16 nop.m 0x0 | d0: 16 S [MII] nop.m 0x0
d6: 16 nop.f 0x0 | d6: 16 dep r21=r19,r20,3,11
dc: 17 R[I0] dep r21=r19,r20,3,11;; | dc: 16 (p07) cmp.eq.or.andcm p6,p7=r20,r0;;
e0: 18 S (p07) ld8 r18=[r21] | e0: 17 S [MFI] (p07) ld8 r18=[r21]
e6: 18 mov r19=cr17 | e6: 17 nop.f 0x0
ec: 18 nop.i 0x0;; | ec: 17 dep r23=0,r20,0,14
f0: 19 S nop.m 0x0 | f0: 17 [MMI] mov r19=cr17;;
f6: 19 nop.f 0x0 | f6: 18 S nop.m 0x0
fc: 19 (p07) tbit.z p6,p7=r18,0 | fc: 18 (p07) tbit.z p6,p7=r18,0
100: 19 mov r22=cr25;; | 100: 18 [MMI] mov r22=cr25;;
106: 20 S nop.m 0x0 | 106: 19 S nop.m 0x0
10c: 20 (p07) tbit.z.unc p11,p10=r19,32 | 10c: 19 (p07) tbit.z.unc p11,p10=r19,32;;
110: 20 nop.m 0x0 | 110: 20 S [MMI] (p10) itc.i r18;;
116: 20 nop.f 0x0 | 116: 21 S nop.m 0x0
11c: 21 R[I0] dep r23=0,r20,0,14;; | 11c: 21 nop.i 0x0;;
120: 22 S (p10) itc.i r18;; | 120: 22 S [MMI] (p11) itc.d r18;;
126: 23 S nop.m 0x0 126: 23 S nop.m 0x0
12c: 23 nop.i 0x0;; | 12c: 23 nop.i 0x0
130: 24 S (p11) itc.d r18;; | 130: 23 [MFB] nop.m 0x0
136: 25 S nop.m 0x0 | 136: 23 nop.f 0x0
13c: 25 nop.i 0x0 | 13c: 23 (p06) br.cond.spnt.many 1820 <page_fault>
140: 25 nop.m 0x0 | 140: 24 [MMI] mov cr20=r22
146: 25 nop.f 0x0 | 146: 25 R[M2] (p08) mov cr21=r25
14c: 25 (p06) br.cond.spnt.many 1820 <page_fault> | 14c: 25 adds r24\x1121,r23;;
150: 26 mov cr20=r22 | 150: 26 S [MMI] (p07) itc.d r24;;
156: 27 R[M2] (p08) mov cr21=r25 | 156: 27 S ld8 r26=[r30]
15c: 27 adds r24\x1121,r23;; | 15c: 27 nop.i 0x0;;
160: 28 S (p07) itc.d r24;; | 160: 28 S [MFI] cmp.eq p7,p6=r26,r20
166: 29 S ld8 r25=[r21] | 166: 28 nop.f 0x0
16c: 29 nop.i 0x0 | 16c: 28 mov r27V
170: 29 ld8 r26=[r17];; | 170: 28 [MMI] ld8 r25=[r21];;
176: 30 S cmp.eq p7,p6=r26,r20 | 176: 29 S (p06) ptc.l r22,r27
17c: 30 mov r27V;; | 17c: 29 (p07) cmp.ne.or.andcm p6,p7=r25,r18;;
180: 31 S (p06) ptc.l r22,r27 | 180: 30 S [MIB] (p06) ptc.l r16,r27
186: 31 nop.f 0x0 | 186: 30 mov pr=r31,0xfffffffffffffffe
18c: 31 (p07) cmp.ne.or.andcm p6,p7=r25,r18;; | 18c: 30 rfi;;
190: 32 S (p06) ptc.l r16,r27 <
196: 32 mov pr=r31,0xfffffffffffffffe <
19c: 32 rfi;; <
...
0000000000001400 <nested_dtlb_miss>: 0000000000001400 <nested_dtlb_miss>:
1400: 0 [MMI] rsm 0x20000 1400: 0 [MMI] rsm 0x20000
1406: 1 R[M2] mov.m r19=ar.k7 1406: 1 R[M2] mov.m r19=ar.k7
140c: 1 shl r21=r16,3 140c: 1 shl r21=r16,3
1410: 2 R[M2] [MMI] mov r18=cr21;; 1410: 2 R[M2] [MMI] mov r18=cr21;;
1416: 3 S nop.m 0x0 1416: 3 S nop.m 0x0
141c: 3 shr.u r17=r16,61 141c: 3 shr.u r17=r16,61
1420: 3 [MII] nop.m 0x0 1420: 3 [MII] nop.m 0x0
1426: 4 R[I0] extr.u r18=r18,2,6;; 1426: 4 R[I0] extr.u r18=r18,2,6;;
142c: 5 S cmp.eq p6,p7=5,r17 142c: 5 S cmp.eq p6,p7=5,r17
1430: 5 [MII] adds r22=-14,r18 1430: 5 [MII] adds r22=-14,r18
1436: 5 adds r18",r18;; 1436: 5 adds r18",r18;;
143c: 6 S shr.u r22=r16,r22 143c: 6 S shr.u r22=r16,r22
1440: 6 [MII] nop.m 0x0 1440: 6 [MII] nop.m 0x0
1446: 6 shr.u r18=r16,r18 1446: 6 shr.u r18=r16,r18
144c: 7 R[I0] (p07) dep r17=r17,r19,11,3 144c: 7 R[I0] (p07) dep r17=r17,r19,11,3
1450: 7 [MLX] srlz.d 1450: 7 [MLX] srlz.d
1456: 7 (p06) movl r19=0x0 1456: 7 (p06) movl r19=0x0
145c: 145c:
1460: 8 [MII] nop.m 0x0 1460: 8 [MII] nop.m 0x0
1466: 8 (p06) shr.u r21=r21,50 1466: 8 (p06) shr.u r21=r21,50
146c: 8 (p07) shr.u r21=r21,47;; 146c: 8 (p07) shr.u r21=r21,47;;
1470: 9 S [MII] nop.m 0x0 1470: 9 S [MII] nop.m 0x0
1476: 9 (p06) dep r17=r18,r19,3,11 1476: 9 (p06) dep r17=r18,r19,3,11
147c: 10 R[I0] (p07) dep r17=r18,r17,3,8 147c: 10 R[I0] (p07) dep r17=r18,r17,3,8
1480: 10 [MFI] cmp.eq p7,p6=0,r21 | 1480: 10 [MII] cmp.eq p7,p6=0,r21
1486: 10 nop.f 0x0 | 1486: 10 shr.u r18=r22,25;;
148c: 10 shr.u r18=r22,25;; | 148c: 11 S shr.u r19=r22,14
1490: 11 [MMI] ld8 r17=[r17];; 1490: 11 [MMI] ld8 r17=[r17];;
1496: 12 S (p07) cmp.eq p6,p7=r17,r0 1496: 12 S (p07) cmp.eq p6,p7=r17,r0
149c: 12 dep r17=r18,r17,3,11;; 149c: 12 dep r17=r18,r17,3,11;;
14a0: 13 S [MII] (p07) ld8 r17=[r17] | 14a0: 13 S [MMI] (p07) ld8 r17=[r17];;
14a6: 13 shr.u r19=r22,14;; | 14a6: 14 S (p07) cmp.eq.or.andcm p6,p7=r17,r0
14ac: 14 S (p07) cmp.eq.or.andcm p6,p7=r17,r0 | 14ac: 14 dep r17=r19,r17,3,11
14b0: 14 [MIB] nop.m 0x0 | 14b0: 14 [MFB] nop.m 0x0
14b6: 15 R[I0] dep r17=r19,r17,3,11 | 14b6: 14 nop.f 0x0
14bc: 15 (p06) br.cond.spnt.few 1820 <page_fault> | 14bc: 14 (p06) br.cond.spnt.few 1820 <page_fault>
14c0: 16 B [MIB] nop.m 0x0 | 14c0: 15 [MIB] nop.m 0x0
14c6: 16 mov b0=r30 | 14c6: 15 mov b0=r30
14cc: 16 br.many b0;; | 14cc: 15 br.many b0;;
next prev parent reply other threads:[~2005-11-10 22:38 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-11-10 16:19 [Patch 1/1] 4-level page tables v4 Robin Holt
2005-11-10 21:49 ` Luck, Tony
2005-11-10 22:38 ` Robin Holt [this message]
2005-11-10 23:03 ` Luck, Tony
2005-11-10 23:30 ` Chen, Kenneth W
2005-11-10 23:54 ` Chen, Kenneth W
2005-11-11 0:13 ` Chen, Kenneth W
2005-11-11 0:24 ` Jack Steiner
2005-11-11 0:58 ` Chen, Kenneth W
2005-11-11 1:19 ` Robin Holt
2005-11-11 2:06 ` Chen, Kenneth W
2005-11-11 2:11 ` Robin Holt
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20051110223833.GA8037@lnx-holt.americas.sgi.com \
--to=holt@sgi.com \
--cc=linux-ia64@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox