From: "Luck, Tony" <tony.luck@intel.com>
To: linux-ia64@vger.kernel.org
Subject: Re: [Patch 1/1] 4-level page tables v4.
Date: Thu, 10 Nov 2005 21:49:26 +0000 [thread overview]
Message-ID: <20051110214926.GA27555@agluck-lia64.sc.intel.com> (raw)
In-Reply-To: <20051110161915.GA3630@lnx-holt.americas.sgi.com>
Compiling with three levels, I see some differences in the scheduling
of instructions in the vhpt_miss handler and the nested_dtlb miss
handler. Side-by-side diff of a disassembly included below (original
sequence is on the left, new sequence is on the right). For the vhpt
case the new handler is 3 instructions shorter ... but shorter isn't
always better.
Ken, David ... can you cast an eye across these please.
-Tony
P.S. there are no other instruction differences in the remainder of
the kernel ... a good indication that you managed to dot all the Is
and cross all the Ts when changing the macros in the header files.
a000000100000000 <vhpt_miss>: a000000100000000 <vhpt_miss>:
a000000100000000: [MLX] mov r16=cr20 a000000100000000: [MLX] mov r16=cr20
a000000100000006: movl r18=0xe a000000100000006: movl r18=0xe
a000000100000010: [MMI] mov r25=cr21;; a000000100000010: [MMI] mov r25=cr21;;
a000000100000016: rsm 0x20000 a000000100000016: rsm 0x20000
a00000010000001c: mov r31=pr a00000010000001c: mov r31=pr
a000000100000020: [MII] mov.m r19=ar.k7 a000000100000020: [MII] mov.m r19=ar.k7
a000000100000026: shl r21=r16,3 a000000100000026: shl r21=r16,3
a00000010000002c: shr.u r17=r16,61;; a00000010000002c: shr.u r17=r16,61;;
a000000100000030: [MII] nop.m 0x0 a000000100000030: [MII] nop.m 0x0
a000000100000036: shr r22=r21,3 | a000000100000036: shr.u r22=r21,3
a00000010000003c: extr.u r26=r25,2,6;; a00000010000003c: extr.u r26=r25,2,6;;
a000000100000040: [MII] cmp.eq p0,p8=r18,r26 a000000100000040: [MII] cmp.eq p0,p8=r18,r26
a000000100000046: sub r27=r26,r18;; a000000100000046: sub r27=r26,r18;;
a00000010000004c: (p08) dep r25=r18,r25,2,6 a00000010000004c: (p08) dep r25=r18,r25,2,6
a000000100000050: [MII] nop.m 0x0 a000000100000050: [MII] nop.m 0x0
a000000100000056: (p08) shr r22=r22,r27;; a000000100000056: (p08) shr r22=r22,r27;;
a00000010000005c: cmp.eq p6,p7=5,r17 a00000010000005c: cmp.eq p6,p7=5,r17
a000000100000060: [MII] nop.m 0x0 a000000100000060: [MII] nop.m 0x0
a000000100000066: shr.u r18=r22,36;; a000000100000066: shr.u r18=r22,36;;
a00000010000006c: (p07) dep r17=r17,r19,11,3 a00000010000006c: (p07) dep r17=r17,r19,11,3
a000000100000070: [MLX] srlz.d a000000100000070: [MLX] srlz.d
a000000100000076: (p06) movl r19=0xa0000001008980 a000000100000076: (p06) movl r19=0xa0000001008980
a000000100000080: [MII] nop.m 0x0 a000000100000080: [MII] nop.m 0x0
a000000100000086: (p06) shr.u r21=r21,50 a000000100000086: (p06) shr.u r21=r21,50
a00000010000008c: (p07) shr.u r21=r21,47;; a00000010000008c: (p07) shr.u r21=r21,47;;
a000000100000090: [MII] nop.m 0x0 a000000100000090: [MII] nop.m 0x0
a000000100000096: (p06) dep r17=r18,r19,3,11 a000000100000096: (p06) dep r17=r18,r19,3,11
a00000010000009c: (p07) dep r17=r18,r17,3,8 a00000010000009c: (p07) dep r17=r18,r17,3,8
a0000001000000a0: [MFI] cmp.eq p7,p6=0,r21 a0000001000000a0: [MFI] cmp.eq p7,p6=0,r21
a0000001000000a6: nop.f 0x0 a0000001000000a6: nop.f 0x0
a0000001000000ac: shr.u r18=r22,25;; | a0000001000000ac: shr.u r20=r22,25;;
a0000001000000b0: [MMI] ld8 r17=[r17];; a0000001000000b0: [MMI] ld8 r17=[r17];;
a0000001000000b6: (p07) cmp.eq p6,p7=r17,r0 | a0000001000000b6: nop.m 0x0
a0000001000000bc: dep r17=r18,r17,3,11;; | a0000001000000bc: dep r30=r20,r17,3,11
a0000001000000c0: [MII] (p07) ld8 r20=[r17] | a0000001000000c0: [MMI] (p07) cmp.eq p6,p7=r17,r0;;
a0000001000000c6: shr.u r19=r22,14;; | a0000001000000c6: (p07) ld8 r20=[r30]
a0000001000000cc: (p07) cmp.eq.or.andcm p6,p7=r20 | a0000001000000cc: shr.u r19=r22,14;;
a0000001000000d0: [MFI] nop.m 0x0 | a0000001000000d0: [MII] nop.m 0x0
a0000001000000d6: nop.f 0x0 | a0000001000000d6: dep r21=r19,r20,3,11
a0000001000000dc: dep r21=r19,r20,3,11;; | a0000001000000dc: (p07) cmp.eq.or.andcm p6,p7=r20
a0000001000000e0: [MMI] (p07) ld8 r18=[r21] | a0000001000000e0: [MFI] (p07) ld8 r18=[r21]
a0000001000000e6: mov r19=cr17 | a0000001000000e6: nop.f 0x0
a0000001000000ec: nop.i 0x0;; | a0000001000000ec: dep r23=0,r20,0,14
a0000001000000f0: [MFI] nop.m 0x0 | a0000001000000f0: [MMI] mov r19=cr17;;
a0000001000000f6: nop.f 0x0 | a0000001000000f6: nop.m 0x0
a0000001000000fc: (p07) tbit.z p6,p7=r18,0 a0000001000000fc: (p07) tbit.z p6,p7=r18,0
a000000100000100: [MMI] mov r22=cr25;; a000000100000100: [MMI] mov r22=cr25;;
a000000100000106: nop.m 0x0 a000000100000106: nop.m 0x0
a00000010000010c: (p07) tbit.z.unc p11,p10=r19,32 | a00000010000010c: (p07) tbit.z.unc p11,p10=r19,32
a000000100000110: [MFI] nop.m 0x0 | a000000100000110: [MMI] (p10) itc.i r18;;
a000000100000116: nop.f 0x0 | a000000100000116: nop.m 0x0
a00000010000011c: dep r23=0,r20,0,14;; | a00000010000011c: nop.i 0x0;;
a000000100000120: [MMI] (p10) itc.i r18;; | a000000100000120: [MMI] (p11) itc.d r18;;
a000000100000126: nop.m 0x0 a000000100000126: nop.m 0x0
a00000010000012c: nop.i 0x0;; | a00000010000012c: nop.i 0x0
a000000100000130: [MMI] (p11) itc.d r18;; | a000000100000130: [MFB] nop.m 0x0
a000000100000136: nop.m 0x0 | a000000100000136: nop.f 0x0
a00000010000013c: nop.i 0x0 | a00000010000013c: (p06) br.cond.spnt.many a000000
a000000100000140: [MFB] nop.m 0x0 | a000000100000140: [MMI] mov cr20=r22
a000000100000146: nop.f 0x0 | a000000100000146: (p08) mov cr21=r25
a00000010000014c: (p06) br.cond.spnt.many a000000 | a00000010000014c: adds r24\x1121,r23;;
a000000100000150: [MMI] mov cr20=r22 | a000000100000150: [MMI] (p07) itc.d r24;;
a000000100000156: (p08) mov cr21=r25 | a000000100000156: ld8 r26=[r30]
a00000010000015c: adds r24\x1121,r23;; | a00000010000015c: nop.i 0x0;;
a000000100000160: [MMI] (p07) itc.d r24;; | a000000100000160: [MFI] cmp.eq p7,p6=r26,r20
a000000100000166: ld8 r25=[r21] | a000000100000166: nop.f 0x0
a00000010000016c: nop.i 0x0 | a00000010000016c: mov r27V
a000000100000170: [MMI] ld8 r26=[r17];; | a000000100000170: [MMI] ld8 r25=[r21];;
a000000100000176: cmp.eq p7,p6=r26,r20 | a000000100000176: (p06) ptc.l r22,r27
a00000010000017c: mov r27V;; | a00000010000017c: (p07) cmp.ne.or.andcm p6,p7=r25
a000000100000180: [MFI] (p06) ptc.l r22,r27 | a000000100000180: [MIB] (p06) ptc.l r16,r27
a000000100000186: nop.f 0x0 | a000000100000186: mov pr=r31,0xffffffffffff
a00000010000018c: (p07) cmp.ne.or.andcm p6,p7=r25 | a00000010000018c: rfi;;
a000000100000190: [MIB] (p06) ptc.l r16,r27 <
a000000100000196: mov pr=r31,0xffffffffffff <
a00000010000019c: rfi;; <
a000000100001400 <nested_dtlb_miss>: a000000100001400 <nested_dtlb_miss>:
a000000100001400: [MMI] rsm 0x20000 a000000100001400: [MMI] rsm 0x20000
a000000100001406: mov.m r19=ar.k7 a000000100001406: mov.m r19=ar.k7
a00000010000140c: shl r21=r16,3 a00000010000140c: shl r21=r16,3
a000000100001410: [MMI] mov r18=cr21;; a000000100001410: [MMI] mov r18=cr21;;
a000000100001416: nop.m 0x0 a000000100001416: nop.m 0x0
a00000010000141c: shr.u r17=r16,61 a00000010000141c: shr.u r17=r16,61
a000000100001420: [MII] nop.m 0x0 a000000100001420: [MII] nop.m 0x0
a000000100001426: extr.u r18=r18,2,6;; a000000100001426: extr.u r18=r18,2,6;;
a00000010000142c: cmp.eq p6,p7=5,r17 a00000010000142c: cmp.eq p6,p7=5,r17
a000000100001430: [MII] adds r22=-14,r18 a000000100001430: [MII] adds r22=-14,r18
a000000100001436: adds r18",r18;; a000000100001436: adds r18",r18;;
a00000010000143c: shr.u r22=r16,r22 a00000010000143c: shr.u r22=r16,r22
a000000100001440: [MII] nop.m 0x0 a000000100001440: [MII] nop.m 0x0
a000000100001446: shr.u r18=r16,r18 a000000100001446: shr.u r18=r16,r18
a00000010000144c: (p07) dep r17=r17,r19,11,3 a00000010000144c: (p07) dep r17=r17,r19,11,3
a000000100001450: [MLX] srlz.d a000000100001450: [MLX] srlz.d
a000000100001456: (p06) movl r19=0xa0000001008980 a000000100001456: (p06) movl r19=0xa0000001008980
a000000100001460: [MII] nop.m 0x0 a000000100001460: [MII] nop.m 0x0
a000000100001466: (p06) shr.u r21=r21,50 a000000100001466: (p06) shr.u r21=r21,50
a00000010000146c: (p07) shr.u r21=r21,47;; a00000010000146c: (p07) shr.u r21=r21,47;;
a000000100001470: [MII] nop.m 0x0 a000000100001470: [MII] nop.m 0x0
a000000100001476: (p06) dep r17=r18,r19,3,11 a000000100001476: (p06) dep r17=r18,r19,3,11
a00000010000147c: (p07) dep r17=r18,r17,3,8 a00000010000147c: (p07) dep r17=r18,r17,3,8
a000000100001480: [MFI] cmp.eq p7,p6=0,r21 | a000000100001480: [MII] cmp.eq p7,p6=0,r21
a000000100001486: nop.f 0x0 | a000000100001486: shr.u r18=r22,25;;
a00000010000148c: shr.u r18=r22,25;; | a00000010000148c: shr.u r19=r22,14
a000000100001490: [MMI] ld8 r17=[r17];; a000000100001490: [MMI] ld8 r17=[r17];;
a000000100001496: (p07) cmp.eq p6,p7=r17,r0 a000000100001496: (p07) cmp.eq p6,p7=r17,r0
a00000010000149c: dep r17=r18,r17,3,11;; a00000010000149c: dep r17=r18,r17,3,11;;
a0000001000014a0: [MII] (p07) ld8 r17=[r17] | a0000001000014a0: [MMI] (p07) ld8 r17=[r17];;
a0000001000014a6: shr.u r19=r22,14;; | a0000001000014a6: (p07) cmp.eq.or.andcm p6,p7=r17
a0000001000014ac: (p07) cmp.eq.or.andcm p6,p7=r17 | a0000001000014ac: dep r17=r19,r17,3,11
a0000001000014b0: [MIB] nop.m 0x0 | a0000001000014b0: [MFB] nop.m 0x0
a0000001000014b6: dep r17=r19,r17,3,11 | a0000001000014b6: nop.f 0x0
a0000001000014bc: (p06) br.cond.spnt.few a0000001 a0000001000014bc: (p06) br.cond.spnt.few a0000001
a0000001000014c0: [MIB] nop.m 0x0 a0000001000014c0: [MIB] nop.m 0x0
a0000001000014c6: mov b0=r30 a0000001000014c6: mov b0=r30
a0000001000014cc: br.many b0;; a0000001000014cc: br.many b0;;
next prev parent reply other threads:[~2005-11-10 21:49 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-11-10 16:19 [Patch 1/1] 4-level page tables v4 Robin Holt
2005-11-10 21:49 ` Luck, Tony [this message]
2005-11-10 22:38 ` Robin Holt
2005-11-10 23:03 ` Luck, Tony
2005-11-10 23:30 ` Chen, Kenneth W
2005-11-10 23:54 ` Chen, Kenneth W
2005-11-11 0:13 ` Chen, Kenneth W
2005-11-11 0:24 ` Jack Steiner
2005-11-11 0:58 ` Chen, Kenneth W
2005-11-11 1:19 ` Robin Holt
2005-11-11 2:06 ` Chen, Kenneth W
2005-11-11 2:11 ` Robin Holt
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20051110214926.GA27555@agluck-lia64.sc.intel.com \
--to=tony.luck@intel.com \
--cc=linux-ia64@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox