public inbox for linux-ia64@vger.kernel.org
 help / color / mirror / Atom feed
From: Mario Smarduch <cms063@email.mot.com>
To: linux-ia64@vger.kernel.org
Subject: Re: [Linux-ia64] [PATCH] Performance Patch for TLB misses
Date: Thu, 07 Nov 2002 17:14:54 +0000	[thread overview]
Message-ID: <marc-linux-ia64-105590709805352@msgid-missing> (raw)
In-Reply-To: <marc-linux-ia64-105590709805350@msgid-missing>

Christian Hildner wrote:

> Hi,
>
> I created a patch that will (hopefully) increase the performance for TLB
> miss handling.
>
> - enlarge the distance between rsm psr.dt and srlz (the movl do not need
> data access)
>
> - let the processor check for the case of page not present (because for
> useable systems the common case is that the page is present, so optimize
> for that and drop the conditional branch)
>
> - let the processor also check for privilege level because it's very
> very rare

Wouldn't removal of this possibly cause cached and uncahced mappings
to same page?

- mario.

>
>
> The second modification will result (rarely) in the insertion of an
> unused TLB entry. But this entry will be overwritten with the useable
> entry when the page is swapped in. The time needed for the insertion of
> the TLB entry and the additional interruption will be neglectible
> compared to the time needed for swapping in.
>
> Since I'am waiting for Itanium 2 (Itanium 1 does not have any relevance
> nowadays) I have done no performance testing. All Itanium 2 owners are
> invited for testing.
>
> Please tell me your comments.
>
> Christian
>
>   ------------------------------------------------------------------------
> --- ivt.S_orig  Tue Mar 19 11:22:28 2002
>  ivt.S  Thu Nov  7 13:53:02 2002
> @@ -114,8 +114-,8 @@
>          *      - the faulting virtual address has no L1, L2, or L3 mapping
>          */
>         mov r16=cr.ifa                          // get address that caused the TLB miss
> -       ;;
>         rsm psr.dt                              // use physical addressing for data
>         ;;
>         mov r31=pr                              // save the predicate registers
>         mov r19=IA64_KR(PT_BASE)                // get page table base address
>         shl r21=r16,3                           // shift bit 60 into sign bit
> @@ -125,7 +124-,6 @@
>         shr.u r18=r16,PGDIR_SHIFT               // get bits 33-63 of the faulting address
>         ;;
>  (p7)   dep r17=r17,r19,(PAGE_SHIFT-3),3        // put region number bits in place
> -       srlz.d                                  // ensure "rsm psr.dt" has taken effect
>  (p6)   movl r19=__pa(SWAPPER_PGD_ADDR)         // region 5 is rooted at swapper_pg_dir
>  (p6)   shr.u r21=r21,PGDIR_SHIFT+PAE-_SHIFT
>  (p7)   shr.u r21=r21,PGDIR_SHIFT+PAE-_SHIFT-3
> @@ -135,6 +134-,7 @@
>         cmp.eq p7,p6=0,r21                      // unused address bits all zeroes?
>         shr.u r18=r16,PMD_SHIFT                 // shift L2 index into position
>         ;;
>         srlz.d                                  // ensure "rsm psr.dt" has taken effect
>         ld8 r17=[r17]                           // fetch the L1 entry (may be 0)
>         ;;
>  (p7)   cmp.eq p6,p7=r17,r0                     // was L1 entry NULL?
> @@ -212,9 +210-,6 @@
>  1:     ld8 r18=[r17]                           // read L3 PTE
>         ;;
>         mov b0=r29
> -       tbit.z p6,p0=r18,_PAGE_P_BIT            // page present bit cleared?
> -(p6)   br.cond.spnt page_fault
> -       ;;
>         itc.i r18
>         ;;
>  #ifdef CONFIG_SMP
> @@ -250,9 +244-,6 @@
>  1:     ld8 r18=[r17]                           // read L3 PTE
>         ;;
>         mov b0=r29
> -       tbit.z p6,p0=r18,_PAGE_P_BIT            // page present bit cleared?
> -(p6)   br.cond.spnt page_fault
> -       ;;
>         itc.d r18
>         ;;
>  #ifdef CONFIG_SMP
> @@ -274,12 +268-,11 @@
>         DBG_FAULT(3)
>         mov r16=cr.ifa          // get address that caused the TLB miss
>         movl r17=PAGE_KERNEL
> -       mov r21=cr.ipsr
>         movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & +AH4-0xfff)
>         mov r31=pr
>         ;;
>  #ifdef CONFIG_DISABLE_VHPT
> -       shr.u r22=r16,61                        // get the region number into r21
>         shr.u r22=r16,61                        // get the region number
>         ;;
>         cmp.gt p8,p0=6,r22                      // user mode
>         ;;
> @@ -289,16 +280-,13 @@
>  (p8)   mov r29°                              // save b0
>  (p8)   br.cond.dptk itlb_fault
>  #endif
> -       extr.u r23=r21,IA64_PSR_CPL0_BIT,2      // extract psr.cpl
>         and r19=r19,r16         // clear ed, reserved bits, and PTE control bits
>         shr.u r18=r16,57        // move address bit 61 to bit 4
>         ;;
>         andcm r18=0x10,r18      // bit 4=+AH4-address-bit(61)
> -       cmp.ne p8,p0=r0,r23     // psr.cpl != 0?
>         or r19=r17,r19          // insert PTE control bits into r19
>         ;;
>         or r19=r19,r18          // set bit 4 (uncached) if the access was to region 6
> -(p8)   br.cond.spnt page_fault
>         ;;
>         itc.i r19               // insert the TLB entry
>         mov pr=r31,-1
> @@ -318,7 +308-,7 @@
>         mov r31=pr
>         ;;
>  #ifdef CONFIG_DISABLE_VHPT
> -       shr.u r22=r16,61                        // get the region number into r21
>         shr.u r22=r16,61                        // get the region number
>         ;;
>         cmp.gt p8,p0=6,r22                      // access to region 0-5
>         ;;
> @@ -328,15 +318-,11 @@
>  (p8)   mov r29°                              // save b0
>  (p8)   br.cond.dptk dtlb_fault
>  #endif
> -       extr.u r23=r21,IA64_PSR_CPL0_BIT,2      // extract psr.cpl
>         tbit.nz p6,p7=r20,IA64_ISR_SP_BIT       // is speculation bit on?
>         and r19=r19,r16         // clear ed, reserved bits, and PTE control bits
>         shr.u r18=r16,57        // move address bit 61 to bit 4
>         ;;
>         andcm r18=0x10,r18      // bit 4=+AH4-address-bit(61)
> -       cmp.ne p8,p0=r0,r23
> -(p8)   br.cond.spnt page_fault
> -
>         dep r21=-1,r21,IA64_PSR_ED_BIT,1
>         or r19=r19,r17          // insert PTE control bits into r19
>         ;;
> @@ -1412,3 +138-,5 @@
>  // 0x7f00 Entry 67 (size 16 bundles) Reserved
>         DBG_FAULT(67)
>         FAULT(67)



  parent reply	other threads:[~2002-11-07 17:14 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2002-11-07 14:00 [Linux-ia64] [PATCH] Performance Patch for TLB misses Christian Hildner
2002-11-07 16:38 ` David Mosberger
2002-11-07 17:14 ` Mario Smarduch [this message]
2002-11-11  8:38 ` Christian Hildner
2002-11-11  8:49 ` Christian Hildner
2002-11-11 12:07 ` John Marvin
2002-11-11 18:48 ` David Mosberger
2002-11-12  8:43 ` Christian Hildner
2002-11-12 17:00 ` David Mosberger
2002-11-12 18:03 ` David Mosberger

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=marc-linux-ia64-105590709805352@msgid-missing \
    --to=cms063@email.mot.com \
    --cc=linux-ia64@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox