public inbox for linux-ia64@vger.kernel.org
 help / color / mirror / Atom feed
From: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
To: linux-ia64@vger.kernel.org
Subject: RE: ia64 printk_clock()
Date: Mon, 06 Feb 2006 21:37:18 +0000	[thread overview]
Message-ID: <200602062137.k16LbIg06097@unix-os.sc.intel.com> (raw)
In-Reply-To: <20060202204422.GA27082@sgi.com>

Magenheimer, Dan wrote on Monday, February 06, 2006 12:41 PM
> One headsup before unpinning per-cpu...
> 
> In playing with paravirtualization for Xen/ia64, I saw
> some evidence that replacing all uses of ar.kr's in Linux/ia64
> with direct memory accesses to (pinned) per-cpu data may speed
> up the kernel somewhat (~0.5%?).  (Reading/writing of kr's
> on Mckinley is pretty slow... don't know about other processors.)


On madison, reading KR is pretty fast.  I have the following experimental
patch[*] and when I measured alt_dtlb_miss latency, with or without the
patch, both yielded 23 cycles.  TLB miss in the kernel definitely went up,
by about 3% with kernel build bench and 7% with an OLTP db workload. The
trade off is to have smaller TLB miss for user app.  I'm doing a few more
experiments to see whether the trade off is worthwhile or not.

- Ken

[*] prerequisite of a patch similar to
http://www.gelato.unsw.edu.au/archives/linux-ia64/0601/16836.html to avoid
referencing per cpu variable ia64_phys_stacked_size_p8 in the kernel exit
path, as the per cpu area is accessed with psr.ic=0.


--- ./arch/ia64/kernel/ivt.S.orig	2006-01-02 19:21:10.000000000 -0800
+++ ./arch/ia64/kernel/ivt.S	2006-02-05 13:30:57.782233990 -0800
@@ -375,6 +375,7 @@ ENTRY(alt_dtlb_miss)
 	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
 	mov r21=cr.ipsr
 	mov r31=pr
+	mov r24=PERCPU_ADDR
 	;;
 #ifdef CONFIG_DISABLE_VHPT
 	shr.u r22=r16,61			// get the region number into r21
@@ -387,22 +388,30 @@ ENTRY(alt_dtlb_miss)
 (p8)	mov r29°				// save b0
 (p8)	br.cond.dptk dtlb_fault
 #endif
+	cmp.ge p10,p11=r16,r24			// access to per_cpu_data?
+	tbit.z p12,p0=r16,61			// access to region 6?
+	mov r25=PERCPU_PAGE_SHIFT << 2
+	mov r26=PERCPU_PAGE_SIZE
+	nop.m 0
+	nop.b 0
+	;;
+(p10)	mov r19=IA64_KR(PER_CPU_DATA)
+(p11)	and r19=r19,r16				// clear non-ppn fields
 	extr.u r23=r21,IA64_PSR_CPL0_BIT,2	// extract psr.cpl
 	and r22=IA64_ISR_CODE_MASK,r20		// get the isr.code field
 	tbit.nz p6,p7=r20,IA64_ISR_SP_BIT	// is speculation bit on?
-	shr.u r18=r16,57			// move address bit 61 to bit 4
-	and r19=r19,r16				// clear ed, reserved bits, and PTE control bits
 	tbit.nz p9,p0=r20,IA64_ISR_NA_BIT	// is non-access bit on?
 	;;
-	andcm r18=0x10,r18	// bit 4=~address-bit(61)
+(p10)	sub r19=r19,r26
+(p10)	mov cr.itir=r25
 	cmp.ne p8,p0=r0,r23
 (p9)	cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22	// check isr.code field
+(p12)	dep r17=-1,r17,4,1			// set ma=UC for region 6 addr
 (p8)	br.cond.spnt page_fault
 
 	dep r21=-1,r21,IA64_PSR_ED_BIT,1
-	or r19=r19,r17		// insert PTE control bits into r19
 	;;
-	or r19=r19,r18		// set bit 4 (uncached) if the access was to region 6
+	or r19=r19,r17		// insert PTE control bits into r19
 (p6)	mov cr.ipsr=r21
 	;;
 (p7)	itc.d r19		// insert the TLB entry
--- ./arch/ia64/kernel/mca_asm.S.orig	2006-01-02 19:21:10.000000000 -0800
+++ ./arch/ia64/kernel/mca_asm.S	2006-02-05 11:15:55.620223867 -0800
@@ -102,14 +102,6 @@ ia64_do_tlb_purge:
 	;;
 	srlz.d
 	;;
-	// 2. Purge DTR for PERCPU data.
-	movl r16=PERCPU_ADDR
-	mov r18=PERCPU_PAGE_SHIFT<<2
-	;;
-	ptr.d r16,r18
-	;;
-	srlz.d
-	;;
 	// 3. Purge ITR for PAL code.
 	GET_THIS_PADDR(r2, ia64_mca_pal_base)
 	;;
@@ -197,22 +189,6 @@ ia64_reload_tr:
 	srlz.i
 	srlz.d
 	;;
-	// 2. Reload DTR register for PERCPU data.
-	GET_THIS_PADDR(r2, ia64_mca_per_cpu_pte)
-	;;
-	movl r16=PERCPU_ADDR		// vaddr
-	movl r18=PERCPU_PAGE_SHIFT<<2
-	;;
-	mov cr.itir=r18
-	mov cr.ifa=r16
-	;;
-	ld8 r18=[r2]			// load per-CPU PTE
-	mov r16=IA64_TR_PERCPU_DATA;
-	;;
-	itr.d dtr[r16]=r18
-	;;
-	srlz.d
-	;;
 	// 3. Reload ITR for PAL code.
 	GET_THIS_PADDR(r2, ia64_mca_pal_pte)
 	;;
--- ./arch/ia64/mm/init.c.orig	2006-01-02 19:21:10.000000000 -0800
+++ ./arch/ia64/mm/init.c	2006-02-05 11:16:57.578230920 -0800
@@ -332,7 +332,7 @@ setup_gate (void)
 void __devinit
 ia64_mmu_init (void *my_cpu_data)
 {
-	unsigned long psr, pta, impl_va_bits;
+	unsigned long pta, impl_va_bits;
 	extern void __devinit tlb_init (void);
 
 #ifdef CONFIG_DISABLE_VHPT
@@ -341,15 +341,6 @@ ia64_mmu_init (void *my_cpu_data)
 #	define VHPT_ENABLE_BIT	1
 #endif
 
-	/* Pin mapping for percpu area into TLB */
-	psr = ia64_clear_ic();
-	ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR,
-		 pte_val(pfn_pte(__pa(my_cpu_data) >> PAGE_SHIFT, PAGE_KERNEL)),
-		 PERCPU_PAGE_SHIFT);
-
-	ia64_set_psr(psr);
-	ia64_srlz_i();
-
 	/*
 	 * Check if the virtually mapped linear page table (VMLPT) overlaps with a mapped
 	 * address space.  The IA-64 architecture guarantees that at least 50 bits of
--- ./include/asm-ia64/kregs.h.orig	2006-01-02 19:21:10.000000000 -0800
+++ ./include/asm-ia64/kregs.h	2006-02-05 11:15:55.621200429 -0800
@@ -29,8 +29,7 @@
  */
 #define IA64_TR_KERNEL		0	/* itr0, dtr0: maps kernel image (code & data) */
 #define IA64_TR_PALCODE		1	/* itr1: maps PALcode as required by EFI */
-#define IA64_TR_PERCPU_DATA	1	/* dtr1: percpu data */
-#define IA64_TR_CURRENT_STACK	2	/* dtr2: maps kernel's memory- & register-stacks */
+#define IA64_TR_CURRENT_STACK	1	/* dtr1: maps kernel's memory- & register-stacks */
 
 /* Processor status register bits: */
 #define IA64_PSR_BE_BIT		1



  parent reply	other threads:[~2006-02-06 21:37 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-02-02 20:44 ia64 printk_clock() Jack Steiner
2006-02-02 21:22 ` Dean Roe
2006-02-02 21:46 ` Luck, Tony
2006-02-02 21:50 ` Jack Steiner
2006-02-02 22:29 ` Luck, Tony
2006-02-02 22:52 ` Chen, Kenneth W
2006-02-03 17:43 ` Chen, Kenneth W
2006-02-03 18:04 ` Luck, Tony
2006-02-03 18:30 ` Chen, Kenneth W
2006-02-03 18:43 ` Luck, Tony
2006-02-03 18:55 ` Chen, Kenneth W
2006-02-04  0:16 ` Keith Owens
2006-02-04  0:28 ` Chen, Kenneth W
2006-02-04  0:32 ` Luck, Tony
2006-02-04  0:36 ` Keith Owens
2006-02-06 18:14 ` Luck, Tony
2006-02-06 18:34 ` Chen, Kenneth W
2006-02-06 20:40 ` Magenheimer, Dan (HP Labs Fort Collins)
2006-02-06 21:37 ` Chen, Kenneth W [this message]
2006-02-07 11:11 ` Jes Sorensen
2006-02-08  0:05 ` Luck, Tony
2006-02-08  6:29 ` Jes Sorensen
2006-02-08 18:18 ` Luck, Tony

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=200602062137.k16LbIg06097@unix-os.sc.intel.com \
    --to=kenneth.w.chen@intel.com \
    --cc=linux-ia64@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox