All of lore.kernel.org
 help / color / mirror / Atom feed
From: Zoltan Menyhart <Zoltan.Menyhart@bull.net>
To: linux-ia64@vger.kernel.org
Subject: Re: flush_icache_range
Date: Thu, 02 Jun 2005 14:25:25 +0000	[thread overview]
Message-ID: <429F16D5.7030305@bull.net> (raw)
In-Reply-To: <4236D7B5.8050408@bull.net>

[-- Attachment #1: Type: text/plain, Size: 1574 bytes --]

Jack Steiner wrote:
> On Thu, Jun 02, 2005 at 02:12:02PM +0200, Zoltan Menyhart wrote:
> 
>>+.Loop:	fc.i	in0			// issuable on M0 only
>>+	add	in0=r21,in0
>> 	br.cloop.sptk.few .Loop
>> 	;;
> 
> 
> I noticed that the flush loop has a single bundle loop. I know
> that this loop was not introduced by your code, but according to 
> Intel, single bundle loops should not be used in performance critical code.
> 
> We ran in to severe performance problems several years ago with single bundle
> loops. IIRC, the details were posted to the ia64 mail list & the 
> resolution was "don't use single bundle loops". I don't know if the performance
> problem exists if the loop contains an fc instruction but you may want
> to unroll the loop one additional cycle. 
> 
> (The problem is that single bundle loops that are not aligned on a
> 0 mod 32 address will run significantly slower (we observed 3X slower) after 
> an interrupt).

Thank you for your remark.

I added a "nop.b. 0" to occupy the original slot of "br".
I hope it is fine that my "br" is shifted to the very last slot:

0xa000000100302d00 <flush_icache_range+64>:     [MIB]       fc.i r32
0xa000000100302d01 <flush_icache_range+65>:                 add r32=r21,r32
0xa000000100302d02 <flush_icache_range+66>:                 nop.b 0x0
0xa000000100302d10 <flush_icache_range+80>:     [MFB]       nop.m 0x0
0xa000000100302d11 <flush_icache_range+81>:                 nop.f 0x0
0xa000000100302d12 <flush_icache_range+82>:                 br.cloop.sptk.few 0xa000000100302d00
									<flush_icache_range+64>;;

Zoltan


[-- Attachment #2: diff2 --]
[-- Type: text/plain, Size: 3887 bytes --]

--- linux-2.6.11-orig/arch/ia64/lib/flush.S	2005-04-26 15:59:49.000000000 +0200
+++ linux-2.6.11/arch/ia64/lib/flush.S	2005-06-02 16:12:08.655606148 +0200
@@ -3,37 +3,57 @@
  *
  * Copyright (C) 1999-2001 Hewlett-Packard Co
  * Copyright (C) 1999-2001 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 05/28/05 Zoltan Menyhart	Dynamic stride size
  */
+
 #include <asm/asmmacro.h>
 #include <asm/page.h>
 
+
 	/*
 	 * flush_icache_range(start,end)
-	 *	Must flush range from start to end-1 but nothing else (need to
+	 *
+	 *	Make i-cache(s) coherent with d-caches.
+	 *
+	 *	Must deal with range from start to end-1 but nothing else (need to
 	 *	be careful not to touch addresses that may be unmapped).
 	 */
 GLOBAL_ENTRY(flush_icache_range)
+
 	.prologue
-	alloc r2=ar.pfs,2,0,0,0
-	sub r8=in1,in0,1
-	;;
-	shr.u r8=r8,5			// we flush 32 bytes per iteration
-	.save ar.lc, r3
-	mov r3=ar.lc			// save ar.lc
+	alloc	r2=ar.pfs,2,0,0,0
+	movl	r3=log_2_i_cache_stride_size
+ 	mov	r21=1
+	;;
+	ld8	r20=[r3]		// r20: log2( stride size of the i-cache(s) )
+	sub	r8=in1,in0,1
+	;;
+	shl	r21=r21,r20		// r21: stride size of the i-cache(s)
+	shr.u	r8=r8,r20		// we flush "stride size" bytes per iteration
+	
+	.save	ar.lc, r3
+	mov	r3=ar.lc		// save ar.lc
 	;;
 
 	.body
 
-	mov ar.lc=r8
+	mov	ar.lc=r8
 	;;
-.Loop:	fc in0				// issuable on M0 only
-	add in0=32,in0
+
+	/*
+	 * 32 byte aligned loop, even number of (actually 2) bundles
+	 */
+.Loop:	fc.i	in0			// issuable on M0 only
+	add	in0=r21,in0
+	nop.b	0
 	br.cloop.sptk.few .Loop
 	;;
+
 	sync.i
 	;;
 	srlz.i
 	;;
-	mov ar.lc=r3			// restore ar.lc
+	mov	ar.lc=r3		// restore ar.lc
 	br.ret.sptk.many rp
 END(flush_icache_range)
--- linux-2.6.11-orig/arch/ia64/kernel/setup.c	2005-04-26 15:59:49.000000000 +0200
+++ linux-2.6.11/arch/ia64/kernel/setup.c	2005-06-02 13:55:23.448675412 +0200
@@ -15,6 +15,7 @@
  * 02/01/00 R.Seth	fixed get_cpuinfo for SMP
  * 01/07/99 S.Eranian	added the support for command line argument
  * 06/24/99 W.Drummond	added boot_cpu_data.
+ * 05/28/05 Z. Menyhart	Dynamic stride size for "flush_icache_range()"
  */
 #include <linux/config.h>
 #include <linux/module.h>
@@ -78,6 +79,13 @@
 EXPORT_SYMBOL(io_space);
 unsigned int num_io_spaces;
 
+/*
+ * "flush_icache_range()" needs to know what processor dependent stride size to use
+ * when it makes i-cache(s) coherent with d-caches.
+ */
+#define	LOG_2_I_CACHE_STRIDE_SIZE	5	/* Safest way to go: 32 bytes by 32 bytes */
+unsigned long log_2_i_cache_stride_size = ~0;
+
 unsigned char aux_device_present = 0xaa;        /* XXX remove this when legacy I/O is gone */
 
 /*
@@ -624,6 +632,34 @@
 		ia64_max_cacheline_size = max;
 }
 
+
+/*
+ * "flush_icache_range()" needs to know what processor dependent stride size to use
+ * when it makes i-cache(s) coherent with d-caches.
+ * The minimum of the i-cache stride sizes is calculated.
+ */
+static void
+get_i_cache_stride_size (void)
+{
+	pal_cache_config_info_t cci;
+	s64 status;
+
+	/*
+	 * We assume that the stride size of the L2I cache (if exixt) is the same as
+	 * that of the L1I cache.
+	 */
+	status = ia64_pal_cache_config_info(/* cache_level ( 0 means L1 ) */ 0,
+					    /* cache_type (instruction)= */ 1, &cci);
+	if (status != 0) {
+		printk(KERN_ERR
+		       "%s: ia64_pal_cache_config_info(L1I) failed (status=%ld CPU=%d)\n",
+		       __FUNCTION__, status, smp_processor_id());
+		cci.pcci_stride = LOG_2_I_CACHE_STRIDE_SIZE;
+	}
+	if (cci.pcci_stride < log_2_i_cache_stride_size)
+		log_2_i_cache_stride_size = cci.pcci_stride;
+}
+
 /*
  * cpu_init() initializes state that is per-CPU.  This function acts
  * as a 'CPU state barrier', nothing should get across.
@@ -649,6 +685,7 @@
 		    ia64_tpa(cpu_data) - (long) __per_cpu_start);
 
 	get_max_cacheline_size();
+	get_i_cache_stride_size();
 
 	/*
 	 * We can't pass "local_cpu_data" to identify_cpu() because we haven't called

  parent reply	other threads:[~2005-06-02 14:25 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-03-15 12:40 flush_icache_range Zoltan Menyhart
2005-03-15 18:21 ` flush_icache_range David Mosberger
2005-03-16 10:58 ` flush_icache_range Zoltan Menyhart
2005-03-16 11:19 ` flush_icache_range Duraid Madina
2005-03-16 18:31 ` flush_icache_range David Mosberger
2005-05-20 14:17 ` flush_icache_range Zoltan Menyhart
2005-05-20 15:03 ` flush_icache_range David Mosberger
2005-05-23 13:43 ` flush_icache_range Zoltan Menyhart
2005-05-26 17:21 ` flush_icache_range David Mosberger
2005-05-26 17:39 ` flush_icache_range Seth, Rohit
2005-05-27 15:45 ` flush_icache_range Zoltan Menyhart
2005-05-27 15:56 ` flush_icache_range David Mosberger
2005-05-27 16:45 ` flush_icache_range Zoltan Menyhart
2005-05-27 16:55 ` flush_icache_range David Mosberger
2005-05-27 18:27 ` flush_icache_range Grant Grundler
2005-05-27 19:00 ` flush_icache_range Russ Anderson
2005-05-29 20:23 ` flush_icache_range Menyhart, Zoltan
2005-06-01 23:50 ` flush_icache_range David Mosberger
2005-06-02  3:00 ` flush_icache_range Jim Hull
2005-06-02 12:12 ` flush_icache_range Zoltan Menyhart
2005-06-02 14:25 ` Zoltan Menyhart [this message]
2005-06-02 17:36 ` flush_icache_range David Mosberger
2005-06-02 18:28 ` flush_icache_range David Mosberger
2005-06-02 18:31 ` flush_icache_range David Mosberger
2005-06-02 19:00 ` flush_icache_range Jim Hull
2005-06-02 21:37 ` flush_icache_range Menyhart, Zoltan
2005-06-02 22:23 ` flush_icache_range David Mosberger
2005-06-02 22:55 ` flush_icache_range Menyhart, Zoltan
2005-06-02 23:07 ` flush_icache_range David Mosberger
2005-06-03 12:35 ` flush_icache_range Zoltan Menyhart
2005-06-03 21:09 ` flush_icache_range David Mosberger
2005-06-13 11:20 ` flush_icache_range Zoltan Menyhart
  -- strict thread matches above, loose matches on Subject: below --
2000-07-23  1:07 flush_icache_range Kanoj Sarcar
2000-07-23 18:36 ` flush_icache_range Ralf Baechle
2000-07-24 16:10   ` flush_icache_range Kanoj Sarcar
2000-07-25  0:06     ` flush_icache_range Ralf Baechle
2000-07-25  1:11       ` flush_icache_range Kanoj Sarcar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=429F16D5.7030305@bull.net \
    --to=zoltan.menyhart@bull.net \
    --cc=linux-ia64@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.