Linux MIPS Architecture development
 help / color / mirror / Atom feed
* [PATCH] Fix 32bit kernels on R4k with 128 byte cache line size
@ 2008-07-08 12:46 Thomas Bogendoerfer
  2008-07-08 13:02 ` Ralf Baechle
  0 siblings, 1 reply; 2+ messages in thread
From: Thomas Bogendoerfer @ 2008-07-08 12:46 UTC (permalink / raw)
  To: linux-mips; +Cc: ralf

The generated copy_page for R4k CPU with a 128 byte cache line size used
Create Dirty Exclusive cache line operations even if only part of the
cache line was filled.  This change avoids generating cache operations,
if only part of the cache line size is copied in one loop. It also
increases the maxmimum loop size, because the generated code even fits
into the available space for r4k CPUs with 128 byte cache line size.

Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---

 arch/mips/mm/page.c |   61 ++++++++++++++++++++++++++------------------------
 1 files changed, 32 insertions(+), 29 deletions(-)

diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
index 1edf0cb..1417c64 100644
--- a/arch/mips/mm/page.c
+++ b/arch/mips/mm/page.c
@@ -235,13 +235,12 @@ static void __cpuinit set_prefetch_parameters(void)
 	}
 	/*
 	 * Too much unrolling will overflow the available space in
-	 * clear_space_array / copy_page_array. 8 words sounds generous,
-	 * but a R4000 with 128 byte L2 line length can exceed even that.
+	 * clear_space_array / copy_page_array.
 	 */
-	half_clear_loop_size = min(8 * clear_word_size,
+	half_clear_loop_size = min(16 * clear_word_size,
 				   max(cache_line_size >> 1,
 				       4 * clear_word_size));
-	half_copy_loop_size = min(8 * copy_word_size,
+	half_copy_loop_size = min(16 * copy_word_size,
 				  max(cache_line_size >> 1,
 				      4 * copy_word_size));
 }
@@ -263,21 +262,23 @@ static inline void __cpuinit build_clear_pref(u32 **buf, int off)
 	if (pref_bias_clear_store) {
 		uasm_i_pref(buf, pref_dst_mode, pref_bias_clear_store + off,
 			    A0);
-	} else if (cpu_has_cache_cdex_s) {
-		uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
-	} else if (cpu_has_cache_cdex_p) {
-		if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
-			uasm_i_nop(buf);
-			uasm_i_nop(buf);
-			uasm_i_nop(buf);
-			uasm_i_nop(buf);
-		}
+	} else if (cache_line_size == (half_clear_loop_size << 1)) {
+		if (cpu_has_cache_cdex_s) {
+			uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
+		} else if (cpu_has_cache_cdex_p) {
+			if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
+				uasm_i_nop(buf);
+				uasm_i_nop(buf);
+				uasm_i_nop(buf);
+				uasm_i_nop(buf);
+			}
 
-		if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
-			uasm_i_lw(buf, ZERO, ZERO, AT);
+			if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
+				uasm_i_lw(buf, ZERO, ZERO, AT);
 
-		uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
-	}
+			uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
+		}
+		}
 }
 
 void __cpuinit build_clear_page(void)
@@ -403,20 +404,22 @@ static inline void build_copy_store_pref(u32 **buf, int off)
 	if (pref_bias_copy_store) {
 		uasm_i_pref(buf, pref_dst_mode, pref_bias_copy_store + off,
 			    A0);
-	} else if (cpu_has_cache_cdex_s) {
-		uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
-	} else if (cpu_has_cache_cdex_p) {
-		if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
-			uasm_i_nop(buf);
-			uasm_i_nop(buf);
-			uasm_i_nop(buf);
-			uasm_i_nop(buf);
-		}
+	} else if (cache_line_size == (half_copy_loop_size << 1)) {
+		if (cpu_has_cache_cdex_s) {
+			uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
+		} else if (cpu_has_cache_cdex_p) {
+			if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
+				uasm_i_nop(buf);
+				uasm_i_nop(buf);
+				uasm_i_nop(buf);
+				uasm_i_nop(buf);
+			}
 
-		if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
-			uasm_i_lw(buf, ZERO, ZERO, AT);
+			if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
+				uasm_i_lw(buf, ZERO, ZERO, AT);
 
-		uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
+			uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
+		}
 	}
 }
 

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH] Fix 32bit kernels on R4k with 128 byte cache line size
  2008-07-08 12:46 [PATCH] Fix 32bit kernels on R4k with 128 byte cache line size Thomas Bogendoerfer
@ 2008-07-08 13:02 ` Ralf Baechle
  0 siblings, 0 replies; 2+ messages in thread
From: Ralf Baechle @ 2008-07-08 13:02 UTC (permalink / raw)
  To: Thomas Bogendoerfer; +Cc: linux-mips

On Tue, Jul 08, 2008 at 02:46:34PM +0200, Thomas Bogendoerfer wrote:

> The generated copy_page for R4k CPU with a 128 byte cache line size used
> Create Dirty Exclusive cache line operations even if only part of the
> cache line was filled.  This change avoids generating cache operations,
> if only part of the cache line size is copied in one loop. It also
> increases the maxmimum loop size, because the generated code even fits
> into the available space for r4k CPUs with 128 byte cache line size.
> 
> Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>

Thanks - another important fix in the last minute ...

Applied, will send to Linus tonight.

  Ralf

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2008-07-08 13:04 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-07-08 12:46 [PATCH] Fix 32bit kernels on R4k with 128 byte cache line size Thomas Bogendoerfer
2008-07-08 13:02 ` Ralf Baechle

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox