Linux MIPS Architecture development
 help / color / mirror / Atom feed
* [PATCH v2,08/10] MIPS: MIPS32R2 optimisations for pipeline stalls and code size.
@ 2012-05-11  4:26 Steven J. Hill
  0 siblings, 0 replies; 3+ messages in thread
From: Steven J. Hill @ 2012-05-11  4:26 UTC (permalink / raw)
  To: linux-mips, ralf; +Cc: Steven J. Hill

From: "Steven J. Hill" <sjhill@mips.com>

If the CPU type is selected as MIPS32R2, then we can surround
some code with #ifdef's to reduce the binary size. Detect when
to use 'ehb' instruction to avoid pipeline stalls.

Signed-off-by: Steven J. Hill <sjhill@mips.com>
---
 arch/mips/mm/tlbex.c |   39 ++++++++++++++++++++++++++++++---------
 1 file changed, 30 insertions(+), 9 deletions(-)

diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 7b12f27..7b84001 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -74,10 +74,12 @@ static inline int __maybe_unused bcm1250_m3_war(void)
 	return BCM1250_M3_WAR;
 }
 
+#ifndef CONFIG_CPU_MIPS32_R2
 static inline int __maybe_unused r10000_llsc_war(void)
 {
 	return R10000_LLSC_WAR;
 }
+#endif
 
 static int use_bbit_insns(void)
 {
@@ -340,6 +342,7 @@ static void __cpuinit build_restore_work_registers(u32 **p)
  */
 extern unsigned long pgd_current[];
 
+# ifndef CONFIG_CPU_MIPS32_R2
 /*
  * The R3000 TLB handler is simple.
  */
@@ -379,6 +382,7 @@ static void __cpuinit build_r3000_tlb_refill_handler(void)
 
 	dump_handler((u32 *)ebase, 32);
 }
+# endif /* !CONFIG_CPU_MIPS32_R2 */
 #endif /* CONFIG_MIPS_PGD_C0_CONTEXT */
 
 /*
@@ -449,8 +453,22 @@ static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l,
 	}
 
 	if (cpu_has_mips_r2) {
-		if (cpu_has_mips_r2_exec_hazard)
-			uasm_i_ehb(p);
+		/*
+		 * The architecture spec says an ehb is required here,
+		 * but a number of cores do not have the hazard and
+		 * using an ehb causes an expensive pipeline stall.
+		 */
+		if (cpu_has_mips_r2_exec_hazard) {
+			switch (current_cpu_type()) {
+			case CPU_M14KC:
+			case CPU_74K:
+				break;
+
+			default:
+				uasm_i_ehb(p);
+				break;
+			}
+		}
 		tlbw(p);
 		return;
 	}
@@ -910,7 +928,7 @@ build_get_pgde32(u32 **p, unsigned int tmp, unsigned int ptr)
 #else
 	/*
 	 * smp_processor_id() << 3 is stored in CONTEXT.
-         */
+	 */
 	uasm_i_mfc0(p, ptr, C0_CONTEXT);
 	UASM_i_LA_mostly(p, tmp, pgdc);
 	uasm_i_srl(p, ptr, ptr, 23);
@@ -921,13 +939,13 @@ build_get_pgde32(u32 **p, unsigned int tmp, unsigned int ptr)
 #endif
 	uasm_i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */
 	uasm_i_lw(p, ptr, uasm_rel_lo(pgdc), ptr);
-#ifdef CONFIG_32BIT
+
 	if (cpu_has_mips32r2) {
 		uasm_i_ext(p, tmp, tmp, PGDIR_SHIFT, (32 - PGDIR_SHIFT));
 		uasm_i_ins(p, ptr, tmp, PGD_T_LOG2, (32 - PGDIR_SHIFT));
 		return;
 	}
-#endif
+
 	uasm_i_srl(p, tmp, tmp, PGDIR_SHIFT); /* get pgd only bits */
 	uasm_i_sll(p, tmp, tmp, PGD_T_LOG2);
 	uasm_i_addu(p, ptr, ptr, tmp); /* add in pgd offset */
@@ -963,7 +981,6 @@ static void __cpuinit build_adjust_context(u32 **p, unsigned int ctx)
 
 static void __cpuinit build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr)
 {
-#ifdef CONFIG_32BIT
 	if (cpu_has_mips32r2) {
 		/* For MIPS32R2, PTE ptr offset is obtained from BadVAddr */
 		UASM_i_MFC0(p, tmp, C0_BADVADDR);
@@ -972,7 +989,7 @@ static void __cpuinit build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr
 		uasm_i_ins(p, ptr, tmp, PTE_T_LOG2+1, PGDIR_SHIFT-PAGE_SHIFT-1);
 		return;
 	}
-#endif
+
 	/*
 	 * Bug workaround for the Nevada. It seems as if under certain
 	 * circumstances the move from cp0_context might produce a
@@ -1513,9 +1530,11 @@ iPTE_SW(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr,
 # endif
 		UASM_i_SC(p, pte, 0, ptr);
 
+#ifndef CONFIG_CPU_MIPS32_R2
 	if (r10000_llsc_war())
 		uasm_il_beqzl(p, r, pte, label_smp_pgtable_change);
 	else
+#endif
 		uasm_il_beqz(p, r, pte, label_smp_pgtable_change);
 
 # ifdef CONFIG_64BIT_PHYS_ADDR
@@ -1649,7 +1668,7 @@ build_pte_modifiable(u32 **p, struct uasm_reloc **r,
 	}
 }
 
-#ifndef CONFIG_MIPS_PGD_C0_CONTEXT
+#if !defined(CONFIG_MIPS_PGD_C0_CONTEXT) && !defined(CONFIG_CPU_MIPS32_R2)
 
 
 /*
@@ -1803,7 +1822,7 @@ static void __cpuinit build_r3000_tlb_modify_handler(void)
 
 	dump_handler(handle_tlbm, ARRAY_SIZE(handle_tlbm));
 }
-#endif /* CONFIG_MIPS_PGD_C0_CONTEXT */
+#endif /* !CONFIG_MIPS_PGD_C0_CONTEXT && !CONFIG_CPU_MIPS32_R2 */
 
 /*
  * R4000 style TLB load/store/modify handlers.
@@ -2120,6 +2139,7 @@ void __cpuinit build_tlb_refill_handler(void)
 #endif
 
 	switch (current_cpu_type()) {
+#ifndef CONFIG_CPU_MIPS32_R2
 	case CPU_R2000:
 	case CPU_R3000:
 	case CPU_R3000A:
@@ -2149,6 +2169,7 @@ void __cpuinit build_tlb_refill_handler(void)
 		panic("No R8000 TLB refill handler yet");
 		break;
 
+#endif /* !CONFIG_CPU_MIPS32_R2 */
 	default:
 		if (!run_once) {
 			scratch_reg = allocate_kscratch();
-- 
1.7.10

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH v2,08/10] MIPS: MIPS32R2 optimisations for pipeline stalls and code size.
@ 2012-05-11 20:21 Steven J. Hill
  2012-05-11 20:53 ` David Daney
  0 siblings, 1 reply; 3+ messages in thread
From: Steven J. Hill @ 2012-05-11 20:21 UTC (permalink / raw)
  To: linux-mips, ralf; +Cc: Steven J. Hill

From: "Steven J. Hill" <sjhill@mips.com>

If the CPU type is selected as MIPS32R2, then we can surround
some code with #ifdef's to reduce the binary size. Detect when
to use 'ehb' instruction to avoid pipeline stalls. Utilise the
'ins' and 'ext' MIPS32R2 instructions to reduce the size of
exception handlers.

Signed-off-by: Steven J. Hill <sjhill@mips.com>
---
 arch/mips/mm/tlbex.c |   48 +++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 43 insertions(+), 5 deletions(-)

diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 897b727..7b84001 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -74,10 +74,12 @@ static inline int __maybe_unused bcm1250_m3_war(void)
 	return BCM1250_M3_WAR;
 }
 
+#ifndef CONFIG_CPU_MIPS32_R2
 static inline int __maybe_unused r10000_llsc_war(void)
 {
 	return R10000_LLSC_WAR;
 }
+#endif
 
 static int use_bbit_insns(void)
 {
@@ -340,6 +342,7 @@ static void __cpuinit build_restore_work_registers(u32 **p)
  */
 extern unsigned long pgd_current[];
 
+# ifndef CONFIG_CPU_MIPS32_R2
 /*
  * The R3000 TLB handler is simple.
  */
@@ -379,6 +382,7 @@ static void __cpuinit build_r3000_tlb_refill_handler(void)
 
 	dump_handler((u32 *)ebase, 32);
 }
+# endif /* !CONFIG_CPU_MIPS32_R2 */
 #endif /* CONFIG_MIPS_PGD_C0_CONTEXT */
 
 /*
@@ -449,8 +453,22 @@ static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l,
 	}
 
 	if (cpu_has_mips_r2) {
-		if (cpu_has_mips_r2_exec_hazard)
-			uasm_i_ehb(p);
+		/*
+		 * The architecture spec says an ehb is required here,
+		 * but a number of cores do not have the hazard and
+		 * using an ehb causes an expensive pipeline stall.
+		 */
+		if (cpu_has_mips_r2_exec_hazard) {
+			switch (current_cpu_type()) {
+			case CPU_M14KC:
+			case CPU_74K:
+				break;
+
+			default:
+				uasm_i_ehb(p);
+				break;
+			}
+		}
 		tlbw(p);
 		return;
 	}
@@ -910,7 +928,7 @@ build_get_pgde32(u32 **p, unsigned int tmp, unsigned int ptr)
 #else
 	/*
 	 * smp_processor_id() << 3 is stored in CONTEXT.
-         */
+	 */
 	uasm_i_mfc0(p, ptr, C0_CONTEXT);
 	UASM_i_LA_mostly(p, tmp, pgdc);
 	uasm_i_srl(p, ptr, ptr, 23);
@@ -921,6 +939,13 @@ build_get_pgde32(u32 **p, unsigned int tmp, unsigned int ptr)
 #endif
 	uasm_i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */
 	uasm_i_lw(p, ptr, uasm_rel_lo(pgdc), ptr);
+
+	if (cpu_has_mips32r2) {
+		uasm_i_ext(p, tmp, tmp, PGDIR_SHIFT, (32 - PGDIR_SHIFT));
+		uasm_i_ins(p, ptr, tmp, PGD_T_LOG2, (32 - PGDIR_SHIFT));
+		return;
+	}
+
 	uasm_i_srl(p, tmp, tmp, PGDIR_SHIFT); /* get pgd only bits */
 	uasm_i_sll(p, tmp, tmp, PGD_T_LOG2);
 	uasm_i_addu(p, ptr, ptr, tmp); /* add in pgd offset */
@@ -956,6 +981,15 @@ static void __cpuinit build_adjust_context(u32 **p, unsigned int ctx)
 
 static void __cpuinit build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr)
 {
+	if (cpu_has_mips32r2) {
+		/* For MIPS32R2, PTE ptr offset is obtained from BadVAddr */
+		UASM_i_MFC0(p, tmp, C0_BADVADDR);
+		UASM_i_LW(p, ptr, 0, ptr);
+		uasm_i_ext(p, tmp, tmp, PAGE_SHIFT+1, PGDIR_SHIFT-PAGE_SHIFT-1);
+		uasm_i_ins(p, ptr, tmp, PTE_T_LOG2+1, PGDIR_SHIFT-PAGE_SHIFT-1);
+		return;
+	}
+
 	/*
 	 * Bug workaround for the Nevada. It seems as if under certain
 	 * circumstances the move from cp0_context might produce a
@@ -1496,9 +1530,11 @@ iPTE_SW(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr,
 # endif
 		UASM_i_SC(p, pte, 0, ptr);
 
+#ifndef CONFIG_CPU_MIPS32_R2
 	if (r10000_llsc_war())
 		uasm_il_beqzl(p, r, pte, label_smp_pgtable_change);
 	else
+#endif
 		uasm_il_beqz(p, r, pte, label_smp_pgtable_change);
 
 # ifdef CONFIG_64BIT_PHYS_ADDR
@@ -1632,7 +1668,7 @@ build_pte_modifiable(u32 **p, struct uasm_reloc **r,
 	}
 }
 
-#ifndef CONFIG_MIPS_PGD_C0_CONTEXT
+#if !defined(CONFIG_MIPS_PGD_C0_CONTEXT) && !defined(CONFIG_CPU_MIPS32_R2)
 
 
 /*
@@ -1786,7 +1822,7 @@ static void __cpuinit build_r3000_tlb_modify_handler(void)
 
 	dump_handler(handle_tlbm, ARRAY_SIZE(handle_tlbm));
 }
-#endif /* CONFIG_MIPS_PGD_C0_CONTEXT */
+#endif /* !CONFIG_MIPS_PGD_C0_CONTEXT && !CONFIG_CPU_MIPS32_R2 */
 
 /*
  * R4000 style TLB load/store/modify handlers.
@@ -2103,6 +2139,7 @@ void __cpuinit build_tlb_refill_handler(void)
 #endif
 
 	switch (current_cpu_type()) {
+#ifndef CONFIG_CPU_MIPS32_R2
 	case CPU_R2000:
 	case CPU_R3000:
 	case CPU_R3000A:
@@ -2132,6 +2169,7 @@ void __cpuinit build_tlb_refill_handler(void)
 		panic("No R8000 TLB refill handler yet");
 		break;
 
+#endif /* !CONFIG_CPU_MIPS32_R2 */
 	default:
 		if (!run_once) {
 			scratch_reg = allocate_kscratch();
-- 
1.7.10

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v2,08/10] MIPS: MIPS32R2 optimisations for pipeline stalls and code size.
  2012-05-11 20:21 [PATCH v2,08/10] MIPS: MIPS32R2 optimisations for pipeline stalls and code size Steven J. Hill
@ 2012-05-11 20:53 ` David Daney
  0 siblings, 0 replies; 3+ messages in thread
From: David Daney @ 2012-05-11 20:53 UTC (permalink / raw)
  To: Steven J. Hill; +Cc: linux-mips, ralf

On 05/11/2012 01:21 PM, Steven J. Hill wrote:
> From: "Steven J. Hill"<sjhill@mips.com>
>
> If the CPU type is selected as MIPS32R2, then we can surround
> some code with #ifdef's to reduce the binary size. Detect when
> to use 'ehb' instruction to avoid pipeline stalls. Utilise the
> 'ins' and 'ext' MIPS32R2 instructions to reduce the size of
> exception handlers.
>
> Signed-off-by: Steven J. Hill<sjhill@mips.com>
> ---
>   arch/mips/mm/tlbex.c |   48 +++++++++++++++++++++++++++++++++++++++++++-----
>   1 file changed, 43 insertions(+), 5 deletions(-)
>
> diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
> index 897b727..7b84001 100644
> --- a/arch/mips/mm/tlbex.c
> +++ b/arch/mips/mm/tlbex.c
> @@ -74,10 +74,12 @@ static inline int __maybe_unused bcm1250_m3_war(void)
>   	return BCM1250_M3_WAR;
>   }
>
> +#ifndef CONFIG_CPU_MIPS32_R2
>   static inline int __maybe_unused r10000_llsc_war(void)
>   {
>   	return R10000_LLSC_WAR;
>   }
> +#endif

Totally useless addition of #if.  It Does nothing to the generated code, 
only introducing ugliness and fragility to the source.

In general, we are trying to reduce the number of #if in this file (and 
the kernel in general) rather than increase them.


>
>   static int use_bbit_insns(void)
>   {
> @@ -340,6 +342,7 @@ static void __cpuinit build_restore_work_registers(u32 **p)
>    */
>   extern unsigned long pgd_current[];
>
> +# ifndef CONFIG_CPU_MIPS32_R2
>   /*
>    * The R3000 TLB handler is simple.
>    */
> @@ -379,6 +382,7 @@ static void __cpuinit build_r3000_tlb_refill_handler(void)
>
>   	dump_handler((u32 *)ebase, 32);
>   }
> +# endif /* !CONFIG_CPU_MIPS32_R2 */
>   #endif /* CONFIG_MIPS_PGD_C0_CONTEXT */
>
>   /*
> @@ -449,8 +453,22 @@ static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l,
>   	}
>
>   	if (cpu_has_mips_r2) {
> -		if (cpu_has_mips_r2_exec_hazard)
> -			uasm_i_ehb(p);
> +		/*
> +		 * The architecture spec says an ehb is required here,
> +		 * but a number of cores do not have the hazard and
> +		 * using an ehb causes an expensive pipeline stall.
> +		 */
> +		if (cpu_has_mips_r2_exec_hazard) {
> +			switch (current_cpu_type()) {
> +			case CPU_M14KC:
> +			case CPU_74K:
> +				break;
> +

Can this be rolled into the implementation of 
cpu_has_mips_r2_exec_hazard, thus leaving tlbex.c looking cleaner.

> +			default:
> +				uasm_i_ehb(p);
> +				break;
> +			}
> +		}
>   		tlbw(p);
>   		return;
>   	}
> @@ -910,7 +928,7 @@ build_get_pgde32(u32 **p, unsigned int tmp, unsigned int ptr)
>   #else
>   	/*
>   	 * smp_processor_id()<<  3 is stored in CONTEXT.
> -         */
> +	 */
>   	uasm_i_mfc0(p, ptr, C0_CONTEXT);
>   	UASM_i_LA_mostly(p, tmp, pgdc);
>   	uasm_i_srl(p, ptr, ptr, 23);
> @@ -921,6 +939,13 @@ build_get_pgde32(u32 **p, unsigned int tmp, unsigned int ptr)
>   #endif
>   	uasm_i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */
>   	uasm_i_lw(p, ptr, uasm_rel_lo(pgdc), ptr);
> +
> +	if (cpu_has_mips32r2) {
> +		uasm_i_ext(p, tmp, tmp, PGDIR_SHIFT, (32 - PGDIR_SHIFT));
> +		uasm_i_ins(p, ptr, tmp, PGD_T_LOG2, (32 - PGDIR_SHIFT));
> +		return;
> +	}
> +
>   	uasm_i_srl(p, tmp, tmp, PGDIR_SHIFT); /* get pgd only bits */
>   	uasm_i_sll(p, tmp, tmp, PGD_T_LOG2);
>   	uasm_i_addu(p, ptr, ptr, tmp); /* add in pgd offset */
> @@ -956,6 +981,15 @@ static void __cpuinit build_adjust_context(u32 **p, unsigned int ctx)
>
>   static void __cpuinit build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr)
>   {
> +	if (cpu_has_mips32r2) {

Doesn't the optimization also apply to mips64r2?

> +		/* For MIPS32R2, PTE ptr offset is obtained from BadVAddr */
> +		UASM_i_MFC0(p, tmp, C0_BADVADDR);
> +		UASM_i_LW(p, ptr, 0, ptr);
> +		uasm_i_ext(p, tmp, tmp, PAGE_SHIFT+1, PGDIR_SHIFT-PAGE_SHIFT-1);
> +		uasm_i_ins(p, ptr, tmp, PTE_T_LOG2+1, PGDIR_SHIFT-PAGE_SHIFT-1);
> +		return;
> +	}
> +
>   	/*
>   	 * Bug workaround for the Nevada. It seems as if under certain
>   	 * circumstances the move from cp0_context might produce a
> @@ -1496,9 +1530,11 @@ iPTE_SW(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr,
>   # endif
>   		UASM_i_SC(p, pte, 0, ptr);
>
> +#ifndef CONFIG_CPU_MIPS32_R2

Ugh.

>   	if (r10000_llsc_war())
>   		uasm_il_beqzl(p, r, pte, label_smp_pgtable_change);
>   	else
> +#endif
>   		uasm_il_beqz(p, r, pte, label_smp_pgtable_change);
>
>   # ifdef CONFIG_64BIT_PHYS_ADDR
> @@ -1632,7 +1668,7 @@ build_pte_modifiable(u32 **p, struct uasm_reloc **r,
>   	}
>   }
>
> -#ifndef CONFIG_MIPS_PGD_C0_CONTEXT
> +#if !defined(CONFIG_MIPS_PGD_C0_CONTEXT)&&  !defined(CONFIG_CPU_MIPS32_R2)
>
>
>   /*
> @@ -1786,7 +1822,7 @@ static void __cpuinit build_r3000_tlb_modify_handler(void)
>
>   	dump_handler(handle_tlbm, ARRAY_SIZE(handle_tlbm));
>   }
> -#endif /* CONFIG_MIPS_PGD_C0_CONTEXT */
> +#endif /* !CONFIG_MIPS_PGD_C0_CONTEXT&&  !CONFIG_CPU_MIPS32_R2 */
>
>   /*
>    * R4000 style TLB load/store/modify handlers.
> @@ -2103,6 +2139,7 @@ void __cpuinit build_tlb_refill_handler(void)
>   #endif
>
>   	switch (current_cpu_type()) {
> +#ifndef CONFIG_CPU_MIPS32_R2
>   	case CPU_R2000:
>   	case CPU_R3000:
>   	case CPU_R3000A:
> @@ -2132,6 +2169,7 @@ void __cpuinit build_tlb_refill_handler(void)
>   		panic("No R8000 TLB refill handler yet");
>   		break;
>
> +#endif /* !CONFIG_CPU_MIPS32_R2 */
>   	default:
>   		if (!run_once) {
>   			scratch_reg = allocate_kscratch();

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2012-05-11 20:53 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-05-11 20:21 [PATCH v2,08/10] MIPS: MIPS32R2 optimisations for pipeline stalls and code size Steven J. Hill
2012-05-11 20:53 ` David Daney
  -- strict thread matches above, loose matches on Subject: below --
2012-05-11  4:26 Steven J. Hill

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox