* [RFC][PATCH v3] powerpc: e300c2/c3/c4 TLB errata workaround
@ 2009-03-13 15:16 Kumar Gala
2009-03-13 21:25 ` Benjamin Herrenschmidt
2009-03-13 21:26 ` Benjamin Herrenschmidt
0 siblings, 2 replies; 5+ messages in thread
From: Kumar Gala @ 2009-03-13 15:16 UTC (permalink / raw)
To: david.jander; +Cc: linuxppc-dev, wd, gunnar
From: David Jander <david.jander@protonic.nl>
Complete workaround for DTLB errata in e300c2/c3/c4 processors.
Due to the bug, the hardware-implemented LRU algorythm always goes to way
1 of the TLB. This fix implements the proposed software workaround in
form of a LRW table for chosing the TLB-way.
Signed-off-by: David Jander <david@protonic.nl>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
Added cpu feature support.. need to check with Ben if we should use a MMU feature instead
- k
arch/powerpc/include/asm/cputable.h | 7 +++-
arch/powerpc/kernel/cputable.c | 4 +-
arch/powerpc/kernel/head_32.S | 61 +++++++++++++++++++++++++++++++++++
3 files changed, 69 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index fca1611..42e3145 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -152,6 +152,7 @@ extern const char *powerpc_base_platform;
#define CPU_FTR_NAP_DISABLE_L2_PR ASM_CONST(0x0000000000002000)
#define CPU_FTR_DUAL_PLL_750FX ASM_CONST(0x0000000000004000)
#define CPU_FTR_NO_DPM ASM_CONST(0x0000000000008000)
+#define CPU_FTR_NEED_DTLB_SW_LRU ASM_CONST(0x0000000000010000)
#define CPU_FTR_NEED_COHERENT ASM_CONST(0x0000000000020000)
#define CPU_FTR_NO_BTIC ASM_CONST(0x0000000000040000)
#define CPU_FTR_NODSISRALIGN ASM_CONST(0x0000000000100000)
@@ -356,7 +357,11 @@ extern const char *powerpc_base_platform;
CPU_FTR_COMMON)
#define CPU_FTRS_E300C2 (CPU_FTR_MAYBE_CAN_DOZE | \
CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \
- CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE)
+ CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE | \
+ CPU_FTR_NEED_DTLB_SW_LRU)
+#define CPU_FTRS_E300C3 (CPU_FTR_MAYBE_CAN_DOZE | \
+ CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \
+ CPU_FTR_COMMON | CPU_FTR_NEED_DTLB_SW_LRU)
#define CPU_FTRS_CLASSIC32 (CPU_FTR_COMMON | CPU_FTR_USE_TB)
#define CPU_FTRS_8XX (CPU_FTR_USE_TB)
#define CPU_FTRS_40X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index ccea243..039452c 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -1101,7 +1101,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.pvr_mask = 0x7fff0000,
.pvr_value = 0x00850000,
.cpu_name = "e300c3",
- .cpu_features = CPU_FTRS_E300,
+ .cpu_features = CPU_FTRS_E300C3,
.cpu_user_features = COMMON_USER,
.mmu_features = MMU_FTR_USE_HIGH_BATS,
.icache_bsize = 32,
@@ -1116,7 +1116,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.pvr_mask = 0x7fff0000,
.pvr_value = 0x00860000,
.cpu_name = "e300c4",
- .cpu_features = CPU_FTRS_E300,
+ .cpu_features = CPU_FTRS_E300C3,
.cpu_user_features = COMMON_USER,
.mmu_features = MMU_FTR_USE_HIGH_BATS,
.icache_bsize = 32,
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index f8c2e6b..eecae0d 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -554,6 +554,10 @@ DataLoadTLBMiss:
* r2: ptr to linux-style pte
* r3: scratch
*/
+BEGIN_FTR_SECTION
+ b TlbWo /* Code for TLB-errata workaround doesn't fit here */
+END_FTR_SECTION_IFSET(CPU_FTR_NEED_DTLB_SW_LRU)
+RFTlbWo:
mfctr r0
/* Get PTE (linux-style) and check access */
mfspr r3,SPRN_DMISS
@@ -626,6 +630,31 @@ DataStoreTLBMiss:
* r2: ptr to linux-style pte
* r3: scratch
*/
+BEGIN_FTR_SECTION
+/* MPC512x: workaround for errata in die M36P and earlier:
+ * Implement LRW for TLB way.
+ */
+ mfspr r3,SPRN_DMISS
+ rlwinm r3,r3,19,25,29 /* Get Address bits 19:15 */
+ lis r2,lrw@ha /* Search index in lrw[] */
+ addi r2,r2,lrw@l
+ tophys(r2,r2)
+ lwzx r1,r3,r2 /* Get item from lrw[] */
+ cmpwi 0,r1,0 /* Was it way 0 last time? */
+ beq- 0,113f /* Then goto 113: */
+
+ mfspr r1,SPRN_SRR1
+ rlwinm r1,r1,0,15,13 /* Mask out SRR1[WAY] */
+ mtspr SPRN_SRR1,r1
+
+ li r0,0
+ stwx r0,r3,r2 /* Make lrw[] entry 0 */
+ b 114f
+113:
+ li r0,1
+ stwx r0,r3,r2 /* Make lrw[] entry 1 */
+114:
+END_FTR_SECTION_IFSET(CPU_FTR_NEED_DTLB_SW_LRU)
mfctr r0
/* Get PTE (linux-style) and check access */
mfspr r3,SPRN_DMISS
@@ -813,6 +842,32 @@ giveup_altivec:
blr
#endif /* CONFIG_ALTIVEC */
+TlbWo:
+/* MPC512x: workaround for errata in die M36P and earlier:
+ * Implement LRW for TLB way.
+ */
+ mfspr r3,SPRN_DMISS
+ rlwinm r3,r3,19,25,29 /* Get Address bits 19:15 */
+ lis r2,lrw@ha /* Search index in lrw[] */
+ addi r2,r2,lrw@l
+ tophys(r2,r2)
+ lwzx r1,r3,r2 /* Get item from lrw[] */
+ cmpwi 0,r1,0 /* Was it way 0 last time? */
+ beq- 0,113f /* Then goto 113: */
+
+ mfspr r1,SPRN_SRR1
+ rlwinm r1,r1,0,15,13 /* Mask out SRR1[WAY] */
+ mtspr SPRN_SRR1,r1
+
+ li r0,0
+ stwx r0,r3,r2 /* Make lrw[] entry 0 */
+ b 114f
+113:
+ li r0,1
+ stwx r0,r3,r2 /* Make lrw[] entry 1 */
+114:
+ b RFTlbWo
+
/*
* This code is jumped to from the startup code to copy
* the kernel image to physical address PHYSICAL_START.
@@ -1328,6 +1383,12 @@ intercept_table:
.long 0, 0, 0, 0, 0, 0, 0, 0
.long 0, 0, 0, 0, 0, 0, 0, 0
+lrw:
+ .long 0, 0, 0, 0, 0, 0, 0, 0
+ .long 0, 0, 0, 0, 0, 0, 0, 0
+ .long 0, 0, 0, 0, 0, 0, 0, 0
+ .long 0, 0, 0, 0, 0, 0, 0, 0
+
/* Room for two PTE pointers, usually the kernel and current user pointers
* to their respective root page table.
*/
--
1.5.6.6
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [RFC][PATCH v3] powerpc: e300c2/c3/c4 TLB errata workaround
2009-03-13 15:16 [RFC][PATCH v3] powerpc: e300c2/c3/c4 TLB errata workaround Kumar Gala
@ 2009-03-13 21:25 ` Benjamin Herrenschmidt
2009-03-13 21:26 ` Benjamin Herrenschmidt
1 sibling, 0 replies; 5+ messages in thread
From: Benjamin Herrenschmidt @ 2009-03-13 21:25 UTC (permalink / raw)
To: Kumar Gala; +Cc: linuxppc-dev, david.jander, wd, gunnar
On Fri, 2009-03-13 at 10:16 -0500, Kumar Gala wrote:
> From: David Jander <david.jander@protonic.nl>
>
> Complete workaround for DTLB errata in e300c2/c3/c4 processors.
>
> Due to the bug, the hardware-implemented LRU algorythm always goes to way
> 1 of the TLB. This fix implements the proposed software workaround in
> form of a LRW table for chosing the TLB-way.
>
> Signed-off-by: David Jander <david@protonic.nl>
> Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
> ---
>
> Added cpu feature support.. need to check with Ben if we should use a MMU feature instead
Yes, we should...
Cheers,
Ben.
> - k
>
> arch/powerpc/include/asm/cputable.h | 7 +++-
> arch/powerpc/kernel/cputable.c | 4 +-
> arch/powerpc/kernel/head_32.S | 61 +++++++++++++++++++++++++++++++++++
> 3 files changed, 69 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
> index fca1611..42e3145 100644
> --- a/arch/powerpc/include/asm/cputable.h
> +++ b/arch/powerpc/include/asm/cputable.h
> @@ -152,6 +152,7 @@ extern const char *powerpc_base_platform;
> #define CPU_FTR_NAP_DISABLE_L2_PR ASM_CONST(0x0000000000002000)
> #define CPU_FTR_DUAL_PLL_750FX ASM_CONST(0x0000000000004000)
> #define CPU_FTR_NO_DPM ASM_CONST(0x0000000000008000)
> +#define CPU_FTR_NEED_DTLB_SW_LRU ASM_CONST(0x0000000000010000)
> #define CPU_FTR_NEED_COHERENT ASM_CONST(0x0000000000020000)
> #define CPU_FTR_NO_BTIC ASM_CONST(0x0000000000040000)
> #define CPU_FTR_NODSISRALIGN ASM_CONST(0x0000000000100000)
> @@ -356,7 +357,11 @@ extern const char *powerpc_base_platform;
> CPU_FTR_COMMON)
> #define CPU_FTRS_E300C2 (CPU_FTR_MAYBE_CAN_DOZE | \
> CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \
> - CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE)
> + CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE | \
> + CPU_FTR_NEED_DTLB_SW_LRU)
> +#define CPU_FTRS_E300C3 (CPU_FTR_MAYBE_CAN_DOZE | \
> + CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \
> + CPU_FTR_COMMON | CPU_FTR_NEED_DTLB_SW_LRU)
> #define CPU_FTRS_CLASSIC32 (CPU_FTR_COMMON | CPU_FTR_USE_TB)
> #define CPU_FTRS_8XX (CPU_FTR_USE_TB)
> #define CPU_FTRS_40X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
> diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
> index ccea243..039452c 100644
> --- a/arch/powerpc/kernel/cputable.c
> +++ b/arch/powerpc/kernel/cputable.c
> @@ -1101,7 +1101,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
> .pvr_mask = 0x7fff0000,
> .pvr_value = 0x00850000,
> .cpu_name = "e300c3",
> - .cpu_features = CPU_FTRS_E300,
> + .cpu_features = CPU_FTRS_E300C3,
> .cpu_user_features = COMMON_USER,
> .mmu_features = MMU_FTR_USE_HIGH_BATS,
> .icache_bsize = 32,
> @@ -1116,7 +1116,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
> .pvr_mask = 0x7fff0000,
> .pvr_value = 0x00860000,
> .cpu_name = "e300c4",
> - .cpu_features = CPU_FTRS_E300,
> + .cpu_features = CPU_FTRS_E300C3,
> .cpu_user_features = COMMON_USER,
> .mmu_features = MMU_FTR_USE_HIGH_BATS,
> .icache_bsize = 32,
> diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
> index f8c2e6b..eecae0d 100644
> --- a/arch/powerpc/kernel/head_32.S
> +++ b/arch/powerpc/kernel/head_32.S
> @@ -554,6 +554,10 @@ DataLoadTLBMiss:
> * r2: ptr to linux-style pte
> * r3: scratch
> */
> +BEGIN_FTR_SECTION
> + b TlbWo /* Code for TLB-errata workaround doesn't fit here */
> +END_FTR_SECTION_IFSET(CPU_FTR_NEED_DTLB_SW_LRU)
> +RFTlbWo:
> mfctr r0
> /* Get PTE (linux-style) and check access */
> mfspr r3,SPRN_DMISS
> @@ -626,6 +630,31 @@ DataStoreTLBMiss:
> * r2: ptr to linux-style pte
> * r3: scratch
> */
> +BEGIN_FTR_SECTION
> +/* MPC512x: workaround for errata in die M36P and earlier:
> + * Implement LRW for TLB way.
> + */
> + mfspr r3,SPRN_DMISS
> + rlwinm r3,r3,19,25,29 /* Get Address bits 19:15 */
> + lis r2,lrw@ha /* Search index in lrw[] */
> + addi r2,r2,lrw@l
> + tophys(r2,r2)
> + lwzx r1,r3,r2 /* Get item from lrw[] */
> + cmpwi 0,r1,0 /* Was it way 0 last time? */
> + beq- 0,113f /* Then goto 113: */
> +
> + mfspr r1,SPRN_SRR1
> + rlwinm r1,r1,0,15,13 /* Mask out SRR1[WAY] */
> + mtspr SPRN_SRR1,r1
> +
> + li r0,0
> + stwx r0,r3,r2 /* Make lrw[] entry 0 */
> + b 114f
> +113:
> + li r0,1
> + stwx r0,r3,r2 /* Make lrw[] entry 1 */
> +114:
> +END_FTR_SECTION_IFSET(CPU_FTR_NEED_DTLB_SW_LRU)
> mfctr r0
> /* Get PTE (linux-style) and check access */
> mfspr r3,SPRN_DMISS
> @@ -813,6 +842,32 @@ giveup_altivec:
> blr
> #endif /* CONFIG_ALTIVEC */
>
> +TlbWo:
> +/* MPC512x: workaround for errata in die M36P and earlier:
> + * Implement LRW for TLB way.
> + */
> + mfspr r3,SPRN_DMISS
> + rlwinm r3,r3,19,25,29 /* Get Address bits 19:15 */
> + lis r2,lrw@ha /* Search index in lrw[] */
> + addi r2,r2,lrw@l
> + tophys(r2,r2)
> + lwzx r1,r3,r2 /* Get item from lrw[] */
> + cmpwi 0,r1,0 /* Was it way 0 last time? */
> + beq- 0,113f /* Then goto 113: */
> +
> + mfspr r1,SPRN_SRR1
> + rlwinm r1,r1,0,15,13 /* Mask out SRR1[WAY] */
> + mtspr SPRN_SRR1,r1
> +
> + li r0,0
> + stwx r0,r3,r2 /* Make lrw[] entry 0 */
> + b 114f
> +113:
> + li r0,1
> + stwx r0,r3,r2 /* Make lrw[] entry 1 */
> +114:
> + b RFTlbWo
> +
> /*
> * This code is jumped to from the startup code to copy
> * the kernel image to physical address PHYSICAL_START.
> @@ -1328,6 +1383,12 @@ intercept_table:
> .long 0, 0, 0, 0, 0, 0, 0, 0
> .long 0, 0, 0, 0, 0, 0, 0, 0
>
> +lrw:
> + .long 0, 0, 0, 0, 0, 0, 0, 0
> + .long 0, 0, 0, 0, 0, 0, 0, 0
> + .long 0, 0, 0, 0, 0, 0, 0, 0
> + .long 0, 0, 0, 0, 0, 0, 0, 0
> +
> /* Room for two PTE pointers, usually the kernel and current user pointers
> * to their respective root page table.
> */
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [RFC][PATCH v3] powerpc: e300c2/c3/c4 TLB errata workaround
2009-03-13 15:16 [RFC][PATCH v3] powerpc: e300c2/c3/c4 TLB errata workaround Kumar Gala
2009-03-13 21:25 ` Benjamin Herrenschmidt
@ 2009-03-13 21:26 ` Benjamin Herrenschmidt
2009-03-13 22:06 ` Kumar Gala
1 sibling, 1 reply; 5+ messages in thread
From: Benjamin Herrenschmidt @ 2009-03-13 21:26 UTC (permalink / raw)
To: Kumar Gala; +Cc: linuxppc-dev, david.jander, wd, gunnar
> +BEGIN_FTR_SECTION
> + b TlbWo /* Code for TLB-errata workaround doesn't fit here */
> +END_FTR_SECTION_IFSET(CPU_FTR_NEED_DTLB_SW_LRU)
> +RFTlbWo:
Can you use nicer label names ? :-)
Also, that's a lot of code for such a hot path...
Cheers,
Ben.
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [RFC][PATCH v3] powerpc: e300c2/c3/c4 TLB errata workaround
2009-03-13 21:26 ` Benjamin Herrenschmidt
@ 2009-03-13 22:06 ` Kumar Gala
2009-03-16 12:42 ` Kumar Gala
0 siblings, 1 reply; 5+ messages in thread
From: Kumar Gala @ 2009-03-13 22:06 UTC (permalink / raw)
To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, david.jander, wd, gunnar
On Mar 13, 2009, at 4:26 PM, Benjamin Herrenschmidt wrote:
>> +BEGIN_FTR_SECTION
>> + b TlbWo /* Code for TLB-errata workaround doesn't fit
>> here */
>> +END_FTR_SECTION_IFSET(CPU_FTR_NEED_DTLB_SW_LRU)
>> +RFTlbWo:
>
> Can you use nicer label names ? :-)
>
> Also, that's a lot of code for such a hot path...
>
> Cheers,
> Ben.
The code needs reworking. However, we are doing SW LRU, not sure how
we reduce this in the hot path.
- k
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [RFC][PATCH v3] powerpc: e300c2/c3/c4 TLB errata workaround
2009-03-13 22:06 ` Kumar Gala
@ 2009-03-16 12:42 ` Kumar Gala
0 siblings, 0 replies; 5+ messages in thread
From: Kumar Gala @ 2009-03-16 12:42 UTC (permalink / raw)
To: Benjamin Herrenschmidt
Cc: linuxppc-dev Development, David Jander, Wolfgang Denk,
Gunnar Von Boehn
On Mar 13, 2009, at 5:06 PM, Kumar Gala wrote:
>
> On Mar 13, 2009, at 4:26 PM, Benjamin Herrenschmidt wrote:
>
>>> +BEGIN_FTR_SECTION
>>> + b TlbWo /* Code for TLB-errata workaround doesn't fit
>>> here */
>>> +END_FTR_SECTION_IFSET(CPU_FTR_NEED_DTLB_SW_LRU)
>>> +RFTlbWo:
>>
>> Can you use nicer label names ? :-)
>>
>> Also, that's a lot of code for such a hot path...
>>
>> Cheers,
>> Ben.
>
> The code needs reworking. However, we are doing SW LRU, not sure
> how we reduce this in the hot path.
Ben, David,
Here's my attempt at reworking the code to use an SPRG, remove
branches, optimize it down, etc. I haven't validated that this is
even correct. It should be easy to replace the m{f,t}spr SPRG w/lwz/
stw if we want to keep the LRU state in memory instead.
Ben, do you think we can optimize this further with some random LRU
selection?
mtspr SPRN_RPA,r1
mfspr r2,SPRN_SRR1 /* Need to restore CR0 */
mtcrf 0x80,r2
#if 1
li r0,1
mfspr r1,SPRN_SPRG4 /* could replace w/lwz r1,sw_way_lru@l(0) */
rlwinm r3,r3,19,25,29 /* Get Address bits 19:15 */
slw r0,r0,r3
xor r1,r0,r1
srw r0,r1,r3
mtspr SPRN_SPRG4,r1 /* could replace w/stw r1,sw_way_lru@l(0) */
rlwimi r2,r0,31-14,14,14
#endif
tlbld r3
rfi
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2009-03-16 12:42 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-03-13 15:16 [RFC][PATCH v3] powerpc: e300c2/c3/c4 TLB errata workaround Kumar Gala
2009-03-13 21:25 ` Benjamin Herrenschmidt
2009-03-13 21:26 ` Benjamin Herrenschmidt
2009-03-13 22:06 ` Kumar Gala
2009-03-16 12:42 ` Kumar Gala
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).