* [PATCH 1/3] powerpc/mm: Remove unused register usage in SW TLB miss handling
@ 2009-03-19 13:55 Kumar Gala
2009-03-19 13:55 ` [PATCH 2/3] powerpc/mm: Used free register to save a few cycles " Kumar Gala
0 siblings, 1 reply; 3+ messages in thread
From: Kumar Gala @ 2009-03-19 13:55 UTC (permalink / raw)
Cc: linuxppc-dev
Long ago we had some code that actually used the CTR in the SW TLB
miss handlers (603/e300). Since we don't use it no reason to waste
cycles saving it off and restoring it (we actually didn't restore it
in the fast path case).
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
arch/powerpc/kernel/head_32.S | 11 +++--------
1 files changed, 3 insertions(+), 8 deletions(-)
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index d794a63..0105fd5 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -472,12 +472,11 @@ SystemCall:
. = 0x1000
InstructionTLBMiss:
/*
- * r0: stored ctr
+ * r0: scratch
* r1: linux style pte ( later becomes ppc hardware pte )
* r2: ptr to linux-style pte
* r3: scratch
*/
- mfctr r0
/* Get PTE (linux-style) and check access */
mfspr r3,SPRN_IMISS
lis r1,PAGE_OFFSET@h /* check if kernel address */
@@ -528,7 +527,6 @@ InstructionAddressInvalid:
addis r1,r1,0x2000
mtspr SPRN_DSISR,r1 /* (shouldn't be needed) */
- mtctr r0 /* Restore CTR */
andi. r2,r3,0xFFFF /* Clear upper bits of SRR1 */
or r2,r2,r1
mtspr SPRN_SRR1,r2
@@ -549,12 +547,11 @@ InstructionAddressInvalid:
. = 0x1100
DataLoadTLBMiss:
/*
- * r0: stored ctr
+ * r0: scratch
* r1: linux style pte ( later becomes ppc hardware pte )
* r2: ptr to linux-style pte
* r3: scratch
*/
- mfctr r0
/* Get PTE (linux-style) and check access */
mfspr r3,SPRN_DMISS
lis r1,PAGE_OFFSET@h /* check if kernel address */
@@ -604,7 +601,6 @@ DataAddressInvalid:
rlwinm r1,r3,9,6,6 /* Get load/store bit */
addis r1,r1,0x2000
mtspr SPRN_DSISR,r1
- mtctr r0 /* Restore CTR */
andi. r2,r3,0xFFFF /* Clear upper bits of SRR1 */
mtspr SPRN_SRR1,r2
mfspr r1,SPRN_DMISS /* Get failing address */
@@ -624,12 +620,11 @@ DataAddressInvalid:
. = 0x1200
DataStoreTLBMiss:
/*
- * r0: stored ctr
+ * r0: scratch
* r1: linux style pte ( later becomes ppc hardware pte )
* r2: ptr to linux-style pte
* r3: scratch
*/
- mfctr r0
/* Get PTE (linux-style) and check access */
mfspr r3,SPRN_DMISS
lis r1,PAGE_OFFSET@h /* check if kernel address */
--
1.5.6.6
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH 2/3] powerpc/mm: Used free register to save a few cycles in SW TLB miss handling
2009-03-19 13:55 [PATCH 1/3] powerpc/mm: Remove unused register usage in SW TLB miss handling Kumar Gala
@ 2009-03-19 13:55 ` Kumar Gala
2009-03-19 13:55 ` [PATCH 3/3] powerpc/mm: e300c2/c3/c4 TLB errata workaround Kumar Gala
0 siblings, 1 reply; 3+ messages in thread
From: Kumar Gala @ 2009-03-19 13:55 UTC (permalink / raw)
Cc: linuxppc-dev
Now that r0 is free we can keep the value of I/DMISS in r3 and not reload
it before doing the tlbli/d. This saves us a few cycles in the fast path
case.
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
arch/powerpc/kernel/head_32.S | 51 +++++++++++++++++++---------------------
1 files changed, 24 insertions(+), 27 deletions(-)
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 0105fd5..8a9dc79 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -495,28 +495,27 @@ InstructionTLBMiss:
rlwinm. r2,r2,0,0,19 /* extract address of pte page */
beq- InstructionAddressInvalid /* return if no mapping */
rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */
- lwz r3,0(r2) /* get linux-style pte */
- andc. r1,r1,r3 /* check access & ~permission */
+ lwz r0,0(r2) /* get linux-style pte */
+ andc. r1,r1,r0 /* check access & ~permission */
bne- InstructionAddressInvalid /* return if access not permitted */
- ori r3,r3,_PAGE_ACCESSED /* set _PAGE_ACCESSED in pte */
+ ori r0,r0,_PAGE_ACCESSED /* set _PAGE_ACCESSED in pte */
/*
* NOTE! We are assuming this is not an SMP system, otherwise
* we would need to update the pte atomically with lwarx/stwcx.
*/
- stw r3,0(r2) /* update PTE (accessed bit) */
+ stw r0,0(r2) /* update PTE (accessed bit) */
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwinm r1,r3,32-10,31,31 /* _PAGE_RW -> PP lsb */
- rlwinm r2,r3,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */
+ rlwinm r1,r0,32-10,31,31 /* _PAGE_RW -> PP lsb */
+ rlwinm r2,r0,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */
and r1,r1,r2 /* writable if _RW and _DIRTY */
- rlwimi r3,r3,32-1,30,30 /* _PAGE_USER -> PP msb */
- rlwimi r3,r3,32-1,31,31 /* _PAGE_USER -> PP lsb */
+ rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
+ rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */
ori r1,r1,0xe04 /* clear out reserved bits */
- andc r1,r3,r1 /* PP = user? (rw&dirty? 2: 3): 0 */
+ andc r1,r0,r1 /* PP = user? (rw&dirty? 2: 3): 0 */
BEGIN_FTR_SECTION
rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
mtspr SPRN_RPA,r1
- mfspr r3,SPRN_IMISS
tlbli r3
mfspr r3,SPRN_SRR1 /* Need to restore CR0 */
mtcrf 0x80,r3
@@ -570,28 +569,27 @@ DataLoadTLBMiss:
rlwinm. r2,r2,0,0,19 /* extract address of pte page */
beq- DataAddressInvalid /* return if no mapping */
rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */
- lwz r3,0(r2) /* get linux-style pte */
- andc. r1,r1,r3 /* check access & ~permission */
+ lwz r0,0(r2) /* get linux-style pte */
+ andc. r1,r1,r0 /* check access & ~permission */
bne- DataAddressInvalid /* return if access not permitted */
- ori r3,r3,_PAGE_ACCESSED /* set _PAGE_ACCESSED in pte */
+ ori r0,r0,_PAGE_ACCESSED /* set _PAGE_ACCESSED in pte */
/*
* NOTE! We are assuming this is not an SMP system, otherwise
* we would need to update the pte atomically with lwarx/stwcx.
*/
- stw r3,0(r2) /* update PTE (accessed bit) */
+ stw r0,0(r2) /* update PTE (accessed bit) */
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwinm r1,r3,32-10,31,31 /* _PAGE_RW -> PP lsb */
- rlwinm r2,r3,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */
+ rlwinm r1,r0,32-10,31,31 /* _PAGE_RW -> PP lsb */
+ rlwinm r2,r0,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */
and r1,r1,r2 /* writable if _RW and _DIRTY */
- rlwimi r3,r3,32-1,30,30 /* _PAGE_USER -> PP msb */
- rlwimi r3,r3,32-1,31,31 /* _PAGE_USER -> PP lsb */
+ rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
+ rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */
ori r1,r1,0xe04 /* clear out reserved bits */
- andc r1,r3,r1 /* PP = user? (rw&dirty? 2: 3): 0 */
+ andc r1,r0,r1 /* PP = user? (rw&dirty? 2: 3): 0 */
BEGIN_FTR_SECTION
rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
mtspr SPRN_RPA,r1
- mfspr r3,SPRN_DMISS
tlbld r3
mfspr r3,SPRN_SRR1 /* Need to restore CR0 */
mtcrf 0x80,r3
@@ -643,24 +641,23 @@ DataStoreTLBMiss:
rlwinm. r2,r2,0,0,19 /* extract address of pte page */
beq- DataAddressInvalid /* return if no mapping */
rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */
- lwz r3,0(r2) /* get linux-style pte */
- andc. r1,r1,r3 /* check access & ~permission */
+ lwz r0,0(r2) /* get linux-style pte */
+ andc. r1,r1,r0 /* check access & ~permission */
bne- DataAddressInvalid /* return if access not permitted */
- ori r3,r3,_PAGE_ACCESSED|_PAGE_DIRTY
+ ori r0,r0,_PAGE_ACCESSED|_PAGE_DIRTY
/*
* NOTE! We are assuming this is not an SMP system, otherwise
* we would need to update the pte atomically with lwarx/stwcx.
*/
- stw r3,0(r2) /* update PTE (accessed/dirty bits) */
+ stw r0,0(r2) /* update PTE (accessed/dirty bits) */
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwimi r3,r3,32-1,30,30 /* _PAGE_USER -> PP msb */
+ rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
li r1,0xe05 /* clear out reserved bits & PP lsb */
- andc r1,r3,r1 /* PP = user? 2: 0 */
+ andc r1,r0,r1 /* PP = user? 2: 0 */
BEGIN_FTR_SECTION
rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
mtspr SPRN_RPA,r1
- mfspr r3,SPRN_DMISS
tlbld r3
mfspr r3,SPRN_SRR1 /* Need to restore CR0 */
mtcrf 0x80,r3
--
1.5.6.6
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH 3/3] powerpc/mm: e300c2/c3/c4 TLB errata workaround
2009-03-19 13:55 ` [PATCH 2/3] powerpc/mm: Used free register to save a few cycles " Kumar Gala
@ 2009-03-19 13:55 ` Kumar Gala
0 siblings, 0 replies; 3+ messages in thread
From: Kumar Gala @ 2009-03-19 13:55 UTC (permalink / raw)
Cc: linuxppc-dev
Complete workaround for DTLB errata in e300c2/c3/c4 processors.
Due to the bug, the hardware-implemented LRU algorythm always goes to way
1 of the TLB. This fix implements the proposed software workaround in
form of a LRW table for chosing the TLB-way.
Based on patch from David Jander <david@protonic.nl>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
* Fix a linking problem that showed up in testing branch with not having
#include <asm/mmu.h> in cpu_setup_6xx.S for the MMU feature fixup
arch/powerpc/include/asm/mmu.h | 6 ++++++
arch/powerpc/kernel/cpu_setup_6xx.S | 5 +++++
arch/powerpc/kernel/cputable.c | 9 ++++++---
arch/powerpc/kernel/head_32.S | 32 ++++++++++++++++++++++++++++----
4 files changed, 45 insertions(+), 7 deletions(-)
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index dc82dcd..c073de4 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -46,6 +46,12 @@
*/
#define MMU_FTR_LOCK_BCAST_INVAL ASM_CONST(0x00100000)
+/* This indicates that the processor doesn't handle way selection
+ * properly and needs SW to track and update the LRU state. This
+ * is specific to an errata on e300c2/c3/c4 class parts
+ */
+#define MMU_FTR_NEED_DTLB_SW_LRU ASM_CONST(0x00200000)
+
#ifndef __ASSEMBLY__
#include <asm/cputable.h>
diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S
index 72d1d73..54f767e 100644
--- a/arch/powerpc/kernel/cpu_setup_6xx.S
+++ b/arch/powerpc/kernel/cpu_setup_6xx.S
@@ -15,9 +15,14 @@
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/cache.h>
+#include <asm/mmu.h>
_GLOBAL(__setup_cpu_603)
mflr r4
+BEGIN_MMU_FTR_SECTION
+ li r10,0
+ mtspr SPRN_SPRG4,r10 /* init SW LRU tracking */
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
BEGIN_FTR_SECTION
bl __init_fpu_registers
END_FTR_SECTION_IFCLR(CPU_FTR_FPU_UNAVAILABLE)
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 6388386..4ec0a3a 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -1082,7 +1082,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_name = "e300c2",
.cpu_features = CPU_FTRS_E300C2,
.cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
- .mmu_features = MMU_FTR_USE_HIGH_BATS,
+ .mmu_features = MMU_FTR_USE_HIGH_BATS |
+ MMU_FTR_NEED_DTLB_SW_LRU,
.icache_bsize = 32,
.dcache_bsize = 32,
.cpu_setup = __setup_cpu_603,
@@ -1095,7 +1096,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_name = "e300c3",
.cpu_features = CPU_FTRS_E300,
.cpu_user_features = COMMON_USER,
- .mmu_features = MMU_FTR_USE_HIGH_BATS,
+ .mmu_features = MMU_FTR_USE_HIGH_BATS |
+ MMU_FTR_NEED_DTLB_SW_LRU,
.icache_bsize = 32,
.dcache_bsize = 32,
.cpu_setup = __setup_cpu_603,
@@ -1110,7 +1112,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_name = "e300c4",
.cpu_features = CPU_FTRS_E300,
.cpu_user_features = COMMON_USER,
- .mmu_features = MMU_FTR_USE_HIGH_BATS,
+ .mmu_features = MMU_FTR_USE_HIGH_BATS |
+ MMU_FTR_NEED_DTLB_SW_LRU,
.icache_bsize = 32,
.dcache_bsize = 32,
.cpu_setup = __setup_cpu_603,
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 8a9dc79..6469ffa 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -590,9 +590,21 @@ BEGIN_FTR_SECTION
rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
mtspr SPRN_RPA,r1
+ mfspr r2,SPRN_SRR1 /* Need to restore CR0 */
+ mtcrf 0x80,r2
+BEGIN_MMU_FTR_SECTION
+ li r0,1
+ mfspr r1,SPRN_SPRG4
+ rlwinm r2,r3,20,27,31 /* Get Address bits 15:19 */
+ slw r0,r0,r2
+ xor r1,r0,r1
+ srw r0,r1,r2
+ mtspr SPRN_SPRG4,r1
+ mfspr r2,SPRN_SRR1
+ rlwimi r2,r0,31-14,14,14
+ mtspr SPRN_SRR1,r2
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
tlbld r3
- mfspr r3,SPRN_SRR1 /* Need to restore CR0 */
- mtcrf 0x80,r3
rfi
DataAddressInvalid:
mfspr r3,SPRN_SRR1
@@ -658,9 +670,21 @@ BEGIN_FTR_SECTION
rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
mtspr SPRN_RPA,r1
+ mfspr r2,SPRN_SRR1 /* Need to restore CR0 */
+ mtcrf 0x80,r2
+BEGIN_MMU_FTR_SECTION
+ li r0,1
+ mfspr r1,SPRN_SPRG4
+ rlwinm r2,r3,20,27,31 /* Get Address bits 15:19 */
+ slw r0,r0,r2
+ xor r1,r0,r1
+ srw r0,r1,r2
+ mtspr SPRN_SPRG4,r1
+ mfspr r2,SPRN_SRR1
+ rlwimi r2,r0,31-14,14,14
+ mtspr SPRN_SRR1,r2
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
tlbld r3
- mfspr r3,SPRN_SRR1 /* Need to restore CR0 */
- mtcrf 0x80,r3
rfi
#ifndef CONFIG_ALTIVEC
--
1.5.6.6
^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2009-03-19 13:55 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-03-19 13:55 [PATCH 1/3] powerpc/mm: Remove unused register usage in SW TLB miss handling Kumar Gala
2009-03-19 13:55 ` [PATCH 2/3] powerpc/mm: Used free register to save a few cycles " Kumar Gala
2009-03-19 13:55 ` [PATCH 3/3] powerpc/mm: e300c2/c3/c4 TLB errata workaround Kumar Gala
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.