* [PATCH 1/3] powerpc/mm: Remove unused register usage in SW TLB miss handling @ 2009-03-19 13:55 Kumar Gala 2009-03-19 13:55 ` [PATCH 2/3] powerpc/mm: Used free register to save a few cycles " Kumar Gala 0 siblings, 1 reply; 3+ messages in thread From: Kumar Gala @ 2009-03-19 13:55 UTC (permalink / raw) Cc: linuxppc-dev Long ago we had some code that actually used the CTR in the SW TLB miss handlers (603/e300). Since we don't use it no reason to waste cycles saving it off and restoring it (we actually didn't restore it in the fast path case). Signed-off-by: Kumar Gala <galak@kernel.crashing.org> --- arch/powerpc/kernel/head_32.S | 11 +++-------- 1 files changed, 3 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index d794a63..0105fd5 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -472,12 +472,11 @@ SystemCall: . = 0x1000 InstructionTLBMiss: /* - * r0: stored ctr + * r0: scratch * r1: linux style pte ( later becomes ppc hardware pte ) * r2: ptr to linux-style pte * r3: scratch */ - mfctr r0 /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_IMISS lis r1,PAGE_OFFSET@h /* check if kernel address */ @@ -528,7 +527,6 @@ InstructionAddressInvalid: addis r1,r1,0x2000 mtspr SPRN_DSISR,r1 /* (shouldn't be needed) */ - mtctr r0 /* Restore CTR */ andi. r2,r3,0xFFFF /* Clear upper bits of SRR1 */ or r2,r2,r1 mtspr SPRN_SRR1,r2 @@ -549,12 +547,11 @@ InstructionAddressInvalid: . = 0x1100 DataLoadTLBMiss: /* - * r0: stored ctr + * r0: scratch * r1: linux style pte ( later becomes ppc hardware pte ) * r2: ptr to linux-style pte * r3: scratch */ - mfctr r0 /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_DMISS lis r1,PAGE_OFFSET@h /* check if kernel address */ @@ -604,7 +601,6 @@ DataAddressInvalid: rlwinm r1,r3,9,6,6 /* Get load/store bit */ addis r1,r1,0x2000 mtspr SPRN_DSISR,r1 - mtctr r0 /* Restore CTR */ andi. r2,r3,0xFFFF /* Clear upper bits of SRR1 */ mtspr SPRN_SRR1,r2 mfspr r1,SPRN_DMISS /* Get failing address */ @@ -624,12 +620,11 @@ DataAddressInvalid: . = 0x1200 DataStoreTLBMiss: /* - * r0: stored ctr + * r0: scratch * r1: linux style pte ( later becomes ppc hardware pte ) * r2: ptr to linux-style pte * r3: scratch */ - mfctr r0 /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_DMISS lis r1,PAGE_OFFSET@h /* check if kernel address */ -- 1.5.6.6 ^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH 2/3] powerpc/mm: Used free register to save a few cycles in SW TLB miss handling 2009-03-19 13:55 [PATCH 1/3] powerpc/mm: Remove unused register usage in SW TLB miss handling Kumar Gala @ 2009-03-19 13:55 ` Kumar Gala 2009-03-19 13:55 ` [PATCH 3/3] powerpc/mm: e300c2/c3/c4 TLB errata workaround Kumar Gala 0 siblings, 1 reply; 3+ messages in thread From: Kumar Gala @ 2009-03-19 13:55 UTC (permalink / raw) Cc: linuxppc-dev Now that r0 is free we can keep the value of I/DMISS in r3 and not reload it before doing the tlbli/d. This saves us a few cycles in the fast path case. Signed-off-by: Kumar Gala <galak@kernel.crashing.org> --- arch/powerpc/kernel/head_32.S | 51 +++++++++++++++++++--------------------- 1 files changed, 24 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 0105fd5..8a9dc79 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -495,28 +495,27 @@ InstructionTLBMiss: rlwinm. r2,r2,0,0,19 /* extract address of pte page */ beq- InstructionAddressInvalid /* return if no mapping */ rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ - lwz r3,0(r2) /* get linux-style pte */ - andc. r1,r1,r3 /* check access & ~permission */ + lwz r0,0(r2) /* get linux-style pte */ + andc. r1,r1,r0 /* check access & ~permission */ bne- InstructionAddressInvalid /* return if access not permitted */ - ori r3,r3,_PAGE_ACCESSED /* set _PAGE_ACCESSED in pte */ + ori r0,r0,_PAGE_ACCESSED /* set _PAGE_ACCESSED in pte */ /* * NOTE! We are assuming this is not an SMP system, otherwise * we would need to update the pte atomically with lwarx/stwcx. */ - stw r3,0(r2) /* update PTE (accessed bit) */ + stw r0,0(r2) /* update PTE (accessed bit) */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwinm r1,r3,32-10,31,31 /* _PAGE_RW -> PP lsb */ - rlwinm r2,r3,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */ + rlwinm r1,r0,32-10,31,31 /* _PAGE_RW -> PP lsb */ + rlwinm r2,r0,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */ and r1,r1,r2 /* writable if _RW and _DIRTY */ - rlwimi r3,r3,32-1,30,30 /* _PAGE_USER -> PP msb */ - rlwimi r3,r3,32-1,31,31 /* _PAGE_USER -> PP lsb */ + rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ + rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */ ori r1,r1,0xe04 /* clear out reserved bits */ - andc r1,r3,r1 /* PP = user? (rw&dirty? 2: 3): 0 */ + andc r1,r0,r1 /* PP = user? (rw&dirty? 2: 3): 0 */ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) mtspr SPRN_RPA,r1 - mfspr r3,SPRN_IMISS tlbli r3 mfspr r3,SPRN_SRR1 /* Need to restore CR0 */ mtcrf 0x80,r3 @@ -570,28 +569,27 @@ DataLoadTLBMiss: rlwinm. r2,r2,0,0,19 /* extract address of pte page */ beq- DataAddressInvalid /* return if no mapping */ rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ - lwz r3,0(r2) /* get linux-style pte */ - andc. r1,r1,r3 /* check access & ~permission */ + lwz r0,0(r2) /* get linux-style pte */ + andc. r1,r1,r0 /* check access & ~permission */ bne- DataAddressInvalid /* return if access not permitted */ - ori r3,r3,_PAGE_ACCESSED /* set _PAGE_ACCESSED in pte */ + ori r0,r0,_PAGE_ACCESSED /* set _PAGE_ACCESSED in pte */ /* * NOTE! We are assuming this is not an SMP system, otherwise * we would need to update the pte atomically with lwarx/stwcx. */ - stw r3,0(r2) /* update PTE (accessed bit) */ + stw r0,0(r2) /* update PTE (accessed bit) */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwinm r1,r3,32-10,31,31 /* _PAGE_RW -> PP lsb */ - rlwinm r2,r3,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */ + rlwinm r1,r0,32-10,31,31 /* _PAGE_RW -> PP lsb */ + rlwinm r2,r0,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */ and r1,r1,r2 /* writable if _RW and _DIRTY */ - rlwimi r3,r3,32-1,30,30 /* _PAGE_USER -> PP msb */ - rlwimi r3,r3,32-1,31,31 /* _PAGE_USER -> PP lsb */ + rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ + rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */ ori r1,r1,0xe04 /* clear out reserved bits */ - andc r1,r3,r1 /* PP = user? (rw&dirty? 2: 3): 0 */ + andc r1,r0,r1 /* PP = user? (rw&dirty? 2: 3): 0 */ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) mtspr SPRN_RPA,r1 - mfspr r3,SPRN_DMISS tlbld r3 mfspr r3,SPRN_SRR1 /* Need to restore CR0 */ mtcrf 0x80,r3 @@ -643,24 +641,23 @@ DataStoreTLBMiss: rlwinm. r2,r2,0,0,19 /* extract address of pte page */ beq- DataAddressInvalid /* return if no mapping */ rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ - lwz r3,0(r2) /* get linux-style pte */ - andc. r1,r1,r3 /* check access & ~permission */ + lwz r0,0(r2) /* get linux-style pte */ + andc. r1,r1,r0 /* check access & ~permission */ bne- DataAddressInvalid /* return if access not permitted */ - ori r3,r3,_PAGE_ACCESSED|_PAGE_DIRTY + ori r0,r0,_PAGE_ACCESSED|_PAGE_DIRTY /* * NOTE! We are assuming this is not an SMP system, otherwise * we would need to update the pte atomically with lwarx/stwcx. */ - stw r3,0(r2) /* update PTE (accessed/dirty bits) */ + stw r0,0(r2) /* update PTE (accessed/dirty bits) */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwimi r3,r3,32-1,30,30 /* _PAGE_USER -> PP msb */ + rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ li r1,0xe05 /* clear out reserved bits & PP lsb */ - andc r1,r3,r1 /* PP = user? 2: 0 */ + andc r1,r0,r1 /* PP = user? 2: 0 */ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) mtspr SPRN_RPA,r1 - mfspr r3,SPRN_DMISS tlbld r3 mfspr r3,SPRN_SRR1 /* Need to restore CR0 */ mtcrf 0x80,r3 -- 1.5.6.6 ^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH 3/3] powerpc/mm: e300c2/c3/c4 TLB errata workaround 2009-03-19 13:55 ` [PATCH 2/3] powerpc/mm: Used free register to save a few cycles " Kumar Gala @ 2009-03-19 13:55 ` Kumar Gala 0 siblings, 0 replies; 3+ messages in thread From: Kumar Gala @ 2009-03-19 13:55 UTC (permalink / raw) Cc: linuxppc-dev Complete workaround for DTLB errata in e300c2/c3/c4 processors. Due to the bug, the hardware-implemented LRU algorythm always goes to way 1 of the TLB. This fix implements the proposed software workaround in form of a LRW table for chosing the TLB-way. Based on patch from David Jander <david@protonic.nl> Signed-off-by: Kumar Gala <galak@kernel.crashing.org> --- * Fix a linking problem that showed up in testing branch with not having #include <asm/mmu.h> in cpu_setup_6xx.S for the MMU feature fixup arch/powerpc/include/asm/mmu.h | 6 ++++++ arch/powerpc/kernel/cpu_setup_6xx.S | 5 +++++ arch/powerpc/kernel/cputable.c | 9 ++++++--- arch/powerpc/kernel/head_32.S | 32 ++++++++++++++++++++++++++++---- 4 files changed, 45 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index dc82dcd..c073de4 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -46,6 +46,12 @@ */ #define MMU_FTR_LOCK_BCAST_INVAL ASM_CONST(0x00100000) +/* This indicates that the processor doesn't handle way selection + * properly and needs SW to track and update the LRU state. This + * is specific to an errata on e300c2/c3/c4 class parts + */ +#define MMU_FTR_NEED_DTLB_SW_LRU ASM_CONST(0x00200000) + #ifndef __ASSEMBLY__ #include <asm/cputable.h> diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S index 72d1d73..54f767e 100644 --- a/arch/powerpc/kernel/cpu_setup_6xx.S +++ b/arch/powerpc/kernel/cpu_setup_6xx.S @@ -15,9 +15,14 @@ #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/cache.h> +#include <asm/mmu.h> _GLOBAL(__setup_cpu_603) mflr r4 +BEGIN_MMU_FTR_SECTION + li r10,0 + mtspr SPRN_SPRG4,r10 /* init SW LRU tracking */ +END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) BEGIN_FTR_SECTION bl __init_fpu_registers END_FTR_SECTION_IFCLR(CPU_FTR_FPU_UNAVAILABLE) diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 6388386..4ec0a3a 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1082,7 +1082,8 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "e300c2", .cpu_features = CPU_FTRS_E300C2, .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, - .mmu_features = MMU_FTR_USE_HIGH_BATS, + .mmu_features = MMU_FTR_USE_HIGH_BATS | + MMU_FTR_NEED_DTLB_SW_LRU, .icache_bsize = 32, .dcache_bsize = 32, .cpu_setup = __setup_cpu_603, @@ -1095,7 +1096,8 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "e300c3", .cpu_features = CPU_FTRS_E300, .cpu_user_features = COMMON_USER, - .mmu_features = MMU_FTR_USE_HIGH_BATS, + .mmu_features = MMU_FTR_USE_HIGH_BATS | + MMU_FTR_NEED_DTLB_SW_LRU, .icache_bsize = 32, .dcache_bsize = 32, .cpu_setup = __setup_cpu_603, @@ -1110,7 +1112,8 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "e300c4", .cpu_features = CPU_FTRS_E300, .cpu_user_features = COMMON_USER, - .mmu_features = MMU_FTR_USE_HIGH_BATS, + .mmu_features = MMU_FTR_USE_HIGH_BATS | + MMU_FTR_NEED_DTLB_SW_LRU, .icache_bsize = 32, .dcache_bsize = 32, .cpu_setup = __setup_cpu_603, diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 8a9dc79..6469ffa 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -590,9 +590,21 @@ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) mtspr SPRN_RPA,r1 + mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ + mtcrf 0x80,r2 +BEGIN_MMU_FTR_SECTION + li r0,1 + mfspr r1,SPRN_SPRG4 + rlwinm r2,r3,20,27,31 /* Get Address bits 15:19 */ + slw r0,r0,r2 + xor r1,r0,r1 + srw r0,r1,r2 + mtspr SPRN_SPRG4,r1 + mfspr r2,SPRN_SRR1 + rlwimi r2,r0,31-14,14,14 + mtspr SPRN_SRR1,r2 +END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) tlbld r3 - mfspr r3,SPRN_SRR1 /* Need to restore CR0 */ - mtcrf 0x80,r3 rfi DataAddressInvalid: mfspr r3,SPRN_SRR1 @@ -658,9 +670,21 @@ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) mtspr SPRN_RPA,r1 + mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ + mtcrf 0x80,r2 +BEGIN_MMU_FTR_SECTION + li r0,1 + mfspr r1,SPRN_SPRG4 + rlwinm r2,r3,20,27,31 /* Get Address bits 15:19 */ + slw r0,r0,r2 + xor r1,r0,r1 + srw r0,r1,r2 + mtspr SPRN_SPRG4,r1 + mfspr r2,SPRN_SRR1 + rlwimi r2,r0,31-14,14,14 + mtspr SPRN_SRR1,r2 +END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) tlbld r3 - mfspr r3,SPRN_SRR1 /* Need to restore CR0 */ - mtcrf 0x80,r3 rfi #ifndef CONFIG_ALTIVEC -- 1.5.6.6 ^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2009-03-19 13:55 UTC | newest] Thread overview: 3+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2009-03-19 13:55 [PATCH 1/3] powerpc/mm: Remove unused register usage in SW TLB miss handling Kumar Gala 2009-03-19 13:55 ` [PATCH 2/3] powerpc/mm: Used free register to save a few cycles " Kumar Gala 2009-03-19 13:55 ` [PATCH 3/3] powerpc/mm: e300c2/c3/c4 TLB errata workaround Kumar Gala
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).