* [PATCH] Synthesize TLB refill handler at runtime
@ 2004-11-21 17:02 Thiemo Seufer
2004-11-21 19:50 ` Geert Uytterhoeven
0 siblings, 1 reply; 28+ messages in thread
From: Thiemo Seufer @ 2004-11-21 17:02 UTC (permalink / raw)
To: linux-mips; +Cc: ralf
Hello All,
currently we have a large number of TLB refill handlers written in
hand-optimized assembly which are mostly indentical. The appended
patch removes them all, and adds a micro-assembler instead which
synthesizes the proper variant for the CPU at runtime.
Tested for
- ip22 32bit
- ip22 64bit
- ip27 SMP 64bit
- ip32 64bit
with excellent results. The synthesized handler is in most cases
shorter and more performant than the hand-written version.
Please test on your machine as well.
Thiemo
Index: arch/mips/mm/Makefile
===================================================================
RCS file: /home/cvs/linux/arch/mips/mm/Makefile,v
retrieving revision 1.68
diff -u -p -r1.68 Makefile
--- arch/mips/mm/Makefile 20 Jun 2004 23:52:17 -0000 1.68
+++ arch/mips/mm/Makefile 20 Nov 2004 16:46:40 -0000
@@ -2,7 +2,8 @@
# Makefile for the Linux/MIPS-specific parts of the memory manager.
#
-obj-y += cache.o extable.o fault.o init.o pgtable.o
+obj-y += cache.o extable.o fault.o init.o pgtable.o \
+ tlbex.o
obj-$(CONFIG_MIPS32) += ioremap.o pgtable-32.o
obj-$(CONFIG_MIPS64) += pgtable-64.o
@@ -47,16 +48,16 @@ obj-$(CONFIG_CPU_SB1) += tlbex32-r4k.o
obj-$(CONFIG_CPU_TX39XX) += tlbex32-r3k.o
endif
ifdef CONFIG_MIPS64
-obj-$(CONFIG_CPU_R4300) += tlb64-glue-r4k.o tlbex64-r4k.o
-obj-$(CONFIG_CPU_R4X00) += tlb64-glue-r4k.o tlbex64-r4k.o
-obj-$(CONFIG_CPU_R5000) += tlb64-glue-r4k.o tlbex64-r4k.o
-obj-$(CONFIG_CPU_NEVADA) += tlb64-glue-r4k.o tlbex64-r4k.o
-obj-$(CONFIG_CPU_R5432) += tlb64-glue-r4k.o tlbex64-r4k.o
-obj-$(CONFIG_CPU_RM7000) += tlb64-glue-r4k.o tlbex64-r4k.o
-obj-$(CONFIG_CPU_RM9000) += tlb64-glue-r4k.o tlbex64-r4k.o
-obj-$(CONFIG_CPU_R10000) += tlb64-glue-r4k.o tlbex64-r4k.o
-obj-$(CONFIG_CPU_SB1) += tlb64-glue-sb1.o tlbex64-r4k.o
-obj-$(CONFIG_CPU_MIPS64) += tlb64-glue-r4k.o tlbex64-r4k.o
+obj-$(CONFIG_CPU_R4300) += tlb64-glue-r4k.o
+obj-$(CONFIG_CPU_R4X00) += tlb64-glue-r4k.o
+obj-$(CONFIG_CPU_R5000) += tlb64-glue-r4k.o
+obj-$(CONFIG_CPU_NEVADA) += tlb64-glue-r4k.o
+obj-$(CONFIG_CPU_R5432) += tlb64-glue-r4k.o
+obj-$(CONFIG_CPU_RM7000) += tlb64-glue-r4k.o
+obj-$(CONFIG_CPU_RM9000) += tlb64-glue-r4k.o
+obj-$(CONFIG_CPU_R10000) += tlb64-glue-r4k.o
+obj-$(CONFIG_CPU_SB1) += tlb64-glue-sb1.o
+obj-$(CONFIG_CPU_MIPS64) += tlb64-glue-r4k.o
endif
Index: arch/mips/mm/tlb-andes.c
===================================================================
RCS file: /home/cvs/linux/arch/mips/mm/tlb-andes.c,v
retrieving revision 1.8
diff -u -p -r1.8 tlb-andes.c
--- arch/mips/mm/tlb-andes.c 19 Oct 2004 02:21:16 -0000 1.8
+++ arch/mips/mm/tlb-andes.c 20 Nov 2004 16:46:46 -0000
@@ -17,10 +17,7 @@
#include <asm/system.h>
#include <asm/mmu_context.h>
-extern void except_vec0_generic(void);
-extern void except_vec0_r4000(void);
-extern void except_vec1_generic(void);
-extern void except_vec1_r4k(void);
+extern void build_tlb_refill_handler(void);
#define NTLB_ENTRIES 64
#define NTLB_ENTRIES_HALF 32
@@ -257,14 +254,5 @@ void __init tlb_init(void)
/* Did I tell you that ARC SUCKS? */
-#ifdef CONFIG_MIPS32
- memcpy((void *)KSEG0, &except_vec0_r4000, 0x80);
- memcpy((void *)(KSEG0 + 0x080), &except_vec1_generic, 0x80);
- flush_icache_range(KSEG0, KSEG0 + 0x100);
-#endif
-#ifdef CONFIG_MIPS64
- memcpy((void *)(CKSEG0 + 0x000), &except_vec0_generic, 0x80);
- memcpy((void *)(CKSEG0 + 0x080), except_vec1_r4k, 0x80);
- flush_icache_range(CKSEG0 + 0x80, CKSEG0 + 0x100);
-#endif
+ build_tlb_refill_handler();
}
Index: arch/mips/mm/tlb-r3k.c
===================================================================
RCS file: /home/cvs/linux/arch/mips/mm/tlb-r3k.c,v
retrieving revision 1.26
diff -u -p -r1.26 tlb-r3k.c
--- arch/mips/mm/tlb-r3k.c 11 Dec 2003 16:27:01 -0000 1.26
+++ arch/mips/mm/tlb-r3k.c 20 Nov 2004 16:46:46 -0000
@@ -26,7 +26,7 @@
#undef DEBUG_TLB
-extern char except_vec0_r2300;
+extern void build_tlb_refill_handler(void);
/* CP0 hazard avoidance. */
#define BARRIER \
@@ -284,6 +284,6 @@ void __init add_wired_entry(unsigned lon
void __init tlb_init(void)
{
local_flush_tlb_all();
- memcpy((void *)KSEG0, &except_vec0_r2300, 0x80);
- flush_icache_range(KSEG0, KSEG0 + 0x80);
+
+ build_tlb_refill_handler();
}
Index: arch/mips/mm/tlb-r4k.c
===================================================================
RCS file: /home/cvs/linux/arch/mips/mm/tlb-r4k.c,v
retrieving revision 1.38
diff -u -p -r1.38 tlb-r4k.c
--- arch/mips/mm/tlb-r4k.c 19 Mar 2004 04:07:59 -0000 1.38
+++ arch/mips/mm/tlb-r4k.c 20 Nov 2004 16:46:46 -0000
@@ -19,12 +19,7 @@
#include <asm/pgtable.h>
#include <asm/system.h>
-extern void except_vec0_generic(void);
-extern void except_vec0_nevada(void);
-extern void except_vec0_r4000(void);
-extern void except_vec0_r4600(void);
-extern void except_vec1_generic(void);
-extern void except_vec1_r4k(void);
+extern void build_tlb_refill_handler(void);
/* CP0 hazard avoidance. */
#define BARRIER __asm__ __volatile__(".set noreorder\n\t" \
@@ -414,19 +409,5 @@ void __init tlb_init(void)
temp_tlb_entry = current_cpu_data.tlbsize - 1;
local_flush_tlb_all();
-#ifdef CONFIG_MIPS32
- if (current_cpu_data.cputype == CPU_NEVADA)
- memcpy((void *)KSEG0, &except_vec0_nevada, 0x80);
- else if (current_cpu_data.cputype == CPU_R4600)
- memcpy((void *)KSEG0, &except_vec0_r4600, 0x80);
- else
- memcpy((void *)KSEG0, &except_vec0_r4000, 0x80);
- memcpy((void *)(KSEG0 + 0x080), &except_vec1_generic, 0x80);
- flush_icache_range(KSEG0, KSEG0 + 0x100);
-#endif
-#ifdef CONFIG_MIPS64
- memcpy((void *)(CKSEG0 + 0x00), &except_vec0_generic, 0x80);
- memcpy((void *)(CKSEG0 + 0x80), except_vec1_r4k, 0x80);
- flush_icache_range(CKSEG0 + 0x80, CKSEG0 + 0x100);
-#endif
+ build_tlb_refill_handler();
}
Index: arch/mips/mm/tlb-r8k.c
===================================================================
RCS file: /home/cvs/linux/arch/mips/mm/tlb-r8k.c,v
retrieving revision 1.1
diff -u -p -r1.1 tlb-r8k.c
--- arch/mips/mm/tlb-r8k.c 20 Jun 2004 23:01:07 -0000 1.1
+++ arch/mips/mm/tlb-r8k.c 20 Nov 2004 16:46:46 -0000
@@ -19,8 +19,7 @@
#include <asm/pgtable.h>
#include <asm/system.h>
-extern void except_vec0_generic(void);
-extern void except_vec1_r8k(void);
+extern void build_tlb_refill_handler(void);
#define TFP_TLB_SIZE 384
#define TFP_TLB_SET_SHIFT 7
@@ -247,7 +246,5 @@ void __init tlb_init(void)
local_flush_tlb_all();
- memcpy((void *)(CKSEG0 + 0x00), &except_vec0_generic, 0x80);
- memcpy((void *)(CKSEG0 + 0x80), except_vec1_r8k, 0x80);
- flush_icache_range(CKSEG0 + 0x80, CKSEG0 + 0x100);
+ build_tlb_refill_handler();
}
Index: arch/mips/mm/tlb-sb1.c
===================================================================
RCS file: /home/cvs/linux/arch/mips/mm/tlb-sb1.c,v
retrieving revision 1.45
diff -u -p -r1.45 tlb-sb1.c
--- arch/mips/mm/tlb-sb1.c 23 Oct 2004 01:18:17 -0000 1.45
+++ arch/mips/mm/tlb-sb1.c 20 Nov 2004 16:46:46 -0000
@@ -23,14 +23,7 @@
#include <asm/bootinfo.h>
#include <asm/cpu.h>
-#ifdef CONFIG_MIPS32
-extern void except_vec0_sb1(void);
-extern void except_vec1_generic(void);
-#endif
-#ifdef CONFIG_MIPS64
-extern void except_vec0_generic(void);
-extern void except_vec1_sb1(void);
-#endif
+extern void build_tlb_refill_handler(void);
#define UNIQUE_ENTRYHI(idx) (KSEG0 + ((idx) << (PAGE_SHIFT + 1)))
@@ -380,14 +373,5 @@ void tlb_init(void)
*/
sb1_sanitize_tlb();
-#ifdef CONFIG_MIPS32
- memcpy((void *)KSEG0, &except_vec0_sb1, 0x80);
- memcpy((void *)(KSEG0 + 0x080), &except_vec1_generic, 0x80);
- flush_icache_range(KSEG0, KSEG0 + 0x100);
-#endif
-#ifdef CONFIG_MIPS64
- memcpy((void *)CKSEG0, &except_vec0_generic, 0x80);
- memcpy((void *)(CKSEG0 + 0x80), &except_vec1_sb1, 0x80);
- flush_icache_range(CKSEG0, CKSEG0 + 0x100);
-#endif
+ build_tlb_refill_handler();
}
Index: arch/mips/mm/tlbex32-r3k.S
===================================================================
RCS file: /home/cvs/linux/arch/mips/mm/tlbex32-r3k.S,v
retrieving revision 1.1
diff -u -p -r1.1 tlbex32-r3k.S
--- arch/mips/mm/tlbex32-r3k.S 20 Jun 2004 23:52:17 -0000 1.1
+++ arch/mips/mm/tlbex32-r3k.S 20 Nov 2004 16:46:46 -0000
@@ -24,36 +24,6 @@
#define TLB_OPTIMIZE /* If you are paranoid, disable this. */
- .text
- .set mips1
- .set noreorder
-
- __INIT
-
- /* TLB refill, R[23]00 version */
- LEAF(except_vec0_r2300)
- .set noat
- .set mips1
- mfc0 k0, CP0_BADVADDR
- lw k1, pgd_current # get pgd pointer
- srl k0, k0, 22
- sll k0, k0, 2
- addu k1, k1, k0
- mfc0 k0, CP0_CONTEXT
- lw k1, (k1)
- and k0, k0, 0xffc
- addu k1, k1, k0
- lw k0, (k1)
- nop
- mtc0 k0, CP0_ENTRYLO0
- mfc0 k1, CP0_EPC
- tlbwr
- jr k1
- rfe
- END(except_vec0_r2300)
-
- __FINIT
-
/* ABUSE of CPP macros 101. */
/* After this macro runs, the pte faulted on is
Index: arch/mips/mm/tlbex32-r4k.S
===================================================================
RCS file: /home/cvs/linux/arch/mips/mm/tlbex32-r4k.S,v
retrieving revision 1.2
diff -u -p -r1.2 tlbex32-r4k.S
--- arch/mips/mm/tlbex32-r4k.S 3 Oct 2004 01:16:24 -0000 1.2
+++ arch/mips/mm/tlbex32-r4k.S 20 Nov 2004 16:46:46 -0000
@@ -139,272 +139,6 @@
_PAGE_VALID | _PAGE_DIRTY); \
PTE_S pte, (ptr);
- __INIT
-
-#ifdef CONFIG_64BIT_PHYS_ADDR
-#define GET_PTE_OFF(reg)
-#elif CONFIG_CPU_VR41XX
-#define GET_PTE_OFF(reg) srl reg, reg, 3
-#else
-#define GET_PTE_OFF(reg) srl reg, reg, 1
-#endif
-
-/*
- * These handlers much be written in a relocatable manner
- * because based upon the cpu type an arbitrary one of the
- * following pieces of code will be copied to the KSEG0
- * vector location.
- */
- /* TLB refill, EXL == 0, R4xx0, non-R4600 version */
- .set noreorder
- .set noat
- LEAF(except_vec0_r4000)
- .set mips3
- GET_PGD(k0, k1) # get pgd pointer
- mfc0 k0, CP0_BADVADDR # Get faulting address
- srl k0, k0, _PGDIR_SHIFT # get pgd only bits
-
- sll k0, k0, 2
- addu k1, k1, k0 # add in pgd offset
- mfc0 k0, CP0_CONTEXT # get context reg
- lw k1, (k1)
- GET_PTE_OFF(k0) # get pte offset
- and k0, k0, PTEP_INDX_MSK
- addu k1, k1, k0 # add in offset
- PTE_L k0, 0(k1) # get even pte
- PTE_L k1, PTE_SIZE(k1) # get odd pte
- PTE_SRL k0, k0, 6 # convert to entrylo0
- P_MTC0 k0, CP0_ENTRYLO0 # load it
- PTE_SRL k1, k1, 6 # convert to entrylo1
- P_MTC0 k1, CP0_ENTRYLO1 # load it
- mtc0_tlbw_hazard
- tlbwr # write random tlb entry
- nop
- tlbw_eret_hazard
- eret # return from trap
- END(except_vec0_r4000)
-
- /* TLB refill, EXL == 0, R4600 version */
- LEAF(except_vec0_r4600)
- .set mips3
- GET_PGD(k0, k1) # get pgd pointer
- mfc0 k0, CP0_BADVADDR
- srl k0, k0, _PGDIR_SHIFT
- sll k0, k0, 2 # log2(sizeof(pgd_t)
- addu k1, k1, k0
- mfc0 k0, CP0_CONTEXT
- lw k1, (k1)
- GET_PTE_OFF(k0) # get pte offset
- and k0, k0, PTEP_INDX_MSK
- addu k1, k1, k0
- PTE_L k0, 0(k1)
- PTE_L k1, PTE_SIZE(k1)
- PTE_SRL k0, k0, 6
- P_MTC0 k0, CP0_ENTRYLO0
- PTE_SRL k1, k1, 6
- P_MTC0 k1, CP0_ENTRYLO1
- nop
- tlbwr
- nop
- eret
- END(except_vec0_r4600)
-
- /* TLB refill, EXL == 0, R52x0 "Nevada" version */
- /*
- * This version has a bug workaround for the Nevada. It seems
- * as if under certain circumstances the move from cp0_context
- * might produce a bogus result when the mfc0 instruction and
- * it's consumer are in a different cacheline or a load instruction,
- * probably any memory reference, is between them. This is
- * potencially slower than the R4000 version, so we use this
- * special version.
- */
- .set noreorder
- .set noat
- LEAF(except_vec0_nevada)
- .set mips3
- mfc0 k0, CP0_BADVADDR # Get faulting address
- srl k0, k0, _PGDIR_SHIFT # get pgd only bits
- lw k1, pgd_current # get pgd pointer
- sll k0, k0, 2 # log2(sizeof(pgd_t)
- addu k1, k1, k0 # add in pgd offset
- lw k1, (k1)
- mfc0 k0, CP0_CONTEXT # get context reg
- GET_PTE_OFF(k0) # get pte offset
- and k0, k0, PTEP_INDX_MSK
- addu k1, k1, k0 # add in offset
- PTE_L k0, 0(k1) # get even pte
- PTE_L k1, PTE_SIZE(k1) # get odd pte
- PTE_SRL k0, k0, 6 # convert to entrylo0
- P_MTC0 k0, CP0_ENTRYLO0 # load it
- PTE_SRL k1, k1, 6 # convert to entrylo1
- P_MTC0 k1, CP0_ENTRYLO1 # load it
- nop # QED specified nops
- nop
- tlbwr # write random tlb entry
- nop # traditional nop
- eret # return from trap
- END(except_vec0_nevada)
-
- /* TLB refill, EXL == 0, SB1 with M3 errata handling version */
- LEAF(except_vec0_sb1)
-#if BCM1250_M3_WAR
- mfc0 k0, CP0_BADVADDR
- mfc0 k1, CP0_ENTRYHI
- xor k0, k1
- srl k0, k0, PAGE_SHIFT+1
- bnez k0, 1f
-#endif
- GET_PGD(k0, k1) # get pgd pointer
- mfc0 k0, CP0_BADVADDR # Get faulting address
- srl k0, k0, _PGDIR_SHIFT # get pgd only bits
- sll k0, k0, 2
- addu k1, k1, k0 # add in pgd offset
- mfc0 k0, CP0_CONTEXT # get context reg
- lw k1, (k1)
- GET_PTE_OFF(k0) # get pte offset
- and k0, k0, PTEP_INDX_MSK
- addu k1, k1, k0 # add in offset
- PTE_L k0, 0(k1) # get even pte
- PTE_L k1, PTE_SIZE(k1) # get odd pte
- PTE_SRL k0, k0, 6 # convert to entrylo0
- P_MTC0 k0, CP0_ENTRYLO0 # load it
- PTE_SRL k1, k1, 6 # convert to entrylo1
- P_MTC0 k1, CP0_ENTRYLO1 # load it
- tlbwr # write random tlb entry
-1: eret # return from trap
- END(except_vec0_sb1)
-
- /* TLB refill, EXL == 0, R4[40]00/R5000 badvaddr hwbug version */
- LEAF(except_vec0_r45k_bvahwbug)
- .set mips3
- GET_PGD(k0, k1) # get pgd pointer
- mfc0 k0, CP0_BADVADDR
- srl k0, k0, _PGDIR_SHIFT
- sll k0, k0, 2 # log2(sizeof(pgd_t)
- addu k1, k1, k0
- mfc0 k0, CP0_CONTEXT
- lw k1, (k1)
-#ifndef CONFIG_64BIT_PHYS_ADDR
- srl k0, k0, 1
-#endif
- and k0, k0, PTEP_INDX_MSK
- addu k1, k1, k0
- PTE_L k0, 0(k1)
- PTE_L k1, PTE_SIZE(k1)
- nop /* XXX */
- tlbp
- PTE_SRL k0, k0, 6
- P_MTC0 k0, CP0_ENTRYLO0
- PTE_SRL k1, k1, 6
- mfc0 k0, CP0_INDEX
- P_MTC0 k1, CP0_ENTRYLO1
- bltzl k0, 1f
- tlbwr
-1:
- nop
- eret
- END(except_vec0_r45k_bvahwbug)
-
-#ifdef CONFIG_SMP
- /* TLB refill, EXL == 0, R4000 MP badvaddr hwbug version */
- LEAF(except_vec0_r4k_mphwbug)
- .set mips3
- GET_PGD(k0, k1) # get pgd pointer
- mfc0 k0, CP0_BADVADDR
- srl k0, k0, _PGDIR_SHIFT
- sll k0, k0, 2 # log2(sizeof(pgd_t)
- addu k1, k1, k0
- mfc0 k0, CP0_CONTEXT
- lw k1, (k1)
-#ifndef CONFIG_64BIT_PHYS_ADDR
- srl k0, k0, 1
-#endif
- and k0, k0, PTEP_INDX_MSK
- addu k1, k1, k0
- PTE_L k0, 0(k1)
- PTE_L k1, PTE_SIZE(k1)
- nop /* XXX */
- tlbp
- PTE_SRL k0, k0, 6
- P_MTC0 k0, CP0_ENTRYLO0
- PTE_SRL k1, k1, 6
- mfc0 k0, CP0_INDEX
- P_MTC0 k1, CP0_ENTRYLO1
- bltzl k0, 1f
- tlbwr
-1:
- nop
- eret
- END(except_vec0_r4k_mphwbug)
-#endif
-
- /* TLB refill, EXL == 0, R4000 UP 250MHZ entrylo[01] hwbug version */
- LEAF(except_vec0_r4k_250MHZhwbug)
- .set mips3
- GET_PGD(k0, k1) # get pgd pointer
- mfc0 k0, CP0_BADVADDR
- srl k0, k0, _PGDIR_SHIFT
- sll k0, k0, 2 # log2(sizeof(pgd_t)
- addu k1, k1, k0
- mfc0 k0, CP0_CONTEXT
- lw k1, (k1)
-#ifndef CONFIG_64BIT_PHYS_ADDR
- srl k0, k0, 1
-#endif
- and k0, k0, PTEP_INDX_MSK
- addu k1, k1, k0
- PTE_L k0, 0(k1)
- PTE_L k1, PTE_SIZE(k1)
- PTE_SRL k0, k0, 6
- P_MTC0 zero, CP0_ENTRYLO0
- P_MTC0 k0, CP0_ENTRYLO0
- PTE_SRL k1, k1, 6
- P_MTC0 zero, CP0_ENTRYLO1
- P_MTC0 k1, CP0_ENTRYLO1
- b 1f
- tlbwr
-1:
- nop
- eret
- END(except_vec0_r4k_250MHZhwbug)
-
-#ifdef CONFIG_SMP
- /* TLB refill, EXL == 0, R4000 MP 250MHZ entrylo[01]+badvaddr bug version */
- LEAF(except_vec0_r4k_MP250MHZhwbug)
- .set mips3
- GET_PGD(k0, k1) # get pgd pointer
- mfc0 k0, CP0_BADVADDR
- srl k0, k0, _PGDIR_SHIFT
- sll k0, k0, 2 # log2(sizeof(pgd_t)
- addu k1, k1, k0
- mfc0 k0, CP0_CONTEXT
- lw k1, (k1)
-#ifndef CONFIG_64BIT_PHYS_ADDR
- srl k0, k0, 1
-#endif
- and k0, k0, PTEP_INDX_MSK
- addu k1, k1, k0
- PTE_L k0, 0(k1)
- PTE_L k1, PTE_SIZE(k1)
- nop /* XXX */
- tlbp
- PTE_SRL k0, k0, 6
- P_MTC0 zero, CP0_ENTRYLO0
- P_MTC0 k0, CP0_ENTRYLO0
- mfc0 k0, CP0_INDEX
- PTE_SRL k1, k1, 6
- P_MTC0 zero, CP0_ENTRYLO1
- P_MTC0 k1, CP0_ENTRYLO1
- bltzl k0, 1f
- tlbwr
-1:
- nop
- eret
- END(except_vec0_r4k_MP250MHZhwbug)
-#endif
-
- __FINIT
.set noreorder
--- /dev/null 2004-08-24 19:23:08.000000000 +0200
+++ arch/mips/mm/tlbex.c 2004-11-20 17:41:35.000000000 +0100
@@ -0,0 +1,1162 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Systhesize TLB refill handlers at runtime.
+ *
+ * Copyright (C) 2004 by Thiemo Seufer
+ */
+
+#include <stdarg.h>
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/init.h>
+
+#include <asm/pgtable.h>
+#include <asm/cacheflush.h>
+#include <asm/cacheflush.h>
+#include <asm/mmu_context.h>
+#include <asm/inst.h>
+#include <asm/elf.h>
+#include <asm/smp.h>
+
+/* #define DEBUG_TLB */
+
+static __init int r45k_bvahwbug(void)
+{
+ /* XXX: We should probe for the presence of this bug, but we don't. */
+ return 0;
+}
+
+static __init int r4k_250MHZhwbug(void)
+{
+ /* XXX: We should probe for the presence of this bug, but we don't. */
+ return 0;
+}
+
+static __init int bcm1250_m3_war(void)
+{
+ return BCM1250_M3_WAR;
+}
+
+/*
+ * A little micro-assembler, intended for TLB refill handler
+ * synthesizing. It is intentionally kept simple, does only support
+ * a subset of instructions, and does not try to hide pipeline effects
+ * like branch delay slots.
+ */
+
+enum fields
+{
+ RS = 0x001,
+ RT = 0x002,
+ RD = 0x004,
+ RE = 0x008,
+ SIMM = 0x010,
+ UIMM = 0x020,
+ BIMM = 0x040,
+ JIMM = 0x080,
+ FUNC = 0x100,
+};
+
+#define OP_MASK 0x2f
+#define OP_SH 26
+#define RS_MASK 0x1f
+#define RS_SH 21
+#define RT_MASK 0x1f
+#define RT_SH 16
+#define RD_MASK 0x1f
+#define RD_SH 11
+#define RE_MASK 0x1f
+#define RE_SH 6
+#define IMM_MASK 0xffff
+#define IMM_SH 0
+#define JIMM_MASK 0x3ffffff
+#define JIMM_SH 0
+#define FUNC_MASK 0x2f
+#define FUNC_SH 0
+
+enum opcode {
+ insn_invalid,
+ insn_addu, insn_addiu, insn_and, insn_andi, insn_beq,
+ insn_bgez, insn_bgezl, insn_bltz, insn_bltzl, insn_bne,
+ insn_daddu, insn_daddiu, insn_dmfc0, insn_dmtc0,
+ insn_dsll, insn_dsll32, insn_dsra, insn_dsrl, insn_dsrl32,
+ insn_dsubu, insn_eret, insn_j, insn_jal, insn_jr, insn_ld,
+ insn_lui, insn_lw, insn_mfc0, insn_mtc0, insn_ori, insn_rfe,
+ insn_sd, insn_sll, insn_sra, insn_srl, insn_subu, insn_sw,
+ insn_tlbp, insn_tlbwi, insn_tlbwr, insn_xor, insn_xori
+};
+
+struct insn {
+ enum opcode opcode;
+ u32 match;
+ enum fields fields;
+};
+
+/* This macro sets the non-variable bits of an instruction. */
+#define M(a, b, c, d, e, f) \
+ ((a) << OP_SH \
+ | (b) << RS_SH \
+ | (c) << RT_SH \
+ | (d) << RD_SH \
+ | (e) << RE_SH \
+ | (f) << FUNC_SH)
+
+static __initdata struct insn insn_table[] = {
+ { insn_addiu, M(addiu_op,0,0,0,0,0), RS | RT | SIMM },
+ { insn_addu, M(spec_op,0,0,0,0,addu_op), RS | RT | RD },
+ { insn_and, M(spec_op,0,0,0,0,and_op), RS | RT | RD },
+ { insn_andi, M(andi_op,0,0,0,0,0), RS | RT | UIMM },
+ { insn_beq, M(beq_op,0,0,0,0,0), RS | RT | BIMM },
+ { insn_bgez, M(bcond_op,0,bgez_op,0,0,0), RS | BIMM },
+ { insn_bgezl, M(bcond_op,0,bgezl_op,0,0,0), RS | BIMM },
+ { insn_bltz, M(bcond_op,0,bltz_op,0,0,0), RS | BIMM },
+ { insn_bltzl, M(bcond_op,0,bltzl_op,0,0,0), RS | BIMM },
+ { insn_bne, M(bne_op,0,0,0,0,0), RS | RT | BIMM },
+ { insn_daddiu, M(daddiu_op,0,0,0,0,0), RS | RT | SIMM },
+ { insn_daddu, M(spec_op,0,0,0,0,daddu_op), RS | RT | RD },
+ { insn_dmfc0, M(cop0_op,dmfc_op,0,0,0,0), RT | RD },
+ { insn_dmtc0, M(cop0_op,dmtc_op,0,0,0,0), RT | RD },
+ { insn_dsll, M(spec_op,0,0,0,0,dsll_op), RT | RD | RE },
+ { insn_dsll32, M(spec_op,0,0,0,0,dsll32_op), RT | RD | RE },
+ { insn_dsra, M(spec_op,0,0,0,0,dsra_op), RT | RD | RE },
+ { insn_dsrl, M(spec_op,0,0,0,0,dsrl_op), RT | RD | RE },
+ { insn_dsrl32, M(spec_op,0,0,0,0,dsrl32_op), RT | RD | RE },
+ { insn_dsubu, M(spec_op,0,0,0,0,dsubu_op), RS | RT | RD },
+ { insn_eret, M(cop0_op,cop_op,0,0,0,eret_op), 0 },
+ { insn_j, M(j_op,0,0,0,0,0), JIMM },
+ { insn_jal, M(jal_op,0,0,0,0,0), JIMM },
+ { insn_jr, M(spec_op,0,0,0,0,jr_op), RS },
+ { insn_ld, M(ld_op,0,0,0,0,0), RS | RT | SIMM },
+ { insn_lui, M(lui_op,0,0,0,0,0), RT | SIMM },
+ { insn_lw, M(lw_op,0,0,0,0,0), RS | RT | SIMM },
+ { insn_mfc0, M(cop0_op,mfc_op,0,0,0,0), RT | RD },
+ { insn_mtc0, M(cop0_op,mtc_op,0,0,0,0), RT | RD },
+ { insn_ori, M(ori_op,0,0,0,0,0), RS | RT | UIMM },
+ { insn_rfe, M(cop0_op,cop_op,0,0,0,rfe_op), 0 },
+ { insn_sd, M(sd_op,0,0,0,0,0), RS | RT | SIMM },
+ { insn_sll, M(spec_op,0,0,0,0,sll_op), RT | RD | RE },
+ { insn_sra, M(spec_op,0,0,0,0,sra_op), RT | RD | RE },
+ { insn_srl, M(spec_op,0,0,0,0,srl_op), RT | RD | RE },
+ { insn_subu, M(spec_op,0,0,0,0,subu_op), RS | RT | RD },
+ { insn_sw, M(sw_op,0,0,0,0,0), RS | RT | SIMM },
+ { insn_tlbp, M(cop0_op,cop_op,0,0,0,tlbp_op), 0 },
+ { insn_tlbwi, M(cop0_op,cop_op,0,0,0,tlbwi_op), 0 },
+ { insn_tlbwr, M(cop0_op,cop_op,0,0,0,tlbwr_op), 0 },
+ { insn_xor, M(spec_op,0,0,0,0,xor_op), RS | RT | RD },
+ { insn_xori, M(xori_op,0,0,0,0,0), RS | RT | UIMM },
+ { insn_invalid, 0, 0 }
+};
+
+#undef M
+
+static __init u32 build_rs(u32 arg)
+{
+ if (arg & ~RS_MASK)
+ printk(KERN_WARNING "TLB synthesizer field overflow\n");
+
+ return (arg & RS_MASK) << RS_SH;
+}
+
+static __init u32 build_rt(u32 arg)
+{
+ if (arg & ~RT_MASK)
+ printk(KERN_WARNING "TLB synthesizer field overflow\n");
+
+ return (arg & RT_MASK) << RT_SH;
+}
+
+static __init u32 build_rd(u32 arg)
+{
+ if (arg & ~RD_MASK)
+ printk(KERN_WARNING "TLB synthesizer field overflow\n");
+
+ return (arg & RD_MASK) << RD_SH;
+}
+
+static __init u32 build_re(u32 arg)
+{
+ if (arg & ~RE_MASK)
+ printk(KERN_WARNING "TLB synthesizer field overflow\n");
+
+ return (arg & RE_MASK) << RE_SH;
+}
+
+static __init u32 build_simm(s32 arg)
+{
+ if (arg > 0x7fff || arg < -0x8000)
+ printk(KERN_WARNING "TLB synthesizer field overflow\n");
+
+ return arg & 0xffff;
+}
+
+static __init u32 build_uimm(u32 arg)
+{
+ if (arg & ~IMM_MASK)
+ printk(KERN_WARNING "TLB synthesizer field overflow\n");
+
+ return arg & IMM_MASK;
+}
+
+static __init u32 build_bimm(s32 arg)
+{
+ if (arg > 0x1ffff || arg < -0x20000)
+ printk(KERN_WARNING "TLB synthesizer field overflow\n");
+
+ if (arg & 0x3)
+ printk(KERN_WARNING "Invalid TLB synthesizer branch target\n");
+
+ return ((arg < 0) ? (1 << 15) : 0) | ((arg >> 2) & 0x7fff);
+}
+
+static __init u32 build_jimm(u32 arg)
+{
+ if (arg & ~((JIMM_MASK) << 2))
+ printk(KERN_WARNING "TLB synthesizer field overflow\n");
+
+ return (arg >> 2) & JIMM_MASK;
+}
+
+static __init u32 build_func(u32 arg)
+{
+ if (arg & ~FUNC_MASK)
+ printk(KERN_WARNING "TLB synthesizer field overflow\n");
+
+ return arg & FUNC_MASK;
+}
+
+/*
+ * The order of opcode arguments is implicitly left to right,
+ * starting with RS and ending with FUNC or IMM.
+ */
+static void __init build_insn(u32 **buf, enum opcode opc, ...)
+{
+ struct insn *ip = NULL;
+ unsigned int i;
+ va_list ap;
+ u32 op;
+
+ for (i = 0; insn_table[i].opcode != insn_invalid; i++)
+ if (insn_table[i].opcode == opc) {
+ ip = &insn_table[i];
+ break;
+ }
+
+ if (!ip)
+ panic("Unsupported TLB synthesizer instruction %d", opc);
+
+ op = ip->match;
+ va_start(ap, opc);
+ if (ip->fields & RS) op |= build_rs(va_arg(ap, u32));
+ if (ip->fields & RT) op |= build_rt(va_arg(ap, u32));
+ if (ip->fields & RD) op |= build_rd(va_arg(ap, u32));
+ if (ip->fields & RE) op |= build_re(va_arg(ap, u32));
+ if (ip->fields & SIMM) op |= build_simm(va_arg(ap, s32));
+ if (ip->fields & UIMM) op |= build_uimm(va_arg(ap, u32));
+ if (ip->fields & BIMM) op |= build_bimm(va_arg(ap, s32));
+ if (ip->fields & JIMM) op |= build_jimm(va_arg(ap, u32));
+ if (ip->fields & FUNC) op |= build_func(va_arg(ap, u32));
+ va_end(ap);
+
+ **buf = op;
+ (*buf)++;
+}
+
+#define I_u1u2u3(op) \
+ static inline void i##op(u32 **buf, unsigned int a, \
+ unsigned int b, unsigned int c) \
+ { \
+ build_insn(buf, insn##op, a, b, c); \
+ }
+
+#define I_u2u1u3(op) \
+ static inline void i##op(u32 **buf, unsigned int a, \
+ unsigned int b, unsigned int c) \
+ { \
+ build_insn(buf, insn##op, b, a, c); \
+ }
+
+#define I_u3u1u2(op) \
+ static inline void i##op(u32 **buf, unsigned int a, \
+ unsigned int b, unsigned int c) \
+ { \
+ build_insn(buf, insn##op, b, c, a); \
+ }
+
+#define I_u1u2s3(op) \
+ static inline void i##op(u32 **buf, unsigned int a, \
+ unsigned int b, signed int c) \
+ { \
+ build_insn(buf, insn##op, a, b, c); \
+ }
+
+#define I_u2s3u1(op) \
+ static inline void i##op(u32 **buf, unsigned int a, \
+ signed int b, unsigned int c) \
+ { \
+ build_insn(buf, insn##op, c, a, b); \
+ }
+
+#define I_u2u1s3(op) \
+ static inline void i##op(u32 **buf, unsigned int a, \
+ unsigned int b, signed int c) \
+ { \
+ build_insn(buf, insn##op, b, a, c); \
+ }
+
+#define I_u1u2(op) \
+ static inline void i##op(u32 **buf, unsigned int a, \
+ unsigned int b) \
+ { \
+ build_insn(buf, insn##op, a, b); \
+ }
+
+#define I_u1s2(op) \
+ static inline void i##op(u32 **buf, unsigned int a, \
+ signed int b) \
+ { \
+ build_insn(buf, insn##op, a, b); \
+ }
+
+#define I_u1(op) \
+ static inline void i##op(u32 **buf, unsigned int a) \
+ { \
+ build_insn(buf, insn##op, a); \
+ }
+
+#define I_0(op) \
+ static inline void i##op(u32 **buf) \
+ { \
+ build_insn(buf, insn##op); \
+ }
+
+I_u2u1s3(_addiu);
+I_u3u1u2(_addu);
+I_u2u1u3(_andi);
+I_u3u1u2(_and);
+I_u1u2s3(_beq);
+I_u1s2(_bgez);
+I_u1s2(_bgezl);
+I_u1s2(_bltz);
+I_u1s2(_bltzl);
+I_u1u2s3(_bne);
+I_u1u2(_dmfc0);
+I_u1u2(_dmtc0);
+I_u2u1s3(_daddiu);
+I_u3u1u2(_daddu);
+I_u2u1u3(_dsll);
+I_u2u1u3(_dsll32);
+I_u2u1u3(_dsra);
+I_u2u1u3(_dsrl);
+I_u2u1u3(_dsrl32);
+I_u3u1u2(_dsubu);
+I_0(_eret);
+I_u1(_j);
+I_u1(_jal);
+I_u1(_jr);
+I_u2s3u1(_ld);
+I_u1s2(_lui);
+I_u2s3u1(_lw);
+I_u1u2(_mfc0);
+I_u1u2(_mtc0);
+I_u2u1u3(_ori);
+I_0(_rfe);
+I_u2s3u1(_sd);
+I_u2u1u3(_sll);
+I_u2u1u3(_sra);
+I_u2u1u3(_srl);
+I_u3u1u2(_subu);
+I_u2s3u1(_sw);
+I_0(_tlbp);
+I_0(_tlbwi);
+I_0(_tlbwr);
+I_u3u1u2(_xor)
+I_u2u1u3(_xori);
+
+/*
+ * handling labels
+ */
+
+enum label_id {
+ label_invalid,
+ label_second_part,
+ label_leave,
+ label_vmalloc,
+ label_vmalloc_done,
+ label_tlbwr_hazard,
+ label_split
+};
+
+struct label {
+ u32 *addr;
+ enum label_id lab;
+};
+
+static __init void build_label(struct label **lab, u32 *addr,
+ enum label_id l)
+{
+ (*lab)->addr = addr;
+ (*lab)->lab = l;
+ (*lab)++;
+}
+
+#define L_LA(lb) \
+ static inline void l##lb(struct label **lab, u32 *addr) \
+ { \
+ build_label(lab, addr, label##lb); \
+ }
+
+L_LA(_second_part)
+L_LA(_leave)
+L_LA(_vmalloc)
+L_LA(_vmalloc_done)
+L_LA(_tlbwr_hazard)
+L_LA(_split)
+
+/* convenience macros for instructions */
+#ifdef CONFIG_MIPS64
+# define i_LW(buf, rs, rt, off) i_ld(buf, rs, rt, off)
+# define i_SW(buf, rs, rt, off) i_sd(buf, rs, rt, off)
+# define i_SLL(buf, rs, rt, sh) i_dsll(buf, rs, rt, sh)
+# define i_SRA(buf, rs, rt, sh) i_dsra(buf, rs, rt, sh)
+# define i_SRL(buf, rs, rt, sh) i_dsrl(buf, rs, rt, sh)
+# define i_MFC0(buf, rt, rd) i_dmfc0(buf, rt, rd)
+# define i_MTC0(buf, rt, rd) i_dmtc0(buf, rt, rd)
+# define i_ADDIU(buf, rs, rt, val) i_daddiu(buf, rs, rt, val)
+# define i_ADDU(buf, rs, rt, rd) i_daddu(buf, rs, rt, rd)
+# define i_SUBU(buf, rs, rt, rd) i_dsubu(buf, rs, rt, rd)
+#else
+# define i_LW(buf, rs, rt, off) i_lw(buf, rs, rt, off)
+# define i_SW(buf, rs, rt, off) i_sw(buf, rs, rt, off)
+# define i_SLL(buf, rs, rt, sh) i_sll(buf, rs, rt, sh)
+# define i_SRA(buf, rs, rt, sh) i_sra(buf, rs, rt, sh)
+# define i_SRL(buf, rs, rt, sh) i_srl(buf, rs, rt, sh)
+# define i_MFC0(buf, rt, rd) i_mfc0(buf, rt, rd)
+# define i_MTC0(buf, rt, rd) i_mtc0(buf, rt, rd)
+# define i_ADDIU(buf, rs, rt, val) i_addiu(buf, rs, rt, val)
+# define i_ADDU(buf, rs, rt, rd) i_addu(buf, rs, rt, rd)
+# define i_SUBU(buf, rs, rt, rd) i_subu(buf, rs, rt, rd)
+#endif
+
+#define i_b(buf, off) i_beq(buf, 0, 0, off)
+#define i_bnez(buf, rs, off) i_bne(buf, rs, 0, off)
+#define i_move(buf, a, b) i_ADDU(buf, a, 0, b)
+#define i_nop(buf) i_sll(buf, 0, 0, 0)
+#define i_ssnop(buf) i_sll(buf, 0, 2, 1)
+
+#if CONFIG_MIPS64
+static __init int in_compat_space_p(long addr)
+{
+ /* Is this address in 32bit compat space? */
+ return (((addr) & 0xffffffff00000000) == 0xffffffff00000000);
+}
+
+static __init int rel_highest(long val)
+{
+ return ((((val + 0x800080008000L) >> 48) & 0xffff) ^ 0x8000) - 0x8000;
+}
+
+static __init int rel_higher(long val)
+{
+ return ((((val + 0x80008000L) >> 32) & 0xffff) ^ 0x8000) - 0x8000;
+}
+#endif
+
+static __init int rel_hi(long val)
+{
+ return ((((val + 0x8000L) >> 16) & 0xffff) ^ 0x8000) - 0x8000;
+}
+
+static __init int rel_lo(long val)
+{
+ return ((val & 0xffff) ^ 0x8000) - 0x8000;
+}
+
+static __init void i_LA_mostly(u32 **buf, unsigned int rs, long addr)
+{
+#if CONFIG_MIPS64
+ if (!in_compat_space_p(addr)) {
+ i_lui(buf, rs, rel_highest(addr));
+ if (rel_higher(addr))
+ i_daddiu(buf, rs, rs, rel_higher(addr));
+ if (rel_hi(addr)) {
+ i_dsll(buf, rs, rs, 16);
+ i_daddiu(buf, rs, rs, rel_hi(addr));
+ i_dsll(buf, rs, rs, 16);
+ } else
+ i_dsll32(buf, rs, rs, 0);
+ } else
+#endif
+ i_lui(buf, rs, rel_hi(addr));
+}
+
+static __init void i_LA(u32 **buf, unsigned int rs, long addr)
+{
+ i_LA_mostly(buf, rs, addr);
+ if (rel_lo(addr))
+ i_ADDIU(buf, rs, rs, rel_lo(addr));
+}
+
+/*
+ * handle relocations
+ */
+
+struct reloc {
+ u32 *addr;
+ unsigned int type;
+ enum label_id lab;
+};
+
+static __init void r_mips_pc16(struct reloc **rel, u32 *addr,
+ enum label_id l)
+{
+ (*rel)->addr = addr;
+ (*rel)->type = R_MIPS_PC16;
+ (*rel)->lab = l;
+ (*rel)++;
+}
+
+static inline void __resolve_relocs(struct reloc *rel, struct label *lab)
+{
+ long laddr = (long)lab->addr;
+ long raddr = (long)rel->addr;
+
+ switch (rel->type) {
+ case R_MIPS_PC16:
+ *rel->addr |= build_bimm(laddr - (raddr + 4));
+ break;
+
+ default:
+ panic("Unsupported TLB systhesizer relocation %d",
+ rel->type);
+ }
+}
+
+static __init void resolve_relocs(struct reloc *rel, struct label *lab)
+{
+ struct label *l;
+
+ for (; rel->lab != label_invalid; rel++)
+ for (l = lab; l->lab != label_invalid; l++)
+ if (rel->lab == l->lab)
+ __resolve_relocs(rel, l);
+}
+
+static __init void copy_handler(struct reloc *rel, struct label *lab,
+ u32 *first, u32 *end, u32* target)
+{
+ long off = (long)(target - first);
+
+ memcpy(target, first, (end - first) * sizeof(u32));
+
+ for (; rel->lab != label_invalid; rel++)
+ if (rel->addr >= first && rel->addr < end)
+ rel->addr += off;
+
+ for (; lab->lab != label_invalid; lab++)
+ if (lab->addr >= first && lab->addr < end)
+ lab->addr += off;
+}
+
+static __init int insn_has_bdelay(struct reloc *rel, u32 *addr)
+{
+ for (; rel->lab != label_invalid; rel++) {
+ if (rel->addr == addr
+ && (rel->type == R_MIPS_PC16
+ || rel->type == R_MIPS_26))
+ return 1;
+ }
+
+ return 0;
+}
+
+/* convenience functions for labeled branches */
+static void il_bltz(u32 **p, struct reloc **r, unsigned int reg,
+ enum label_id l)
+{
+ r_mips_pc16(r, *p, l);
+ i_bltz(p, reg, 0);
+}
+
+static void il_b(u32 **p, struct reloc **r, enum label_id l)
+{
+ r_mips_pc16(r, *p, l);
+ i_b(p, 0);
+}
+
+static void il_bnez(u32 **p, struct reloc **r, unsigned int reg,
+ enum label_id l)
+{
+ r_mips_pc16(r, *p, l);
+ i_bnez(p, reg, 0);
+}
+
+static void il_bgezl(u32 **p, struct reloc **r, unsigned int reg,
+ enum label_id l)
+{
+ r_mips_pc16(r, *p, l);
+ i_bgezl(p, reg, 0);
+}
+
+/* The only registers allowed in TLB handlers. */
+#define K0 26
+#define K1 27
+
+/* Some CP0 registers */
+#define C0_INDEX 0
+#define C0_ENTRYLO0 2
+#define C0_ENTRYLO1 3
+#define C0_CONTEXT 4
+#define C0_BADVADDR 8
+#define C0_ENTRYHI 10
+#define C0_EPC 14
+#define C0_XCONTEXT 20
+
+#ifdef CONFIG_MIPS64
+# define GET_CONTEXT(buf, reg) i_MFC0(buf, reg, C0_XCONTEXT)
+#else
+# define GET_CONTEXT(buf, reg) i_MFC0(buf, reg, C0_CONTEXT)
+#endif
+
+/* The worst case length of the handler is around 18 instructions for
+ * R3000-style TLBs and up to 63 instructions for R4000-style TLBs.
+ * Maximum space available is 32 instructions for R3000 and 64
+ * instructions for R4000.
+ *
+ * We deliberately chose a buffer size of 128, so we won't scribble
+ * over anything important on overflow before we panic.
+ */
+static __initdata u32 tlb_handler[128];
+
+/* simply assume worst case size for labels and relocs */
+static __initdata struct label labels[128];
+static __initdata struct reloc relocs[128];
+
+#ifdef CONFIG_MIPS32
+/*
+ * The R3000 TLB handler is simple.
+ */
+static void __init build_r3000_tlb_refill_handler(void)
+{
+ long pgdc = (long)pgd_current;
+ u32 *p;
+
+ memset(tlb_handler, 0, sizeof(tlb_handler));
+ p = tlb_handler;
+
+ i_mfc0(&p, K0, C0_BADVADDR);
+ i_lui(&p, K1, rel_hi(pgdc)); /* cp0 delay */
+ i_lw(&p, K1, rel_lo(pgdc), K1);
+ i_srl(&p, K0, K0, 22); /* load delay */
+ i_sll(&p, K0, K0, 2);
+ i_addu(&p, K1, K1, K0);
+ i_mfc0(&p, K0, C0_CONTEXT);
+ i_lw(&p, K1, 0, K1);
+ i_andi(&p, K0, K0, 0xffc); /* load delay */
+ i_addu(&p, K1, K1, K0);
+ i_lw(&p, K0, 0, K1);
+ i_nop(&p); /* load delay */
+ i_mtc0(&p, K0, C0_ENTRYLO0);
+ i_mfc0(&p, K1, C0_EPC); /* cp0 delay */
+ i_tlbwr(&p); /* cp0 delay */
+ i_jr(&p, K1); /* cp0 delay */
+ i_rfe(&p); /* branch delay */
+
+ if (p > tlb_handler + 32)
+ panic("TLB refill handler space exceeded");
+
+ printk("Synthesized TLB handler (%u instructions).\n",
+ p - tlb_handler);
+#ifdef DEBUG_TLB
+ {
+ int i;
+ for (i = 0; i < (p - tlb_handler); i++)
+ printk("%08x\n", tlb_handler[i]);
+ }
+#endif
+
+ memcpy((void *)CAC_BASE, tlb_handler, 0x80);
+ flush_icache_range(CAC_BASE, CAC_BASE + 0x80);
+}
+#endif /* CONFIG_MIPS32 */
+
+/*
+ * The R4000 TLB handler is much more complicated. We have two
+ * consecutive handler areas with 32 instructions space each.
+ * Since they aren't used at the same time, we can overflow in the
+ * other one.To keep things simple, we first assume linear space,
+ * then we relocate it to the final handler layout as needed.
+ */
+static __initdata u32 final_handler[64];
+
+/*
+ * Hazards
+ *
+ * From the IDT errata for the QED RM5230 (Nevada), processor revision 1.0:
+ * 2. A timing hazard exists for the TLBP instruction.
+ *
+ * stalling_instruction
+ * TLBP
+ *
+ * The JTLB is being read for the TLBP throughout the stall generated by the
+ * previous instruction. This is not really correct as the stalling instruction
+ * can modify the address used to access the JTLB. The failure symptom is that
+ * the TLBP instruction will use an address created for the stalling instruction
+ * and not the address held in C0_ENHI and thus report the wrong results.
+ *
+ * The software work-around is to not allow the instruction preceding the TLBP
+ * to stall - make it an NOP or some other instruction guaranteed not to stall.
+ *
+ * Errata 2 will not be fixed. This errata is also on the R5000.
+ *
+ * As if we MIPS hackers wouldn't know how to nop pipelines happy ...
+ */
+static __init void build_tlbp_hazard(u32 **p)
+{
+ switch (current_cpu_data.cputype) {
+ case CPU_R5000:
+ case CPU_R5000A:
+ case CPU_NEVADA:
+ i_nop(p);
+ break;
+
+ default:
+ break;
+ }
+}
+
+/*
+ * Write random TLB entry, and care about the hazards from the
+ * preceeding mtc0 and for the following eret.
+ */
+static __init void build_tlb_write_random_entry(u32 **p, struct label **l,
+ struct reloc **r)
+{
+ switch (current_cpu_data.cputype) {
+ case CPU_R4000PC:
+ case CPU_R4000SC:
+ case CPU_R4000MC:
+ case CPU_R4400PC:
+ case CPU_R4400SC:
+ case CPU_R4400MC:
+ /*
+ * This branch uses up a mtc0 hazard nop slot and saves
+ * two nops after the tlbwr.
+ */
+ il_bgezl(p, r, 0, label_tlbwr_hazard);
+ i_tlbwr(p);
+ l_tlbwr_hazard(l, *p);
+ i_nop(p);
+ break;
+
+ case CPU_R4600:
+ case CPU_R4700:
+ i_nop(p);
+ i_tlbwr(p);
+ break;
+
+ case CPU_NEVADA:
+ i_nop(p); /* QED specifies 2 nops hazard */
+ /*
+ * This branch uses up a mtc0 hazard nop slot and saves
+ * a nop after the tlbwr.
+ */
+ il_bgezl(p, r, 0, label_tlbwr_hazard);
+ i_tlbwr(p);
+ l_tlbwr_hazard(l, *p);
+ break;
+
+ case CPU_RM9000:
+ /*
+ * When the JTLB is updated by tlbwi or tlbwr, a subsequent
+ * use of the JTLB for instructions should not occur for 4
+ * cpu cycles and use for data translations should not occur
+ * for 3 cpu cycles.
+ */
+ i_ssnop(p);
+ i_ssnop(p);
+ i_ssnop(p);
+ i_ssnop(p);
+ i_tlbwr(p);
+ i_ssnop(p);
+ i_ssnop(p);
+ i_ssnop(p);
+ i_ssnop(p);
+ break;
+
+ case CPU_R10000:
+ case CPU_R12000:
+ case CPU_SB1:
+ i_tlbwr(p);
+ break;
+
+ default:
+ /*
+ * Others are assumed to have one cycle mtc0 hazard,
+ * and one cycle tlbwr hazard.
+ * XXX: This might be overly general.
+ */
+ i_nop(p);
+ i_tlbwr(p);
+ i_nop(p);
+ break;
+ }
+}
+
+#if CONFIG_MIPS64
+/*
+ * TMP and PTR are scratch.
+ * TMP will be clobbered, PTR will hold the pmd entry.
+ */
+static __init void
+build_get_pmde64(u32 **p, struct label **l, struct reloc **r,
+ unsigned int tmp, unsigned int ptr)
+{
+ long pgdc = (long)pgd_current;
+
+ /*
+ * The vmalloc handling is not in the hotpath.
+ */
+ i_dmfc0(p, tmp, C0_BADVADDR);
+ il_bltz(p, r, tmp, label_vmalloc);
+ /* No i_nop needed here, since the next insn doesn't touch TMP. */
+
+# ifdef CONFIG_SMP
+ /*
+ * 64 bit SMP has the lower part of &pgd_current[smp_processor_id()]
+ * stored in CONTEXT.
+ */
+ if (in_compat_space_p(pgdc)) {
+ i_dmfc0(p, ptr, C0_CONTEXT);
+ i_dsra(p, ptr, ptr, 23);
+ } else {
+ i_dmfc0(p, ptr, C0_CONTEXT);
+ i_lui(p, tmp, rel_highest(pgdc));
+ i_dsll(p, ptr, ptr, 9);
+ i_daddiu(p, tmp, tmp, rel_higher(pgdc));
+ i_dsrl32(p, ptr, ptr, 0);
+ i_and(p, ptr, ptr, tmp);
+ i_dmfc0(p, tmp, C0_BADVADDR);
+ }
+ i_ld(p, ptr, 0, ptr);
+# else
+ i_LA_mostly(p, ptr, pgdc);
+ i_ld(p, ptr, rel_lo(pgdc), ptr);
+# endif
+
+ l_vmalloc_done(l, *p);
+ i_dsrl(p, tmp, tmp, PGDIR_SHIFT-3); /* get pgd offset in bytes */
+ i_andi(p, tmp, tmp, (PTRS_PER_PGD - 1)<<3);
+ i_daddu(p, ptr, ptr, tmp); /* add in pgd offset */
+ i_dmfc0(p, tmp, C0_BADVADDR); /* get faulting address */
+ i_ld(p, ptr, 0, ptr); /* get pmd pointer */
+ i_dsrl(p, tmp, tmp, PMD_SHIFT-3); /* get pmd offset in bytes */
+ i_andi(p, tmp, tmp, (PTRS_PER_PMD - 1)<<3);
+ i_daddu(p, ptr, ptr, tmp); /* add in pmd offset */
+}
+
+/*
+ * BVADDR is the faulting address, PTR is scratch.
+ * PTR will hold the pgd for vmalloc.
+ */
+static __init void
+build_get_pgd_vmalloc64(u32 **p, struct label **l, struct reloc **r,
+ unsigned int bvaddr, unsigned int ptr)
+{
+ long swpd = (long)swapper_pg_dir;
+
+ l_vmalloc(l, *p);
+ i_LA(p, ptr, VMALLOC_START);
+ i_dsubu(p, bvaddr, bvaddr, ptr);
+
+ if (in_compat_space_p(swpd) && !rel_lo(swpd)) {
+ il_b(p, r, label_vmalloc_done);
+ i_lui(p, ptr, rel_hi(swpd));
+ } else {
+ i_LA_mostly(p, ptr, swpd);
+ il_b(p, r, label_vmalloc_done);
+ i_daddiu(p, ptr, ptr, rel_lo(swpd));
+ }
+}
+
+#else /* CONFIG_MIPS32 */
+
+/*
+ * TMP and PTR are scratch.
+ * TMP will be clobbered, PTR will hold the pgd entry.
+ */
+static __init void build_get_pgde32(u32 **p, unsigned int tmp, unsigned int ptr)
+{
+ long pgdc = (long)pgd_current;
+
+ /* 32 bit SMP has smp_processor_id() stored in CONTEXT. */
+#ifdef CONFIG_SMP
+ i_mfc0(p, ptr, C0_CONTEXT);
+ i_LA_mostly(p, tmp, pgdc);
+ i_srl(p, ptr, ptr, 23);
+ i_sll(p, ptr, ptr, 2);
+ i_addu(p, ptr, tmp, ptr);
+#else
+ i_LA_mostly(p, ptr, pgdc);
+#endif
+ i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */
+ i_lw(p, ptr, rel_lo(pgdc), ptr);
+ i_srl(p, tmp, tmp, PGDIR_SHIFT); /* get pgd only bits */
+ i_sll(p, tmp, tmp, PGD_T_LOG2);
+ i_addu(p, ptr, ptr, tmp); /* add in pgd offset */
+}
+#endif /* CONFIG_MIPS32 */
+
+static __init void build_adjust_context(u32 **p, unsigned int ctx)
+{
+ unsigned int shift = 0;
+ unsigned int mask = 0xff0;
+
+#if !defined(CONFIG_MIPS64) && !defined(CONFIG_64BIT_PHYS_ADDR)
+ shift++;
+ mask |= 0x008;
+#endif
+
+ switch (current_cpu_data.cputype) {
+ case CPU_VR41XX:
+ case CPU_VR4111:
+ case CPU_VR4121:
+ case CPU_VR4122:
+ case CPU_VR4131:
+ case CPU_VR4181:
+ case CPU_VR4181A:
+ case CPU_VR4133:
+ shift += 2;
+ break;
+
+ default:
+ break;
+ }
+
+ if (shift)
+ i_SRL(p, ctx, ctx, shift);
+ i_andi(p, ctx, ctx, mask);
+}
+
+static __init void build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr)
+{
+ /*
+ * Bug workaround for the Nevada. It seems as if under certain
+ * circumstances the move from cp0_context might produce a
+ * bogus result when the mfc0 instruction and its consumer are
+ * in a different cacheline or a load instruction, probably any
+ * memory reference, is between them.
+ */
+ switch (current_cpu_data.cputype) {
+ case CPU_NEVADA:
+ i_LW(p, ptr, 0, ptr);
+ GET_CONTEXT(p, tmp); /* get context reg */
+ break;
+
+ default:
+ GET_CONTEXT(p, tmp); /* get context reg */
+ i_LW(p, ptr, 0, ptr);
+ break;
+ }
+
+ build_adjust_context(p, tmp);
+ i_ADDU(p, ptr, ptr, tmp); /* add in offset */
+}
+
+static __init void build_update_entries(u32 **p, unsigned int tmp,
+ unsigned int ptep)
+{
+ /*
+ * 64bit address support (36bit on a 32bit CPU) in a 32bit
+ * Kernel is a special case. Only a few CPUs use it.
+ */
+#ifdef CONFIG_64BIT_PHYS_ADDR
+ if (cpu_has_64bit_registers) {
+ i_ld(p, tmp, 0, ptep); /* get even pte */
+ i_ld(p, ptep, sizeof(pte_t), ptep); /* get odd pte */
+ i_dsrl(p, tmp, tmp, 6); /* convert to entrylo0 */
+ i_mtc0(p, tmp, C0_ENTRYLO0); /* load it */
+ i_dsrl(p, ptep, ptep, 6); /* convert to entrylo1 */
+ i_mtc0(p, ptep, C0_ENTRYLO1); /* load it */
+ } else {
+ int pte_off_even = sizeof(pte_t) / 2;
+ int pte_off_odd = pte_off_even + sizeof(pte_t);
+
+ /* The pte entries are pre-shifted */
+ i_lw(p, tmp, pte_off_even, ptep); /* get even pte */
+ i_mtc0(p, tmp, C0_ENTRYLO0); /* load it */
+ i_lw(p, ptep, pte_off_odd, ptep); /* get odd pte */
+ i_mtc0(p, ptep, C0_ENTRYLO1); /* load it */
+ }
+#else
+ i_LW(p, tmp, 0, ptep); /* get even pte */
+ i_LW(p, ptep, sizeof(pte_t), ptep); /* get odd pte */
+ if (r45k_bvahwbug()) {
+ build_tlbp_hazard(p);
+ i_tlbp(p);
+ }
+ i_SRL(p, tmp, tmp, 6); /* convert to entrylo0 */
+ if (r4k_250MHZhwbug())
+ i_mtc0(p, 0, C0_ENTRYLO0);
+ i_mtc0(p, tmp, C0_ENTRYLO0); /* load it */
+ i_SRL(p, ptep, ptep, 6); /* convert to entrylo1 */
+ if (r45k_bvahwbug())
+ i_mfc0(p, tmp, C0_INDEX);
+ if (r4k_250MHZhwbug())
+ i_mtc0(p, 0, C0_ENTRYLO1);
+ i_mtc0(p, ptep, C0_ENTRYLO1); /* load it */
+#endif
+}
+
+static void __init build_r4000_tlb_refill_handler(void)
+{
+ u32 *p = tlb_handler;
+ struct label *l = labels;
+ struct reloc *r = relocs;
+ u32 *f;
+ unsigned int final_len;
+
+ memset(tlb_handler, 0, sizeof(tlb_handler));
+ memset(labels, 0, sizeof(labels));
+ memset(relocs, 0, sizeof(relocs));
+ memset(final_handler, 0, sizeof(final_handler));
+
+ /*
+ * create the plain linear handler
+ */
+ if (bcm1250_m3_war) {
+ i_MFC0(&p, K0, C0_BADVADDR);
+ i_MFC0(&p, K1, C0_ENTRYHI);
+ i_xor(&p, K0, K0, K1);
+ i_SRL(&p, K0, K0, PAGE_SHIFT+1);
+ il_bnez(&p, &r, K0, label_leave);
+ /* No need for i_nop */
+ }
+
+#ifdef CONFIG_MIPS64
+ build_get_pmde64(&p, &l, &r, K0, K1); /* get pmd ptr in K1 */
+#else
+ build_get_pgde32(&p, K0, K1); /* get pgd ptr in K1 */
+#endif
+
+ build_get_ptep(&p, K0, K1);
+ build_update_entries(&p, K0, K1);
+ build_tlb_write_random_entry(&p, &l, &r);
+ l_leave(&l, p);
+ i_eret(&p); /* return from trap */
+
+#ifdef CONFIG_MIPS64
+ build_get_pgd_vmalloc64(&p, &l, &r, K0, K1);
+#endif
+
+ /*
+ * Overflow check: For the 64bit handler, we need at least one
+ * free instruction slot for the wrap-around branch. In worst
+ * case, if the intended insertion point is a delay slot, we
+ * need three, with the the second nop'ed and the third being
+ * unused.
+ */
+#ifdef CONFIG_MIPS32
+ if ((p - tlb_handler) > 64)
+ panic("TLB refill handler space exceeded");
+#else
+ if (((p - tlb_handler) > 63)
+ || (((p - tlb_handler) > 61)
+ && insn_has_bdelay(relocs, tlb_handler + 29)))
+ panic("TLB refill handler space exceeded");
+#endif
+
+ /*
+ * Now fold the handler in the TLB refill handler space.
+ */
+#ifdef CONFIG_MIPS32
+ f = final_handler;
+ /* Simplest case, just copy the handler. */
+ copy_handler(relocs, labels, tlb_handler, p, f);
+ final_len = p - tlb_handler;
+#else /* CONFIG_MIPS64 */
+ f = final_handler + 32;
+ if ((p - tlb_handler) <= 32) {
+ /* Just copy the handler. */
+ copy_handler(relocs, labels, tlb_handler, p, f);
+ final_len = p - tlb_handler;
+ } else {
+ u32 *split = tlb_handler + 30;
+
+ /*
+ * Find the split point.
+ */
+ if (insn_has_bdelay(relocs, split - 1))
+ split--;
+
+ /* Copy first part of the handler. */
+ copy_handler(relocs, labels, tlb_handler, split, f);
+ f += split - tlb_handler;
+
+ /* Insert branch. */
+ l_split(&l, final_handler);
+ il_b(&f, &r, label_split);
+ if (insn_has_bdelay(relocs, split))
+ i_nop(&f);
+ else {
+ copy_handler(relocs, labels, split, split + 1, f);
+ f++;
+ split++;
+ }
+
+ /* Copy the rest of the handler. */
+ copy_handler(relocs, labels, split, p, final_handler);
+ final_len = (f - (final_handler + 32)) + (p - split);
+ }
+#endif /* CONFIG_MIPS64 */
+
+ resolve_relocs(relocs, labels);
+ printk("Synthesized TLB handler (%u instructions).\n", final_len);
+
+#ifdef DEBUG_TLB
+ {
+ int i;
+
+ for (i = 0; i < 64; i++)
+ printk("%08x\n", final_handler[i]);
+ }
+#endif
+
+ memcpy((void *)CAC_BASE, final_handler, 0x100);
+ flush_icache_range(CAC_BASE, CAC_BASE + 0x100);
+}
+
+void __init build_tlb_refill_handler(void)
+{
+ switch (current_cpu_data.cputype) {
+#ifdef CONFIG_MIPS32
+ case CPU_R2000:
+ case CPU_R3000:
+ case CPU_R3000A:
+ case CPU_R3081E:
+ case CPU_TX3912:
+ case CPU_TX3922:
+ case CPU_TX3927:
+ build_r3000_tlb_refill_handler();
+ break;
+
+ case CPU_R6000:
+ case CPU_R6000A:
+ panic("No R6000 TLB refill handler yet");
+ break;
+#endif
+
+ case CPU_R8000:
+ panic("No R8000 TLB refill handler yet");
+ break;
+
+ default:
+ build_r4000_tlb_refill_handler();
+ }
+}
--- arch/mips/mm/tlbex64-r4k.S 2004-11-21 03:05:35.000000000 +0100
+++ /dev/null 2004-08-24 19:23:08.000000000 +0200
@@ -1,136 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2000 Silicon Graphics, Inc.
- * Written by Ulf Carlsson (ulfc@engr.sgi.com)
- * Copyright (C) 2002 Maciej W. Rozycki
- */
-#include <linux/config.h>
-#include <linux/init.h>
-#include <linux/threads.h>
-
-#include <asm/asm.h>
-#include <asm/hazards.h>
-#include <asm/regdef.h>
-#include <asm/mipsregs.h>
-#include <asm/stackframe.h>
-#include <asm/war.h>
-
-#define _VMALLOC_START 0xc000000000000000
-
- .macro GET_PGD, ptr
-#ifdef CONFIG_SMP
- /*
- * Fixme - this is b0rked for pgd_current outside of CKSEG0
- */
- dmfc0 \ptr, CP0_CONTEXT
- dsra \ptr, 23 # get pgd_current[cpu]
- ld \ptr, (\ptr)
-#else
- ld \ptr, pgd_current
-#endif
- .endm
-
- /*
- * After this macro runs we have a pointer to the pte of the address
- * that caused the fault in PTR. Expects register containing the
- * the pagetable root pointer as the ptr argument and c0_badvaddr
- * passed as tmp argument.
- */
- .macro LOAD_PTE2, ptr, tmp
- dsrl \tmp, (_PGDIR_SHIFT-3) # get pgd offset in bytes
- andi \tmp, ((_PTRS_PER_PGD - 1)<<3)
- daddu \ptr, \tmp # add in pgd offset
- dmfc0 \tmp, CP0_BADVADDR
- ld \ptr, (\ptr) # get pmd pointer
- dsrl \tmp, (_PMD_SHIFT-3) # get pmd offset in bytes
- andi \tmp, ((_PTRS_PER_PMD - 1)<<3)
- daddu \ptr, \tmp # add in pmd offset
- dmfc0 \tmp, CP0_XCONTEXT
- ld \ptr, (\ptr) # get pte pointer
- andi \tmp, 0xff0 # get pte offset
- daddu \ptr, \tmp
- .endm
-
- /*
- * This places the even/odd pte pair in the page table at the pte
- * entry pointed to by PTE into ENTRYLO0 and ENTRYLO1.
- */
- .macro PTE_RELOAD, pte0, pte1
- dsrl \pte0, 6 # convert to entrylo0
- dmtc0 \pte0, CP0_ENTRYLO0 # load it
- dsrl \pte1, 6 # convert to entrylo1
- dmtc0 \pte1, CP0_ENTRYLO1 # load it
- .endm
-
-
- .text
- .set noreorder
- .set mips3
-
- __INIT
-
- /*
- * TLB refill handlers for the R4000 and SB1.
- * Attention: We may only use 32 instructions / 128 bytes.
- */
- .align 5
-LEAF(except_vec1_r4k)
- .set noat
- dla k0, handle_vec1_r4k
- jr k0
- nop
-END(except_vec1_r4k)
-
-LEAF(except_vec1_sb1)
-#if BCM1250_M3_WAR
- dmfc0 k0, CP0_BADVADDR
- dmfc0 k1, CP0_ENTRYHI
- xor k0, k1
- dsrl k0, k0, _PAGE_SHIFT+1
- bnez k0, 1f
-#endif
- .set noat
- dla k0, handle_vec1_r4k
- jr k0
- nop
-
-1: eret
- nop
-END(except_vec1_sb1)
-
- __FINIT
-
- .align 5
-LEAF(handle_vec1_r4k)
- .set noat
- dmfc0 k0, CP0_BADVADDR
- bltz k0, 9f
-
- GET_PGD k1 # pointer to root of pgd
- LOAD_PTE2 k1 k0
- ld k0, 0(k1) # get even pte
- ld k1, 8(k1) # get odd pte
- PTE_RELOAD k0 k1
- mtc0_tlbw_hazard
- tlbwr
- nop
- tlbw_eret_hazard
- eret
-
-9: # handle the vmalloc range
- dli k1, _VMALLOC_START
- dsubu k0, k1
- dla k1, swapper_pg_dir # pointer to root of pgd
- LOAD_PTE2 k1 k0
- ld k0, 0(k1) # get even pte
- ld k1, 8(k1) # get odd pte
- PTE_RELOAD k0 k1
- mtc0_tlbw_hazard
- tlbwr
- nop
- tlbw_eret_hazard
- eret
-END(handle_vec1_r4k)
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH] Synthesize TLB refill handler at runtime
2004-11-21 17:02 [PATCH] Synthesize TLB refill handler at runtime Thiemo Seufer
@ 2004-11-21 19:50 ` Geert Uytterhoeven
2004-11-21 20:37 ` Thiemo Seufer
2004-11-21 20:43 ` Ralf Baechle
0 siblings, 2 replies; 28+ messages in thread
From: Geert Uytterhoeven @ 2004-11-21 19:50 UTC (permalink / raw)
To: Thiemo Seufer; +Cc: Linux/MIPS Development, Ralf Baechle
On Sun, 21 Nov 2004, Thiemo Seufer wrote:
> currently we have a large number of TLB refill handlers written in
> hand-optimized assembly which are mostly indentical. The appended
> patch removes them all, and adds a micro-assembler instead which
> synthesizes the proper variant for the CPU at runtime.
Woow.....
I found a few typos (in the comments, didn't verify the code ;-)
s/Systhesize/Synthesize/
s/systhesizer/synthesizer/
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH] Synthesize TLB refill handler at runtime
2004-11-21 19:50 ` Geert Uytterhoeven
@ 2004-11-21 20:37 ` Thiemo Seufer
2004-11-22 7:01 ` Ralf Baechle
2004-11-22 14:37 ` Maciej W. Rozycki
2004-11-21 20:43 ` Ralf Baechle
1 sibling, 2 replies; 28+ messages in thread
From: Thiemo Seufer @ 2004-11-21 20:37 UTC (permalink / raw)
To: Geert Uytterhoeven; +Cc: Linux/MIPS Development, Ralf Baechle
Geert Uytterhoeven wrote:
> On Sun, 21 Nov 2004, Thiemo Seufer wrote:
> > currently we have a large number of TLB refill handlers written in
> > hand-optimized assembly which are mostly indentical. The appended
> > patch removes them all, and adds a micro-assembler instead which
> > synthesizes the proper variant for the CPU at runtime.
>
> Woow.....
>
> I found a few typos (in the comments, didn't verify the code ;-)
>
> s/Systhesize/Synthesize/
> s/systhesizer/synthesizer/
Aww, fatal error in the spelling module. :-)
Updated.
Thiemo
Index: arch/mips/mm/Makefile
===================================================================
RCS file: /home/cvs/linux/arch/mips/mm/Makefile,v
retrieving revision 1.68
diff -u -p -r1.68 Makefile
--- arch/mips/mm/Makefile 20 Jun 2004 23:52:17 -0000 1.68
+++ arch/mips/mm/Makefile 20 Nov 2004 16:46:40 -0000
@@ -2,7 +2,8 @@
# Makefile for the Linux/MIPS-specific parts of the memory manager.
#
-obj-y += cache.o extable.o fault.o init.o pgtable.o
+obj-y += cache.o extable.o fault.o init.o pgtable.o \
+ tlbex.o
obj-$(CONFIG_MIPS32) += ioremap.o pgtable-32.o
obj-$(CONFIG_MIPS64) += pgtable-64.o
@@ -47,16 +48,16 @@ obj-$(CONFIG_CPU_SB1) += tlbex32-r4k.o
obj-$(CONFIG_CPU_TX39XX) += tlbex32-r3k.o
endif
ifdef CONFIG_MIPS64
-obj-$(CONFIG_CPU_R4300) += tlb64-glue-r4k.o tlbex64-r4k.o
-obj-$(CONFIG_CPU_R4X00) += tlb64-glue-r4k.o tlbex64-r4k.o
-obj-$(CONFIG_CPU_R5000) += tlb64-glue-r4k.o tlbex64-r4k.o
-obj-$(CONFIG_CPU_NEVADA) += tlb64-glue-r4k.o tlbex64-r4k.o
-obj-$(CONFIG_CPU_R5432) += tlb64-glue-r4k.o tlbex64-r4k.o
-obj-$(CONFIG_CPU_RM7000) += tlb64-glue-r4k.o tlbex64-r4k.o
-obj-$(CONFIG_CPU_RM9000) += tlb64-glue-r4k.o tlbex64-r4k.o
-obj-$(CONFIG_CPU_R10000) += tlb64-glue-r4k.o tlbex64-r4k.o
-obj-$(CONFIG_CPU_SB1) += tlb64-glue-sb1.o tlbex64-r4k.o
-obj-$(CONFIG_CPU_MIPS64) += tlb64-glue-r4k.o tlbex64-r4k.o
+obj-$(CONFIG_CPU_R4300) += tlb64-glue-r4k.o
+obj-$(CONFIG_CPU_R4X00) += tlb64-glue-r4k.o
+obj-$(CONFIG_CPU_R5000) += tlb64-glue-r4k.o
+obj-$(CONFIG_CPU_NEVADA) += tlb64-glue-r4k.o
+obj-$(CONFIG_CPU_R5432) += tlb64-glue-r4k.o
+obj-$(CONFIG_CPU_RM7000) += tlb64-glue-r4k.o
+obj-$(CONFIG_CPU_RM9000) += tlb64-glue-r4k.o
+obj-$(CONFIG_CPU_R10000) += tlb64-glue-r4k.o
+obj-$(CONFIG_CPU_SB1) += tlb64-glue-sb1.o
+obj-$(CONFIG_CPU_MIPS64) += tlb64-glue-r4k.o
endif
Index: arch/mips/mm/tlb-andes.c
===================================================================
RCS file: /home/cvs/linux/arch/mips/mm/tlb-andes.c,v
retrieving revision 1.8
diff -u -p -r1.8 tlb-andes.c
--- arch/mips/mm/tlb-andes.c 19 Oct 2004 02:21:16 -0000 1.8
+++ arch/mips/mm/tlb-andes.c 20 Nov 2004 16:46:46 -0000
@@ -17,10 +17,7 @@
#include <asm/system.h>
#include <asm/mmu_context.h>
-extern void except_vec0_generic(void);
-extern void except_vec0_r4000(void);
-extern void except_vec1_generic(void);
-extern void except_vec1_r4k(void);
+extern void build_tlb_refill_handler(void);
#define NTLB_ENTRIES 64
#define NTLB_ENTRIES_HALF 32
@@ -257,14 +254,5 @@ void __init tlb_init(void)
/* Did I tell you that ARC SUCKS? */
-#ifdef CONFIG_MIPS32
- memcpy((void *)KSEG0, &except_vec0_r4000, 0x80);
- memcpy((void *)(KSEG0 + 0x080), &except_vec1_generic, 0x80);
- flush_icache_range(KSEG0, KSEG0 + 0x100);
-#endif
-#ifdef CONFIG_MIPS64
- memcpy((void *)(CKSEG0 + 0x000), &except_vec0_generic, 0x80);
- memcpy((void *)(CKSEG0 + 0x080), except_vec1_r4k, 0x80);
- flush_icache_range(CKSEG0 + 0x80, CKSEG0 + 0x100);
-#endif
+ build_tlb_refill_handler();
}
Index: arch/mips/mm/tlb-r3k.c
===================================================================
RCS file: /home/cvs/linux/arch/mips/mm/tlb-r3k.c,v
retrieving revision 1.26
diff -u -p -r1.26 tlb-r3k.c
--- arch/mips/mm/tlb-r3k.c 11 Dec 2003 16:27:01 -0000 1.26
+++ arch/mips/mm/tlb-r3k.c 20 Nov 2004 16:46:46 -0000
@@ -26,7 +26,7 @@
#undef DEBUG_TLB
-extern char except_vec0_r2300;
+extern void build_tlb_refill_handler(void);
/* CP0 hazard avoidance. */
#define BARRIER \
@@ -284,6 +284,6 @@ void __init add_wired_entry(unsigned lon
void __init tlb_init(void)
{
local_flush_tlb_all();
- memcpy((void *)KSEG0, &except_vec0_r2300, 0x80);
- flush_icache_range(KSEG0, KSEG0 + 0x80);
+
+ build_tlb_refill_handler();
}
Index: arch/mips/mm/tlb-r4k.c
===================================================================
RCS file: /home/cvs/linux/arch/mips/mm/tlb-r4k.c,v
retrieving revision 1.38
diff -u -p -r1.38 tlb-r4k.c
--- arch/mips/mm/tlb-r4k.c 19 Mar 2004 04:07:59 -0000 1.38
+++ arch/mips/mm/tlb-r4k.c 20 Nov 2004 16:46:46 -0000
@@ -19,12 +19,7 @@
#include <asm/pgtable.h>
#include <asm/system.h>
-extern void except_vec0_generic(void);
-extern void except_vec0_nevada(void);
-extern void except_vec0_r4000(void);
-extern void except_vec0_r4600(void);
-extern void except_vec1_generic(void);
-extern void except_vec1_r4k(void);
+extern void build_tlb_refill_handler(void);
/* CP0 hazard avoidance. */
#define BARRIER __asm__ __volatile__(".set noreorder\n\t" \
@@ -414,19 +409,5 @@ void __init tlb_init(void)
temp_tlb_entry = current_cpu_data.tlbsize - 1;
local_flush_tlb_all();
-#ifdef CONFIG_MIPS32
- if (current_cpu_data.cputype == CPU_NEVADA)
- memcpy((void *)KSEG0, &except_vec0_nevada, 0x80);
- else if (current_cpu_data.cputype == CPU_R4600)
- memcpy((void *)KSEG0, &except_vec0_r4600, 0x80);
- else
- memcpy((void *)KSEG0, &except_vec0_r4000, 0x80);
- memcpy((void *)(KSEG0 + 0x080), &except_vec1_generic, 0x80);
- flush_icache_range(KSEG0, KSEG0 + 0x100);
-#endif
-#ifdef CONFIG_MIPS64
- memcpy((void *)(CKSEG0 + 0x00), &except_vec0_generic, 0x80);
- memcpy((void *)(CKSEG0 + 0x80), except_vec1_r4k, 0x80);
- flush_icache_range(CKSEG0 + 0x80, CKSEG0 + 0x100);
-#endif
+ build_tlb_refill_handler();
}
Index: arch/mips/mm/tlb-r8k.c
===================================================================
RCS file: /home/cvs/linux/arch/mips/mm/tlb-r8k.c,v
retrieving revision 1.1
diff -u -p -r1.1 tlb-r8k.c
--- arch/mips/mm/tlb-r8k.c 20 Jun 2004 23:01:07 -0000 1.1
+++ arch/mips/mm/tlb-r8k.c 20 Nov 2004 16:46:46 -0000
@@ -19,8 +19,7 @@
#include <asm/pgtable.h>
#include <asm/system.h>
-extern void except_vec0_generic(void);
-extern void except_vec1_r8k(void);
+extern void build_tlb_refill_handler(void);
#define TFP_TLB_SIZE 384
#define TFP_TLB_SET_SHIFT 7
@@ -247,7 +246,5 @@ void __init tlb_init(void)
local_flush_tlb_all();
- memcpy((void *)(CKSEG0 + 0x00), &except_vec0_generic, 0x80);
- memcpy((void *)(CKSEG0 + 0x80), except_vec1_r8k, 0x80);
- flush_icache_range(CKSEG0 + 0x80, CKSEG0 + 0x100);
+ build_tlb_refill_handler();
}
Index: arch/mips/mm/tlb-sb1.c
===================================================================
RCS file: /home/cvs/linux/arch/mips/mm/tlb-sb1.c,v
retrieving revision 1.45
diff -u -p -r1.45 tlb-sb1.c
--- arch/mips/mm/tlb-sb1.c 23 Oct 2004 01:18:17 -0000 1.45
+++ arch/mips/mm/tlb-sb1.c 20 Nov 2004 16:46:46 -0000
@@ -23,14 +23,7 @@
#include <asm/bootinfo.h>
#include <asm/cpu.h>
-#ifdef CONFIG_MIPS32
-extern void except_vec0_sb1(void);
-extern void except_vec1_generic(void);
-#endif
-#ifdef CONFIG_MIPS64
-extern void except_vec0_generic(void);
-extern void except_vec1_sb1(void);
-#endif
+extern void build_tlb_refill_handler(void);
#define UNIQUE_ENTRYHI(idx) (KSEG0 + ((idx) << (PAGE_SHIFT + 1)))
@@ -380,14 +373,5 @@ void tlb_init(void)
*/
sb1_sanitize_tlb();
-#ifdef CONFIG_MIPS32
- memcpy((void *)KSEG0, &except_vec0_sb1, 0x80);
- memcpy((void *)(KSEG0 + 0x080), &except_vec1_generic, 0x80);
- flush_icache_range(KSEG0, KSEG0 + 0x100);
-#endif
-#ifdef CONFIG_MIPS64
- memcpy((void *)CKSEG0, &except_vec0_generic, 0x80);
- memcpy((void *)(CKSEG0 + 0x80), &except_vec1_sb1, 0x80);
- flush_icache_range(CKSEG0, CKSEG0 + 0x100);
-#endif
+ build_tlb_refill_handler();
}
Index: arch/mips/mm/tlbex32-r3k.S
===================================================================
RCS file: /home/cvs/linux/arch/mips/mm/tlbex32-r3k.S,v
retrieving revision 1.1
diff -u -p -r1.1 tlbex32-r3k.S
--- arch/mips/mm/tlbex32-r3k.S 20 Jun 2004 23:52:17 -0000 1.1
+++ arch/mips/mm/tlbex32-r3k.S 20 Nov 2004 16:46:46 -0000
@@ -24,36 +24,6 @@
#define TLB_OPTIMIZE /* If you are paranoid, disable this. */
- .text
- .set mips1
- .set noreorder
-
- __INIT
-
- /* TLB refill, R[23]00 version */
- LEAF(except_vec0_r2300)
- .set noat
- .set mips1
- mfc0 k0, CP0_BADVADDR
- lw k1, pgd_current # get pgd pointer
- srl k0, k0, 22
- sll k0, k0, 2
- addu k1, k1, k0
- mfc0 k0, CP0_CONTEXT
- lw k1, (k1)
- and k0, k0, 0xffc
- addu k1, k1, k0
- lw k0, (k1)
- nop
- mtc0 k0, CP0_ENTRYLO0
- mfc0 k1, CP0_EPC
- tlbwr
- jr k1
- rfe
- END(except_vec0_r2300)
-
- __FINIT
-
/* ABUSE of CPP macros 101. */
/* After this macro runs, the pte faulted on is
Index: arch/mips/mm/tlbex32-r4k.S
===================================================================
RCS file: /home/cvs/linux/arch/mips/mm/tlbex32-r4k.S,v
retrieving revision 1.2
diff -u -p -r1.2 tlbex32-r4k.S
--- arch/mips/mm/tlbex32-r4k.S 3 Oct 2004 01:16:24 -0000 1.2
+++ arch/mips/mm/tlbex32-r4k.S 20 Nov 2004 16:46:46 -0000
@@ -139,272 +139,6 @@
_PAGE_VALID | _PAGE_DIRTY); \
PTE_S pte, (ptr);
- __INIT
-
-#ifdef CONFIG_64BIT_PHYS_ADDR
-#define GET_PTE_OFF(reg)
-#elif CONFIG_CPU_VR41XX
-#define GET_PTE_OFF(reg) srl reg, reg, 3
-#else
-#define GET_PTE_OFF(reg) srl reg, reg, 1
-#endif
-
-/*
- * These handlers much be written in a relocatable manner
- * because based upon the cpu type an arbitrary one of the
- * following pieces of code will be copied to the KSEG0
- * vector location.
- */
- /* TLB refill, EXL == 0, R4xx0, non-R4600 version */
- .set noreorder
- .set noat
- LEAF(except_vec0_r4000)
- .set mips3
- GET_PGD(k0, k1) # get pgd pointer
- mfc0 k0, CP0_BADVADDR # Get faulting address
- srl k0, k0, _PGDIR_SHIFT # get pgd only bits
-
- sll k0, k0, 2
- addu k1, k1, k0 # add in pgd offset
- mfc0 k0, CP0_CONTEXT # get context reg
- lw k1, (k1)
- GET_PTE_OFF(k0) # get pte offset
- and k0, k0, PTEP_INDX_MSK
- addu k1, k1, k0 # add in offset
- PTE_L k0, 0(k1) # get even pte
- PTE_L k1, PTE_SIZE(k1) # get odd pte
- PTE_SRL k0, k0, 6 # convert to entrylo0
- P_MTC0 k0, CP0_ENTRYLO0 # load it
- PTE_SRL k1, k1, 6 # convert to entrylo1
- P_MTC0 k1, CP0_ENTRYLO1 # load it
- mtc0_tlbw_hazard
- tlbwr # write random tlb entry
- nop
- tlbw_eret_hazard
- eret # return from trap
- END(except_vec0_r4000)
-
- /* TLB refill, EXL == 0, R4600 version */
- LEAF(except_vec0_r4600)
- .set mips3
- GET_PGD(k0, k1) # get pgd pointer
- mfc0 k0, CP0_BADVADDR
- srl k0, k0, _PGDIR_SHIFT
- sll k0, k0, 2 # log2(sizeof(pgd_t)
- addu k1, k1, k0
- mfc0 k0, CP0_CONTEXT
- lw k1, (k1)
- GET_PTE_OFF(k0) # get pte offset
- and k0, k0, PTEP_INDX_MSK
- addu k1, k1, k0
- PTE_L k0, 0(k1)
- PTE_L k1, PTE_SIZE(k1)
- PTE_SRL k0, k0, 6
- P_MTC0 k0, CP0_ENTRYLO0
- PTE_SRL k1, k1, 6
- P_MTC0 k1, CP0_ENTRYLO1
- nop
- tlbwr
- nop
- eret
- END(except_vec0_r4600)
-
- /* TLB refill, EXL == 0, R52x0 "Nevada" version */
- /*
- * This version has a bug workaround for the Nevada. It seems
- * as if under certain circumstances the move from cp0_context
- * might produce a bogus result when the mfc0 instruction and
- * it's consumer are in a different cacheline or a load instruction,
- * probably any memory reference, is between them. This is
- * potencially slower than the R4000 version, so we use this
- * special version.
- */
- .set noreorder
- .set noat
- LEAF(except_vec0_nevada)
- .set mips3
- mfc0 k0, CP0_BADVADDR # Get faulting address
- srl k0, k0, _PGDIR_SHIFT # get pgd only bits
- lw k1, pgd_current # get pgd pointer
- sll k0, k0, 2 # log2(sizeof(pgd_t)
- addu k1, k1, k0 # add in pgd offset
- lw k1, (k1)
- mfc0 k0, CP0_CONTEXT # get context reg
- GET_PTE_OFF(k0) # get pte offset
- and k0, k0, PTEP_INDX_MSK
- addu k1, k1, k0 # add in offset
- PTE_L k0, 0(k1) # get even pte
- PTE_L k1, PTE_SIZE(k1) # get odd pte
- PTE_SRL k0, k0, 6 # convert to entrylo0
- P_MTC0 k0, CP0_ENTRYLO0 # load it
- PTE_SRL k1, k1, 6 # convert to entrylo1
- P_MTC0 k1, CP0_ENTRYLO1 # load it
- nop # QED specified nops
- nop
- tlbwr # write random tlb entry
- nop # traditional nop
- eret # return from trap
- END(except_vec0_nevada)
-
- /* TLB refill, EXL == 0, SB1 with M3 errata handling version */
- LEAF(except_vec0_sb1)
-#if BCM1250_M3_WAR
- mfc0 k0, CP0_BADVADDR
- mfc0 k1, CP0_ENTRYHI
- xor k0, k1
- srl k0, k0, PAGE_SHIFT+1
- bnez k0, 1f
-#endif
- GET_PGD(k0, k1) # get pgd pointer
- mfc0 k0, CP0_BADVADDR # Get faulting address
- srl k0, k0, _PGDIR_SHIFT # get pgd only bits
- sll k0, k0, 2
- addu k1, k1, k0 # add in pgd offset
- mfc0 k0, CP0_CONTEXT # get context reg
- lw k1, (k1)
- GET_PTE_OFF(k0) # get pte offset
- and k0, k0, PTEP_INDX_MSK
- addu k1, k1, k0 # add in offset
- PTE_L k0, 0(k1) # get even pte
- PTE_L k1, PTE_SIZE(k1) # get odd pte
- PTE_SRL k0, k0, 6 # convert to entrylo0
- P_MTC0 k0, CP0_ENTRYLO0 # load it
- PTE_SRL k1, k1, 6 # convert to entrylo1
- P_MTC0 k1, CP0_ENTRYLO1 # load it
- tlbwr # write random tlb entry
-1: eret # return from trap
- END(except_vec0_sb1)
-
- /* TLB refill, EXL == 0, R4[40]00/R5000 badvaddr hwbug version */
- LEAF(except_vec0_r45k_bvahwbug)
- .set mips3
- GET_PGD(k0, k1) # get pgd pointer
- mfc0 k0, CP0_BADVADDR
- srl k0, k0, _PGDIR_SHIFT
- sll k0, k0, 2 # log2(sizeof(pgd_t)
- addu k1, k1, k0
- mfc0 k0, CP0_CONTEXT
- lw k1, (k1)
-#ifndef CONFIG_64BIT_PHYS_ADDR
- srl k0, k0, 1
-#endif
- and k0, k0, PTEP_INDX_MSK
- addu k1, k1, k0
- PTE_L k0, 0(k1)
- PTE_L k1, PTE_SIZE(k1)
- nop /* XXX */
- tlbp
- PTE_SRL k0, k0, 6
- P_MTC0 k0, CP0_ENTRYLO0
- PTE_SRL k1, k1, 6
- mfc0 k0, CP0_INDEX
- P_MTC0 k1, CP0_ENTRYLO1
- bltzl k0, 1f
- tlbwr
-1:
- nop
- eret
- END(except_vec0_r45k_bvahwbug)
-
-#ifdef CONFIG_SMP
- /* TLB refill, EXL == 0, R4000 MP badvaddr hwbug version */
- LEAF(except_vec0_r4k_mphwbug)
- .set mips3
- GET_PGD(k0, k1) # get pgd pointer
- mfc0 k0, CP0_BADVADDR
- srl k0, k0, _PGDIR_SHIFT
- sll k0, k0, 2 # log2(sizeof(pgd_t)
- addu k1, k1, k0
- mfc0 k0, CP0_CONTEXT
- lw k1, (k1)
-#ifndef CONFIG_64BIT_PHYS_ADDR
- srl k0, k0, 1
-#endif
- and k0, k0, PTEP_INDX_MSK
- addu k1, k1, k0
- PTE_L k0, 0(k1)
- PTE_L k1, PTE_SIZE(k1)
- nop /* XXX */
- tlbp
- PTE_SRL k0, k0, 6
- P_MTC0 k0, CP0_ENTRYLO0
- PTE_SRL k1, k1, 6
- mfc0 k0, CP0_INDEX
- P_MTC0 k1, CP0_ENTRYLO1
- bltzl k0, 1f
- tlbwr
-1:
- nop
- eret
- END(except_vec0_r4k_mphwbug)
-#endif
-
- /* TLB refill, EXL == 0, R4000 UP 250MHZ entrylo[01] hwbug version */
- LEAF(except_vec0_r4k_250MHZhwbug)
- .set mips3
- GET_PGD(k0, k1) # get pgd pointer
- mfc0 k0, CP0_BADVADDR
- srl k0, k0, _PGDIR_SHIFT
- sll k0, k0, 2 # log2(sizeof(pgd_t)
- addu k1, k1, k0
- mfc0 k0, CP0_CONTEXT
- lw k1, (k1)
-#ifndef CONFIG_64BIT_PHYS_ADDR
- srl k0, k0, 1
-#endif
- and k0, k0, PTEP_INDX_MSK
- addu k1, k1, k0
- PTE_L k0, 0(k1)
- PTE_L k1, PTE_SIZE(k1)
- PTE_SRL k0, k0, 6
- P_MTC0 zero, CP0_ENTRYLO0
- P_MTC0 k0, CP0_ENTRYLO0
- PTE_SRL k1, k1, 6
- P_MTC0 zero, CP0_ENTRYLO1
- P_MTC0 k1, CP0_ENTRYLO1
- b 1f
- tlbwr
-1:
- nop
- eret
- END(except_vec0_r4k_250MHZhwbug)
-
-#ifdef CONFIG_SMP
- /* TLB refill, EXL == 0, R4000 MP 250MHZ entrylo[01]+badvaddr bug version */
- LEAF(except_vec0_r4k_MP250MHZhwbug)
- .set mips3
- GET_PGD(k0, k1) # get pgd pointer
- mfc0 k0, CP0_BADVADDR
- srl k0, k0, _PGDIR_SHIFT
- sll k0, k0, 2 # log2(sizeof(pgd_t)
- addu k1, k1, k0
- mfc0 k0, CP0_CONTEXT
- lw k1, (k1)
-#ifndef CONFIG_64BIT_PHYS_ADDR
- srl k0, k0, 1
-#endif
- and k0, k0, PTEP_INDX_MSK
- addu k1, k1, k0
- PTE_L k0, 0(k1)
- PTE_L k1, PTE_SIZE(k1)
- nop /* XXX */
- tlbp
- PTE_SRL k0, k0, 6
- P_MTC0 zero, CP0_ENTRYLO0
- P_MTC0 k0, CP0_ENTRYLO0
- mfc0 k0, CP0_INDEX
- PTE_SRL k1, k1, 6
- P_MTC0 zero, CP0_ENTRYLO1
- P_MTC0 k1, CP0_ENTRYLO1
- bltzl k0, 1f
- tlbwr
-1:
- nop
- eret
- END(except_vec0_r4k_MP250MHZhwbug)
-#endif
-
- __FINIT
.set noreorder
--- /dev/null 2004-08-24 19:23:08.000000000 +0200
+++ arch/mips/mm/tlbex.c 2004-11-20 17:41:35.000000000 +0100
@@ -0,0 +1,1162 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Synthesize TLB refill handlers at runtime.
+ *
+ * Copyright (C) 2004 by Thiemo Seufer
+ */
+
+#include <stdarg.h>
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/init.h>
+
+#include <asm/pgtable.h>
+#include <asm/cacheflush.h>
+#include <asm/cacheflush.h>
+#include <asm/mmu_context.h>
+#include <asm/inst.h>
+#include <asm/elf.h>
+#include <asm/smp.h>
+
+/* #define DEBUG_TLB */
+
+static __init int r45k_bvahwbug(void)
+{
+ /* XXX: We should probe for the presence of this bug, but we don't. */
+ return 0;
+}
+
+static __init int r4k_250MHZhwbug(void)
+{
+ /* XXX: We should probe for the presence of this bug, but we don't. */
+ return 0;
+}
+
+static __init int bcm1250_m3_war(void)
+{
+ return BCM1250_M3_WAR;
+}
+
+/*
+ * A little micro-assembler, intended for TLB refill handler
+ * synthesizing. It is intentionally kept simple, does only support
+ * a subset of instructions, and does not try to hide pipeline effects
+ * like branch delay slots.
+ */
+
+enum fields
+{
+ RS = 0x001,
+ RT = 0x002,
+ RD = 0x004,
+ RE = 0x008,
+ SIMM = 0x010,
+ UIMM = 0x020,
+ BIMM = 0x040,
+ JIMM = 0x080,
+ FUNC = 0x100,
+};
+
+#define OP_MASK 0x2f
+#define OP_SH 26
+#define RS_MASK 0x1f
+#define RS_SH 21
+#define RT_MASK 0x1f
+#define RT_SH 16
+#define RD_MASK 0x1f
+#define RD_SH 11
+#define RE_MASK 0x1f
+#define RE_SH 6
+#define IMM_MASK 0xffff
+#define IMM_SH 0
+#define JIMM_MASK 0x3ffffff
+#define JIMM_SH 0
+#define FUNC_MASK 0x2f
+#define FUNC_SH 0
+
+enum opcode {
+ insn_invalid,
+ insn_addu, insn_addiu, insn_and, insn_andi, insn_beq,
+ insn_bgez, insn_bgezl, insn_bltz, insn_bltzl, insn_bne,
+ insn_daddu, insn_daddiu, insn_dmfc0, insn_dmtc0,
+ insn_dsll, insn_dsll32, insn_dsra, insn_dsrl, insn_dsrl32,
+ insn_dsubu, insn_eret, insn_j, insn_jal, insn_jr, insn_ld,
+ insn_lui, insn_lw, insn_mfc0, insn_mtc0, insn_ori, insn_rfe,
+ insn_sd, insn_sll, insn_sra, insn_srl, insn_subu, insn_sw,
+ insn_tlbp, insn_tlbwi, insn_tlbwr, insn_xor, insn_xori
+};
+
+struct insn {
+ enum opcode opcode;
+ u32 match;
+ enum fields fields;
+};
+
+/* This macro sets the non-variable bits of an instruction. */
+#define M(a, b, c, d, e, f) \
+ ((a) << OP_SH \
+ | (b) << RS_SH \
+ | (c) << RT_SH \
+ | (d) << RD_SH \
+ | (e) << RE_SH \
+ | (f) << FUNC_SH)
+
+static __initdata struct insn insn_table[] = {
+ { insn_addiu, M(addiu_op,0,0,0,0,0), RS | RT | SIMM },
+ { insn_addu, M(spec_op,0,0,0,0,addu_op), RS | RT | RD },
+ { insn_and, M(spec_op,0,0,0,0,and_op), RS | RT | RD },
+ { insn_andi, M(andi_op,0,0,0,0,0), RS | RT | UIMM },
+ { insn_beq, M(beq_op,0,0,0,0,0), RS | RT | BIMM },
+ { insn_bgez, M(bcond_op,0,bgez_op,0,0,0), RS | BIMM },
+ { insn_bgezl, M(bcond_op,0,bgezl_op,0,0,0), RS | BIMM },
+ { insn_bltz, M(bcond_op,0,bltz_op,0,0,0), RS | BIMM },
+ { insn_bltzl, M(bcond_op,0,bltzl_op,0,0,0), RS | BIMM },
+ { insn_bne, M(bne_op,0,0,0,0,0), RS | RT | BIMM },
+ { insn_daddiu, M(daddiu_op,0,0,0,0,0), RS | RT | SIMM },
+ { insn_daddu, M(spec_op,0,0,0,0,daddu_op), RS | RT | RD },
+ { insn_dmfc0, M(cop0_op,dmfc_op,0,0,0,0), RT | RD },
+ { insn_dmtc0, M(cop0_op,dmtc_op,0,0,0,0), RT | RD },
+ { insn_dsll, M(spec_op,0,0,0,0,dsll_op), RT | RD | RE },
+ { insn_dsll32, M(spec_op,0,0,0,0,dsll32_op), RT | RD | RE },
+ { insn_dsra, M(spec_op,0,0,0,0,dsra_op), RT | RD | RE },
+ { insn_dsrl, M(spec_op,0,0,0,0,dsrl_op), RT | RD | RE },
+ { insn_dsrl32, M(spec_op,0,0,0,0,dsrl32_op), RT | RD | RE },
+ { insn_dsubu, M(spec_op,0,0,0,0,dsubu_op), RS | RT | RD },
+ { insn_eret, M(cop0_op,cop_op,0,0,0,eret_op), 0 },
+ { insn_j, M(j_op,0,0,0,0,0), JIMM },
+ { insn_jal, M(jal_op,0,0,0,0,0), JIMM },
+ { insn_jr, M(spec_op,0,0,0,0,jr_op), RS },
+ { insn_ld, M(ld_op,0,0,0,0,0), RS | RT | SIMM },
+ { insn_lui, M(lui_op,0,0,0,0,0), RT | SIMM },
+ { insn_lw, M(lw_op,0,0,0,0,0), RS | RT | SIMM },
+ { insn_mfc0, M(cop0_op,mfc_op,0,0,0,0), RT | RD },
+ { insn_mtc0, M(cop0_op,mtc_op,0,0,0,0), RT | RD },
+ { insn_ori, M(ori_op,0,0,0,0,0), RS | RT | UIMM },
+ { insn_rfe, M(cop0_op,cop_op,0,0,0,rfe_op), 0 },
+ { insn_sd, M(sd_op,0,0,0,0,0), RS | RT | SIMM },
+ { insn_sll, M(spec_op,0,0,0,0,sll_op), RT | RD | RE },
+ { insn_sra, M(spec_op,0,0,0,0,sra_op), RT | RD | RE },
+ { insn_srl, M(spec_op,0,0,0,0,srl_op), RT | RD | RE },
+ { insn_subu, M(spec_op,0,0,0,0,subu_op), RS | RT | RD },
+ { insn_sw, M(sw_op,0,0,0,0,0), RS | RT | SIMM },
+ { insn_tlbp, M(cop0_op,cop_op,0,0,0,tlbp_op), 0 },
+ { insn_tlbwi, M(cop0_op,cop_op,0,0,0,tlbwi_op), 0 },
+ { insn_tlbwr, M(cop0_op,cop_op,0,0,0,tlbwr_op), 0 },
+ { insn_xor, M(spec_op,0,0,0,0,xor_op), RS | RT | RD },
+ { insn_xori, M(xori_op,0,0,0,0,0), RS | RT | UIMM },
+ { insn_invalid, 0, 0 }
+};
+
+#undef M
+
+static __init u32 build_rs(u32 arg)
+{
+ if (arg & ~RS_MASK)
+ printk(KERN_WARNING "TLB synthesizer field overflow\n");
+
+ return (arg & RS_MASK) << RS_SH;
+}
+
+static __init u32 build_rt(u32 arg)
+{
+ if (arg & ~RT_MASK)
+ printk(KERN_WARNING "TLB synthesizer field overflow\n");
+
+ return (arg & RT_MASK) << RT_SH;
+}
+
+static __init u32 build_rd(u32 arg)
+{
+ if (arg & ~RD_MASK)
+ printk(KERN_WARNING "TLB synthesizer field overflow\n");
+
+ return (arg & RD_MASK) << RD_SH;
+}
+
+static __init u32 build_re(u32 arg)
+{
+ if (arg & ~RE_MASK)
+ printk(KERN_WARNING "TLB synthesizer field overflow\n");
+
+ return (arg & RE_MASK) << RE_SH;
+}
+
+static __init u32 build_simm(s32 arg)
+{
+ if (arg > 0x7fff || arg < -0x8000)
+ printk(KERN_WARNING "TLB synthesizer field overflow\n");
+
+ return arg & 0xffff;
+}
+
+static __init u32 build_uimm(u32 arg)
+{
+ if (arg & ~IMM_MASK)
+ printk(KERN_WARNING "TLB synthesizer field overflow\n");
+
+ return arg & IMM_MASK;
+}
+
+static __init u32 build_bimm(s32 arg)
+{
+ if (arg > 0x1ffff || arg < -0x20000)
+ printk(KERN_WARNING "TLB synthesizer field overflow\n");
+
+ if (arg & 0x3)
+ printk(KERN_WARNING "Invalid TLB synthesizer branch target\n");
+
+ return ((arg < 0) ? (1 << 15) : 0) | ((arg >> 2) & 0x7fff);
+}
+
+static __init u32 build_jimm(u32 arg)
+{
+ if (arg & ~((JIMM_MASK) << 2))
+ printk(KERN_WARNING "TLB synthesizer field overflow\n");
+
+ return (arg >> 2) & JIMM_MASK;
+}
+
+static __init u32 build_func(u32 arg)
+{
+ if (arg & ~FUNC_MASK)
+ printk(KERN_WARNING "TLB synthesizer field overflow\n");
+
+ return arg & FUNC_MASK;
+}
+
+/*
+ * The order of opcode arguments is implicitly left to right,
+ * starting with RS and ending with FUNC or IMM.
+ */
+static void __init build_insn(u32 **buf, enum opcode opc, ...)
+{
+ struct insn *ip = NULL;
+ unsigned int i;
+ va_list ap;
+ u32 op;
+
+ for (i = 0; insn_table[i].opcode != insn_invalid; i++)
+ if (insn_table[i].opcode == opc) {
+ ip = &insn_table[i];
+ break;
+ }
+
+ if (!ip)
+ panic("Unsupported TLB synthesizer instruction %d", opc);
+
+ op = ip->match;
+ va_start(ap, opc);
+ if (ip->fields & RS) op |= build_rs(va_arg(ap, u32));
+ if (ip->fields & RT) op |= build_rt(va_arg(ap, u32));
+ if (ip->fields & RD) op |= build_rd(va_arg(ap, u32));
+ if (ip->fields & RE) op |= build_re(va_arg(ap, u32));
+ if (ip->fields & SIMM) op |= build_simm(va_arg(ap, s32));
+ if (ip->fields & UIMM) op |= build_uimm(va_arg(ap, u32));
+ if (ip->fields & BIMM) op |= build_bimm(va_arg(ap, s32));
+ if (ip->fields & JIMM) op |= build_jimm(va_arg(ap, u32));
+ if (ip->fields & FUNC) op |= build_func(va_arg(ap, u32));
+ va_end(ap);
+
+ **buf = op;
+ (*buf)++;
+}
+
+#define I_u1u2u3(op) \
+ static inline void i##op(u32 **buf, unsigned int a, \
+ unsigned int b, unsigned int c) \
+ { \
+ build_insn(buf, insn##op, a, b, c); \
+ }
+
+#define I_u2u1u3(op) \
+ static inline void i##op(u32 **buf, unsigned int a, \
+ unsigned int b, unsigned int c) \
+ { \
+ build_insn(buf, insn##op, b, a, c); \
+ }
+
+#define I_u3u1u2(op) \
+ static inline void i##op(u32 **buf, unsigned int a, \
+ unsigned int b, unsigned int c) \
+ { \
+ build_insn(buf, insn##op, b, c, a); \
+ }
+
+#define I_u1u2s3(op) \
+ static inline void i##op(u32 **buf, unsigned int a, \
+ unsigned int b, signed int c) \
+ { \
+ build_insn(buf, insn##op, a, b, c); \
+ }
+
+#define I_u2s3u1(op) \
+ static inline void i##op(u32 **buf, unsigned int a, \
+ signed int b, unsigned int c) \
+ { \
+ build_insn(buf, insn##op, c, a, b); \
+ }
+
+#define I_u2u1s3(op) \
+ static inline void i##op(u32 **buf, unsigned int a, \
+ unsigned int b, signed int c) \
+ { \
+ build_insn(buf, insn##op, b, a, c); \
+ }
+
+#define I_u1u2(op) \
+ static inline void i##op(u32 **buf, unsigned int a, \
+ unsigned int b) \
+ { \
+ build_insn(buf, insn##op, a, b); \
+ }
+
+#define I_u1s2(op) \
+ static inline void i##op(u32 **buf, unsigned int a, \
+ signed int b) \
+ { \
+ build_insn(buf, insn##op, a, b); \
+ }
+
+#define I_u1(op) \
+ static inline void i##op(u32 **buf, unsigned int a) \
+ { \
+ build_insn(buf, insn##op, a); \
+ }
+
+#define I_0(op) \
+ static inline void i##op(u32 **buf) \
+ { \
+ build_insn(buf, insn##op); \
+ }
+
+I_u2u1s3(_addiu);
+I_u3u1u2(_addu);
+I_u2u1u3(_andi);
+I_u3u1u2(_and);
+I_u1u2s3(_beq);
+I_u1s2(_bgez);
+I_u1s2(_bgezl);
+I_u1s2(_bltz);
+I_u1s2(_bltzl);
+I_u1u2s3(_bne);
+I_u1u2(_dmfc0);
+I_u1u2(_dmtc0);
+I_u2u1s3(_daddiu);
+I_u3u1u2(_daddu);
+I_u2u1u3(_dsll);
+I_u2u1u3(_dsll32);
+I_u2u1u3(_dsra);
+I_u2u1u3(_dsrl);
+I_u2u1u3(_dsrl32);
+I_u3u1u2(_dsubu);
+I_0(_eret);
+I_u1(_j);
+I_u1(_jal);
+I_u1(_jr);
+I_u2s3u1(_ld);
+I_u1s2(_lui);
+I_u2s3u1(_lw);
+I_u1u2(_mfc0);
+I_u1u2(_mtc0);
+I_u2u1u3(_ori);
+I_0(_rfe);
+I_u2s3u1(_sd);
+I_u2u1u3(_sll);
+I_u2u1u3(_sra);
+I_u2u1u3(_srl);
+I_u3u1u2(_subu);
+I_u2s3u1(_sw);
+I_0(_tlbp);
+I_0(_tlbwi);
+I_0(_tlbwr);
+I_u3u1u2(_xor)
+I_u2u1u3(_xori);
+
+/*
+ * handling labels
+ */
+
+enum label_id {
+ label_invalid,
+ label_second_part,
+ label_leave,
+ label_vmalloc,
+ label_vmalloc_done,
+ label_tlbwr_hazard,
+ label_split
+};
+
+struct label {
+ u32 *addr;
+ enum label_id lab;
+};
+
+static __init void build_label(struct label **lab, u32 *addr,
+ enum label_id l)
+{
+ (*lab)->addr = addr;
+ (*lab)->lab = l;
+ (*lab)++;
+}
+
+#define L_LA(lb) \
+ static inline void l##lb(struct label **lab, u32 *addr) \
+ { \
+ build_label(lab, addr, label##lb); \
+ }
+
+L_LA(_second_part)
+L_LA(_leave)
+L_LA(_vmalloc)
+L_LA(_vmalloc_done)
+L_LA(_tlbwr_hazard)
+L_LA(_split)
+
+/* convenience macros for instructions */
+#ifdef CONFIG_MIPS64
+# define i_LW(buf, rs, rt, off) i_ld(buf, rs, rt, off)
+# define i_SW(buf, rs, rt, off) i_sd(buf, rs, rt, off)
+# define i_SLL(buf, rs, rt, sh) i_dsll(buf, rs, rt, sh)
+# define i_SRA(buf, rs, rt, sh) i_dsra(buf, rs, rt, sh)
+# define i_SRL(buf, rs, rt, sh) i_dsrl(buf, rs, rt, sh)
+# define i_MFC0(buf, rt, rd) i_dmfc0(buf, rt, rd)
+# define i_MTC0(buf, rt, rd) i_dmtc0(buf, rt, rd)
+# define i_ADDIU(buf, rs, rt, val) i_daddiu(buf, rs, rt, val)
+# define i_ADDU(buf, rs, rt, rd) i_daddu(buf, rs, rt, rd)
+# define i_SUBU(buf, rs, rt, rd) i_dsubu(buf, rs, rt, rd)
+#else
+# define i_LW(buf, rs, rt, off) i_lw(buf, rs, rt, off)
+# define i_SW(buf, rs, rt, off) i_sw(buf, rs, rt, off)
+# define i_SLL(buf, rs, rt, sh) i_sll(buf, rs, rt, sh)
+# define i_SRA(buf, rs, rt, sh) i_sra(buf, rs, rt, sh)
+# define i_SRL(buf, rs, rt, sh) i_srl(buf, rs, rt, sh)
+# define i_MFC0(buf, rt, rd) i_mfc0(buf, rt, rd)
+# define i_MTC0(buf, rt, rd) i_mtc0(buf, rt, rd)
+# define i_ADDIU(buf, rs, rt, val) i_addiu(buf, rs, rt, val)
+# define i_ADDU(buf, rs, rt, rd) i_addu(buf, rs, rt, rd)
+# define i_SUBU(buf, rs, rt, rd) i_subu(buf, rs, rt, rd)
+#endif
+
+#define i_b(buf, off) i_beq(buf, 0, 0, off)
+#define i_bnez(buf, rs, off) i_bne(buf, rs, 0, off)
+#define i_move(buf, a, b) i_ADDU(buf, a, 0, b)
+#define i_nop(buf) i_sll(buf, 0, 0, 0)
+#define i_ssnop(buf) i_sll(buf, 0, 2, 1)
+
+#if CONFIG_MIPS64
+static __init int in_compat_space_p(long addr)
+{
+ /* Is this address in 32bit compat space? */
+ return (((addr) & 0xffffffff00000000) == 0xffffffff00000000);
+}
+
+static __init int rel_highest(long val)
+{
+ return ((((val + 0x800080008000L) >> 48) & 0xffff) ^ 0x8000) - 0x8000;
+}
+
+static __init int rel_higher(long val)
+{
+ return ((((val + 0x80008000L) >> 32) & 0xffff) ^ 0x8000) - 0x8000;
+}
+#endif
+
+static __init int rel_hi(long val)
+{
+ return ((((val + 0x8000L) >> 16) & 0xffff) ^ 0x8000) - 0x8000;
+}
+
+static __init int rel_lo(long val)
+{
+ return ((val & 0xffff) ^ 0x8000) - 0x8000;
+}
+
+static __init void i_LA_mostly(u32 **buf, unsigned int rs, long addr)
+{
+#if CONFIG_MIPS64
+ if (!in_compat_space_p(addr)) {
+ i_lui(buf, rs, rel_highest(addr));
+ if (rel_higher(addr))
+ i_daddiu(buf, rs, rs, rel_higher(addr));
+ if (rel_hi(addr)) {
+ i_dsll(buf, rs, rs, 16);
+ i_daddiu(buf, rs, rs, rel_hi(addr));
+ i_dsll(buf, rs, rs, 16);
+ } else
+ i_dsll32(buf, rs, rs, 0);
+ } else
+#endif
+ i_lui(buf, rs, rel_hi(addr));
+}
+
+static __init void i_LA(u32 **buf, unsigned int rs, long addr)
+{
+ i_LA_mostly(buf, rs, addr);
+ if (rel_lo(addr))
+ i_ADDIU(buf, rs, rs, rel_lo(addr));
+}
+
+/*
+ * handle relocations
+ */
+
+struct reloc {
+ u32 *addr;
+ unsigned int type;
+ enum label_id lab;
+};
+
+static __init void r_mips_pc16(struct reloc **rel, u32 *addr,
+ enum label_id l)
+{
+ (*rel)->addr = addr;
+ (*rel)->type = R_MIPS_PC16;
+ (*rel)->lab = l;
+ (*rel)++;
+}
+
+static inline void __resolve_relocs(struct reloc *rel, struct label *lab)
+{
+ long laddr = (long)lab->addr;
+ long raddr = (long)rel->addr;
+
+ switch (rel->type) {
+ case R_MIPS_PC16:
+ *rel->addr |= build_bimm(laddr - (raddr + 4));
+ break;
+
+ default:
+ panic("Unsupported TLB synthesizer relocation %d",
+ rel->type);
+ }
+}
+
+static __init void resolve_relocs(struct reloc *rel, struct label *lab)
+{
+ struct label *l;
+
+ for (; rel->lab != label_invalid; rel++)
+ for (l = lab; l->lab != label_invalid; l++)
+ if (rel->lab == l->lab)
+ __resolve_relocs(rel, l);
+}
+
+static __init void copy_handler(struct reloc *rel, struct label *lab,
+ u32 *first, u32 *end, u32* target)
+{
+ long off = (long)(target - first);
+
+ memcpy(target, first, (end - first) * sizeof(u32));
+
+ for (; rel->lab != label_invalid; rel++)
+ if (rel->addr >= first && rel->addr < end)
+ rel->addr += off;
+
+ for (; lab->lab != label_invalid; lab++)
+ if (lab->addr >= first && lab->addr < end)
+ lab->addr += off;
+}
+
+static __init int insn_has_bdelay(struct reloc *rel, u32 *addr)
+{
+ for (; rel->lab != label_invalid; rel++) {
+ if (rel->addr == addr
+ && (rel->type == R_MIPS_PC16
+ || rel->type == R_MIPS_26))
+ return 1;
+ }
+
+ return 0;
+}
+
+/* convenience functions for labeled branches */
+static void il_bltz(u32 **p, struct reloc **r, unsigned int reg,
+ enum label_id l)
+{
+ r_mips_pc16(r, *p, l);
+ i_bltz(p, reg, 0);
+}
+
+static void il_b(u32 **p, struct reloc **r, enum label_id l)
+{
+ r_mips_pc16(r, *p, l);
+ i_b(p, 0);
+}
+
+static void il_bnez(u32 **p, struct reloc **r, unsigned int reg,
+ enum label_id l)
+{
+ r_mips_pc16(r, *p, l);
+ i_bnez(p, reg, 0);
+}
+
+static void il_bgezl(u32 **p, struct reloc **r, unsigned int reg,
+ enum label_id l)
+{
+ r_mips_pc16(r, *p, l);
+ i_bgezl(p, reg, 0);
+}
+
+/* The only registers allowed in TLB handlers. */
+#define K0 26
+#define K1 27
+
+/* Some CP0 registers */
+#define C0_INDEX 0
+#define C0_ENTRYLO0 2
+#define C0_ENTRYLO1 3
+#define C0_CONTEXT 4
+#define C0_BADVADDR 8
+#define C0_ENTRYHI 10
+#define C0_EPC 14
+#define C0_XCONTEXT 20
+
+#ifdef CONFIG_MIPS64
+# define GET_CONTEXT(buf, reg) i_MFC0(buf, reg, C0_XCONTEXT)
+#else
+# define GET_CONTEXT(buf, reg) i_MFC0(buf, reg, C0_CONTEXT)
+#endif
+
+/* The worst case length of the handler is around 18 instructions for
+ * R3000-style TLBs and up to 63 instructions for R4000-style TLBs.
+ * Maximum space available is 32 instructions for R3000 and 64
+ * instructions for R4000.
+ *
+ * We deliberately chose a buffer size of 128, so we won't scribble
+ * over anything important on overflow before we panic.
+ */
+static __initdata u32 tlb_handler[128];
+
+/* simply assume worst case size for labels and relocs */
+static __initdata struct label labels[128];
+static __initdata struct reloc relocs[128];
+
+#ifdef CONFIG_MIPS32
+/*
+ * The R3000 TLB handler is simple.
+ */
+static void __init build_r3000_tlb_refill_handler(void)
+{
+ long pgdc = (long)pgd_current;
+ u32 *p;
+
+ memset(tlb_handler, 0, sizeof(tlb_handler));
+ p = tlb_handler;
+
+ i_mfc0(&p, K0, C0_BADVADDR);
+ i_lui(&p, K1, rel_hi(pgdc)); /* cp0 delay */
+ i_lw(&p, K1, rel_lo(pgdc), K1);
+ i_srl(&p, K0, K0, 22); /* load delay */
+ i_sll(&p, K0, K0, 2);
+ i_addu(&p, K1, K1, K0);
+ i_mfc0(&p, K0, C0_CONTEXT);
+ i_lw(&p, K1, 0, K1);
+ i_andi(&p, K0, K0, 0xffc); /* load delay */
+ i_addu(&p, K1, K1, K0);
+ i_lw(&p, K0, 0, K1);
+ i_nop(&p); /* load delay */
+ i_mtc0(&p, K0, C0_ENTRYLO0);
+ i_mfc0(&p, K1, C0_EPC); /* cp0 delay */
+ i_tlbwr(&p); /* cp0 delay */
+ i_jr(&p, K1); /* cp0 delay */
+ i_rfe(&p); /* branch delay */
+
+ if (p > tlb_handler + 32)
+ panic("TLB refill handler space exceeded");
+
+ printk("Synthesized TLB handler (%u instructions).\n",
+ p - tlb_handler);
+#ifdef DEBUG_TLB
+ {
+ int i;
+ for (i = 0; i < (p - tlb_handler); i++)
+ printk("%08x\n", tlb_handler[i]);
+ }
+#endif
+
+ memcpy((void *)CAC_BASE, tlb_handler, 0x80);
+ flush_icache_range(CAC_BASE, CAC_BASE + 0x80);
+}
+#endif /* CONFIG_MIPS32 */
+
+/*
+ * The R4000 TLB handler is much more complicated. We have two
+ * consecutive handler areas with 32 instructions space each.
+ * Since they aren't used at the same time, we can overflow in the
+ * other one.To keep things simple, we first assume linear space,
+ * then we relocate it to the final handler layout as needed.
+ */
+static __initdata u32 final_handler[64];
+
+/*
+ * Hazards
+ *
+ * From the IDT errata for the QED RM5230 (Nevada), processor revision 1.0:
+ * 2. A timing hazard exists for the TLBP instruction.
+ *
+ * stalling_instruction
+ * TLBP
+ *
+ * The JTLB is being read for the TLBP throughout the stall generated by the
+ * previous instruction. This is not really correct as the stalling instruction
+ * can modify the address used to access the JTLB. The failure symptom is that
+ * the TLBP instruction will use an address created for the stalling instruction
+ * and not the address held in C0_ENHI and thus report the wrong results.
+ *
+ * The software work-around is to not allow the instruction preceding the TLBP
+ * to stall - make it an NOP or some other instruction guaranteed not to stall.
+ *
+ * Errata 2 will not be fixed. This errata is also on the R5000.
+ *
+ * As if we MIPS hackers wouldn't know how to nop pipelines happy ...
+ */
+static __init void build_tlbp_hazard(u32 **p)
+{
+ switch (current_cpu_data.cputype) {
+ case CPU_R5000:
+ case CPU_R5000A:
+ case CPU_NEVADA:
+ i_nop(p);
+ break;
+
+ default:
+ break;
+ }
+}
+
+/*
+ * Write random TLB entry, and care about the hazards from the
+ * preceeding mtc0 and for the following eret.
+ */
+static __init void build_tlb_write_random_entry(u32 **p, struct label **l,
+ struct reloc **r)
+{
+ switch (current_cpu_data.cputype) {
+ case CPU_R4000PC:
+ case CPU_R4000SC:
+ case CPU_R4000MC:
+ case CPU_R4400PC:
+ case CPU_R4400SC:
+ case CPU_R4400MC:
+ /*
+ * This branch uses up a mtc0 hazard nop slot and saves
+ * two nops after the tlbwr.
+ */
+ il_bgezl(p, r, 0, label_tlbwr_hazard);
+ i_tlbwr(p);
+ l_tlbwr_hazard(l, *p);
+ i_nop(p);
+ break;
+
+ case CPU_R4600:
+ case CPU_R4700:
+ i_nop(p);
+ i_tlbwr(p);
+ break;
+
+ case CPU_NEVADA:
+ i_nop(p); /* QED specifies 2 nops hazard */
+ /*
+ * This branch uses up a mtc0 hazard nop slot and saves
+ * a nop after the tlbwr.
+ */
+ il_bgezl(p, r, 0, label_tlbwr_hazard);
+ i_tlbwr(p);
+ l_tlbwr_hazard(l, *p);
+ break;
+
+ case CPU_RM9000:
+ /*
+ * When the JTLB is updated by tlbwi or tlbwr, a subsequent
+ * use of the JTLB for instructions should not occur for 4
+ * cpu cycles and use for data translations should not occur
+ * for 3 cpu cycles.
+ */
+ i_ssnop(p);
+ i_ssnop(p);
+ i_ssnop(p);
+ i_ssnop(p);
+ i_tlbwr(p);
+ i_ssnop(p);
+ i_ssnop(p);
+ i_ssnop(p);
+ i_ssnop(p);
+ break;
+
+ case CPU_R10000:
+ case CPU_R12000:
+ case CPU_SB1:
+ i_tlbwr(p);
+ break;
+
+ default:
+ /*
+ * Others are assumed to have one cycle mtc0 hazard,
+ * and one cycle tlbwr hazard.
+ * XXX: This might be overly general.
+ */
+ i_nop(p);
+ i_tlbwr(p);
+ i_nop(p);
+ break;
+ }
+}
+
+#if CONFIG_MIPS64
+/*
+ * TMP and PTR are scratch.
+ * TMP will be clobbered, PTR will hold the pmd entry.
+ */
+static __init void
+build_get_pmde64(u32 **p, struct label **l, struct reloc **r,
+ unsigned int tmp, unsigned int ptr)
+{
+ long pgdc = (long)pgd_current;
+
+ /*
+ * The vmalloc handling is not in the hotpath.
+ */
+ i_dmfc0(p, tmp, C0_BADVADDR);
+ il_bltz(p, r, tmp, label_vmalloc);
+ /* No i_nop needed here, since the next insn doesn't touch TMP. */
+
+# ifdef CONFIG_SMP
+ /*
+ * 64 bit SMP has the lower part of &pgd_current[smp_processor_id()]
+ * stored in CONTEXT.
+ */
+ if (in_compat_space_p(pgdc)) {
+ i_dmfc0(p, ptr, C0_CONTEXT);
+ i_dsra(p, ptr, ptr, 23);
+ } else {
+ i_dmfc0(p, ptr, C0_CONTEXT);
+ i_lui(p, tmp, rel_highest(pgdc));
+ i_dsll(p, ptr, ptr, 9);
+ i_daddiu(p, tmp, tmp, rel_higher(pgdc));
+ i_dsrl32(p, ptr, ptr, 0);
+ i_and(p, ptr, ptr, tmp);
+ i_dmfc0(p, tmp, C0_BADVADDR);
+ }
+ i_ld(p, ptr, 0, ptr);
+# else
+ i_LA_mostly(p, ptr, pgdc);
+ i_ld(p, ptr, rel_lo(pgdc), ptr);
+# endif
+
+ l_vmalloc_done(l, *p);
+ i_dsrl(p, tmp, tmp, PGDIR_SHIFT-3); /* get pgd offset in bytes */
+ i_andi(p, tmp, tmp, (PTRS_PER_PGD - 1)<<3);
+ i_daddu(p, ptr, ptr, tmp); /* add in pgd offset */
+ i_dmfc0(p, tmp, C0_BADVADDR); /* get faulting address */
+ i_ld(p, ptr, 0, ptr); /* get pmd pointer */
+ i_dsrl(p, tmp, tmp, PMD_SHIFT-3); /* get pmd offset in bytes */
+ i_andi(p, tmp, tmp, (PTRS_PER_PMD - 1)<<3);
+ i_daddu(p, ptr, ptr, tmp); /* add in pmd offset */
+}
+
+/*
+ * BVADDR is the faulting address, PTR is scratch.
+ * PTR will hold the pgd for vmalloc.
+ */
+static __init void
+build_get_pgd_vmalloc64(u32 **p, struct label **l, struct reloc **r,
+ unsigned int bvaddr, unsigned int ptr)
+{
+ long swpd = (long)swapper_pg_dir;
+
+ l_vmalloc(l, *p);
+ i_LA(p, ptr, VMALLOC_START);
+ i_dsubu(p, bvaddr, bvaddr, ptr);
+
+ if (in_compat_space_p(swpd) && !rel_lo(swpd)) {
+ il_b(p, r, label_vmalloc_done);
+ i_lui(p, ptr, rel_hi(swpd));
+ } else {
+ i_LA_mostly(p, ptr, swpd);
+ il_b(p, r, label_vmalloc_done);
+ i_daddiu(p, ptr, ptr, rel_lo(swpd));
+ }
+}
+
+#else /* CONFIG_MIPS32 */
+
+/*
+ * TMP and PTR are scratch.
+ * TMP will be clobbered, PTR will hold the pgd entry.
+ */
+static __init void build_get_pgde32(u32 **p, unsigned int tmp, unsigned int ptr)
+{
+ long pgdc = (long)pgd_current;
+
+ /* 32 bit SMP has smp_processor_id() stored in CONTEXT. */
+#ifdef CONFIG_SMP
+ i_mfc0(p, ptr, C0_CONTEXT);
+ i_LA_mostly(p, tmp, pgdc);
+ i_srl(p, ptr, ptr, 23);
+ i_sll(p, ptr, ptr, 2);
+ i_addu(p, ptr, tmp, ptr);
+#else
+ i_LA_mostly(p, ptr, pgdc);
+#endif
+ i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */
+ i_lw(p, ptr, rel_lo(pgdc), ptr);
+ i_srl(p, tmp, tmp, PGDIR_SHIFT); /* get pgd only bits */
+ i_sll(p, tmp, tmp, PGD_T_LOG2);
+ i_addu(p, ptr, ptr, tmp); /* add in pgd offset */
+}
+#endif /* CONFIG_MIPS32 */
+
+static __init void build_adjust_context(u32 **p, unsigned int ctx)
+{
+ unsigned int shift = 0;
+ unsigned int mask = 0xff0;
+
+#if !defined(CONFIG_MIPS64) && !defined(CONFIG_64BIT_PHYS_ADDR)
+ shift++;
+ mask |= 0x008;
+#endif
+
+ switch (current_cpu_data.cputype) {
+ case CPU_VR41XX:
+ case CPU_VR4111:
+ case CPU_VR4121:
+ case CPU_VR4122:
+ case CPU_VR4131:
+ case CPU_VR4181:
+ case CPU_VR4181A:
+ case CPU_VR4133:
+ shift += 2;
+ break;
+
+ default:
+ break;
+ }
+
+ if (shift)
+ i_SRL(p, ctx, ctx, shift);
+ i_andi(p, ctx, ctx, mask);
+}
+
+static __init void build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr)
+{
+ /*
+ * Bug workaround for the Nevada. It seems as if under certain
+ * circumstances the move from cp0_context might produce a
+ * bogus result when the mfc0 instruction and its consumer are
+ * in a different cacheline or a load instruction, probably any
+ * memory reference, is between them.
+ */
+ switch (current_cpu_data.cputype) {
+ case CPU_NEVADA:
+ i_LW(p, ptr, 0, ptr);
+ GET_CONTEXT(p, tmp); /* get context reg */
+ break;
+
+ default:
+ GET_CONTEXT(p, tmp); /* get context reg */
+ i_LW(p, ptr, 0, ptr);
+ break;
+ }
+
+ build_adjust_context(p, tmp);
+ i_ADDU(p, ptr, ptr, tmp); /* add in offset */
+}
+
+static __init void build_update_entries(u32 **p, unsigned int tmp,
+ unsigned int ptep)
+{
+ /*
+ * 64bit address support (36bit on a 32bit CPU) in a 32bit
+ * Kernel is a special case. Only a few CPUs use it.
+ */
+#ifdef CONFIG_64BIT_PHYS_ADDR
+ if (cpu_has_64bit_registers) {
+ i_ld(p, tmp, 0, ptep); /* get even pte */
+ i_ld(p, ptep, sizeof(pte_t), ptep); /* get odd pte */
+ i_dsrl(p, tmp, tmp, 6); /* convert to entrylo0 */
+ i_mtc0(p, tmp, C0_ENTRYLO0); /* load it */
+ i_dsrl(p, ptep, ptep, 6); /* convert to entrylo1 */
+ i_mtc0(p, ptep, C0_ENTRYLO1); /* load it */
+ } else {
+ int pte_off_even = sizeof(pte_t) / 2;
+ int pte_off_odd = pte_off_even + sizeof(pte_t);
+
+ /* The pte entries are pre-shifted */
+ i_lw(p, tmp, pte_off_even, ptep); /* get even pte */
+ i_mtc0(p, tmp, C0_ENTRYLO0); /* load it */
+ i_lw(p, ptep, pte_off_odd, ptep); /* get odd pte */
+ i_mtc0(p, ptep, C0_ENTRYLO1); /* load it */
+ }
+#else
+ i_LW(p, tmp, 0, ptep); /* get even pte */
+ i_LW(p, ptep, sizeof(pte_t), ptep); /* get odd pte */
+ if (r45k_bvahwbug()) {
+ build_tlbp_hazard(p);
+ i_tlbp(p);
+ }
+ i_SRL(p, tmp, tmp, 6); /* convert to entrylo0 */
+ if (r4k_250MHZhwbug())
+ i_mtc0(p, 0, C0_ENTRYLO0);
+ i_mtc0(p, tmp, C0_ENTRYLO0); /* load it */
+ i_SRL(p, ptep, ptep, 6); /* convert to entrylo1 */
+ if (r45k_bvahwbug())
+ i_mfc0(p, tmp, C0_INDEX);
+ if (r4k_250MHZhwbug())
+ i_mtc0(p, 0, C0_ENTRYLO1);
+ i_mtc0(p, ptep, C0_ENTRYLO1); /* load it */
+#endif
+}
+
+static void __init build_r4000_tlb_refill_handler(void)
+{
+ u32 *p = tlb_handler;
+ struct label *l = labels;
+ struct reloc *r = relocs;
+ u32 *f;
+ unsigned int final_len;
+
+ memset(tlb_handler, 0, sizeof(tlb_handler));
+ memset(labels, 0, sizeof(labels));
+ memset(relocs, 0, sizeof(relocs));
+ memset(final_handler, 0, sizeof(final_handler));
+
+ /*
+ * create the plain linear handler
+ */
+ if (bcm1250_m3_war) {
+ i_MFC0(&p, K0, C0_BADVADDR);
+ i_MFC0(&p, K1, C0_ENTRYHI);
+ i_xor(&p, K0, K0, K1);
+ i_SRL(&p, K0, K0, PAGE_SHIFT+1);
+ il_bnez(&p, &r, K0, label_leave);
+ /* No need for i_nop */
+ }
+
+#ifdef CONFIG_MIPS64
+ build_get_pmde64(&p, &l, &r, K0, K1); /* get pmd ptr in K1 */
+#else
+ build_get_pgde32(&p, K0, K1); /* get pgd ptr in K1 */
+#endif
+
+ build_get_ptep(&p, K0, K1);
+ build_update_entries(&p, K0, K1);
+ build_tlb_write_random_entry(&p, &l, &r);
+ l_leave(&l, p);
+ i_eret(&p); /* return from trap */
+
+#ifdef CONFIG_MIPS64
+ build_get_pgd_vmalloc64(&p, &l, &r, K0, K1);
+#endif
+
+ /*
+ * Overflow check: For the 64bit handler, we need at least one
+ * free instruction slot for the wrap-around branch. In worst
+ * case, if the intended insertion point is a delay slot, we
+ * need three, with the the second nop'ed and the third being
+ * unused.
+ */
+#ifdef CONFIG_MIPS32
+ if ((p - tlb_handler) > 64)
+ panic("TLB refill handler space exceeded");
+#else
+ if (((p - tlb_handler) > 63)
+ || (((p - tlb_handler) > 61)
+ && insn_has_bdelay(relocs, tlb_handler + 29)))
+ panic("TLB refill handler space exceeded");
+#endif
+
+ /*
+ * Now fold the handler in the TLB refill handler space.
+ */
+#ifdef CONFIG_MIPS32
+ f = final_handler;
+ /* Simplest case, just copy the handler. */
+ copy_handler(relocs, labels, tlb_handler, p, f);
+ final_len = p - tlb_handler;
+#else /* CONFIG_MIPS64 */
+ f = final_handler + 32;
+ if ((p - tlb_handler) <= 32) {
+ /* Just copy the handler. */
+ copy_handler(relocs, labels, tlb_handler, p, f);
+ final_len = p - tlb_handler;
+ } else {
+ u32 *split = tlb_handler + 30;
+
+ /*
+ * Find the split point.
+ */
+ if (insn_has_bdelay(relocs, split - 1))
+ split--;
+
+ /* Copy first part of the handler. */
+ copy_handler(relocs, labels, tlb_handler, split, f);
+ f += split - tlb_handler;
+
+ /* Insert branch. */
+ l_split(&l, final_handler);
+ il_b(&f, &r, label_split);
+ if (insn_has_bdelay(relocs, split))
+ i_nop(&f);
+ else {
+ copy_handler(relocs, labels, split, split + 1, f);
+ f++;
+ split++;
+ }
+
+ /* Copy the rest of the handler. */
+ copy_handler(relocs, labels, split, p, final_handler);
+ final_len = (f - (final_handler + 32)) + (p - split);
+ }
+#endif /* CONFIG_MIPS64 */
+
+ resolve_relocs(relocs, labels);
+ printk("Synthesized TLB handler (%u instructions).\n", final_len);
+
+#ifdef DEBUG_TLB
+ {
+ int i;
+
+ for (i = 0; i < 64; i++)
+ printk("%08x\n", final_handler[i]);
+ }
+#endif
+
+ memcpy((void *)CAC_BASE, final_handler, 0x100);
+ flush_icache_range(CAC_BASE, CAC_BASE + 0x100);
+}
+
+void __init build_tlb_refill_handler(void)
+{
+ switch (current_cpu_data.cputype) {
+#ifdef CONFIG_MIPS32
+ case CPU_R2000:
+ case CPU_R3000:
+ case CPU_R3000A:
+ case CPU_R3081E:
+ case CPU_TX3912:
+ case CPU_TX3922:
+ case CPU_TX3927:
+ build_r3000_tlb_refill_handler();
+ break;
+
+ case CPU_R6000:
+ case CPU_R6000A:
+ panic("No R6000 TLB refill handler yet");
+ break;
+#endif
+
+ case CPU_R8000:
+ panic("No R8000 TLB refill handler yet");
+ break;
+
+ default:
+ build_r4000_tlb_refill_handler();
+ }
+}
--- arch/mips/mm/tlbex64-r4k.S 2004-11-21 03:05:35.000000000 +0100
+++ /dev/null 2004-08-24 19:23:08.000000000 +0200
@@ -1,136 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2000 Silicon Graphics, Inc.
- * Written by Ulf Carlsson (ulfc@engr.sgi.com)
- * Copyright (C) 2002 Maciej W. Rozycki
- */
-#include <linux/config.h>
-#include <linux/init.h>
-#include <linux/threads.h>
-
-#include <asm/asm.h>
-#include <asm/hazards.h>
-#include <asm/regdef.h>
-#include <asm/mipsregs.h>
-#include <asm/stackframe.h>
-#include <asm/war.h>
-
-#define _VMALLOC_START 0xc000000000000000
-
- .macro GET_PGD, ptr
-#ifdef CONFIG_SMP
- /*
- * Fixme - this is b0rked for pgd_current outside of CKSEG0
- */
- dmfc0 \ptr, CP0_CONTEXT
- dsra \ptr, 23 # get pgd_current[cpu]
- ld \ptr, (\ptr)
-#else
- ld \ptr, pgd_current
-#endif
- .endm
-
- /*
- * After this macro runs we have a pointer to the pte of the address
- * that caused the fault in PTR. Expects register containing the
- * the pagetable root pointer as the ptr argument and c0_badvaddr
- * passed as tmp argument.
- */
- .macro LOAD_PTE2, ptr, tmp
- dsrl \tmp, (_PGDIR_SHIFT-3) # get pgd offset in bytes
- andi \tmp, ((_PTRS_PER_PGD - 1)<<3)
- daddu \ptr, \tmp # add in pgd offset
- dmfc0 \tmp, CP0_BADVADDR
- ld \ptr, (\ptr) # get pmd pointer
- dsrl \tmp, (_PMD_SHIFT-3) # get pmd offset in bytes
- andi \tmp, ((_PTRS_PER_PMD - 1)<<3)
- daddu \ptr, \tmp # add in pmd offset
- dmfc0 \tmp, CP0_XCONTEXT
- ld \ptr, (\ptr) # get pte pointer
- andi \tmp, 0xff0 # get pte offset
- daddu \ptr, \tmp
- .endm
-
- /*
- * This places the even/odd pte pair in the page table at the pte
- * entry pointed to by PTE into ENTRYLO0 and ENTRYLO1.
- */
- .macro PTE_RELOAD, pte0, pte1
- dsrl \pte0, 6 # convert to entrylo0
- dmtc0 \pte0, CP0_ENTRYLO0 # load it
- dsrl \pte1, 6 # convert to entrylo1
- dmtc0 \pte1, CP0_ENTRYLO1 # load it
- .endm
-
-
- .text
- .set noreorder
- .set mips3
-
- __INIT
-
- /*
- * TLB refill handlers for the R4000 and SB1.
- * Attention: We may only use 32 instructions / 128 bytes.
- */
- .align 5
-LEAF(except_vec1_r4k)
- .set noat
- dla k0, handle_vec1_r4k
- jr k0
- nop
-END(except_vec1_r4k)
-
-LEAF(except_vec1_sb1)
-#if BCM1250_M3_WAR
- dmfc0 k0, CP0_BADVADDR
- dmfc0 k1, CP0_ENTRYHI
- xor k0, k1
- dsrl k0, k0, _PAGE_SHIFT+1
- bnez k0, 1f
-#endif
- .set noat
- dla k0, handle_vec1_r4k
- jr k0
- nop
-
-1: eret
- nop
-END(except_vec1_sb1)
-
- __FINIT
-
- .align 5
-LEAF(handle_vec1_r4k)
- .set noat
- dmfc0 k0, CP0_BADVADDR
- bltz k0, 9f
-
- GET_PGD k1 # pointer to root of pgd
- LOAD_PTE2 k1 k0
- ld k0, 0(k1) # get even pte
- ld k1, 8(k1) # get odd pte
- PTE_RELOAD k0 k1
- mtc0_tlbw_hazard
- tlbwr
- nop
- tlbw_eret_hazard
- eret
-
-9: # handle the vmalloc range
- dli k1, _VMALLOC_START
- dsubu k0, k1
- dla k1, swapper_pg_dir # pointer to root of pgd
- LOAD_PTE2 k1 k0
- ld k0, 0(k1) # get even pte
- ld k1, 8(k1) # get odd pte
- PTE_RELOAD k0 k1
- mtc0_tlbw_hazard
- tlbwr
- nop
- tlbw_eret_hazard
- eret
-END(handle_vec1_r4k)
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH] Synthesize TLB refill handler at runtime
2004-11-21 19:50 ` Geert Uytterhoeven
2004-11-21 20:37 ` Thiemo Seufer
@ 2004-11-21 20:43 ` Ralf Baechle
1 sibling, 0 replies; 28+ messages in thread
From: Ralf Baechle @ 2004-11-21 20:43 UTC (permalink / raw)
To: Geert Uytterhoeven; +Cc: Thiemo Seufer, Linux/MIPS Development
On Sun, Nov 21, 2004 at 08:50:30PM +0100, Geert Uytterhoeven wrote:
> On Sun, 21 Nov 2004, Thiemo Seufer wrote:
> > currently we have a large number of TLB refill handlers written in
> > hand-optimized assembly which are mostly indentical. The appended
> > patch removes them all, and adds a micro-assembler instead which
> > synthesizes the proper variant for the CPU at runtime.
>
> Woow.....
This has been the plan for quite a while already. Nowhere else than in
the TLB exception handler more details about exception handling,
pipeline structure, SMP etc. become visible and benchmarkable in that
few instructions. Copy_page / clear_page have basically been a test
how well it'd work out - it did. So from that point it was just a
question of who was going to bite the bullet and do the work and Thiemo
did. Thanks!
Ralf
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH] Synthesize TLB refill handler at runtime
2004-11-21 20:37 ` Thiemo Seufer
@ 2004-11-22 7:01 ` Ralf Baechle
2004-11-23 0:26 ` Manish Lachwani
2004-11-24 22:24 ` Manish Lachwani
2004-11-22 14:37 ` Maciej W. Rozycki
1 sibling, 2 replies; 28+ messages in thread
From: Ralf Baechle @ 2004-11-22 7:01 UTC (permalink / raw)
To: Thiemo Seufer; +Cc: Geert Uytterhoeven, Linux/MIPS Development
On Sun, Nov 21, 2004 at 09:37:57PM +0100, Thiemo Seufer wrote:
> Aww, fatal error in the spelling module. :-)
> Updated.
The patch was looking good, so I gave it a shot on one of my machines also
and it was working fine, applied.
Thanks!
Ralf
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH] Synthesize TLB refill handler at runtime
2004-11-21 20:37 ` Thiemo Seufer
2004-11-22 7:01 ` Ralf Baechle
@ 2004-11-22 14:37 ` Maciej W. Rozycki
2004-11-22 15:56 ` Thiemo Seufer
1 sibling, 1 reply; 28+ messages in thread
From: Maciej W. Rozycki @ 2004-11-22 14:37 UTC (permalink / raw)
To: Thiemo Seufer; +Cc: Geert Uytterhoeven, Linux/MIPS Development, Ralf Baechle
On Sun, 21 Nov 2004, Thiemo Seufer wrote:
> Aww, fatal error in the spelling module. :-)
> Updated.
Great stuff! Thanks a lot. I gave it some testing on hardware available
to me and it works just fine. I've got a couple of warnings upon
building, though:
arch/mips/mm/tlbex.c:500: warning: 'i_LA' defined but not used
arch/mips/mm/tlbex.c:568: warning: 'insn_has_bdelay' defined but not used
arch/mips/mm/tlbex.c:582: warning: 'il_bltz' defined but not used
arch/mips/mm/tlbex.c:588: warning: 'il_b' defined but not used
How about marking them "attribute((unused))"? I can do that if you agree.
Maciej
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH] Synthesize TLB refill handler at runtime
2004-11-22 14:37 ` Maciej W. Rozycki
@ 2004-11-22 15:56 ` Thiemo Seufer
2004-11-22 18:39 ` Maciej W. Rozycki
0 siblings, 1 reply; 28+ messages in thread
From: Thiemo Seufer @ 2004-11-22 15:56 UTC (permalink / raw)
To: Maciej W. Rozycki
Cc: Geert Uytterhoeven, Linux/MIPS Development, Ralf Baechle
Maciej W. Rozycki wrote:
> On Sun, 21 Nov 2004, Thiemo Seufer wrote:
>
> > Aww, fatal error in the spelling module. :-)
> > Updated.
>
> Great stuff! Thanks a lot. I gave it some testing on hardware available
> to me and it works just fine. I've got a couple of warnings upon
> building, though:
>
> arch/mips/mm/tlbex.c:500: warning: 'i_LA' defined but not used
> arch/mips/mm/tlbex.c:568: warning: 'insn_has_bdelay' defined but not used
> arch/mips/mm/tlbex.c:582: warning: 'il_bltz' defined but not used
> arch/mips/mm/tlbex.c:588: warning: 'il_b' defined but not used
>
> How about marking them "attribute((unused))"? I can do that if you agree.
Please do so, but IIRC there's a compiler-independent Linux-specific
define which is preferable. Newest gcc just removes unused static
functions without further notice, AFAIK.
Thiemo
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH] Synthesize TLB refill handler at runtime
2004-11-22 15:56 ` Thiemo Seufer
@ 2004-11-22 18:39 ` Maciej W. Rozycki
0 siblings, 0 replies; 28+ messages in thread
From: Maciej W. Rozycki @ 2004-11-22 18:39 UTC (permalink / raw)
To: Thiemo Seufer; +Cc: Geert Uytterhoeven, Linux/MIPS Development, Ralf Baechle
On Mon, 22 Nov 2004, Thiemo Seufer wrote:
> > Great stuff! Thanks a lot. I gave it some testing on hardware available
> > to me and it works just fine. I've got a couple of warnings upon
> > building, though:
> >
> > arch/mips/mm/tlbex.c:500: warning: 'i_LA' defined but not used
> > arch/mips/mm/tlbex.c:568: warning: 'insn_has_bdelay' defined but not used
> > arch/mips/mm/tlbex.c:582: warning: 'il_bltz' defined but not used
> > arch/mips/mm/tlbex.c:588: warning: 'il_b' defined but not used
> >
> > How about marking them "attribute((unused))"? I can do that if you agree.
>
> Please do so, but IIRC there's a compiler-independent Linux-specific
It is the reverse -- there is a macro called "__attribute_used__" for use
to prevent object removal and it normally expands to "attribute((used))",
unless the compiler is old enough not to support it. Only then it expands
to "attribute((unused))", merely to kill this warning. But in this case
we don't mind such removals -- they are actually welcome and we (now) know
of that, so we just want to get rid of the warnings completely.
I don't think we support building with GCC that doesn't support this
attribute; a brief research suggests this dates back to GCC 2.7. Anyone
please feel free to correct me.
Oh yeah -- there is a macro called "ACPI_UNUSED_VAR"; to deal with
incorrect compilers I suppose... ;-)
> define which is preferable. Newest gcc just removes unused static
> functions without further notice, AFAIK.
This is with 3.4.2 -- 4.0 could be different indeed (docs suggest
otherwise, but they could lag a bit; I'm too lazy to dig change logs).
Maciej
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH] Synthesize TLB refill handler at runtime
2004-11-22 7:01 ` Ralf Baechle
@ 2004-11-23 0:26 ` Manish Lachwani
2004-11-23 0:40 ` Maciej W. Rozycki
2004-11-24 22:24 ` Manish Lachwani
1 sibling, 1 reply; 28+ messages in thread
From: Manish Lachwani @ 2004-11-23 0:26 UTC (permalink / raw)
To: Ralf Baechle; +Cc: Thiemo Seufer, Geert Uytterhoeven, Linux/MIPS Development
Ralf Baechle wrote:
> On Sun, Nov 21, 2004 at 09:37:57PM +0100, Thiemo Seufer wrote:
>
>
>>Aww, fatal error in the spelling module. :-)
>>Updated.
>
>
> The patch was looking good, so I gave it a shot on one of my machines also
> and it was working fine, applied.
>
> Thanks!
>
> Ralf
>
Hello !
I tried out the patch on a MIPS Malta board (24Kc core). Compiled fine
and booted fine as well. On bootup, I see:
...
Synthesized TLB handler (26 instructions).
...
However, when running a native kernel make (some test, I guess), I ran
into the following:
gcc -D__KERNEL__ -I/root/2.4.19/include -Wall -Wstrict-prototypes
-Wno-trigraphs -O2 -fno-strict-aliasing -fno-common -fomit-frame-pointer
-DGEMDEBUG_TRACEBUFFER -I /root/2.4.19/include/asm/gcc -G 0
-mno-abicalls -fno-pic -pipe -mips2 -Wa,--trap -DKBUILD_BASENAME=main
-c -o init/main.o init/main.c
Data bus error, epc == 801f8ab8, ra == 80324be4
Oops in arch/mips/kernel/traps.c::do_be, line 330[#1]:
Cpu 0
$ 0 : 00000000 80000000 83fffb24 00000000
$ 4 : 83ffff04 8123e414 000000dc 00000000
$ 8 : 642e6261 00000000 7470672e 622e6261
$12 : 00007373 8117dd60 0000000a 7470672e
$16 : 8123e034 83fffb24 000004dc 0000a8f4
$20 : 81170060 00000000 83fffb24 00ff00ff
$24 : 0000001c 00000001
$28 : 83c58000 83c597f0 00000000 80324be4
Hi : 106210ce
Lo : d70a080d
epc : 801f8ab8 both_aligned+0x40/0x74 Not tainted
ra : 80324be4 csum_partial_copy_nocheck+0x44/0x64
Status: 1000fc03 KERNEL EXL IE
Cause : 0080001c
PrId : 00019360
Modules linked in:
Process as (pid: 120, threadinfo=83c58000, task=83f47850)
Stack : 00000001 a1121000 80258ee0 80258df0 000004dc 000004dc 000004dc
00000000
802a82e4 80383a14 50808000 801f9198 8114d5c8 00000000 a1121000
00000000
000004f3 810da000 000004dc 000004dc 0000106c 00000b90 81170060
0001090e
83fffb24 00ff00ff 00000b24 802a8498 81170060 00000000 8114d5c8
00000000
00000000 00000000 83c59910 0000000a 0000006c 00001000 83c59918
00001000
...
Call Trace:
[<80258ee0>] pcnet32_rx+0x38c/0x4b4
[<80258df0>] pcnet32_rx+0x29c/0x4b4
[<802a82e4>] skb_copy_and_csum_bits+0x78/0x2bc
[<801f9198>] move_128bytes+0x90/0x214
[<802a8498>] skb_copy_and_csum_bits+0x22c/0x2bc
[<80312dc4>] skb_read_and_csum_bits+0x0/0xb4
[<80312e08>] skb_read_and_csum_bits+0x44/0xb4
[<8014328c>] __do_IRQ+0x170/0x184
[<8031f494>] xdr_partial_copy_from_skb+0x190/0x1dc
[<80312eec>] csum_partial_copy_to_xdr+0x74/0x134
[<801026bc>] mipsIRQ+0x11c/0x180
[<803130d4>] udp_data_ready+0x128/0x230
[<802eb968>] udp_queue_rcv_skb+0x1e4/0x318
[<802c4e68>] ipq_kill+0x18/0xcc
[<802c513c>] ip_frag_intern+0x3c/0xe8
[<802ec054>] udp_rcv+0x158/0x418
[<802c4de4>] ip_frag_destroy+0xf8/0x164
[<802c5c98>] ip_defrag+0x140/0x214
[<802c5b80>] ip_defrag+0x28/0x214
[<802c3eac>] ip_local_deliver+0x150/0x2dc
[<802c4498>] ip_rcv+0x460/0x5cc
[<802ae0e4>] process_backlog+0xcc/0x1d0
[<802adf28>] netif_receive_skb+0x16c/0x25c
[<802a697c>] skb_release_data+0xe0/0x124
[<80313920>] xprt_write_space+0xc/0xf0
[<802ae0e4>] process_backlog+0xcc/0x1d0
[<802ae2ac>] net_rx_action+0xc4/0x1d8
[<8012aea8>] __do_softirq+0x108/0x11c
[<8012af48>] do_softirq+0x8c/0x94
[<801431bc>] __do_IRQ+0xa0/0x184
[<8014328c>] __do_IRQ+0x170/0x184
[<80143028>] irq_exit+0x4c/0x54
[<80102c84>] malta_hw0_irqdispatch+0x104/0x204
[<80102c7c>] malta_hw0_irqdispatch+0xfc/0x204
[<8013d030>] autoremove_wake_function+0x0/0x44
[<801026bc>] mipsIRQ+0x11c/0x180
[<803113fc>] call_transmit+0x68/0xd4
[<80311444>] call_transmit+0xb0/0xd4
[<801be7b4>] nfs_execute_read+0x3c/0x5c
[<801be7b4>] nfs_execute_read+0x3c/0x5c
[<801beb20>] nfs_pagein_one+0x138/0x164
[<801beb18>] nfs_pagein_one+0x130/0x164
[<801bebd0>] nfs_pagein_list+0x84/0xb0
[<801bebb4>] nfs_pagein_list+0x68/0xb0
[<801bf38c>] nfs_readpages+0xe8/0x124
[<8014e25c>] read_pages+0x1c8/0x1d0
[<8014a304>] buffered_rmqueue+0x198/0x288
[<80102b20>] mips_timer_interrupt+0x60/0xc0
[<8014a7b4>] __alloc_pages+0x3c0/0x3d0
[<8014e600>] do_page_cache_readahead+0x16c/0x204
[<8014a304>] buffered_rmqueue+0x198/0x288
[<80146564>] filemap_nopage+0x480/0x50c
[<801115e8>] r4k_flush_cache_page+0x224/0x238
[<80112408>] blast_icache32+0x6c/0xf0
[<801588a0>] do_no_page+0xe0/0x4b4
[<801579ec>] do_wp_page+0x264/0x544
[<80158ecc>] handle_mm_fault+0x148/0x20c
[<80143028>] irq_exit+0x4c/0x54
[<801083d8>] ll_timer_interrupt+0x48/0x54
[<80110774>] do_page_fault+0x1d4/0x360
[<8012fa74>] run_timer_softirq+0x10c/0x214
[<801026a4>] mipsIRQ+0x104/0x180
[<8012aea8>] __do_softirq+0x108/0x11c
[<8012af48>] do_softirq+0x8c/0x94
[<8010829c>] timer_interrupt+0x178/0x26c
[<80143028>] irq_exit+0x4c/0x54
[<801083d8>] ll_timer_interrupt+0x48/0x54
[<801138d0>] nopage_tlbl+0xf0/0x100
[<801026a4>] mipsIRQ+0x104/0x180
Code: ac880000 ac890004 8ca80018 <8ca9001c> 24a50020 24840020
ac8affe8 ac8bffec ac8cfff0
Kernel panic - not syncing: Aiee, killing interrupt handler!
I have not got a chance to look deeper into it, but just wanted to let
you folks know
Thanks
Manish Lachwani
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH] Synthesize TLB refill handler at runtime
2004-11-23 0:26 ` Manish Lachwani
@ 2004-11-23 0:40 ` Maciej W. Rozycki
2004-11-23 2:17 ` Manish Lachwani
0 siblings, 1 reply; 28+ messages in thread
From: Maciej W. Rozycki @ 2004-11-23 0:40 UTC (permalink / raw)
To: Manish Lachwani
Cc: Ralf Baechle, Thiemo Seufer, Geert Uytterhoeven,
Linux/MIPS Development
On Mon, 22 Nov 2004, Manish Lachwani wrote:
> I tried out the patch on a MIPS Malta board (24Kc core). Compiled fine
> and booted fine as well. On bootup, I see:
>
> ...
> Synthesized TLB handler (26 instructions).
> ...
This should be 21 instructions -- please get an update from the CVS tree
for a fix I applied yesterday. You run with the BCM1250 workaround
enabled.
Maciej
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH] Synthesize TLB refill handler at runtime
2004-11-23 0:40 ` Maciej W. Rozycki
@ 2004-11-23 2:17 ` Manish Lachwani
2004-11-23 20:24 ` Maciej W. Rozycki
0 siblings, 1 reply; 28+ messages in thread
From: Manish Lachwani @ 2004-11-23 2:17 UTC (permalink / raw)
To: Maciej W. Rozycki
Cc: Ralf Baechle, Thiemo Seufer, Geert Uytterhoeven,
Linux/MIPS Development
Maciej W. Rozycki wrote:
> On Mon, 22 Nov 2004, Manish Lachwani wrote:
>
>
>>I tried out the patch on a MIPS Malta board (24Kc core). Compiled fine
>>and booted fine as well. On bootup, I see:
>>
>>...
>>Synthesized TLB handler (26 instructions).
>>...
>
>
> This should be 21 instructions -- please get an update from the CVS tree
> for a fix I applied yesterday. You run with the BCM1250 workaround
> enabled.
>
> Maciej
Hello Maciej,
Thanks for the info. I got the latest sources from cvs. Bootlog shows:
...
Synthesized TLB handler (21 instructions).
...
However, the crash still occurs. I dont think your patch was intended to
fix the problem that I see below (resulting in crash).
I will try to debug this now.
gcc -D__KERNEL__ -I/root/2.4.19/include -Wall -Wstrict-prototypes
-Wno-trigraphs -O2 -fno-strict-aliasing -fno-common -fomit-frame-pointer
-DGEMDEBUG_TRACEBUFFER -I /root/2.4.19/include/asm/gcc -G 0
-mno-abicalls -fno-pic -pipe -mips2 -Wa,--trap -DKBUILD_BASENAME=main
-c -o init/main.o init/main.c
Data bus error, epc == 801f83b8, ra == 80323f04
Oops in arch/mips/kernel/traps.c::do_be, line 330[#1]:
Cpu 0
$ 0 : 00000000 80000000 83fffb24 00000000
$ 4 : 83ffff04 80612414 000000dc 00000000
$ 8 : 73696874 6c6f7362 20657475 72707865
$12 : 69737365 81141d60 0000000a 00006e6f
$16 : 80612034 83fffb24 000004dc 0000f45d
$20 : 81136660 00000000 83fffb24 00ff00ff
$24 : 0000001c 00000001
$28 : 8114c000 8114d6c0 00000000 80323f04
Hi : 10621125
Lo : f1a9c6d0
epc : 801f83b8 both_aligned+0x40/0x74 Not tainted
ra : 80323f04 csum_partial_copy_nocheck+0x44/0x64
Status: 1000fc03 KERNEL EXL IE
Cause : 0080001c
PrId : 00019360
Modules linked in:
Process rpciod (pid: 12, threadinfo=8114c000, task=810f7060)
Stack : 10008000 80100744 00000000 00000000 000004dc 000004dc 000004dc
00000000
802a7464 1000fc00 00000000 80383e34 80383e34 00000003 00000001
00000000
0000dcf9 83fff000 000004dc 000004dc 0000106c 00000b90 81136660
00013fb7
83fffb24 00ff00ff 00000b24 802a7618 81136660 804f3280 81114600
802a5b64
00000000 00000001 00000000 00000000 0000006c 00001000 8114d7e8
00001000
...
Call Trace:
[<80100744>] mipsIRQ+0x104/0x180
[<802a7464>] skb_copy_and_csum_bits+0x78/0x2bc
[<802a7618>] skb_copy_and_csum_bits+0x22c/0x2bc
[<802a5b64>] kfree_skbmem+0x24/0x34
[<80312144>] skb_read_and_csum_bits+0x0/0xb4
[<80312188>] skb_read_and_csum_bits+0x44/0xb4
[<8010e4b8>] __flush_dcache_page+0x90/0xa4
[<8031e814>] xdr_partial_copy_from_skb+0x190/0x1dc
[<8031226c>] csum_partial_copy_to_xdr+0x74/0x134
[<80312454>] udp_data_ready+0x128/0x230
...
From the disassembly:
801f8378 <both_aligned>:
801f8378: 00064142 srl t0,a2,0x5
801f837c: 1100001b beqz t0,801f83ec <cleanup_both_aligned>
801f8380: 30d8001f andi t8,a2,0x1f
801f8384: cca00060 pref 0x0,96(a1)
801f8388: cc810060 pref 0x1,96(a0)
801f838c: 00000000 nop
801f8390: 8ca80000 lw t0,0(a1)
801f8394: 8ca90004 lw t1,4(a1)
801f8398: 8caa0008 lw t2,8(a1)
801f839c: 8cab000c lw t3,12(a1)
801f83a0: 24c6ffe0 addiu a2,a2,-32
801f83a4: 8cac0010 lw t4,16(a1)
801f83a8: 8caf0014 lw t7,20(a1)
801f83ac: ac880000 sw t0,0(a0)
801f83b0: ac890004 sw t1,4(a0)
801f83b4: 8ca80018 lw t0,24(a1)
801f83b8: 8ca9001c lw t1,28(a1)
801f83bc: 24a50020 addiu a1,a1,32
801f83c0: 24840020 addiu a0,a0,32
801f83c4: ac8affe8 sw t2,-24(a0)
801f83c8: ac8bffec sw t3,-20(a0)
801f83cc: ac8cfff0 sw t4,-16(a0)
801f83d0: ac8ffff4 sw t7,-12(a0)
801f83d4: ac88fff8 sw t0,-8(a0)
801f83d8: ac89fffc sw t1,-4(a0)
801f83dc: cca00100 pref 0x0,256(a1)
801f83e0: cc810100 pref 0x1,256(a0)
801f83e4: 14d8ffea bne a2,t8,801f8390 <both_aligned+0x18>
801f83e8: 00000000 nop
Thanks
Manish Lachwani
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH] Synthesize TLB refill handler at runtime
2004-11-23 2:17 ` Manish Lachwani
@ 2004-11-23 20:24 ` Maciej W. Rozycki
2004-11-23 23:41 ` Manish Lachwani
` (2 more replies)
0 siblings, 3 replies; 28+ messages in thread
From: Maciej W. Rozycki @ 2004-11-23 20:24 UTC (permalink / raw)
To: Manish Lachwani
Cc: Ralf Baechle, Thiemo Seufer, Geert Uytterhoeven,
Linux/MIPS Development
On Mon, 22 Nov 2004, Manish Lachwani wrote:
> However, the crash still occurs. I dont think your patch was intended to
> fix the problem that I see below (resulting in crash).
Certainly, it wasn't, but it couldn't have hurt, either.
> Data bus error, epc == 801f83b8, ra == 80323f04
The reason are cp0 hazards, likely leading to an incorrect mapping. Try
the following patch; already applied to the mainline as obviously correct.
Maciej
patch-mips-2.6.10-rc1-20041112-mips-tlb-ehb-0
diff -up --recursive --new-file linux-mips-2.6.10-rc1-20041112.macro/arch/mips/mm/tlbex.c linux-mips-2.6.10-rc1-20041112/arch/mips/mm/tlbex.c
--- linux-mips-2.6.10-rc1-20041112.macro/arch/mips/mm/tlbex.c 2004-11-23 19:52:53.000000000 +0000
+++ linux-mips-2.6.10-rc1-20041112/arch/mips/mm/tlbex.c 2004-11-23 19:58:31.000000000 +0000
@@ -448,7 +448,8 @@ L_LA(_split)
#define i_bnez(buf, rs, off) i_bne(buf, rs, 0, off)
#define i_move(buf, a, b) i_ADDU(buf, a, 0, b)
#define i_nop(buf) i_sll(buf, 0, 0, 0)
-#define i_ssnop(buf) i_sll(buf, 0, 2, 1)
+#define i_ssnop(buf) i_sll(buf, 0, 0, 1)
+#define i_ehb(buf) i_sll(buf, 0, 0, 3)
#if CONFIG_MIPS64
static __init int in_compat_space_p(long addr)
@@ -799,12 +800,12 @@ static __init void build_tlb_write_rando
default:
/*
* Others are assumed to have one cycle mtc0 hazard,
- * and one cycle tlbwr hazard.
+ * and one cycle tlbwr hazard or to understand ehb.
* XXX: This might be overly general.
*/
- i_nop(p);
+ i_ehb(p);
i_tlbwr(p);
- i_nop(p);
+ i_ehb(p);
break;
}
}
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH] Synthesize TLB refill handler at runtime
2004-11-23 20:24 ` Maciej W. Rozycki
@ 2004-11-23 23:41 ` Manish Lachwani
2004-11-24 1:40 ` Thiemo Seufer
2004-11-24 2:16 ` Thiemo Seufer
2 siblings, 0 replies; 28+ messages in thread
From: Manish Lachwani @ 2004-11-23 23:41 UTC (permalink / raw)
To: Maciej W. Rozycki
Cc: Ralf Baechle, Thiemo Seufer, Geert Uytterhoeven,
Linux/MIPS Development
Hi Maciej,
Maciej W. Rozycki wrote:
> On Mon, 22 Nov 2004, Manish Lachwani wrote:
>
>
>>However, the crash still occurs. I dont think your patch was intended to
>>fix the problem that I see below (resulting in crash).
>
>
> Certainly, it wasn't, but it couldn't have hurt, either.
I never said that your patch could have hurt ;)
>
>
>>Data bus error, epc == 801f83b8, ra == 80323f04
>
>
> The reason are cp0 hazards, likely leading to an incorrect mapping. Try
> the following patch; already applied to the mainline as obviously correct.
I did sync with the latest CVS sometime back and have been trying it out
on the MIPS Malta 24Kc. Looks stable upto now ...
Thanks
Manish Lachwani
>
> Maciej
>
> patch-mips-2.6.10-rc1-20041112-mips-tlb-ehb-0
> diff -up --recursive --new-file linux-mips-2.6.10-rc1-20041112.macro/arch/mips/mm/tlbex.c linux-mips-2.6.10-rc1-20041112/arch/mips/mm/tlbex.c
> --- linux-mips-2.6.10-rc1-20041112.macro/arch/mips/mm/tlbex.c 2004-11-23 19:52:53.000000000 +0000
> +++ linux-mips-2.6.10-rc1-20041112/arch/mips/mm/tlbex.c 2004-11-23 19:58:31.000000000 +0000
> @@ -448,7 +448,8 @@ L_LA(_split)
> #define i_bnez(buf, rs, off) i_bne(buf, rs, 0, off)
> #define i_move(buf, a, b) i_ADDU(buf, a, 0, b)
> #define i_nop(buf) i_sll(buf, 0, 0, 0)
> -#define i_ssnop(buf) i_sll(buf, 0, 2, 1)
> +#define i_ssnop(buf) i_sll(buf, 0, 0, 1)
> +#define i_ehb(buf) i_sll(buf, 0, 0, 3)
>
> #if CONFIG_MIPS64
> static __init int in_compat_space_p(long addr)
> @@ -799,12 +800,12 @@ static __init void build_tlb_write_rando
> default:
> /*
> * Others are assumed to have one cycle mtc0 hazard,
> - * and one cycle tlbwr hazard.
> + * and one cycle tlbwr hazard or to understand ehb.
> * XXX: This might be overly general.
> */
> - i_nop(p);
> + i_ehb(p);
> i_tlbwr(p);
> - i_nop(p);
> + i_ehb(p);
> break;
> }
> }
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH] Synthesize TLB refill handler at runtime
2004-11-23 20:24 ` Maciej W. Rozycki
2004-11-23 23:41 ` Manish Lachwani
@ 2004-11-24 1:40 ` Thiemo Seufer
2004-11-24 9:44 ` Ralf Baechle
2004-11-24 2:16 ` Thiemo Seufer
2 siblings, 1 reply; 28+ messages in thread
From: Thiemo Seufer @ 2004-11-24 1:40 UTC (permalink / raw)
To: Maciej W. Rozycki
Cc: Manish Lachwani, Ralf Baechle, Geert Uytterhoeven,
Linux/MIPS Development
Maciej W. Rozycki wrote:
> On Mon, 22 Nov 2004, Manish Lachwani wrote:
>
> > However, the crash still occurs. I dont think your patch was intended to
> > fix the problem that I see below (resulting in crash).
>
> Certainly, it wasn't, but it couldn't have hurt, either.
>
> > Data bus error, epc == 801f83b8, ra == 80323f04
>
> The reason are cp0 hazards, likely leading to an incorrect mapping. Try
> the following patch; already applied to the mainline as obviously correct.
[snip]
> @@ -799,12 +800,12 @@ static __init void build_tlb_write_rando
> default:
> /*
> * Others are assumed to have one cycle mtc0 hazard,
> - * and one cycle tlbwr hazard.
> + * and one cycle tlbwr hazard or to understand ehb.
> * XXX: This might be overly general.
> */
> - i_nop(p);
> + i_ehb(p);
> i_tlbwr(p);
> - i_nop(p);
> + i_ehb(p);
> break;
Does r24k really need both delays? If not, it should get its own case.
Probably it should be separated even if it is identical, the code above
is nothing but a guess based on preexisting code.
Thiemo
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH] Synthesize TLB refill handler at runtime
2004-11-23 20:24 ` Maciej W. Rozycki
2004-11-23 23:41 ` Manish Lachwani
2004-11-24 1:40 ` Thiemo Seufer
@ 2004-11-24 2:16 ` Thiemo Seufer
2004-11-24 8:52 ` Ralf Baechle
2 siblings, 1 reply; 28+ messages in thread
From: Thiemo Seufer @ 2004-11-24 2:16 UTC (permalink / raw)
To: Maciej W. Rozycki
Cc: Manish Lachwani, Ralf Baechle, Geert Uytterhoeven,
Linux/MIPS Development
Maciej W. Rozycki wrote:
[snip]
> patch-mips-2.6.10-rc1-20041112-mips-tlb-ehb-0
> diff -up --recursive --new-file linux-mips-2.6.10-rc1-20041112.macro/arch/mips/mm/tlbex.c linux-mips-2.6.10-rc1-20041112/arch/mips/mm/tlbex.c
> --- linux-mips-2.6.10-rc1-20041112.macro/arch/mips/mm/tlbex.c 2004-11-23 19:52:53.000000000 +0000
> +++ linux-mips-2.6.10-rc1-20041112/arch/mips/mm/tlbex.c 2004-11-23 19:58:31.000000000 +0000
> @@ -448,7 +448,8 @@ L_LA(_split)
> #define i_bnez(buf, rs, off) i_bne(buf, rs, 0, off)
> #define i_move(buf, a, b) i_ADDU(buf, a, 0, b)
> #define i_nop(buf) i_sll(buf, 0, 0, 0)
> -#define i_ssnop(buf) i_sll(buf, 0, 2, 1)
> +#define i_ssnop(buf) i_sll(buf, 0, 0, 1)
Just FYI, I took the ssnop definition from _ssnop in
include/asm-mips/hazards.h, which is different from SSNOP in
include/asm-mips/asm.h. I hope the difference is not meant to
mean more than a typo. :-)
Thiemo
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH] Synthesize TLB refill handler at runtime
2004-11-24 2:16 ` Thiemo Seufer
@ 2004-11-24 8:52 ` Ralf Baechle
0 siblings, 0 replies; 28+ messages in thread
From: Ralf Baechle @ 2004-11-24 8:52 UTC (permalink / raw)
To: Thiemo Seufer
Cc: Maciej W. Rozycki, Manish Lachwani, Geert Uytterhoeven,
Linux/MIPS Development
On Wed, Nov 24, 2004 at 03:16:44AM +0100, Thiemo Seufer wrote:
> > -#define i_ssnop(buf) i_sll(buf, 0, 2, 1)
> > +#define i_ssnop(buf) i_sll(buf, 0, 0, 1)
>
> Just FYI, I took the ssnop definition from _ssnop in
> include/asm-mips/hazards.h, which is different from SSNOP in
> include/asm-mips/asm.h. I hope the difference is not meant to
> mean more than a typo. :-)
It was a typo. As a shift instruction it may have kept semantics that
way but not as a superscalar nop ...
Ralf
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH] Synthesize TLB refill handler at runtime
2004-11-24 1:40 ` Thiemo Seufer
@ 2004-11-24 9:44 ` Ralf Baechle
2004-11-24 15:04 ` Maciej W. Rozycki
0 siblings, 1 reply; 28+ messages in thread
From: Ralf Baechle @ 2004-11-24 9:44 UTC (permalink / raw)
To: Thiemo Seufer
Cc: Maciej W. Rozycki, Manish Lachwani, Geert Uytterhoeven,
Linux/MIPS Development
On Wed, Nov 24, 2004 at 02:40:57AM +0100, Thiemo Seufer wrote:
> > default:
> > /*
> > * Others are assumed to have one cycle mtc0 hazard,
> > - * and one cycle tlbwr hazard.
> > + * and one cycle tlbwr hazard or to understand ehb.
> > * XXX: This might be overly general.
> > */
> > - i_nop(p);
> > + i_ehb(p);
> > i_tlbwr(p);
> > - i_nop(p);
> > + i_ehb(p);
> > break;
>
> Does r24k really need both delays? If not, it should get its own case.
> Probably it should be separated even if it is identical, the code above
> is nothing but a guess based on preexisting code.
I would suggest to default to EHB only for architecture revision 2. For
any pre-V2 processor the outcome of a default case is basically luck and
so I would suggest to just panic and force people to read their CPU
manual.
Ralf
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH] Synthesize TLB refill handler at runtime
2004-11-24 9:44 ` Ralf Baechle
@ 2004-11-24 15:04 ` Maciej W. Rozycki
2004-11-24 21:46 ` Maciej W. Rozycki
0 siblings, 1 reply; 28+ messages in thread
From: Maciej W. Rozycki @ 2004-11-24 15:04 UTC (permalink / raw)
To: Ralf Baechle
Cc: Thiemo Seufer, Manish Lachwani, Geert Uytterhoeven,
Linux/MIPS Development
On Wed, 24 Nov 2004, Ralf Baechle wrote:
> > > default:
> > > /*
> > > * Others are assumed to have one cycle mtc0 hazard,
> > > - * and one cycle tlbwr hazard.
> > > + * and one cycle tlbwr hazard or to understand ehb.
> > > * XXX: This might be overly general.
> > > */
> > > - i_nop(p);
> > > + i_ehb(p);
> > > i_tlbwr(p);
> > > - i_nop(p);
> > > + i_ehb(p);
> > > break;
> >
> > Does r24k really need both delays? If not, it should get its own case.
Good point -- "eret" is a hazard barrier, too, so the second "ehb" is not
needed. For any release 2 implementation, actually.
> > Probably it should be separated even if it is identical, the code above
> > is nothing but a guess based on preexisting code.
>
> I would suggest to default to EHB only for architecture revision 2. For
> any pre-V2 processor the outcome of a default case is basically luck and
> so I would suggest to just panic and force people to read their CPU
> manual.
Agreed. We should probably verify these few "traditional" CPUs to be
handled explicitly ourselves, though, as there is no one else to look
after them.
Maciej
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH] Synthesize TLB refill handler at runtime
2004-11-24 15:04 ` Maciej W. Rozycki
@ 2004-11-24 21:46 ` Maciej W. Rozycki
2004-11-24 22:12 ` Ralf Baechle
0 siblings, 1 reply; 28+ messages in thread
From: Maciej W. Rozycki @ 2004-11-24 21:46 UTC (permalink / raw)
To: Ralf Baechle
Cc: Thiemo Seufer, Manish Lachwani, Geert Uytterhoeven,
Linux/MIPS Development
On Wed, 24 Nov 2004, Maciej W. Rozycki wrote:
> Agreed. We should probably verify these few "traditional" CPUs to be
> handled explicitly ourselves, though, as there is no one else to look
> after them.
Here's my proposal. It doesn't handle MIPS*R2 processors implicitly yet
as that asks for a clean implementation of MIPS architecture
determination. I'll do that in a separate step and adjust this code
afterwards. For now it should be OK. Agreed?
Note, these panic()s really beg for early printk() support -- but doesn't
everyone have it already? ;-)
Maciej
patch-mips-2.6.10-rc1-20041112-mips-tlbwr-0
diff -up --recursive --new-file linux-mips-2.6.10-rc1-20041112.macro/arch/mips/mm/tlbex.c linux-mips-2.6.10-rc1-20041112/arch/mips/mm/tlbex.c
--- linux-mips-2.6.10-rc1-20041112.macro/arch/mips/mm/tlbex.c Tue Nov 23 20:55:14 2004
+++ linux-mips-2.6.10-rc1-20041112/arch/mips/mm/tlbex.c Wed Nov 24 20:15:35 2004
@@ -761,10 +761,22 @@ static __init void build_tlb_write_rando
case CPU_R4600:
case CPU_R4700:
+ case CPU_R5000:
+ case CPU_5KC:
i_nop(p);
i_tlbwr(p);
break;
+ case CPU_R10000:
+ case CPU_R12000:
+ case CPU_4KC:
+ case CPU_SB1:
+ case CPU_4KSC:
+ case CPU_20KC:
+ case CPU_25KF:
+ i_tlbwr(p);
+ break;
+
case CPU_NEVADA:
i_nop(p); /* QED specifies 2 nops hazard */
/*
@@ -776,6 +788,12 @@ static __init void build_tlb_write_rando
l_tlbwr_hazard(l, *p);
break;
+ case CPU_4KEC:
+ case CPU_24K:
+ i_ehb(p);
+ i_tlbwr(p);
+ break;
+
case CPU_RM9000:
/*
* When the JTLB is updated by tlbwi or tlbwr, a subsequent
@@ -794,21 +812,9 @@ static __init void build_tlb_write_rando
i_ssnop(p);
break;
- case CPU_R10000:
- case CPU_R12000:
- case CPU_SB1:
- i_tlbwr(p);
- break;
-
default:
- /*
- * Others are assumed to have one cycle mtc0 hazard,
- * and one cycle tlbwr hazard or to understand ehb.
- * XXX: This might be overly general.
- */
- i_ehb(p);
- i_tlbwr(p);
- i_ehb(p);
+ panic("No TLB refill handler yet (CPU type: %d)",
+ current_cpu_data.cputype);
break;
}
}
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH] Synthesize TLB refill handler at runtime
2004-11-24 21:46 ` Maciej W. Rozycki
@ 2004-11-24 22:12 ` Ralf Baechle
2004-11-24 22:39 ` Maciej W. Rozycki
0 siblings, 1 reply; 28+ messages in thread
From: Ralf Baechle @ 2004-11-24 22:12 UTC (permalink / raw)
To: Maciej W. Rozycki
Cc: Thiemo Seufer, Manish Lachwani, Geert Uytterhoeven,
Linux/MIPS Development
On Wed, Nov 24, 2004 at 09:46:05PM +0000, Maciej W. Rozycki wrote:
> Here's my proposal. It doesn't handle MIPS*R2 processors implicitly yet
> as that asks for a clean implementation of MIPS architecture
> determination. I'll do that in a separate step and adjust this code
> afterwards. For now it should be OK. Agreed?
Sure, go ahead.
> Note, these panic()s really beg for early printk() support -- but doesn't
> everyone have it already? ;-)
It's so easy to implement with serial console. Best thing since sliced
bread :-)
Ralf
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH] Synthesize TLB refill handler at runtime
2004-11-22 7:01 ` Ralf Baechle
2004-11-23 0:26 ` Manish Lachwani
@ 2004-11-24 22:24 ` Manish Lachwani
1 sibling, 0 replies; 28+ messages in thread
From: Manish Lachwani @ 2004-11-24 22:24 UTC (permalink / raw)
To: Ralf Baechle; +Cc: Thiemo Seufer, Geert Uytterhoeven, Linux/MIPS Development
Ralf Baechle wrote:
> On Sun, Nov 21, 2004 at 09:37:57PM +0100, Thiemo Seufer wrote:
>
>
>>Aww, fatal error in the spelling module. :-)
>>Updated.
>
>
> The patch was looking good, so I gave it a shot on one of my machines also
> and it was working fine, applied.
>
> Thanks!
>
> Ralf
>
Hello !
FYI, I have also tried this patch on Broadcom Sibyte (SWARM board) and
PMC Rm79XX (Rm9000 core on Ocelot-3) and it has worked well.
Thanks
Manish Lachwani
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH] Synthesize TLB refill handler at runtime
2004-11-24 22:12 ` Ralf Baechle
@ 2004-11-24 22:39 ` Maciej W. Rozycki
2004-11-24 22:45 ` Ralf Baechle
0 siblings, 1 reply; 28+ messages in thread
From: Maciej W. Rozycki @ 2004-11-24 22:39 UTC (permalink / raw)
To: Ralf Baechle
Cc: Thiemo Seufer, Manish Lachwani, Geert Uytterhoeven,
Linux/MIPS Development
On Wed, 24 Nov 2004, Ralf Baechle wrote:
> > Note, these panic()s really beg for early printk() support -- but doesn't
> > everyone have it already? ;-)
>
> It's so easy to implement with serial console. Best thing since sliced
> bread :-)
Yep, and some systems have an appropriate console output callback in the
firmware making it trivial.
Maciej
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH] Synthesize TLB refill handler at runtime
2004-11-24 22:39 ` Maciej W. Rozycki
@ 2004-11-24 22:45 ` Ralf Baechle
2004-11-24 23:13 ` Maciej W. Rozycki
0 siblings, 1 reply; 28+ messages in thread
From: Ralf Baechle @ 2004-11-24 22:45 UTC (permalink / raw)
To: Maciej W. Rozycki
Cc: Thiemo Seufer, Manish Lachwani, Geert Uytterhoeven,
Linux/MIPS Development
On Wed, Nov 24, 2004 at 10:39:56PM +0000, Maciej W. Rozycki wrote:
> > It's so easy to implement with serial console. Best thing since sliced
> > bread :-)
>
> Yep, and some systems have an appropriate console output callback in the
> firmware making it trivial.
Which unfortunately is becoming unusable fairly soon on many systems.
IP27: ARC is dead after the first TLB flush. IP22: dead after the
external L2 controller was enabled etc. On average I'm less than
pleased with firmware usability even for simple stuff such as printing ...
Ralf
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH] Synthesize TLB refill handler at runtime
2004-11-24 22:45 ` Ralf Baechle
@ 2004-11-24 23:13 ` Maciej W. Rozycki
0 siblings, 0 replies; 28+ messages in thread
From: Maciej W. Rozycki @ 2004-11-24 23:13 UTC (permalink / raw)
To: Ralf Baechle
Cc: Thiemo Seufer, Manish Lachwani, Geert Uytterhoeven,
Linux/MIPS Development
On Wed, 24 Nov 2004, Ralf Baechle wrote:
> Which unfortunately is becoming unusable fairly soon on many systems.
> IP27: ARC is dead after the first TLB flush. IP22: dead after the
> external L2 controller was enabled etc. On average I'm less than
> pleased with firmware usability even for simple stuff such as printing ...
Broken firmware... For DECstations it works until we wipe out RAM
reserved by the firmware, right before running `init'. Which is not
firmware's fault, of course, and which I plan to get fixed eventually to
be able to access and set firmware environment variables from the
userland, to do a proper halt and reboot (with a command), etc. I've not
decided whether to rip it away completely or to leave it as a
configuration option for these memory starved. You can have 4MB of RAM on
some DECstations, you know.
Maciej
^ permalink raw reply [flat|nested] 28+ messages in thread
* RE: [PATCH] Synthesize TLB refill handler at runtime
@ 2004-11-29 19:14 Brad Larson
2004-11-29 19:47 ` Maciej W. Rozycki
0 siblings, 1 reply; 28+ messages in thread
From: Brad Larson @ 2004-11-29 19:14 UTC (permalink / raw)
To: 'Ralf Baechle', Maciej W. Rozycki
Cc: Thiemo Seufer, Manish Lachwani, Geert Uytterhoeven,
Linux/MIPS Development
-----Original Message-----
From: linux-mips-bounce@linux-mips.org
[mailto:linux-mips-bounce@linux-mips.org]On Behalf Of Ralf Baechle
Sent: Wednesday, November 24, 2004 2:46 PM
To: Maciej W. Rozycki
Cc: Thiemo Seufer; Manish Lachwani; Geert Uytterhoeven; Linux/MIPS
Development
Subject: Re: [PATCH] Synthesize TLB refill handler at runtime
On Wed, Nov 24, 2004 at 10:39:56PM +0000, Maciej W. Rozycki wrote:
> > It's so easy to implement with serial console. Best thing since sliced
> > bread :-)
>
> Yep, and some systems have an appropriate console output callback in the
> firmware making it trivial.
Which unfortunately is becoming unusable fairly soon on many systems.
IP27: ARC is dead after the first TLB flush. IP22: dead after the
external L2 controller was enabled etc. On average I'm less than
pleased with firmware usability even for simple stuff such as printing ...
Ralf
Real, not-demo, 32-bit systems are the majority and will typically put the io up at 4GB and let it grow down and have the memory grow up from zero. There is a natural dividing line, minimal TLB usage, and straightforward access to the first 512MB of memory unmapped. Which of course means moving the boot device at bfc00000 up high as well as getting things like discovery at b4000000 out of the way as well.
So, for firmware callbacks to do printf the kernel would need to restore the mappings the firmware needed before handoff. Also firmware usually carves out some memory above 1MB for its drivers so either the firmware has to stop servicing drivers if the kernel stomps on this memory or the kernel needs to get the memory ranges available at handoff which I recall doing with netbsd since it accepted non-contiguous memory.
--Brad
^ permalink raw reply [flat|nested] 28+ messages in thread
* RE: [PATCH] Synthesize TLB refill handler at runtime
2004-11-29 19:14 Brad Larson
@ 2004-11-29 19:47 ` Maciej W. Rozycki
0 siblings, 0 replies; 28+ messages in thread
From: Maciej W. Rozycki @ 2004-11-29 19:47 UTC (permalink / raw)
To: Brad Larson
Cc: 'Ralf Baechle', Thiemo Seufer, Manish Lachwani,
Geert Uytterhoeven, Linux/MIPS Development
On Mon, 29 Nov 2004, Brad Larson wrote:
> Real, not-demo, 32-bit systems are the majority and will typically put
> the io up at 4GB and let it grow down and have the memory grow up from
> zero. There is a natural dividing line, minimal TLB usage, and
> straightforward access to the first 512MB of memory unmapped. Which of
> course means moving the boot device at bfc00000 up high as well as
> getting things like discovery at b4000000 out of the way as well.
So?
> So, for firmware callbacks to do printf the kernel would need to restore
> the mappings the firmware needed before handoff. Also firmware usually
If it needs to access anything beyond KSEG0/1 and XKPHYS is unavailable,
then setting up a single wired TLB entry, using a large page if hardware
is scattered (you can have 256MB pages, you know; if you don't implement
that and your largest page size does not cover the whole area, you can
switch mappings), upon entry and removing it upon exit is trivial and
should just work. Firmware callbacks are not meant to be common -- the
performance loss from an invalidated TLB entry will be negligible; you can
restore the original TLB entry, too.
> carves out some memory above 1MB for its drivers so either the firmware
> has to stop servicing drivers if the kernel stomps on this memory or the
> kernel needs to get the memory ranges available at handoff which I
> recall doing with netbsd since it accepted non-contiguous memory.
Everything needed for that is already present (it's been there for a
couple of years already) -- just let Linux know in some way which areas of
RAM are available for general use and which ones are best left untouched.
It's being done for other systems. There is a dedicated "ROM data" tag
available for that even in addition to the generic "reserved" one.
Maciej
^ permalink raw reply [flat|nested] 28+ messages in thread
* RE: [PATCH] Synthesize TLB refill handler at runtime
@ 2004-11-29 19:57 Brad Larson
2004-11-29 20:20 ` Maciej W. Rozycki
0 siblings, 1 reply; 28+ messages in thread
From: Brad Larson @ 2004-11-29 19:57 UTC (permalink / raw)
To: 'Maciej W. Rozycki'
Cc: 'Ralf Baechle', Thiemo Seufer, Manish Lachwani,
Geert Uytterhoeven, Linux/MIPS Development
hmm, the original email left the impression that its desirable for the firmware to be available after kernel handoff. Looks like the current status is fine for those who need it.
--Brad
-----Original Message-----
From: macro@blysk.ds.pg.gda.pl [mailto:macro@blysk.ds.pg.gda.pl]On
Behalf Of Maciej W. Rozycki
Sent: Monday, November 29, 2004 11:47 AM
To: Brad Larson
Cc: 'Ralf Baechle'; Thiemo Seufer; Manish Lachwani; Geert Uytterhoeven;
Linux/MIPS Development
Subject: RE: [PATCH] Synthesize TLB refill handler at runtime
On Mon, 29 Nov 2004, Brad Larson wrote:
> Real, not-demo, 32-bit systems are the majority and will typically put
> the io up at 4GB and let it grow down and have the memory grow up from
> zero. There is a natural dividing line, minimal TLB usage, and
> straightforward access to the first 512MB of memory unmapped. Which of
> course means moving the boot device at bfc00000 up high as well as
> getting things like discovery at b4000000 out of the way as well.
So?
> So, for firmware callbacks to do printf the kernel would need to restore
> the mappings the firmware needed before handoff. Also firmware usually
If it needs to access anything beyond KSEG0/1 and XKPHYS is unavailable,
then setting up a single wired TLB entry, using a large page if hardware
is scattered (you can have 256MB pages, you know; if you don't implement
that and your largest page size does not cover the whole area, you can
switch mappings), upon entry and removing it upon exit is trivial and
should just work. Firmware callbacks are not meant to be common -- the
performance loss from an invalidated TLB entry will be negligible; you can
restore the original TLB entry, too.
> carves out some memory above 1MB for its drivers so either the firmware
> has to stop servicing drivers if the kernel stomps on this memory or the
> kernel needs to get the memory ranges available at handoff which I
> recall doing with netbsd since it accepted non-contiguous memory.
Everything needed for that is already present (it's been there for a
couple of years already) -- just let Linux know in some way which areas of
RAM are available for general use and which ones are best left untouched.
It's being done for other systems. There is a dedicated "ROM data" tag
available for that even in addition to the generic "reserved" one.
Maciej
^ permalink raw reply [flat|nested] 28+ messages in thread
* RE: [PATCH] Synthesize TLB refill handler at runtime
2004-11-29 19:57 Brad Larson
@ 2004-11-29 20:20 ` Maciej W. Rozycki
0 siblings, 0 replies; 28+ messages in thread
From: Maciej W. Rozycki @ 2004-11-29 20:20 UTC (permalink / raw)
To: Brad Larson
Cc: 'Ralf Baechle', Thiemo Seufer, Manish Lachwani,
Geert Uytterhoeven, Linux/MIPS Development
On Mon, 29 Nov 2004, Brad Larson wrote:
> hmm, the original email left the impression that its desirable for the
The impression was right -- of course as long as the firmware actually
has something useful to offer.
> firmware to be available after kernel handoff. Looks like the current
> status is fine for those who need it.
It depends on what you want to achieve. For console output (early
printk) support, it's usually only needed till the real console driver is
registered, which is just a handful of lines to be printed. For debugging
you may want to support console I/O via the firmware during a normal
system use, but then performance is not that important. Other uses may
include calls to functions for access to firmware configuration, like
environment variables you'd otherwise access from the firmware's operator
interface, or fancy ways of doing a reboot. These are not
performance-critical, either, so doing some sort of TLB reconfiguration
within the firmware for the duration of callbacks would be acceptable.
Maciej
^ permalink raw reply [flat|nested] 28+ messages in thread
end of thread, other threads:[~2004-11-29 20:20 UTC | newest]
Thread overview: 28+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-11-21 17:02 [PATCH] Synthesize TLB refill handler at runtime Thiemo Seufer
2004-11-21 19:50 ` Geert Uytterhoeven
2004-11-21 20:37 ` Thiemo Seufer
2004-11-22 7:01 ` Ralf Baechle
2004-11-23 0:26 ` Manish Lachwani
2004-11-23 0:40 ` Maciej W. Rozycki
2004-11-23 2:17 ` Manish Lachwani
2004-11-23 20:24 ` Maciej W. Rozycki
2004-11-23 23:41 ` Manish Lachwani
2004-11-24 1:40 ` Thiemo Seufer
2004-11-24 9:44 ` Ralf Baechle
2004-11-24 15:04 ` Maciej W. Rozycki
2004-11-24 21:46 ` Maciej W. Rozycki
2004-11-24 22:12 ` Ralf Baechle
2004-11-24 22:39 ` Maciej W. Rozycki
2004-11-24 22:45 ` Ralf Baechle
2004-11-24 23:13 ` Maciej W. Rozycki
2004-11-24 2:16 ` Thiemo Seufer
2004-11-24 8:52 ` Ralf Baechle
2004-11-24 22:24 ` Manish Lachwani
2004-11-22 14:37 ` Maciej W. Rozycki
2004-11-22 15:56 ` Thiemo Seufer
2004-11-22 18:39 ` Maciej W. Rozycki
2004-11-21 20:43 ` Ralf Baechle
-- strict thread matches above, loose matches on Subject: below --
2004-11-29 19:14 Brad Larson
2004-11-29 19:47 ` Maciej W. Rozycki
2004-11-29 19:57 Brad Larson
2004-11-29 20:20 ` Maciej W. Rozycki
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.