* [PATCH v3] MIPS: Optimise TLB handlers for MIPS32/64 R2 cores.
@ 2013-01-04 17:59 Steven J. Hill
2013-01-04 18:14 ` David Daney
0 siblings, 1 reply; 4+ messages in thread
From: Steven J. Hill @ 2013-01-04 17:59 UTC (permalink / raw)
To: linux-mips; +Cc: Steven J. Hill, ralf, jchandra, ddaney.cavm
From: "Steven J. Hill" <sjhill@mips.com>
The EXT and INS instructions can be used to decrease code size and
thus speed up TLB handlers on MIPS32R2 and MIPS64R2 cores.
Signed-off-by: Steven J. Hill <sjhill@mips.com>
---
arch/mips/include/asm/uasm.h | 5 +++++
arch/mips/mm/tlbex.c | 26 ++++++++++++++++++++++++++
arch/mips/mm/uasm.c | 4 +++-
3 files changed, 34 insertions(+), 1 deletion(-)
diff --git a/arch/mips/include/asm/uasm.h b/arch/mips/include/asm/uasm.h
index 7e0bf17..e7cacce 100644
--- a/arch/mips/include/asm/uasm.h
+++ b/arch/mips/include/asm/uasm.h
@@ -77,6 +77,7 @@ Ip_u1u2s3(_bne);
Ip_u2s3u1(_cache);
Ip_u2u1s3(_daddiu);
Ip_u3u1u2(_daddu);
+Ip_u2u1msbu3(_dext);
Ip_u2u1msbu3(_dins);
Ip_u2u1msbu3(_dinsm);
Ip_u1u2u3(_dmfc0);
@@ -151,6 +152,8 @@ static inline void __uasminit uasm_l##lb(struct uasm_label **lab, u32 *addr) \
#ifdef CONFIG_64BIT
# define UASM_i_ADDIU(buf, rs, rt, val) uasm_i_daddiu(buf, rs, rt, val)
# define UASM_i_ADDU(buf, rs, rt, rd) uasm_i_daddu(buf, rs, rt, rd)
+# define UASM_i_EXT(buf, rs, rt, msbd, lsb) uasm_i_dext(buf, rs, rt, msbd, lsb)
+# define UASM_i_INS(buf, rs, rt, msbd, lsb) uasm_i_dins(buf, rs, rt, msbd, lsb)
# define UASM_i_LL(buf, rs, rt, off) uasm_i_lld(buf, rs, rt, off)
# define UASM_i_LW(buf, rs, rt, off) uasm_i_ld(buf, rs, rt, off)
# define UASM_i_LWX(buf, rs, rt, rd) uasm_i_ldx(buf, rs, rt, rd)
@@ -167,6 +170,8 @@ static inline void __uasminit uasm_l##lb(struct uasm_label **lab, u32 *addr) \
#else
# define UASM_i_ADDIU(buf, rs, rt, val) uasm_i_addiu(buf, rs, rt, val)
# define UASM_i_ADDU(buf, rs, rt, rd) uasm_i_addu(buf, rs, rt, rd)
+# define UASM_i_EXT(buf, rs, rt, msbd, lsb) uasm_i_ext(buf, rs, rt, msbd, lsb)
+# define UASM_i_INS(buf, rs, rt, msbd, lsb) uasm_i_ins(buf, rs, rt, msbd, lsb)
# define UASM_i_LL(buf, rs, rt, off) uasm_i_ll(buf, rs, rt, off)
# define UASM_i_LW(buf, rs, rt, off) uasm_i_lw(buf, rs, rt, off)
# define UASM_i_LWX(buf, rs, rt, rd) uasm_i_lwx(buf, rs, rt, rd)
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 074d659..5681623 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -977,6 +977,13 @@ build_get_pgde32(u32 **p, unsigned int tmp, unsigned int ptr)
#endif
uasm_i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */
uasm_i_lw(p, ptr, uasm_rel_lo(pgdc), ptr);
+
+ if (cpu_has_mips_r2) {
+ uasm_i_ext(p, tmp, tmp, PGDIR_SHIFT, (32 - PGDIR_SHIFT));
+ uasm_i_ins(p, ptr, tmp, PGD_T_LOG2, (32 - PGDIR_SHIFT));
+ return;
+ }
+
uasm_i_srl(p, tmp, tmp, PGDIR_SHIFT); /* get pgd only bits */
uasm_i_sll(p, tmp, tmp, PGD_T_LOG2);
uasm_i_addu(p, ptr, ptr, tmp); /* add in pgd offset */
@@ -1012,6 +1019,25 @@ static void __cpuinit build_adjust_context(u32 **p, unsigned int ctx)
static void __cpuinit build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr)
{
+ if (cpu_has_mips_r2) {
+ /* PTE ptr offset is obtained from BadVAddr */
+ UASM_i_MFC0(p, tmp, C0_BADVADDR);
+ UASM_i_LW(p, ptr, 0, ptr);
+ UASM_i_EXT(p, tmp, tmp, (PAGE_SHIFT + 1),
+#ifdef CONFIG_64BIT
+ (PAGE_SHIFT - PTE_ORDER - PTE_T_LOG2 - 1));
+#else
+ (PGDIR_SHIFT - PAGE_SHIFT - 1));
+#endif
+ UASM_i_INS(p, ptr, tmp, (PTE_T_LOG2 + 1),
+#ifdef CONFIG_64BIT
+ (PAGE_SHIFT - PTE_ORDER - PTE_T_LOG2 - 1));
+#else
+ (PGDIR_SHIFT - PAGE_SHIFT - 1));
+#endif
+ return;
+ }
+
/*
* Bug workaround for the Nevada. It seems as if under certain
* circumstances the move from cp0_context might produce a
diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c
index 39b8910..5bfb75a 100644
--- a/arch/mips/mm/uasm.c
+++ b/arch/mips/mm/uasm.c
@@ -60,7 +60,7 @@ enum opcode {
insn_invalid,
insn_addiu, insn_addu, insn_and, insn_andi, insn_bbit0, insn_bbit1,
insn_beq, insn_beql, insn_bgez, insn_bgezl, insn_bltz, insn_bltzl,
- insn_bne, insn_cache, insn_daddiu, insn_daddu, insn_dins, insn_dinsm,
+ insn_bne, insn_cache, insn_daddiu, insn_daddu, insn_dext, insn_dins, insn_dinsm,
insn_dmfc0, insn_dmtc0, insn_drotr, insn_drotr32, insn_dsll,
insn_dsll32, insn_dsra, insn_dsrl, insn_dsrl32, insn_dsubu, insn_eret,
insn_ext, insn_ins, insn_j, insn_jal, insn_jr, insn_ld, insn_ldx,
@@ -103,6 +103,7 @@ static struct insn insn_table[] __uasminitdata = {
{ insn_cache, M(cache_op, 0, 0, 0, 0, 0), RS | RT | SIMM },
{ insn_daddiu, M(daddiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM },
{ insn_daddu, M(spec_op, 0, 0, 0, 0, daddu_op), RS | RT | RD },
+ { insn_dext, M(spec3_op, 0, 0, 0, 0, dext_op), RS | RT | RD | RE},
{ insn_dinsm, M(spec3_op, 0, 0, 0, 0, dinsm_op), RS | RT | RD | RE },
{ insn_dins, M(spec3_op, 0, 0, 0, 0, dins_op), RS | RT | RD | RE },
{ insn_dmfc0, M(cop0_op, dmfc_op, 0, 0, 0, 0), RT | RD | SET},
@@ -435,6 +436,7 @@ I_0(_tlbwi)
I_0(_tlbwr)
I_u3u1u2(_xor)
I_u2u1u3(_xori)
+I_u2u1msbdu3(_dext)
I_u2u1msbu3(_dins);
I_u2u1msb32u3(_dinsm);
I_u1(_syscall);
--
1.7.9.5
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH v3] MIPS: Optimise TLB handlers for MIPS32/64 R2 cores.
2013-01-04 17:59 [PATCH v3] MIPS: Optimise TLB handlers for MIPS32/64 R2 cores Steven J. Hill
@ 2013-01-04 18:14 ` David Daney
2013-01-04 18:24 ` Hill, Steven
0 siblings, 1 reply; 4+ messages in thread
From: David Daney @ 2013-01-04 18:14 UTC (permalink / raw)
To: Steven J. Hill; +Cc: linux-mips, ralf, jchandra
On 01/04/2013 09:59 AM, Steven J. Hill wrote:
> From: "Steven J. Hill" <sjhill@mips.com>
>
> The EXT and INS instructions can be used to decrease code size and
> thus speed up TLB handlers on MIPS32R2 and MIPS64R2 cores.
>
> Signed-off-by: Steven J. Hill <sjhill@mips.com>
[...]
> +#ifdef CONFIG_64BIT
> + (PAGE_SHIFT - PTE_ORDER - PTE_T_LOG2 - 1));
> +#else
> + (PGDIR_SHIFT - PAGE_SHIFT - 1));
> +#endif
> + UASM_i_INS(p, ptr, tmp, (PTE_T_LOG2 + 1),
As far as I can tell, (PAGE_SHIFT - PTE_ORDER - PTE_T_LOG2 - 1) and
(PGDIR_SHIFT - PAGE_SHIFT - 1) are the same thing. So why the two cases?
Can you give an example of where they might differ?
David Daney
^ permalink raw reply [flat|nested] 4+ messages in thread
* RE: [PATCH v3] MIPS: Optimise TLB handlers for MIPS32/64 R2 cores.
2013-01-04 18:14 ` David Daney
@ 2013-01-04 18:24 ` Hill, Steven
2013-01-04 19:51 ` Jayachandran C.
0 siblings, 1 reply; 4+ messages in thread
From: Hill, Steven @ 2013-01-04 18:24 UTC (permalink / raw)
To: David Daney
Cc: linux-mips@linux-mips.org, ralf@linux-mips.org,
jchandra@broadcom.com
>> +#ifdef CONFIG_64BIT
>> + (PAGE_SHIFT - PTE_ORDER - PTE_T_LOG2 - 1));
>> +#else
>> + (PGDIR_SHIFT - PAGE_SHIFT - 1));
>> +#endif
>> + UASM_i_INS(p, ptr, tmp, (PTE_T_LOG2 + 1),
>
> As far as I can tell, (PAGE_SHIFT - PTE_ORDER - PTE_T_LOG2 - 1) and
> (PGDIR_SHIFT - PAGE_SHIFT - 1) are the same thing. So why the two cases?
>
>Can you give an example of where they might differ?
>
David,
Actually, no I cannot. The calculation was given to me by 'jchandra' and since I do not have 64-bit R2 hardware let alone the Broadcom platform, he said it worked on his platform and I took it from him as is. So does this patch work on Cavium platforms using both calculation methods? It would be nice if 'jchandra' could chime in, but he may be on holiday or something.
-Steve
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v3] MIPS: Optimise TLB handlers for MIPS32/64 R2 cores.
2013-01-04 18:24 ` Hill, Steven
@ 2013-01-04 19:51 ` Jayachandran C.
0 siblings, 0 replies; 4+ messages in thread
From: Jayachandran C. @ 2013-01-04 19:51 UTC (permalink / raw)
To: Hill, Steven; +Cc: David Daney, linux-mips@linux-mips.org, ralf@linux-mips.org
On Fri, Jan 04, 2013 at 06:24:54PM +0000, Hill, Steven wrote:
> >> +#ifdef CONFIG_64BIT
> >> + (PAGE_SHIFT - PTE_ORDER - PTE_T_LOG2 - 1));
> >> +#else
> >> + (PGDIR_SHIFT - PAGE_SHIFT - 1));
> >> +#endif
> >> + UASM_i_INS(p, ptr, tmp, (PTE_T_LOG2 + 1),
> >
> > As far as I can tell, (PAGE_SHIFT - PTE_ORDER - PTE_T_LOG2 - 1) and
> > (PGDIR_SHIFT - PAGE_SHIFT - 1) are the same thing. So why the two cases?
> >
> >Can you give an example of where they might differ?
> >
> David,
>
> Actually, no I cannot. The calculation was given to me by 'jchandra' and since I do not have 64-bit R2 hardware let alone the Broadcom platform, he said it worked on his platform and I took it from him as is. So does this patch work on Cavium platforms using both calculation methods? It would be nice if 'jchandra' could chime in, but he may be on holiday or something.
This does not really need hardware. On 64bit, with 16k page, the expansion of
the macro is (from tlbex.i):
uasm_i_dext(p, tmp, tmp, 14 +1, ((14 + (14 + 0 - 3)) + (14 + 0 - 3))-14 -1);
This evaluates to 21, which is obviously wrong (should be 10).
I had sent the generated tlb handler which showed the incorrect size to sjhill,
but that probably got lost in the new year holiday mails.
JC.
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2013-01-04 19:50 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-01-04 17:59 [PATCH v3] MIPS: Optimise TLB handlers for MIPS32/64 R2 cores Steven J. Hill
2013-01-04 18:14 ` David Daney
2013-01-04 18:24 ` Hill, Steven
2013-01-04 19:51 ` Jayachandran C.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox