* [PATCH 0/2] MIPS: Use Octeon BBIT instructions in TLB handlers.
@ 2010-12-20 23:54 David Daney
2010-12-20 23:54 ` [PATCH 1/2] MIPS: Declare uasm bbit0 and bbit1 functions David Daney
2010-12-20 23:54 ` [PATCH 2/2] MIPS: Use BBIT instructions in TLB handlers David Daney
0 siblings, 2 replies; 5+ messages in thread
From: David Daney @ 2010-12-20 23:54 UTC (permalink / raw)
To: linux-mips, ralf; +Cc: David Daney
Octeon has instructions that conditionally branch based on the value
of any single bit in any register. We use these to reduce the number
of instructions in the generated TLB handlers.
This set applies on top of the recent KScratch patch set.
David Daney (2):
MIPS: Declare uasm bbit0 and bbit1 functions.
MIPS: Use BBIT instructions in TLB handlers
arch/mips/include/asm/uasm.h | 2 +
arch/mips/mm/tlbex.c | 119 +++++++++++++++++++++++++++++++----------
2 files changed, 92 insertions(+), 29 deletions(-)
--
1.7.2.3
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 1/2] MIPS: Declare uasm bbit0 and bbit1 functions.
2010-12-20 23:54 [PATCH 0/2] MIPS: Use Octeon BBIT instructions in TLB handlers David Daney
@ 2010-12-20 23:54 ` David Daney
2010-12-28 16:59 ` Ralf Baechle
2010-12-20 23:54 ` [PATCH 2/2] MIPS: Use BBIT instructions in TLB handlers David Daney
1 sibling, 1 reply; 5+ messages in thread
From: David Daney @ 2010-12-20 23:54 UTC (permalink / raw)
To: linux-mips, ralf; +Cc: David Daney
these are already defined, but declaring them allow them to be used
outside of uasm.c.
Signed-off-by: David Daney <ddaney@caviumnetworks.com>
---
arch/mips/include/asm/uasm.h | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/arch/mips/include/asm/uasm.h b/arch/mips/include/asm/uasm.h
index 99dae68..d361df3 100644
--- a/arch/mips/include/asm/uasm.h
+++ b/arch/mips/include/asm/uasm.h
@@ -117,6 +117,8 @@ Ip_u2u1u3(_xori);
Ip_u2u1msbu3(_dins);
Ip_u2u1msbu3(_dinsm);
Ip_u1(_syscall);
+Ip_u1u2s3(_bbit0);
+Ip_u1u2s3(_bbit1);
/* Handle labels. */
struct uasm_label {
--
1.7.2.3
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 2/2] MIPS: Use BBIT instructions in TLB handlers
2010-12-20 23:54 [PATCH 0/2] MIPS: Use Octeon BBIT instructions in TLB handlers David Daney
2010-12-20 23:54 ` [PATCH 1/2] MIPS: Declare uasm bbit0 and bbit1 functions David Daney
@ 2010-12-20 23:54 ` David Daney
2010-12-28 17:00 ` Ralf Baechle
1 sibling, 1 reply; 5+ messages in thread
From: David Daney @ 2010-12-20 23:54 UTC (permalink / raw)
To: linux-mips, ralf; +Cc: David Daney
If the CPU supports BBIT0 and BBIT1, use them in TLB handlers as they
are more efficient than an AND followed by an branch and then
restoring the clobbered register.
Signed-off-by: David Daney <ddaney@caviumnetworks.com>
---
arch/mips/mm/tlbex.c | 119 +++++++++++++++++++++++++++++++++++++------------
1 files changed, 90 insertions(+), 29 deletions(-)
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index cec0e1b..601f4c2 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -65,6 +65,18 @@ static inline int __maybe_unused r10000_llsc_war(void)
return R10000_LLSC_WAR;
}
+static int use_bbit_insns(void)
+{
+ switch (current_cpu_type()) {
+ case CPU_CAVIUM_OCTEON:
+ case CPU_CAVIUM_OCTEON_PLUS:
+ case CPU_CAVIUM_OCTEON2:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
/*
* Found by experiment: At least some revisions of the 4kc throw under
* some circumstances a machine check exception, triggered by invalid
@@ -507,8 +519,12 @@ build_is_huge_pte(u32 **p, struct uasm_reloc **r, unsigned int tmp,
unsigned int pmd, int lid)
{
UASM_i_LW(p, tmp, 0, pmd);
- uasm_i_andi(p, tmp, tmp, _PAGE_HUGE);
- uasm_il_bnez(p, r, tmp, lid);
+ if (use_bbit_insns()) {
+ uasm_il_bbit1(p, r, tmp, ilog2(_PAGE_HUGE), lid);
+ } else {
+ uasm_i_andi(p, tmp, tmp, _PAGE_HUGE);
+ uasm_il_bnez(p, r, tmp, lid);
+ }
}
static __cpuinit void build_huge_update_entries(u32 **p,
@@ -1183,14 +1199,20 @@ build_pte_present(u32 **p, struct uasm_reloc **r,
unsigned int pte, unsigned int ptr, enum label_id lid)
{
if (kernel_uses_smartmips_rixi) {
- uasm_i_andi(p, pte, pte, _PAGE_PRESENT);
- uasm_il_beqz(p, r, pte, lid);
+ if (use_bbit_insns()) {
+ uasm_il_bbit0(p, r, pte, ilog2(_PAGE_PRESENT), lid);
+ uasm_i_nop(p);
+ } else {
+ uasm_i_andi(p, pte, pte, _PAGE_PRESENT);
+ uasm_il_beqz(p, r, pte, lid);
+ iPTE_LW(p, pte, ptr);
+ }
} else {
uasm_i_andi(p, pte, pte, _PAGE_PRESENT | _PAGE_READ);
uasm_i_xori(p, pte, pte, _PAGE_PRESENT | _PAGE_READ);
uasm_il_bnez(p, r, pte, lid);
+ iPTE_LW(p, pte, ptr);
}
- iPTE_LW(p, pte, ptr);
}
/* Make PTE valid, store result in PTR. */
@@ -1211,10 +1233,17 @@ static void __cpuinit
build_pte_writable(u32 **p, struct uasm_reloc **r,
unsigned int pte, unsigned int ptr, enum label_id lid)
{
- uasm_i_andi(p, pte, pte, _PAGE_PRESENT | _PAGE_WRITE);
- uasm_i_xori(p, pte, pte, _PAGE_PRESENT | _PAGE_WRITE);
- uasm_il_bnez(p, r, pte, lid);
- iPTE_LW(p, pte, ptr);
+ if (use_bbit_insns()) {
+ uasm_il_bbit0(p, r, pte, ilog2(_PAGE_PRESENT), lid);
+ uasm_i_nop(p);
+ uasm_il_bbit0(p, r, pte, ilog2(_PAGE_WRITE), lid);
+ uasm_i_nop(p);
+ } else {
+ uasm_i_andi(p, pte, pte, _PAGE_PRESENT | _PAGE_WRITE);
+ uasm_i_xori(p, pte, pte, _PAGE_PRESENT | _PAGE_WRITE);
+ uasm_il_bnez(p, r, pte, lid);
+ iPTE_LW(p, pte, ptr);
+ }
}
/* Make PTE writable, update software status bits as well, then store
@@ -1238,9 +1267,14 @@ static void __cpuinit
build_pte_modifiable(u32 **p, struct uasm_reloc **r,
unsigned int pte, unsigned int ptr, enum label_id lid)
{
- uasm_i_andi(p, pte, pte, _PAGE_WRITE);
- uasm_il_beqz(p, r, pte, lid);
- iPTE_LW(p, pte, ptr);
+ if (use_bbit_insns()) {
+ uasm_il_bbit0(p, r, pte, ilog2(_PAGE_WRITE), lid);
+ uasm_i_nop(p);
+ } else {
+ uasm_i_andi(p, pte, pte, _PAGE_WRITE);
+ uasm_il_beqz(p, r, pte, lid);
+ iPTE_LW(p, pte, ptr);
+ }
}
#ifndef CONFIG_MIPS_PGD_C0_CONTEXT
@@ -1485,14 +1519,23 @@ static void __cpuinit build_r4000_tlb_load_handler(void)
* If the page is not _PAGE_VALID, RI or XI could not
* have triggered it. Skip the expensive test..
*/
- uasm_i_andi(&p, K0, K0, _PAGE_VALID);
- uasm_il_beqz(&p, &r, K0, label_tlbl_goaround1);
+ if (use_bbit_insns()) {
+ uasm_il_bbit0(&p, &r, K0, ilog2(_PAGE_VALID),
+ label_tlbl_goaround1);
+ } else {
+ uasm_i_andi(&p, K0, K0, _PAGE_VALID);
+ uasm_il_beqz(&p, &r, K0, label_tlbl_goaround1);
+ }
uasm_i_nop(&p);
uasm_i_tlbr(&p);
/* Examine entrylo 0 or 1 based on ptr. */
- uasm_i_andi(&p, K0, K1, sizeof(pte_t));
- uasm_i_beqz(&p, K0, 8);
+ if (use_bbit_insns()) {
+ uasm_i_bbit0(&p, K1, ilog2(sizeof(pte_t)), 8);
+ } else {
+ uasm_i_andi(&p, K0, K1, sizeof(pte_t));
+ uasm_i_beqz(&p, K0, 8);
+ }
UASM_i_MFC0(&p, K0, C0_ENTRYLO0); /* load it in the delay slot*/
UASM_i_MFC0(&p, K0, C0_ENTRYLO1); /* load it if ptr is odd */
@@ -1500,12 +1543,18 @@ static void __cpuinit build_r4000_tlb_load_handler(void)
* If the entryLo (now in K0) is valid (bit 1), RI or
* XI must have triggered it.
*/
- uasm_i_andi(&p, K0, K0, 2);
- uasm_il_bnez(&p, &r, K0, label_nopage_tlbl);
-
- uasm_l_tlbl_goaround1(&l, p);
- /* Reload the PTE value */
- iPTE_LW(&p, K0, K1);
+ if (use_bbit_insns()) {
+ uasm_il_bbit1(&p, &r, K0, 1, label_nopage_tlbl);
+ /* Reload the PTE value */
+ iPTE_LW(&p, K0, K1);
+ uasm_l_tlbl_goaround1(&l, p);
+ } else {
+ uasm_i_andi(&p, K0, K0, 2);
+ uasm_il_bnez(&p, &r, K0, label_nopage_tlbl);
+ uasm_l_tlbl_goaround1(&l, p);
+ /* Reload the PTE value */
+ iPTE_LW(&p, K0, K1);
+ }
}
build_make_valid(&p, &r, K0, K1);
build_r4000_tlbchange_handler_tail(&p, &l, &r, K0, K1);
@@ -1525,23 +1574,35 @@ static void __cpuinit build_r4000_tlb_load_handler(void)
* If the page is not _PAGE_VALID, RI or XI could not
* have triggered it. Skip the expensive test..
*/
- uasm_i_andi(&p, K0, K0, _PAGE_VALID);
- uasm_il_beqz(&p, &r, K0, label_tlbl_goaround2);
+ if (use_bbit_insns()) {
+ uasm_il_bbit0(&p, &r, K0, ilog2(_PAGE_VALID),
+ label_tlbl_goaround2);
+ } else {
+ uasm_i_andi(&p, K0, K0, _PAGE_VALID);
+ uasm_il_beqz(&p, &r, K0, label_tlbl_goaround2);
+ }
uasm_i_nop(&p);
uasm_i_tlbr(&p);
/* Examine entrylo 0 or 1 based on ptr. */
- uasm_i_andi(&p, K0, K1, sizeof(pte_t));
- uasm_i_beqz(&p, K0, 8);
-
+ if (use_bbit_insns()) {
+ uasm_i_bbit0(&p, K1, ilog2(sizeof(pte_t)), 8);
+ } else {
+ uasm_i_andi(&p, K0, K1, sizeof(pte_t));
+ uasm_i_beqz(&p, K0, 8);
+ }
UASM_i_MFC0(&p, K0, C0_ENTRYLO0); /* load it in the delay slot*/
UASM_i_MFC0(&p, K0, C0_ENTRYLO1); /* load it if ptr is odd */
/*
* If the entryLo (now in K0) is valid (bit 1), RI or
* XI must have triggered it.
*/
- uasm_i_andi(&p, K0, K0, 2);
- uasm_il_beqz(&p, &r, K0, label_tlbl_goaround2);
+ if (use_bbit_insns()) {
+ uasm_il_bbit0(&p, &r, K0, 1, label_tlbl_goaround2);
+ } else {
+ uasm_i_andi(&p, K0, K0, 2);
+ uasm_il_beqz(&p, &r, K0, label_tlbl_goaround2);
+ }
/* Reload the PTE value */
iPTE_LW(&p, K0, K1);
--
1.7.2.3
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH 1/2] MIPS: Declare uasm bbit0 and bbit1 functions.
2010-12-20 23:54 ` [PATCH 1/2] MIPS: Declare uasm bbit0 and bbit1 functions David Daney
@ 2010-12-28 16:59 ` Ralf Baechle
0 siblings, 0 replies; 5+ messages in thread
From: Ralf Baechle @ 2010-12-28 16:59 UTC (permalink / raw)
To: David Daney; +Cc: linux-mips
On Mon, Dec 20, 2010 at 03:54:49PM -0800, David Daney wrote:
> these are already defined, but declaring them allow them to be used
> outside of uasm.c.
>
> Signed-off-by: David Daney <ddaney@caviumnetworks.com>
> ---
> arch/mips/include/asm/uasm.h | 2 ++
Queued for 2.6.38 - but a few of your patches to uasm.h only apply with
fuzz so I'm wondering if I'm missing a patch or what your patches were
created against.
Thanks,
Ralf
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 2/2] MIPS: Use BBIT instructions in TLB handlers
2010-12-20 23:54 ` [PATCH 2/2] MIPS: Use BBIT instructions in TLB handlers David Daney
@ 2010-12-28 17:00 ` Ralf Baechle
0 siblings, 0 replies; 5+ messages in thread
From: Ralf Baechle @ 2010-12-28 17:00 UTC (permalink / raw)
To: David Daney; +Cc: linux-mips
On Mon, Dec 20, 2010 at 03:54:50PM -0800, David Daney wrote:
> If the CPU supports BBIT0 and BBIT1, use them in TLB handlers as they
> are more efficient than an AND followed by an branch and then
> restoring the clobbered register.
Queued for 2.6.38. Thanks,
Ralf
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2010-12-28 17:00 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-12-20 23:54 [PATCH 0/2] MIPS: Use Octeon BBIT instructions in TLB handlers David Daney
2010-12-20 23:54 ` [PATCH 1/2] MIPS: Declare uasm bbit0 and bbit1 functions David Daney
2010-12-28 16:59 ` Ralf Baechle
2010-12-20 23:54 ` [PATCH 2/2] MIPS: Use BBIT instructions in TLB handlers David Daney
2010-12-28 17:00 ` Ralf Baechle
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.