* [Qemu-devel] [PATCH v2 01/11] target-mips: optimize gen_save_pc()
2008-11-08 19:16 [Qemu-devel] [PATCH v2 0/11] target-mips: optimizations Aurelien Jarno
@ 2008-11-08 19:16 ` Aurelien Jarno
2008-11-08 19:17 ` [Qemu-devel] [PATCH v2 02/11] target-mips: optimize gen_op_addr_add() (1/2) Aurelien Jarno
` (9 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Aurelien Jarno @ 2008-11-08 19:16 UTC (permalink / raw)
To: qemu-devel
We obviously don't need to use a temporary variable to write PC.
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-mips/translate.c | 6 +-----
1 files changed, 1 insertions(+), 5 deletions(-)
diff --git a/target-mips/translate.c b/target-mips/translate.c
index 9a197ee..dcd8094 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -827,11 +827,7 @@ OP_CONDZ(ltz, TCG_COND_LT);
static inline void gen_save_pc(target_ulong pc)
{
- TCGv r_tmp = tcg_temp_new(TCG_TYPE_TL);
-
- tcg_gen_movi_tl(r_tmp, pc);
- tcg_gen_mov_tl(cpu_PC, r_tmp);
- tcg_temp_free(r_tmp);
+ tcg_gen_movi_tl(cpu_PC, pc);
}
static inline void save_cpu_state (DisasContext *ctx, int do_save_pc)
--
1.5.6.5
--
.''`. Aurelien Jarno | GPG: 1024D/F1BCDB73
: :' : Debian developer | Electrical Engineer
`. `' aurel32@debian.org | aurelien@aurel32.net
`- people.debian.org/~aurel32 | www.aurel32.net
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Qemu-devel] [PATCH v2 02/11] target-mips: optimize gen_op_addr_add() (1/2)
2008-11-08 19:16 [Qemu-devel] [PATCH v2 0/11] target-mips: optimizations Aurelien Jarno
2008-11-08 19:16 ` [Qemu-devel] [PATCH v2 01/11] target-mips: optimize gen_save_pc() Aurelien Jarno
@ 2008-11-08 19:17 ` Aurelien Jarno
2008-11-08 19:17 ` [Qemu-devel] [PATCH v2 03/11] target-mips: optimize gen_op_addr_add() (2/2) Aurelien Jarno
` (8 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Aurelien Jarno @ 2008-11-08 19:17 UTC (permalink / raw)
To: qemu-devel
The user mode can be tested at translation time using ctx->hflags.
This simplify gen_op_addr_add().
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-mips/translate.c | 17 +++++++----------
1 files changed, 7 insertions(+), 10 deletions(-)
diff --git a/target-mips/translate.c b/target-mips/translate.c
index dcd8094..f78bfde 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -894,7 +894,7 @@ generate_exception (DisasContext *ctx, int excp)
}
/* Addresses computation */
-static inline void gen_op_addr_add (TCGv t0, TCGv t1)
+static inline void gen_op_addr_add (DisasContext *ctx, TCGv t0, TCGv t1)
{
tcg_gen_add_tl(t0, t0, t1);
@@ -902,19 +902,16 @@ static inline void gen_op_addr_add (TCGv t0, TCGv t1)
/* For compatibility with 32-bit code, data reference in user mode
with Status_UX = 0 should be casted to 32-bit and sign extended.
See the MIPS64 PRA manual, section 4.10. */
- {
+ if ((ctx->hflags & MIPS_HFLAG_KSU) == MIPS_HFLAG_UM) {
int l1 = gen_new_label();
- TCGv r_tmp = tcg_temp_local_new(TCG_TYPE_I32);
+ TCGv r_tmp = tcg_temp_new(TCG_TYPE_I32);
- tcg_gen_ld_i32(r_tmp, cpu_env, offsetof(CPUState, hflags));
- tcg_gen_andi_i32(r_tmp, r_tmp, MIPS_HFLAG_KSU);
- tcg_gen_brcondi_i32(TCG_COND_NE, r_tmp, MIPS_HFLAG_UM, l1);
tcg_gen_ld_i32(r_tmp, cpu_env, offsetof(CPUState, CP0_Status));
tcg_gen_andi_i32(r_tmp, r_tmp, (1 << CP0St_UX));
tcg_gen_brcondi_i32(TCG_COND_NE, r_tmp, 0, l1);
- tcg_temp_free(r_tmp);
tcg_gen_ext32s_i64(t0, t0);
gen_set_label(l1);
+ tcg_temp_free(r_tmp);
}
#endif
}
@@ -1070,7 +1067,7 @@ static void gen_ldst (DisasContext *ctx, uint32_t opc, int rt,
} else {
gen_load_gpr(t0, base);
tcg_gen_movi_tl(t1, offset);
- gen_op_addr_add(t0, t1);
+ gen_op_addr_add(ctx, t0, t1);
}
/* Don't do NOP if destination is zero: we must perform the actual
memory access. */
@@ -1235,7 +1232,7 @@ static void gen_flt_ldst (DisasContext *ctx, uint32_t opc, int ft,
gen_load_gpr(t0, base);
tcg_gen_movi_tl(t1, offset);
- gen_op_addr_add(t0, t1);
+ gen_op_addr_add(ctx, t0, t1);
tcg_temp_free(t1);
}
/* Don't do NOP if destination is zero: we must perform the actual
@@ -7369,7 +7366,7 @@ static void gen_flt3_ldst (DisasContext *ctx, uint32_t opc,
} else {
gen_load_gpr(t0, base);
gen_load_gpr(t1, index);
- gen_op_addr_add(t0, t1);
+ gen_op_addr_add(ctx, t0, t1);
}
/* Don't do NOP if destination is zero: we must perform the actual
memory access. */
--
1.5.6.5
--
.''`. Aurelien Jarno | GPG: 1024D/F1BCDB73
: :' : Debian developer | Electrical Engineer
`. `' aurel32@debian.org | aurelien@aurel32.net
`- people.debian.org/~aurel32 | www.aurel32.net
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Qemu-devel] [PATCH v2 03/11] target-mips: optimize gen_op_addr_add() (2/2)
2008-11-08 19:16 [Qemu-devel] [PATCH v2 0/11] target-mips: optimizations Aurelien Jarno
2008-11-08 19:16 ` [Qemu-devel] [PATCH v2 01/11] target-mips: optimize gen_save_pc() Aurelien Jarno
2008-11-08 19:17 ` [Qemu-devel] [PATCH v2 02/11] target-mips: optimize gen_op_addr_add() (1/2) Aurelien Jarno
@ 2008-11-08 19:17 ` Aurelien Jarno
2008-11-08 19:18 ` [Qemu-devel] [PATCH v2 04/11] target-mips: convert bitfield ops to TCG Aurelien Jarno
` (7 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Aurelien Jarno @ 2008-11-08 19:17 UTC (permalink / raw)
To: qemu-devel
Instead of dynamically generating different code depending on the UX
flag, add a new flag in ctx->flags to generate different code.
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-mips/cpu.h | 13 +++++++------
target-mips/exec.h | 5 ++++-
target-mips/translate.c | 11 ++---------
3 files changed, 13 insertions(+), 16 deletions(-)
diff --git a/target-mips/cpu.h b/target-mips/cpu.h
index a27ffd3..d686f8e 100644
--- a/target-mips/cpu.h
+++ b/target-mips/cpu.h
@@ -411,7 +411,7 @@ struct CPUMIPSState {
int error_code;
uint32_t hflags; /* CPU State */
/* TMASK defines different execution modes */
-#define MIPS_HFLAG_TMASK 0x01FF
+#define MIPS_HFLAG_TMASK 0x03FF
#define MIPS_HFLAG_MODE 0x0007 /* execution modes */
/* The KSU flags must be the lowest bits in hflags. The flag order
must be the same as defined for CP0 Status. This allows to use
@@ -430,15 +430,16 @@ struct CPUMIPSState {
and RSQRT.D. */
#define MIPS_HFLAG_COP1X 0x0080 /* COP1X instructions enabled */
#define MIPS_HFLAG_RE 0x0100 /* Reversed endianness */
+#define MIPS_HFLAG_UX 0x0200 /* 64-bit user mode */
/* If translation is interrupted between the branch instruction and
* the delay slot, record what type of branch it is so that we can
* resume translation properly. It might be possible to reduce
* this from three bits to two. */
-#define MIPS_HFLAG_BMASK 0x0e00
-#define MIPS_HFLAG_B 0x0200 /* Unconditional branch */
-#define MIPS_HFLAG_BC 0x0400 /* Conditional branch */
-#define MIPS_HFLAG_BL 0x0600 /* Likely branch */
-#define MIPS_HFLAG_BR 0x0800 /* branch to register (can't link TB) */
+#define MIPS_HFLAG_BMASK 0x1C00
+#define MIPS_HFLAG_B 0x0400 /* Unconditional branch */
+#define MIPS_HFLAG_BC 0x0800 /* Conditional branch */
+#define MIPS_HFLAG_BL 0x0C00 /* Likely branch */
+#define MIPS_HFLAG_BR 0x1000 /* branch to register (can't link TB) */
target_ulong btarget; /* Jump / branch target */
int bcond; /* Branch condition (if needed) */
diff --git a/target-mips/exec.h b/target-mips/exec.h
index 28bf466..5d3e356 100644
--- a/target-mips/exec.h
+++ b/target-mips/exec.h
@@ -66,7 +66,8 @@ static inline int cpu_halted(CPUState *env)
static inline void compute_hflags(CPUState *env)
{
env->hflags &= ~(MIPS_HFLAG_COP1X | MIPS_HFLAG_64 | MIPS_HFLAG_CP0 |
- MIPS_HFLAG_F64 | MIPS_HFLAG_FPU | MIPS_HFLAG_KSU);
+ MIPS_HFLAG_F64 | MIPS_HFLAG_FPU | MIPS_HFLAG_KSU |
+ MIPS_HFLAG_UX);
if (!(env->CP0_Status & (1 << CP0St_EXL)) &&
!(env->CP0_Status & (1 << CP0St_ERL)) &&
!(env->hflags & MIPS_HFLAG_DM)) {
@@ -77,6 +78,8 @@ static inline void compute_hflags(CPUState *env)
(env->CP0_Status & (1 << CP0St_PX)) ||
(env->CP0_Status & (1 << CP0St_UX)))
env->hflags |= MIPS_HFLAG_64;
+ if (env->CP0_Status & (1 << CP0St_UX))
+ env->hflags |= MIPS_HFLAG_UX;
#endif
if ((env->CP0_Status & (1 << CP0St_CU0)) ||
!(env->hflags & MIPS_HFLAG_KSU))
diff --git a/target-mips/translate.c b/target-mips/translate.c
index f78bfde..af01f73 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -902,16 +902,9 @@ static inline void gen_op_addr_add (DisasContext *ctx, TCGv t0, TCGv t1)
/* For compatibility with 32-bit code, data reference in user mode
with Status_UX = 0 should be casted to 32-bit and sign extended.
See the MIPS64 PRA manual, section 4.10. */
- if ((ctx->hflags & MIPS_HFLAG_KSU) == MIPS_HFLAG_UM) {
- int l1 = gen_new_label();
- TCGv r_tmp = tcg_temp_new(TCG_TYPE_I32);
-
- tcg_gen_ld_i32(r_tmp, cpu_env, offsetof(CPUState, CP0_Status));
- tcg_gen_andi_i32(r_tmp, r_tmp, (1 << CP0St_UX));
- tcg_gen_brcondi_i32(TCG_COND_NE, r_tmp, 0, l1);
+ if (((ctx->hflags & MIPS_HFLAG_KSU) == MIPS_HFLAG_UM) &&
+ (ctx->hflags & MIPS_HFLAG_UX)) {
tcg_gen_ext32s_i64(t0, t0);
- gen_set_label(l1);
- tcg_temp_free(r_tmp);
}
#endif
}
--
1.5.6.5
--
.''`. Aurelien Jarno | GPG: 1024D/F1BCDB73
: :' : Debian developer | Electrical Engineer
`. `' aurel32@debian.org | aurelien@aurel32.net
`- people.debian.org/~aurel32 | www.aurel32.net
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Qemu-devel] [PATCH v2 04/11] target-mips: convert bitfield ops to TCG
2008-11-08 19:16 [Qemu-devel] [PATCH v2 0/11] target-mips: optimizations Aurelien Jarno
` (2 preceding siblings ...)
2008-11-08 19:17 ` [Qemu-devel] [PATCH v2 03/11] target-mips: optimize gen_op_addr_add() (2/2) Aurelien Jarno
@ 2008-11-08 19:18 ` Aurelien Jarno
2008-11-08 19:19 ` [Qemu-devel] [PATCH v2 05/11] target-mips: convert bit shuffle " Aurelien Jarno
` (6 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Aurelien Jarno @ 2008-11-08 19:18 UTC (permalink / raw)
To: qemu-devel
Bitfield operations can be written with very few TCG instructions
(between 2 and 5), so it is worth converting them to TCG.
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-mips/helper.h | 6 +----
target-mips/op_helper.c | 26 +---------------------
target-mips/translate.c | 55 +++++++++++++++++++++++++++++++++-------------
3 files changed, 41 insertions(+), 46 deletions(-)
diff --git a/target-mips/helper.h b/target-mips/helper.h
index 525ccbb..5926921 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -270,13 +270,9 @@ DEF_HELPER(target_ulong, do_rdhwr_ccres, (void))
DEF_HELPER(void, do_pmon, (int function))
DEF_HELPER(void, do_wait, (void))
-/* Bitfield operations. */
-DEF_HELPER(target_ulong, do_ext, (target_ulong t1, uint32_t pos, uint32_t size))
-DEF_HELPER(target_ulong, do_ins, (target_ulong t0, target_ulong t1, uint32_t pos, uint32_t size))
+/* Bit shuffle operations. */
DEF_HELPER(target_ulong, do_wsbh, (target_ulong t1))
#ifdef TARGET_MIPS64
-DEF_HELPER(target_ulong, do_dext, (target_ulong t1, uint32_t pos, uint32_t size))
-DEF_HELPER(target_ulong, do_dins, (target_ulong t0, target_ulong t1, uint32_t pos, uint32_t size))
DEF_HELPER(target_ulong, do_dsbh, (target_ulong t1))
DEF_HELPER(target_ulong, do_dshd, (target_ulong t1))
#endif
diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index 3744728..b642593 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -1781,37 +1781,13 @@ target_ulong do_rdhwr_ccres(void)
return 0;
}
-/* Bitfield operations. */
-target_ulong do_ext(target_ulong t1, uint32_t pos, uint32_t size)
-{
- return (int32_t)((t1 >> pos) & ((size < 32) ? ((1 << size) - 1) : ~0));
-}
-
-target_ulong do_ins(target_ulong t0, target_ulong t1, uint32_t pos, uint32_t size)
-{
- target_ulong mask = ((size < 32) ? ((1 << size) - 1) : ~0) << pos;
-
- return (int32_t)((t0 & ~mask) | ((t1 << pos) & mask));
-}
-
+/* Bit shuffle operations. */
target_ulong do_wsbh(target_ulong t1)
{
return (int32_t)(((t1 << 8) & ~0x00FF00FF) | ((t1 >> 8) & 0x00FF00FF));
}
#if defined(TARGET_MIPS64)
-target_ulong do_dext(target_ulong t1, uint32_t pos, uint32_t size)
-{
- return (t1 >> pos) & ((size < 64) ? ((1ULL << size) - 1) : ~0ULL);
-}
-
-target_ulong do_dins(target_ulong t0, target_ulong t1, uint32_t pos, uint32_t size)
-{
- target_ulong mask = ((size < 64) ? ((1ULL << size) - 1) : ~0ULL) << pos;
-
- return (t0 & ~mask) | ((t1 << pos) & mask);
-}
-
target_ulong do_dsbh(target_ulong t1)
{
return ((t1 << 8) & ~0x00FF00FF00FF00FFULL) | ((t1 >> 8) & 0x00FF00FF00FF00FFULL);
diff --git a/target-mips/translate.c b/target-mips/translate.c
index af01f73..d74d2cb 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -2682,57 +2682,80 @@ static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
static void gen_bitops (DisasContext *ctx, uint32_t opc, int rt,
int rs, int lsb, int msb)
{
- TCGv t0 = tcg_temp_local_new(TCG_TYPE_TL);
- TCGv t1 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv t0 = tcg_temp_new(TCG_TYPE_TL);
+ TCGv t1 = tcg_temp_new(TCG_TYPE_TL);
+ target_ulong mask;
gen_load_gpr(t1, rs);
switch (opc) {
case OPC_EXT:
if (lsb + msb > 31)
goto fail;
- tcg_gen_helper_1_1ii(do_ext, t0, t1, lsb, msb + 1);
+ tcg_gen_shri_tl(t0, t1, lsb);
+ if (msb != 31) {
+ tcg_gen_andi_tl(t0, t0, (1 << (msb + 1)) - 1);
+ } else {
+ tcg_gen_ext32s_tl(t0, t0);
+ }
break;
#if defined(TARGET_MIPS64)
case OPC_DEXTM:
- if (lsb + msb > 63)
- goto fail;
- tcg_gen_helper_1_1ii(do_dext, t0, t1, lsb, msb + 1 + 32);
+ tcg_gen_shri_tl(t0, t1, lsb);
+ if (msb != 31) {
+ tcg_gen_andi_tl(t0, t0, (1ULL << (msb + 1 + 32)) - 1);
+ }
break;
case OPC_DEXTU:
- if (lsb + msb > 63)
- goto fail;
- tcg_gen_helper_1_1ii(do_dext, t0, t1, lsb + 32, msb + 1);
+ tcg_gen_shri_tl(t0, t1, lsb + 32);
+ tcg_gen_andi_tl(t0, t0, (1ULL << (msb + 1)) - 1);
break;
case OPC_DEXT:
- if (lsb + msb > 63)
- goto fail;
- tcg_gen_helper_1_1ii(do_dext, t0, t1, lsb, msb + 1);
+ tcg_gen_shri_tl(t0, t1, lsb);
+ tcg_gen_andi_tl(t0, t0, (1ULL << (msb + 1)) - 1);
break;
#endif
case OPC_INS:
if (lsb > msb)
goto fail;
+ mask = ((msb - lsb + 1 < 32) ? ((1 << (msb - lsb + 1)) - 1) : ~0) << lsb;
gen_load_gpr(t0, rt);
- tcg_gen_helper_1_2ii(do_ins, t0, t0, t1, lsb, msb - lsb + 1);
+ tcg_gen_andi_tl(t0, t0, ~mask);
+ tcg_gen_shli_tl(t1, t1, lsb);
+ tcg_gen_andi_tl(t1, t1, mask);
+ tcg_gen_or_tl(t0, t0, t1);
+ tcg_gen_ext32s_tl(t0, t0);
break;
#if defined(TARGET_MIPS64)
case OPC_DINSM:
if (lsb > msb)
goto fail;
+ mask = ((msb - lsb + 1 + 32 < 64) ? ((1ULL << (msb - lsb + 1 + 32)) - 1) : ~0ULL) << lsb;
gen_load_gpr(t0, rt);
- tcg_gen_helper_1_2ii(do_dins, t0, t0, t1, lsb, msb - lsb + 1 + 32);
+ tcg_gen_andi_tl(t0, t0, ~mask);
+ tcg_gen_shli_tl(t1, t1, lsb);
+ tcg_gen_andi_tl(t1, t1, mask);
+ tcg_gen_or_tl(t0, t0, t1);
break;
case OPC_DINSU:
if (lsb > msb)
goto fail;
+ mask = ((1ULL << (msb - lsb + 1)) - 1) << lsb;
gen_load_gpr(t0, rt);
- tcg_gen_helper_1_2ii(do_dins, t0, t0, t1, lsb + 32, msb - lsb + 1);
+ tcg_gen_andi_tl(t0, t0, ~mask);
+ tcg_gen_shli_tl(t1, t1, lsb + 32);
+ tcg_gen_andi_tl(t1, t1, mask);
+ tcg_gen_or_tl(t0, t0, t1);
break;
case OPC_DINS:
if (lsb > msb)
goto fail;
gen_load_gpr(t0, rt);
- tcg_gen_helper_1_2ii(do_dins, t0, t0, t1, lsb, msb - lsb + 1);
+ mask = ((1ULL << (msb - lsb + 1)) - 1) << lsb;
+ gen_load_gpr(t0, rt);
+ tcg_gen_andi_tl(t0, t0, ~mask);
+ tcg_gen_shli_tl(t1, t1, lsb);
+ tcg_gen_andi_tl(t1, t1, mask);
+ tcg_gen_or_tl(t0, t0, t1);
break;
#endif
default:
--
1.5.6.5
--
.''`. Aurelien Jarno | GPG: 1024D/F1BCDB73
: :' : Debian developer | Electrical Engineer
`. `' aurel32@debian.org | aurelien@aurel32.net
`- people.debian.org/~aurel32 | www.aurel32.net
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Qemu-devel] [PATCH v2 05/11] target-mips: convert bit shuffle ops to TCG
2008-11-08 19:16 [Qemu-devel] [PATCH v2 0/11] target-mips: optimizations Aurelien Jarno
` (3 preceding siblings ...)
2008-11-08 19:18 ` [Qemu-devel] [PATCH v2 04/11] target-mips: convert bitfield ops to TCG Aurelien Jarno
@ 2008-11-08 19:19 ` Aurelien Jarno
2008-11-08 19:19 ` [Qemu-devel] [PATCH v2 06/11] target-mips: optimize gen_arith()/gen_arith_imm() Aurelien Jarno
` (5 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Aurelien Jarno @ 2008-11-08 19:19 UTC (permalink / raw)
To: qemu-devel
Bit shuffle operations can be written with very few TCG instructions
(between 5 and 8), so it is worth converting them to TCG.
This code also move all bit shuffle generation code to a separate
function in order to have a cleaner exception code path, that is it
doesn't store back the TCG register to the target register after the
exception, as the TCG register doesn't exist anymore.
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-mips/helper.h | 7 ---
target-mips/op_helper.c | 19 --------
target-mips/translate.c | 106 +++++++++++++++++++++++++----------------------
3 files changed, 56 insertions(+), 76 deletions(-)
diff --git a/target-mips/helper.h b/target-mips/helper.h
index 5926921..f67c82a 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -269,10 +269,3 @@ DEF_HELPER(target_ulong, do_rdhwr_cc, (void))
DEF_HELPER(target_ulong, do_rdhwr_ccres, (void))
DEF_HELPER(void, do_pmon, (int function))
DEF_HELPER(void, do_wait, (void))
-
-/* Bit shuffle operations. */
-DEF_HELPER(target_ulong, do_wsbh, (target_ulong t1))
-#ifdef TARGET_MIPS64
-DEF_HELPER(target_ulong, do_dsbh, (target_ulong t1))
-DEF_HELPER(target_ulong, do_dshd, (target_ulong t1))
-#endif
diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index b642593..3fe62fb 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -1781,25 +1781,6 @@ target_ulong do_rdhwr_ccres(void)
return 0;
}
-/* Bit shuffle operations. */
-target_ulong do_wsbh(target_ulong t1)
-{
- return (int32_t)(((t1 << 8) & ~0x00FF00FF) | ((t1 >> 8) & 0x00FF00FF));
-}
-
-#if defined(TARGET_MIPS64)
-target_ulong do_dsbh(target_ulong t1)
-{
- return ((t1 << 8) & ~0x00FF00FF00FF00FFULL) | ((t1 >> 8) & 0x00FF00FF00FF00FFULL);
-}
-
-target_ulong do_dshd(target_ulong t1)
-{
- t1 = ((t1 << 16) & ~0x0000FFFF0000FFFFULL) | ((t1 >> 16) & 0x0000FFFF0000FFFFULL);
- return (t1 << 32) | (t1 >> 32);
-}
-#endif
-
void do_pmon (int function)
{
function /= 2;
diff --git a/target-mips/translate.c b/target-mips/translate.c
index d74d2cb..e89d59e 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -2771,6 +2771,60 @@ fail:
tcg_temp_free(t1);
}
+static void gen_bshfl (DisasContext *ctx, uint32_t op2, int rt, int rd)
+{
+ TCGv t0 = tcg_temp_new(TCG_TYPE_TL);
+ TCGv t1 = tcg_temp_new(TCG_TYPE_TL);
+
+ gen_load_gpr(t1, rt);
+ switch (op2) {
+ case OPC_WSBH:
+ tcg_gen_shri_tl(t0, t1, 8);
+ tcg_gen_andi_tl(t0, t0, 0x00FF00FF);
+ tcg_gen_shli_tl(t1, t1, 8);
+ tcg_gen_andi_tl(t1, t1, ~0x00FF00FF);
+ tcg_gen_or_tl(t0, t0, t1);
+ tcg_gen_ext32s_tl(t0, t0);
+ break;
+ case OPC_SEB:
+ tcg_gen_ext8s_tl(t0, t1);
+ break;
+ case OPC_SEH:
+ tcg_gen_ext16s_tl(t0, t1);
+ break;
+#if defined(TARGET_MIPS64)
+ case OPC_DSBH:
+ gen_load_gpr(t1, rt);
+ tcg_gen_shri_tl(t0, t1, 8);
+ tcg_gen_andi_tl(t0, t0, 0x00FF00FF00FF00FFULL);
+ tcg_gen_shli_tl(t1, t1, 8);
+ tcg_gen_andi_tl(t1, t1, ~0x00FF00FF00FF00FFULL);
+ tcg_gen_or_tl(t0, t0, t1);
+ break;
+ case OPC_DSHD:
+ gen_load_gpr(t1, rt);
+ tcg_gen_shri_tl(t0, t1, 16);
+ tcg_gen_andi_tl(t0, t0, 0x0000FFFF0000FFFFULL);
+ tcg_gen_shli_tl(t1, t1, 16);
+ tcg_gen_andi_tl(t1, t1, ~0x0000FFFF0000FFFFULL);
+ tcg_gen_or_tl(t1, t0, t1);
+ tcg_gen_shri_tl(t0, t1, 32);
+ tcg_gen_shli_tl(t1, t1, 32);
+ tcg_gen_or_tl(t0, t0, t1);
+ break;
+#endif
+ default:
+ MIPS_INVAL("bsfhl");
+ generate_exception(ctx, EXCP_RI);
+ tcg_temp_free(t0);
+ tcg_temp_free(t1);
+ return;
+ }
+ gen_store_gpr(t0, rd);
+ tcg_temp_free(t0);
+ tcg_temp_free(t1);
+}
+
#ifndef CONFIG_USER_ONLY
/* CP0 (MMU and control) */
static inline void gen_mfc0_load32 (TCGv t, target_ulong off)
@@ -7953,34 +8007,7 @@ static void decode_opc (CPUState *env, DisasContext *ctx)
case OPC_BSHFL:
check_insn(env, ctx, ISA_MIPS32R2);
op2 = MASK_BSHFL(ctx->opcode);
- {
- TCGv t0 = tcg_temp_local_new(TCG_TYPE_TL);
- TCGv t1 = tcg_temp_local_new(TCG_TYPE_TL);
-
- switch (op2) {
- case OPC_WSBH:
- gen_load_gpr(t1, rt);
- tcg_gen_helper_1_1(do_wsbh, t0, t1);
- gen_store_gpr(t0, rd);
- break;
- case OPC_SEB:
- gen_load_gpr(t1, rt);
- tcg_gen_ext8s_tl(t0, t1);
- gen_store_gpr(t0, rd);
- break;
- case OPC_SEH:
- gen_load_gpr(t1, rt);
- tcg_gen_ext16s_tl(t0, t1);
- gen_store_gpr(t0, rd);
- break;
- default: /* Invalid */
- MIPS_INVAL("bshfl");
- generate_exception(ctx, EXCP_RI);
- break;
- }
- tcg_temp_free(t0);
- tcg_temp_free(t1);
- }
+ gen_bshfl(ctx, op2, rt, rd);
break;
case OPC_RDHWR:
check_insn(env, ctx, ISA_MIPS32R2);
@@ -8056,28 +8083,7 @@ static void decode_opc (CPUState *env, DisasContext *ctx)
check_insn(env, ctx, ISA_MIPS64R2);
check_mips_64(ctx);
op2 = MASK_DBSHFL(ctx->opcode);
- {
- TCGv t0 = tcg_temp_local_new(TCG_TYPE_TL);
- TCGv t1 = tcg_temp_local_new(TCG_TYPE_TL);
-
- switch (op2) {
- case OPC_DSBH:
- gen_load_gpr(t1, rt);
- tcg_gen_helper_1_1(do_dsbh, t0, t1);
- break;
- case OPC_DSHD:
- gen_load_gpr(t1, rt);
- tcg_gen_helper_1_1(do_dshd, t0, t1);
- break;
- default: /* Invalid */
- MIPS_INVAL("dbshfl");
- generate_exception(ctx, EXCP_RI);
- break;
- }
- gen_store_gpr(t0, rd);
- tcg_temp_free(t0);
- tcg_temp_free(t1);
- }
+ gen_bshfl(ctx, op2, rt, rd);
break;
#endif
default: /* Invalid */
--
1.5.6.5
--
.''`. Aurelien Jarno | GPG: 1024D/F1BCDB73
: :' : Debian developer | Electrical Engineer
`. `' aurel32@debian.org | aurelien@aurel32.net
`- people.debian.org/~aurel32 | www.aurel32.net
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Qemu-devel] [PATCH v2 06/11] target-mips: optimize gen_arith()/gen_arith_imm()
2008-11-08 19:16 [Qemu-devel] [PATCH v2 0/11] target-mips: optimizations Aurelien Jarno
` (4 preceding siblings ...)
2008-11-08 19:19 ` [Qemu-devel] [PATCH v2 05/11] target-mips: convert bit shuffle " Aurelien Jarno
@ 2008-11-08 19:19 ` Aurelien Jarno
2008-11-08 19:20 ` [Qemu-devel] [PATCH v2 07/11] target-mips: optimize gen_muldiv() Aurelien Jarno
` (4 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Aurelien Jarno @ 2008-11-08 19:19 UTC (permalink / raw)
To: qemu-devel
Optimize code generation in gen_arith()/gen_arith_imm():
- Don't do sign extension when the value is already guaranteed to be
sign extended (otherwise, results are marked as UNPREDICTABLE).
- When the value is signed extend, compare the value to 0 instead of
testing bit 31/63.
- Temp variables are valid up to and *including* the brcond instruction.
Use them instead of temp local variables.
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-mips/translate.c | 78 +++++++++++++++++++---------------------------
1 files changed, 32 insertions(+), 46 deletions(-)
diff --git a/target-mips/translate.c b/target-mips/translate.c
index e89d59e..dffdb82 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -1333,7 +1333,7 @@ static void gen_arith_imm (CPUState *env, DisasContext *ctx, uint32_t opc,
switch (opc) {
case OPC_ADDI:
{
- TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_TL);
TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_TL);
int l1 = gen_new_label();
@@ -1341,24 +1341,21 @@ static void gen_arith_imm (CPUState *env, DisasContext *ctx, uint32_t opc,
tcg_gen_ext32s_tl(r_tmp1, t0);
tcg_gen_addi_tl(t0, r_tmp1, uimm);
- tcg_gen_xori_tl(r_tmp1, r_tmp1, uimm);
- tcg_gen_xori_tl(r_tmp1, r_tmp1, -1);
+ tcg_gen_xori_tl(r_tmp1, r_tmp1, ~uimm);
tcg_gen_xori_tl(r_tmp2, t0, uimm);
tcg_gen_and_tl(r_tmp1, r_tmp1, r_tmp2);
tcg_temp_free(r_tmp2);
- tcg_gen_shri_tl(r_tmp1, r_tmp1, 31);
- tcg_gen_brcondi_tl(TCG_COND_EQ, r_tmp1, 0, l1);
- tcg_temp_free(r_tmp1);
+ tcg_gen_brcondi_tl(TCG_COND_GE, r_tmp1, 0, l1);
/* operands of same sign, result different sign */
generate_exception(ctx, EXCP_OVERFLOW);
gen_set_label(l1);
+ tcg_temp_free(r_tmp1);
tcg_gen_ext32s_tl(t0, t0);
}
opn = "addi";
break;
case OPC_ADDIU:
- tcg_gen_ext32s_tl(t0, t0);
tcg_gen_addi_tl(t0, t0, uimm);
tcg_gen_ext32s_tl(t0, t0);
opn = "addiu";
@@ -1366,7 +1363,7 @@ static void gen_arith_imm (CPUState *env, DisasContext *ctx, uint32_t opc,
#if defined(TARGET_MIPS64)
case OPC_DADDI:
{
- TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_TL);
TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_TL);
int l1 = gen_new_label();
@@ -1374,17 +1371,15 @@ static void gen_arith_imm (CPUState *env, DisasContext *ctx, uint32_t opc,
tcg_gen_mov_tl(r_tmp1, t0);
tcg_gen_addi_tl(t0, t0, uimm);
- tcg_gen_xori_tl(r_tmp1, r_tmp1, uimm);
- tcg_gen_xori_tl(r_tmp1, r_tmp1, -1);
+ tcg_gen_xori_tl(r_tmp1, r_tmp1, ~uimm);
tcg_gen_xori_tl(r_tmp2, t0, uimm);
tcg_gen_and_tl(r_tmp1, r_tmp1, r_tmp2);
tcg_temp_free(r_tmp2);
- tcg_gen_shri_tl(r_tmp1, r_tmp1, 63);
- tcg_gen_brcondi_tl(TCG_COND_EQ, r_tmp1, 0, l1);
- tcg_temp_free(r_tmp1);
+ tcg_gen_brcondi_tl(TCG_COND_GE, r_tmp1, 0, l1);
/* operands of same sign, result different sign */
generate_exception(ctx, EXCP_OVERFLOW);
gen_set_label(l1);
+ tcg_temp_free(r_tmp1);
}
opn = "daddi";
break;
@@ -1417,7 +1412,6 @@ static void gen_arith_imm (CPUState *env, DisasContext *ctx, uint32_t opc,
opn = "lui";
break;
case OPC_SLL:
- tcg_gen_ext32u_tl(t0, t0);
tcg_gen_shli_tl(t0, t0, uimm);
tcg_gen_ext32s_tl(t0, t0);
opn = "sll";
@@ -1425,15 +1419,17 @@ static void gen_arith_imm (CPUState *env, DisasContext *ctx, uint32_t opc,
case OPC_SRA:
tcg_gen_ext32s_tl(t0, t0);
tcg_gen_sari_tl(t0, t0, uimm);
- tcg_gen_ext32s_tl(t0, t0);
opn = "sra";
break;
case OPC_SRL:
switch ((ctx->opcode >> 21) & 0x1f) {
case 0:
- tcg_gen_ext32u_tl(t0, t0);
- tcg_gen_shri_tl(t0, t0, uimm);
- tcg_gen_ext32s_tl(t0, t0);
+ if (uimm != 0) {
+ tcg_gen_ext32u_tl(t0, t0);
+ tcg_gen_shri_tl(t0, t0, uimm);
+ } else {
+ tcg_gen_ext32s_tl(t0, t0);
+ }
opn = "srl";
break;
case 1:
@@ -1449,9 +1445,12 @@ static void gen_arith_imm (CPUState *env, DisasContext *ctx, uint32_t opc,
}
opn = "rotr";
} else {
- tcg_gen_ext32u_tl(t0, t0);
- tcg_gen_shri_tl(t0, t0, uimm);
- tcg_gen_ext32s_tl(t0, t0);
+ if (uimm != 0) {
+ tcg_gen_ext32u_tl(t0, t0);
+ tcg_gen_shri_tl(t0, t0, uimm);
+ } else {
+ tcg_gen_ext32s_tl(t0, t0);
+ }
opn = "srl";
}
break;
@@ -1562,7 +1561,7 @@ static void gen_arith (CPUState *env, DisasContext *ctx, uint32_t opc,
switch (opc) {
case OPC_ADD:
{
- TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_TL);
TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_TL);
int l1 = gen_new_label();
@@ -1576,27 +1575,24 @@ static void gen_arith (CPUState *env, DisasContext *ctx, uint32_t opc,
tcg_gen_xor_tl(r_tmp2, t0, t1);
tcg_gen_and_tl(r_tmp1, r_tmp1, r_tmp2);
tcg_temp_free(r_tmp2);
- tcg_gen_shri_tl(r_tmp1, r_tmp1, 31);
- tcg_gen_brcondi_tl(TCG_COND_EQ, r_tmp1, 0, l1);
- tcg_temp_free(r_tmp1);
+ tcg_gen_brcondi_tl(TCG_COND_GE, r_tmp1, 0, l1);
/* operands of same sign, result different sign */
generate_exception(ctx, EXCP_OVERFLOW);
gen_set_label(l1);
+ tcg_temp_free(r_tmp1);
tcg_gen_ext32s_tl(t0, t0);
}
opn = "add";
break;
case OPC_ADDU:
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
tcg_gen_add_tl(t0, t0, t1);
tcg_gen_ext32s_tl(t0, t0);
opn = "addu";
break;
case OPC_SUB:
{
- TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_TL);
TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_TL);
int l1 = gen_new_label();
@@ -1609,20 +1605,17 @@ static void gen_arith (CPUState *env, DisasContext *ctx, uint32_t opc,
tcg_gen_xor_tl(r_tmp1, r_tmp1, t0);
tcg_gen_and_tl(r_tmp1, r_tmp1, r_tmp2);
tcg_temp_free(r_tmp2);
- tcg_gen_shri_tl(r_tmp1, r_tmp1, 31);
- tcg_gen_brcondi_tl(TCG_COND_EQ, r_tmp1, 0, l1);
- tcg_temp_free(r_tmp1);
+ tcg_gen_brcondi_tl(TCG_COND_GE, r_tmp1, 0, l1);
/* operands of different sign, first operand and result different sign */
generate_exception(ctx, EXCP_OVERFLOW);
gen_set_label(l1);
+ tcg_temp_free(r_tmp1);
tcg_gen_ext32s_tl(t0, t0);
}
opn = "sub";
break;
case OPC_SUBU:
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
tcg_gen_sub_tl(t0, t0, t1);
tcg_gen_ext32s_tl(t0, t0);
opn = "subu";
@@ -1630,7 +1623,7 @@ static void gen_arith (CPUState *env, DisasContext *ctx, uint32_t opc,
#if defined(TARGET_MIPS64)
case OPC_DADD:
{
- TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_TL);
TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_TL);
int l1 = gen_new_label();
@@ -1643,12 +1636,11 @@ static void gen_arith (CPUState *env, DisasContext *ctx, uint32_t opc,
tcg_gen_xor_tl(r_tmp2, t0, t1);
tcg_gen_and_tl(r_tmp1, r_tmp1, r_tmp2);
tcg_temp_free(r_tmp2);
- tcg_gen_shri_tl(r_tmp1, r_tmp1, 63);
- tcg_gen_brcondi_tl(TCG_COND_EQ, r_tmp1, 0, l1);
- tcg_temp_free(r_tmp1);
+ tcg_gen_brcondi_tl(TCG_COND_GE, r_tmp1, 0, l1);
/* operands of same sign, result different sign */
generate_exception(ctx, EXCP_OVERFLOW);
gen_set_label(l1);
+ tcg_temp_free(r_tmp1);
}
opn = "dadd";
break;
@@ -1658,7 +1650,7 @@ static void gen_arith (CPUState *env, DisasContext *ctx, uint32_t opc,
break;
case OPC_DSUB:
{
- TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_TL);
TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_TL);
int l1 = gen_new_label();
@@ -1670,12 +1662,11 @@ static void gen_arith (CPUState *env, DisasContext *ctx, uint32_t opc,
tcg_gen_xor_tl(r_tmp1, r_tmp1, t0);
tcg_gen_and_tl(r_tmp1, r_tmp1, r_tmp2);
tcg_temp_free(r_tmp2);
- tcg_gen_shri_tl(r_tmp1, r_tmp1, 63);
- tcg_gen_brcondi_tl(TCG_COND_EQ, r_tmp1, 0, l1);
- tcg_temp_free(r_tmp1);
+ tcg_gen_brcondi_tl(TCG_COND_GE, r_tmp1, 0, l1);
/* operands of different sign, first operand and result different sign */
generate_exception(ctx, EXCP_OVERFLOW);
gen_set_label(l1);
+ tcg_temp_free(r_tmp1);
}
opn = "dsub";
break;
@@ -1710,8 +1701,6 @@ static void gen_arith (CPUState *env, DisasContext *ctx, uint32_t opc,
opn = "xor";
break;
case OPC_MUL:
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
tcg_gen_mul_tl(t0, t0, t1);
tcg_gen_ext32s_tl(t0, t0);
opn = "mul";
@@ -1737,8 +1726,6 @@ static void gen_arith (CPUState *env, DisasContext *ctx, uint32_t opc,
opn = "movz";
goto print;
case OPC_SLLV:
- tcg_gen_ext32u_tl(t0, t0);
- tcg_gen_ext32u_tl(t1, t1);
tcg_gen_andi_tl(t0, t0, 0x1f);
tcg_gen_shl_tl(t0, t1, t0);
tcg_gen_ext32s_tl(t0, t0);
@@ -1748,7 +1735,6 @@ static void gen_arith (CPUState *env, DisasContext *ctx, uint32_t opc,
tcg_gen_ext32s_tl(t1, t1);
tcg_gen_andi_tl(t0, t0, 0x1f);
tcg_gen_sar_tl(t0, t1, t0);
- tcg_gen_ext32s_tl(t0, t0);
opn = "srav";
break;
case OPC_SRLV:
--
1.5.6.5
--
.''`. Aurelien Jarno | GPG: 1024D/F1BCDB73
: :' : Debian developer | Electrical Engineer
`. `' aurel32@debian.org | aurelien@aurel32.net
`- people.debian.org/~aurel32 | www.aurel32.net
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Qemu-devel] [PATCH v2 07/11] target-mips: optimize gen_muldiv()
2008-11-08 19:16 [Qemu-devel] [PATCH v2 0/11] target-mips: optimizations Aurelien Jarno
` (5 preceding siblings ...)
2008-11-08 19:19 ` [Qemu-devel] [PATCH v2 06/11] target-mips: optimize gen_arith()/gen_arith_imm() Aurelien Jarno
@ 2008-11-08 19:20 ` Aurelien Jarno
2008-11-08 19:20 ` [Qemu-devel] [PATCH v2 08/11] target-mips: optimize gen_farith() Aurelien Jarno
` (3 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Aurelien Jarno @ 2008-11-08 19:20 UTC (permalink / raw)
To: qemu-devel
Optimize code generation in gen_muldiv():
- Don't do sign extension when the value is already guaranteed to be
sign extended (otherwise, results are marked as UNPREDICTABLE).
- Access the LO, HI registers directly instead of writting them through
a temporary variable.
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-mips/translate.c | 162 ++++++++++++++---------------------------------
1 files changed, 47 insertions(+), 115 deletions(-)
diff --git a/target-mips/translate.c b/target-mips/translate.c
index dffdb82..e6978d6 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -604,27 +604,7 @@ static inline void gen_store_gpr (TCGv t, int reg)
tcg_gen_mov_tl(cpu_gpr[reg], t);
}
-/* Moves to/from HI and LO registers. */
-static inline void gen_load_HI (TCGv t, int reg)
-{
- tcg_gen_mov_tl(t, cpu_HI[reg]);
-}
-
-static inline void gen_store_HI (TCGv t, int reg)
-{
- tcg_gen_mov_tl(cpu_HI[reg], t);
-}
-
-static inline void gen_load_LO (TCGv t, int reg)
-{
- tcg_gen_mov_tl(t, cpu_LO[reg]);
-}
-
-static inline void gen_store_LO (TCGv t, int reg)
-{
- tcg_gen_mov_tl(cpu_LO[reg], t);
-}
-
+/* Moves to/from ACX register. */
static inline void gen_load_ACX (TCGv t, int reg)
{
tcg_gen_mov_tl(t, cpu_ACX[reg]);
@@ -1856,23 +1836,23 @@ static void gen_HILO (DisasContext *ctx, uint32_t opc, int reg)
}
switch (opc) {
case OPC_MFHI:
- gen_load_HI(t0, 0);
+ tcg_gen_mov_tl(t0, cpu_HI[0]);
gen_store_gpr(t0, reg);
opn = "mfhi";
break;
case OPC_MFLO:
- gen_load_LO(t0, 0);
+ tcg_gen_mov_tl(t0, cpu_LO[0]);
gen_store_gpr(t0, reg);
opn = "mflo";
break;
case OPC_MTHI:
gen_load_gpr(t0, reg);
- gen_store_HI(t0, 0);
+ tcg_gen_mov_tl(cpu_HI[0], t0);
opn = "mthi";
break;
case OPC_MTLO:
gen_load_gpr(t0, reg);
- gen_store_LO(t0, 0);
+ tcg_gen_mov_tl(cpu_LO[0], t0);
opn = "mtlo";
break;
default:
@@ -1899,27 +1879,28 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
{
int l1 = gen_new_label();
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
{
- TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64);
- TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
- TCGv r_tmp3 = tcg_temp_new(TCG_TYPE_I64);
-
- tcg_gen_ext_tl_i64(r_tmp1, t0);
- tcg_gen_ext_tl_i64(r_tmp2, t1);
- tcg_gen_div_i64(r_tmp3, r_tmp1, r_tmp2);
- tcg_gen_rem_i64(r_tmp2, r_tmp1, r_tmp2);
- tcg_gen_trunc_i64_tl(t0, r_tmp3);
- tcg_gen_trunc_i64_tl(t1, r_tmp2);
+ int l2 = gen_new_label();
+ TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_I32);
+ TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_I32);
+ TCGv r_tmp3 = tcg_temp_local_new(TCG_TYPE_I32);
+
+ tcg_gen_trunc_tl_i32(r_tmp1, t0);
+ tcg_gen_trunc_tl_i32(r_tmp2, t1);
+ tcg_gen_brcondi_i32(TCG_COND_NE, r_tmp1, -1 << 31, l2);
+ tcg_gen_brcondi_i32(TCG_COND_NE, r_tmp2, -1, l2);
+ tcg_gen_ext32s_tl(cpu_LO[0], t0);
+ tcg_gen_movi_tl(cpu_HI[0], 0);
+ tcg_gen_br(l1);
+ gen_set_label(l2);
+ tcg_gen_div_i32(r_tmp3, r_tmp1, r_tmp2);
+ tcg_gen_rem_i32(r_tmp2, r_tmp1, r_tmp2);
+ tcg_gen_ext_i32_tl(cpu_LO[0], r_tmp3);
+ tcg_gen_ext_i32_tl(cpu_HI[0], r_tmp2);
tcg_temp_free(r_tmp1);
tcg_temp_free(r_tmp2);
tcg_temp_free(r_tmp3);
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
- gen_store_LO(t0, 0);
- gen_store_HI(t1, 0);
}
gen_set_label(l1);
}
@@ -1940,13 +1921,11 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
tcg_gen_trunc_tl_i32(r_tmp2, t1);
tcg_gen_divu_i32(r_tmp3, r_tmp1, r_tmp2);
tcg_gen_remu_i32(r_tmp1, r_tmp1, r_tmp2);
- tcg_gen_ext_i32_tl(t0, r_tmp3);
- tcg_gen_ext_i32_tl(t1, r_tmp1);
+ tcg_gen_ext_i32_tl(cpu_LO[0], r_tmp3);
+ tcg_gen_ext_i32_tl(cpu_HI[0], r_tmp1);
tcg_temp_free(r_tmp1);
tcg_temp_free(r_tmp2);
tcg_temp_free(r_tmp3);
- gen_store_LO(t0, 0);
- gen_store_HI(t1, 0);
}
gen_set_label(l1);
}
@@ -1957,8 +1936,6 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64);
TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
tcg_gen_ext_tl_i64(r_tmp1, t0);
tcg_gen_ext_tl_i64(r_tmp2, t1);
tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
@@ -1967,10 +1944,8 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
tcg_gen_trunc_i64_tl(t1, r_tmp1);
tcg_temp_free(r_tmp1);
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
- gen_store_LO(t0, 0);
- gen_store_HI(t1, 0);
+ tcg_gen_ext32s_tl(cpu_LO[0], t0);
+ tcg_gen_ext32s_tl(cpu_HI[0], t1);
}
opn = "mult";
break;
@@ -1989,10 +1964,8 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
tcg_gen_trunc_i64_tl(t1, r_tmp1);
tcg_temp_free(r_tmp1);
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
- gen_store_LO(t0, 0);
- gen_store_HI(t1, 0);
+ tcg_gen_ext32s_tl(cpu_LO[0], t0);
+ tcg_gen_ext32s_tl(cpu_HI[0], t1);
}
opn = "multu";
break;
@@ -2007,24 +1980,12 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
tcg_gen_brcondi_tl(TCG_COND_NE, t0, -1LL << 63, l2);
tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1LL, l2);
- {
- tcg_gen_movi_tl(t1, 0);
- gen_store_LO(t0, 0);
- gen_store_HI(t1, 0);
- tcg_gen_br(l1);
- }
+ tcg_gen_mov_tl(cpu_LO[0], t0);
+ tcg_gen_movi_tl(cpu_HI[0], 0);
+ tcg_gen_br(l1);
gen_set_label(l2);
- {
- TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64);
- TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
-
- tcg_gen_div_i64(r_tmp1, t0, t1);
- tcg_gen_rem_i64(r_tmp2, t0, t1);
- gen_store_LO(r_tmp1, 0);
- gen_store_HI(r_tmp2, 0);
- tcg_temp_free(r_tmp1);
- tcg_temp_free(r_tmp2);
- }
+ tcg_gen_div_i64(cpu_LO[0], t0, t1);
+ tcg_gen_rem_i64(cpu_HI[0], t0, t1);
}
gen_set_label(l1);
}
@@ -2035,17 +1996,8 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
int l1 = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
- {
- TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64);
- TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
-
- tcg_gen_divu_i64(r_tmp1, t0, t1);
- tcg_gen_remu_i64(r_tmp2, t0, t1);
- tcg_temp_free(r_tmp1);
- tcg_temp_free(r_tmp2);
- gen_store_LO(r_tmp1, 0);
- gen_store_HI(r_tmp2, 0);
- }
+ tcg_gen_divu_i64(cpu_LO[0], t0, t1);
+ tcg_gen_remu_i64(cpu_HI[0], t0, t1);
gen_set_label(l1);
}
opn = "ddivu";
@@ -2064,24 +2016,18 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64);
TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
tcg_gen_ext_tl_i64(r_tmp1, t0);
tcg_gen_ext_tl_i64(r_tmp2, t1);
tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
- gen_load_LO(t0, 0);
- gen_load_HI(t1, 0);
- tcg_gen_concat_tl_i64(r_tmp2, t0, t1);
+ tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
tcg_gen_add_i64(r_tmp1, r_tmp1, r_tmp2);
tcg_temp_free(r_tmp2);
tcg_gen_trunc_i64_tl(t0, r_tmp1);
tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
tcg_gen_trunc_i64_tl(t1, r_tmp1);
tcg_temp_free(r_tmp1);
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
- gen_store_LO(t0, 0);
- gen_store_HI(t1, 0);
+ tcg_gen_ext32s_tl(cpu_LO[0], t0);
+ tcg_gen_ext32s_tl(cpu_LO[1], t1);
}
opn = "madd";
break;
@@ -2095,19 +2041,15 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
tcg_gen_extu_tl_i64(r_tmp1, t0);
tcg_gen_extu_tl_i64(r_tmp2, t1);
tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
- gen_load_LO(t0, 0);
- gen_load_HI(t1, 0);
- tcg_gen_concat_tl_i64(r_tmp2, t0, t1);
+ tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
tcg_gen_add_i64(r_tmp1, r_tmp1, r_tmp2);
tcg_temp_free(r_tmp2);
tcg_gen_trunc_i64_tl(t0, r_tmp1);
tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
tcg_gen_trunc_i64_tl(t1, r_tmp1);
tcg_temp_free(r_tmp1);
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
- gen_store_LO(t0, 0);
- gen_store_HI(t1, 0);
+ tcg_gen_ext32s_tl(cpu_LO[0], t0);
+ tcg_gen_ext32s_tl(cpu_HI[0], t1);
}
opn = "maddu";
break;
@@ -2116,24 +2058,18 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64);
TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
tcg_gen_ext_tl_i64(r_tmp1, t0);
tcg_gen_ext_tl_i64(r_tmp2, t1);
tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
- gen_load_LO(t0, 0);
- gen_load_HI(t1, 0);
- tcg_gen_concat_tl_i64(r_tmp2, t0, t1);
+ tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
tcg_gen_sub_i64(r_tmp1, r_tmp1, r_tmp2);
tcg_temp_free(r_tmp2);
tcg_gen_trunc_i64_tl(t0, r_tmp1);
tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
tcg_gen_trunc_i64_tl(t1, r_tmp1);
tcg_temp_free(r_tmp1);
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
- gen_store_LO(t0, 0);
- gen_store_HI(t1, 0);
+ tcg_gen_ext32s_tl(cpu_LO[0], t0);
+ tcg_gen_ext32s_tl(cpu_HI[0], t1);
}
opn = "msub";
break;
@@ -2147,19 +2083,15 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
tcg_gen_extu_tl_i64(r_tmp1, t0);
tcg_gen_extu_tl_i64(r_tmp2, t1);
tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
- gen_load_LO(t0, 0);
- gen_load_HI(t1, 0);
- tcg_gen_concat_tl_i64(r_tmp2, t0, t1);
+ tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
tcg_gen_sub_i64(r_tmp1, r_tmp1, r_tmp2);
tcg_temp_free(r_tmp2);
tcg_gen_trunc_i64_tl(t0, r_tmp1);
tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
tcg_gen_trunc_i64_tl(t1, r_tmp1);
tcg_temp_free(r_tmp1);
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
- gen_store_LO(t0, 0);
- gen_store_HI(t1, 0);
+ tcg_gen_ext32s_tl(cpu_LO[0], t0);
+ tcg_gen_ext32s_tl(cpu_HI[0], t1);
}
opn = "msubu";
break;
--
1.5.6.5
--
.''`. Aurelien Jarno | GPG: 1024D/F1BCDB73
: :' : Debian developer | Electrical Engineer
`. `' aurel32@debian.org | aurelien@aurel32.net
`- people.debian.org/~aurel32 | www.aurel32.net
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Qemu-devel] [PATCH v2 08/11] target-mips: optimize gen_farith()
2008-11-08 19:16 [Qemu-devel] [PATCH v2 0/11] target-mips: optimizations Aurelien Jarno
` (6 preceding siblings ...)
2008-11-08 19:20 ` [Qemu-devel] [PATCH v2 07/11] target-mips: optimize gen_muldiv() Aurelien Jarno
@ 2008-11-08 19:20 ` Aurelien Jarno
2008-11-08 19:21 ` [Qemu-devel] [PATCH v2 09/11] target-mips: optimize movc*() Aurelien Jarno
` (2 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Aurelien Jarno @ 2008-11-08 19:20 UTC (permalink / raw)
To: qemu-devel
Optimize code generation in gen_farith():
- Temp variables are valid up to and *including* the brcond instruction.
Use them instead of temp local variables.
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-mips/translate.c | 24 ++++++++++++------------
1 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/target-mips/translate.c b/target-mips/translate.c
index e6978d6..0afcb05 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -6314,32 +6314,32 @@ static void gen_farith (DisasContext *ctx, uint32_t op1,
case FOP(18, 16):
{
int l1 = gen_new_label();
- TCGv t0 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv t0 = tcg_temp_new(TCG_TYPE_TL);
TCGv fp0 = tcg_temp_local_new(TCG_TYPE_I32);
gen_load_gpr(t0, ft);
tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, l1);
- tcg_temp_free(t0);
gen_load_fpr32(fp0, fs);
gen_store_fpr32(fp0, fd);
tcg_temp_free(fp0);
gen_set_label(l1);
+ tcg_temp_free(t0);
}
opn = "movz.s";
break;
case FOP(19, 16):
{
int l1 = gen_new_label();
- TCGv t0 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv t0 = tcg_temp_new(TCG_TYPE_TL);
TCGv fp0 = tcg_temp_local_new(TCG_TYPE_I32);
gen_load_gpr(t0, ft);
tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, l1);
- tcg_temp_free(t0);
gen_load_fpr32(fp0, fs);
gen_store_fpr32(fp0, fd);
tcg_temp_free(fp0);
gen_set_label(l1);
+ tcg_temp_free(t0);
}
opn = "movn.s";
break;
@@ -6733,32 +6733,32 @@ static void gen_farith (DisasContext *ctx, uint32_t op1,
case FOP(18, 17):
{
int l1 = gen_new_label();
- TCGv t0 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv t0 = tcg_temp_new(TCG_TYPE_TL);
TCGv fp0 = tcg_temp_local_new(TCG_TYPE_I64);
gen_load_gpr(t0, ft);
tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, l1);
- tcg_temp_free(t0);
gen_load_fpr64(ctx, fp0, fs);
gen_store_fpr64(ctx, fp0, fd);
tcg_temp_free(fp0);
gen_set_label(l1);
+ tcg_temp_free(t0);
}
opn = "movz.d";
break;
case FOP(19, 17):
{
int l1 = gen_new_label();
- TCGv t0 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv t0 = tcg_temp_new(TCG_TYPE_TL);
TCGv fp0 = tcg_temp_local_new(TCG_TYPE_I64);
gen_load_gpr(t0, ft);
tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, l1);
- tcg_temp_free(t0);
gen_load_fpr64(ctx, fp0, fs);
gen_store_fpr64(ctx, fp0, fd);
tcg_temp_free(fp0);
gen_set_label(l1);
+ tcg_temp_free(t0);
}
opn = "movn.d";
break;
@@ -7068,13 +7068,12 @@ static void gen_farith (DisasContext *ctx, uint32_t op1,
check_cp1_64bitmode(ctx);
{
int l1 = gen_new_label();
- TCGv t0 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv t0 = tcg_temp_new(TCG_TYPE_TL);
TCGv fp0 = tcg_temp_local_new(TCG_TYPE_I32);
TCGv fph0 = tcg_temp_local_new(TCG_TYPE_I32);
gen_load_gpr(t0, ft);
tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, l1);
- tcg_temp_free(t0);
gen_load_fpr32(fp0, fs);
gen_load_fpr32h(fph0, fs);
gen_store_fpr32(fp0, fd);
@@ -7082,6 +7081,7 @@ static void gen_farith (DisasContext *ctx, uint32_t op1,
tcg_temp_free(fp0);
tcg_temp_free(fph0);
gen_set_label(l1);
+ tcg_temp_free(t0);
}
opn = "movz.ps";
break;
@@ -7089,13 +7089,12 @@ static void gen_farith (DisasContext *ctx, uint32_t op1,
check_cp1_64bitmode(ctx);
{
int l1 = gen_new_label();
- TCGv t0 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv t0 = tcg_temp_new(TCG_TYPE_TL);
TCGv fp0 = tcg_temp_local_new(TCG_TYPE_I32);
TCGv fph0 = tcg_temp_local_new(TCG_TYPE_I32);
gen_load_gpr(t0, ft);
tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, l1);
- tcg_temp_free(t0);
gen_load_fpr32(fp0, fs);
gen_load_fpr32h(fph0, fs);
gen_store_fpr32(fp0, fd);
@@ -7103,6 +7102,7 @@ static void gen_farith (DisasContext *ctx, uint32_t op1,
tcg_temp_free(fp0);
tcg_temp_free(fph0);
gen_set_label(l1);
+ tcg_temp_free(t0);
}
opn = "movn.ps";
break;
--
1.5.6.5
--
.''`. Aurelien Jarno | GPG: 1024D/F1BCDB73
: :' : Debian developer | Electrical Engineer
`. `' aurel32@debian.org | aurelien@aurel32.net
`- people.debian.org/~aurel32 | www.aurel32.net
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Qemu-devel] [PATCH v2 09/11] target-mips: optimize movc*()
2008-11-08 19:16 [Qemu-devel] [PATCH v2 0/11] target-mips: optimizations Aurelien Jarno
` (7 preceding siblings ...)
2008-11-08 19:20 ` [Qemu-devel] [PATCH v2 08/11] target-mips: optimize gen_farith() Aurelien Jarno
@ 2008-11-08 19:21 ` Aurelien Jarno
2008-11-08 19:21 ` [Qemu-devel] [PATCH 10/11] target-mips: gen_compute_branch1() Aurelien Jarno
2008-11-08 19:22 ` [Qemu-devel] [PATCH v2 11/11] target-mips: fix temporary variable freeing in op_ldst_##insn() Aurelien Jarno
10 siblings, 0 replies; 12+ messages in thread
From: Aurelien Jarno @ 2008-11-08 19:21 UTC (permalink / raw)
To: qemu-devel
Optimize code generation in gen_movc*():
- Temp variables are valid up to and *including* the brcond instruction.
Use them instead of temp local variables.
- Avoid using temporary variables to transfer values.
- Access fpu_fcr31 directly in gen_movcf_ps().
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-mips/translate.c | 81 +++++++++++++++++++----------------------------
1 files changed, 33 insertions(+), 48 deletions(-)
diff --git a/target-mips/translate.c b/target-mips/translate.c
index 0afcb05..2be15f6 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -5930,8 +5930,7 @@ static void gen_movci (DisasContext *ctx, int rd, int rs, int cc, int tf)
uint32_t ccbit;
TCGCond cond;
TCGv t0 = tcg_temp_local_new(TCG_TYPE_TL);
- TCGv t1 = tcg_temp_local_new(TCG_TYPE_TL);
- TCGv r_tmp = tcg_temp_local_new(TCG_TYPE_I32);
+ TCGv r_tmp = tcg_temp_new(TCG_TYPE_I32);
if (cc)
ccbit = 1 << (24 + cc);
@@ -5943,14 +5942,9 @@ static void gen_movci (DisasContext *ctx, int rd, int rs, int cc, int tf)
cond = TCG_COND_NE;
gen_load_gpr(t0, rd);
- gen_load_gpr(t1, rs);
tcg_gen_andi_i32(r_tmp, fpu_fcr31, ccbit);
tcg_gen_brcondi_i32(cond, r_tmp, 0, l1);
- tcg_temp_free(r_tmp);
-
- tcg_gen_mov_tl(t0, t1);
- tcg_temp_free(t1);
-
+ gen_load_gpr(t0, rs);
gen_set_label(l1);
gen_store_gpr(t0, rd);
tcg_temp_free(t0);
@@ -5960,9 +5954,8 @@ static inline void gen_movcf_s (int fs, int fd, int cc, int tf)
{
uint32_t ccbit;
int cond;
- TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_I32);
+ TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I32);
TCGv fp0 = tcg_temp_local_new(TCG_TYPE_I32);
- TCGv fp1 = tcg_temp_local_new(TCG_TYPE_I32);
int l1 = gen_new_label();
if (cc)
@@ -5975,25 +5968,21 @@ static inline void gen_movcf_s (int fs, int fd, int cc, int tf)
else
cond = TCG_COND_NE;
- gen_load_fpr32(fp0, fs);
- gen_load_fpr32(fp1, fd);
+ gen_load_fpr32(fp0, fd);
tcg_gen_andi_i32(r_tmp1, fpu_fcr31, ccbit);
tcg_gen_brcondi_i32(cond, r_tmp1, 0, l1);
- tcg_gen_mov_i32(fp1, fp0);
- tcg_temp_free(fp0);
+ gen_load_fpr32(fp0, fs);
gen_set_label(l1);
- tcg_temp_free(r_tmp1);
- gen_store_fpr32(fp1, fd);
- tcg_temp_free(fp1);
+ gen_store_fpr32(fp0, fd);
+ tcg_temp_free(fp0);
}
static inline void gen_movcf_d (DisasContext *ctx, int fs, int fd, int cc, int tf)
{
uint32_t ccbit;
int cond;
- TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_I32);
+ TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I32);
TCGv fp0 = tcg_temp_local_new(TCG_TYPE_I64);
- TCGv fp1 = tcg_temp_local_new(TCG_TYPE_I64);
int l1 = gen_new_label();
if (cc)
@@ -6006,57 +5995,53 @@ static inline void gen_movcf_d (DisasContext *ctx, int fs, int fd, int cc, int t
else
cond = TCG_COND_NE;
- gen_load_fpr64(ctx, fp0, fs);
- gen_load_fpr64(ctx, fp1, fd);
+ gen_load_fpr64(ctx, fp0, fd);
tcg_gen_andi_i32(r_tmp1, fpu_fcr31, ccbit);
tcg_gen_brcondi_i32(cond, r_tmp1, 0, l1);
- tcg_gen_mov_i64(fp1, fp0);
- tcg_temp_free(fp0);
+ gen_load_fpr64(ctx, fp0, fs);
gen_set_label(l1);
- tcg_temp_free(r_tmp1);
- gen_store_fpr64(ctx, fp1, fd);
- tcg_temp_free(fp1);
+ gen_store_fpr64(ctx, fp0, fd);
+ tcg_temp_free(fp0);
}
static inline void gen_movcf_ps (int fs, int fd, int cc, int tf)
{
+ uint32_t ccbit1, ccbit2;
int cond;
TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_I32);
- TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_I32);
TCGv fp0 = tcg_temp_local_new(TCG_TYPE_I32);
- TCGv fph0 = tcg_temp_local_new(TCG_TYPE_I32);
- TCGv fp1 = tcg_temp_local_new(TCG_TYPE_I32);
- TCGv fph1 = tcg_temp_local_new(TCG_TYPE_I32);
int l1 = gen_new_label();
int l2 = gen_new_label();
+ if (cc) {
+ ccbit1 = 1 << (24 + cc);
+ ccbit2 = 1 << (25 + cc);
+ } else {
+ ccbit1 = 1 << 23;
+ ccbit2 = 1 << 25;
+ }
+
if (tf)
cond = TCG_COND_EQ;
else
cond = TCG_COND_NE;
+ gen_load_fpr32(fp0, fd);
+ tcg_gen_andi_i32(r_tmp1, fpu_fcr31, ccbit1);
+ tcg_gen_brcondi_i32(cond, r_tmp1, 0, l1);
gen_load_fpr32(fp0, fs);
- gen_load_fpr32h(fph0, fs);
- gen_load_fpr32(fp1, fd);
- gen_load_fpr32h(fph1, fd);
- get_fp_cond(r_tmp1);
- tcg_gen_shri_i32(r_tmp1, r_tmp1, cc);
- tcg_gen_andi_i32(r_tmp2, r_tmp1, 0x1);
- tcg_gen_brcondi_i32(cond, r_tmp2, 0, l1);
- tcg_gen_mov_i32(fp1, fp0);
- tcg_temp_free(fp0);
gen_set_label(l1);
- tcg_gen_andi_i32(r_tmp2, r_tmp1, 0x2);
- tcg_gen_brcondi_i32(cond, r_tmp2, 0, l2);
- tcg_gen_mov_i32(fph1, fph0);
- tcg_temp_free(fph0);
+ gen_store_fpr32(fp0, fd);
+
+ gen_load_fpr32h(fp0, fd);
+ tcg_gen_andi_i32(r_tmp1, fpu_fcr31, ccbit2);
+ tcg_gen_brcondi_i32(cond, r_tmp1, 0, l2);
+ gen_load_fpr32h(fp0, fs);
gen_set_label(l2);
+ gen_store_fpr32h(fp0, fd);
+
tcg_temp_free(r_tmp1);
- tcg_temp_free(r_tmp2);
- gen_store_fpr32(fp1, fd);
- gen_store_fpr32h(fph1, fd);
- tcg_temp_free(fp1);
- tcg_temp_free(fph1);
+ tcg_temp_free(fp0);
}
--
1.5.6.5
--
.''`. Aurelien Jarno | GPG: 1024D/F1BCDB73
: :' : Debian developer | Electrical Engineer
`. `' aurel32@debian.org | aurelien@aurel32.net
`- people.debian.org/~aurel32 | www.aurel32.net
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Qemu-devel] [PATCH 10/11] target-mips: gen_compute_branch1()
2008-11-08 19:16 [Qemu-devel] [PATCH v2 0/11] target-mips: optimizations Aurelien Jarno
` (8 preceding siblings ...)
2008-11-08 19:21 ` [Qemu-devel] [PATCH v2 09/11] target-mips: optimize movc*() Aurelien Jarno
@ 2008-11-08 19:21 ` Aurelien Jarno
2008-11-08 19:22 ` [Qemu-devel] [PATCH v2 11/11] target-mips: fix temporary variable freeing in op_ldst_##insn() Aurelien Jarno
10 siblings, 0 replies; 12+ messages in thread
From: Aurelien Jarno @ 2008-11-08 19:21 UTC (permalink / raw)
To: qemu-devel
Optimize code generation in gen_compute_branch1():
- Directly use I32 variables instead of converting values from _tl to
_i32 and back to _tl.
- Write the result directly to bcond instead of passing by a local
variable.
- Temp variables are valid up to and *including* the brcond instruction.
Use them instead of temp local variables.
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-mips/translate.c | 122 ++++++++++++++++-------------------------------
1 files changed, 41 insertions(+), 81 deletions(-)
diff --git a/target-mips/translate.c b/target-mips/translate.c
index 2be15f6..7ec3e2f 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -5634,8 +5634,7 @@ static void gen_compute_branch1 (CPUState *env, DisasContext *ctx, uint32_t op,
{
target_ulong btarget;
const char *opn = "cp1 cond branch";
- TCGv t0 = tcg_temp_local_new(TCG_TYPE_TL);
- TCGv t1 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv t0 = tcg_temp_new(TCG_TYPE_TL);
if (cc != 0)
check_insn(env, ctx, ISA_MIPS4 | ISA_MIPS32);
@@ -5647,19 +5646,14 @@ static void gen_compute_branch1 (CPUState *env, DisasContext *ctx, uint32_t op,
{
int l1 = gen_new_label();
int l2 = gen_new_label();
- TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I32);
- get_fp_cond(r_tmp1);
- tcg_gen_ext_i32_tl(t0, r_tmp1);
- tcg_temp_free(r_tmp1);
- tcg_gen_not_tl(t0, t0);
- tcg_gen_movi_tl(t1, 0x1 << cc);
- tcg_gen_and_tl(t0, t0, t1);
- tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, l1);
- tcg_gen_movi_tl(t0, 0);
+ get_fp_cond(t0);
+ tcg_gen_andi_i32(t0, t0, 0x1 << cc);
+ tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
+ tcg_gen_movi_i32(bcond, 0);
tcg_gen_br(l2);
gen_set_label(l1);
- tcg_gen_movi_tl(t0, 1);
+ tcg_gen_movi_i32(bcond, 1);
gen_set_label(l2);
}
opn = "bc1f";
@@ -5668,19 +5662,14 @@ static void gen_compute_branch1 (CPUState *env, DisasContext *ctx, uint32_t op,
{
int l1 = gen_new_label();
int l2 = gen_new_label();
- TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I32);
- get_fp_cond(r_tmp1);
- tcg_gen_ext_i32_tl(t0, r_tmp1);
- tcg_temp_free(r_tmp1);
- tcg_gen_not_tl(t0, t0);
- tcg_gen_movi_tl(t1, 0x1 << cc);
- tcg_gen_and_tl(t0, t0, t1);
- tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, l1);
- tcg_gen_movi_tl(t0, 0);
+ get_fp_cond(t0);
+ tcg_gen_andi_i32(t0, t0, 0x1 << cc);
+ tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
+ tcg_gen_movi_i32(bcond, 0);
tcg_gen_br(l2);
gen_set_label(l1);
- tcg_gen_movi_tl(t0, 1);
+ tcg_gen_movi_i32(bcond, 1);
gen_set_label(l2);
}
opn = "bc1fl";
@@ -5689,18 +5678,14 @@ static void gen_compute_branch1 (CPUState *env, DisasContext *ctx, uint32_t op,
{
int l1 = gen_new_label();
int l2 = gen_new_label();
- TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I32);
- get_fp_cond(r_tmp1);
- tcg_gen_ext_i32_tl(t0, r_tmp1);
- tcg_temp_free(r_tmp1);
- tcg_gen_movi_tl(t1, 0x1 << cc);
- tcg_gen_and_tl(t0, t0, t1);
- tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, l1);
- tcg_gen_movi_tl(t0, 0);
+ get_fp_cond(t0);
+ tcg_gen_andi_i32(t0, t0, 0x1 << cc);
+ tcg_gen_brcondi_i32(TCG_COND_NE, t0, 0, l1);
+ tcg_gen_movi_i32(bcond, 0);
tcg_gen_br(l2);
gen_set_label(l1);
- tcg_gen_movi_tl(t0, 1);
+ tcg_gen_movi_i32(bcond, 1);
gen_set_label(l2);
}
opn = "bc1t";
@@ -5709,42 +5694,32 @@ static void gen_compute_branch1 (CPUState *env, DisasContext *ctx, uint32_t op,
{
int l1 = gen_new_label();
int l2 = gen_new_label();
- TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I32);
- get_fp_cond(r_tmp1);
- tcg_gen_ext_i32_tl(t0, r_tmp1);
- tcg_temp_free(r_tmp1);
- tcg_gen_movi_tl(t1, 0x1 << cc);
- tcg_gen_and_tl(t0, t0, t1);
- tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, l1);
- tcg_gen_movi_tl(t0, 0);
+ get_fp_cond(t0);
+ tcg_gen_andi_i32(t0, t0, 0x1 << cc);
+ tcg_gen_brcondi_i32(TCG_COND_NE, t0, 0, l1);
+ tcg_gen_movi_i32(bcond, 0);
tcg_gen_br(l2);
gen_set_label(l1);
- tcg_gen_movi_tl(t0, 1);
+ tcg_gen_movi_i32(bcond, 1);
gen_set_label(l2);
}
opn = "bc1tl";
likely:
ctx->hflags |= MIPS_HFLAG_BL;
- tcg_gen_trunc_tl_i32(bcond, t0);
break;
case OPC_BC1FANY2:
{
int l1 = gen_new_label();
int l2 = gen_new_label();
- TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I32);
- get_fp_cond(r_tmp1);
- tcg_gen_ext_i32_tl(t0, r_tmp1);
- tcg_temp_free(r_tmp1);
- tcg_gen_not_tl(t0, t0);
- tcg_gen_movi_tl(t1, 0x3 << cc);
- tcg_gen_and_tl(t0, t0, t1);
- tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, l1);
- tcg_gen_movi_tl(t0, 0);
+ get_fp_cond(t0);
+ tcg_gen_andi_i32(t0, t0, 0x3 << cc);
+ tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
+ tcg_gen_movi_i32(bcond, 0);
tcg_gen_br(l2);
gen_set_label(l1);
- tcg_gen_movi_tl(t0, 1);
+ tcg_gen_movi_i32(bcond, 1);
gen_set_label(l2);
}
opn = "bc1any2f";
@@ -5753,18 +5728,14 @@ static void gen_compute_branch1 (CPUState *env, DisasContext *ctx, uint32_t op,
{
int l1 = gen_new_label();
int l2 = gen_new_label();
- TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I32);
- get_fp_cond(r_tmp1);
- tcg_gen_ext_i32_tl(t0, r_tmp1);
- tcg_temp_free(r_tmp1);
- tcg_gen_movi_tl(t1, 0x3 << cc);
- tcg_gen_and_tl(t0, t0, t1);
- tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, l1);
- tcg_gen_movi_tl(t0, 0);
+ get_fp_cond(t0);
+ tcg_gen_andi_i32(t0, t0, 0x3 << cc);
+ tcg_gen_brcondi_i32(TCG_COND_NE, t0, 0, l1);
+ tcg_gen_movi_i32(bcond, 0);
tcg_gen_br(l2);
gen_set_label(l1);
- tcg_gen_movi_tl(t0, 1);
+ tcg_gen_movi_i32(bcond, 1);
gen_set_label(l2);
}
opn = "bc1any2t";
@@ -5773,19 +5744,14 @@ static void gen_compute_branch1 (CPUState *env, DisasContext *ctx, uint32_t op,
{
int l1 = gen_new_label();
int l2 = gen_new_label();
- TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I32);
- get_fp_cond(r_tmp1);
- tcg_gen_ext_i32_tl(t0, r_tmp1);
- tcg_temp_free(r_tmp1);
- tcg_gen_not_tl(t0, t0);
- tcg_gen_movi_tl(t1, 0xf << cc);
- tcg_gen_and_tl(t0, t0, t1);
- tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, l1);
- tcg_gen_movi_tl(t0, 0);
+ get_fp_cond(t0);
+ tcg_gen_andi_i32(t0, t0, 0xf << cc);
+ tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
+ tcg_gen_movi_i32(bcond, 0);
tcg_gen_br(l2);
gen_set_label(l1);
- tcg_gen_movi_tl(t0, 1);
+ tcg_gen_movi_i32(bcond, 1);
gen_set_label(l2);
}
opn = "bc1any4f";
@@ -5794,24 +5760,19 @@ static void gen_compute_branch1 (CPUState *env, DisasContext *ctx, uint32_t op,
{
int l1 = gen_new_label();
int l2 = gen_new_label();
- TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I32);
- get_fp_cond(r_tmp1);
- tcg_gen_ext_i32_tl(t0, r_tmp1);
- tcg_temp_free(r_tmp1);
- tcg_gen_movi_tl(t1, 0xf << cc);
- tcg_gen_and_tl(t0, t0, t1);
- tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, l1);
- tcg_gen_movi_tl(t0, 0);
+ get_fp_cond(t0);
+ tcg_gen_andi_i32(t0, t0, 0xf << cc);
+ tcg_gen_brcondi_i32(TCG_COND_NE, t0, 0, l1);
+ tcg_gen_movi_i32(bcond, 0);
tcg_gen_br(l2);
gen_set_label(l1);
- tcg_gen_movi_tl(t0, 1);
+ tcg_gen_movi_i32(bcond, 1);
gen_set_label(l2);
}
opn = "bc1any4t";
not_likely:
ctx->hflags |= MIPS_HFLAG_BC;
- tcg_gen_trunc_tl_i32(bcond, t0);
break;
default:
MIPS_INVAL(opn);
@@ -5824,7 +5785,6 @@ static void gen_compute_branch1 (CPUState *env, DisasContext *ctx, uint32_t op,
out:
tcg_temp_free(t0);
- tcg_temp_free(t1);
}
/* Coprocessor 1 (FPU) */
--
1.5.6.5
--
.''`. Aurelien Jarno | GPG: 1024D/F1BCDB73
: :' : Debian developer | Electrical Engineer
`. `' aurel32@debian.org | aurelien@aurel32.net
`- people.debian.org/~aurel32 | www.aurel32.net
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Qemu-devel] [PATCH v2 11/11] target-mips: fix temporary variable freeing in op_ldst_##insn()
2008-11-08 19:16 [Qemu-devel] [PATCH v2 0/11] target-mips: optimizations Aurelien Jarno
` (9 preceding siblings ...)
2008-11-08 19:21 ` [Qemu-devel] [PATCH 10/11] target-mips: gen_compute_branch1() Aurelien Jarno
@ 2008-11-08 19:22 ` Aurelien Jarno
10 siblings, 0 replies; 12+ messages in thread
From: Aurelien Jarno @ 2008-11-08 19:22 UTC (permalink / raw)
To: qemu-devel
Move tcg_temp_free() out of the conditional part to make sure
the TCG temporary variable is freed in all cases.
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-mips/translate.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/target-mips/translate.c b/target-mips/translate.c
index 7ec3e2f..0eb95ed 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -1011,13 +1011,13 @@ static inline void op_ldst_##insn(TCGv t0, TCGv t1, DisasContext *ctx) \
gen_set_label(l1); \
tcg_gen_ld_tl(r_tmp, cpu_env, offsetof(CPUState, CP0_LLAddr)); \
tcg_gen_brcond_tl(TCG_COND_NE, t0, r_tmp, l2); \
- tcg_temp_free(r_tmp); \
tcg_gen_qemu_##fname(t1, t0, ctx->mem_idx); \
tcg_gen_movi_tl(t0, 1); \
tcg_gen_br(l3); \
gen_set_label(l2); \
tcg_gen_movi_tl(t0, 0); \
gen_set_label(l3); \
+ tcg_temp_free(r_tmp); \
}
OP_ST_ATOMIC(sc,st32,0x3);
#if defined(TARGET_MIPS64)
--
1.5.6.5
--
.''`. Aurelien Jarno | GPG: 1024D/F1BCDB73
: :' : Debian developer | Electrical Engineer
`. `' aurel32@debian.org | aurelien@aurel32.net
`- people.debian.org/~aurel32 | www.aurel32.net
^ permalink raw reply related [flat|nested] 12+ messages in thread