From: Aurelien Jarno <aurelien@aurel32.net>
To: qemu-devel@nongnu.org
Subject: [Qemu-devel] [PATCH 07/11] target-mips: optimize gen_muldiv()
Date: Sat, 8 Nov 2008 09:37:16 +0100 [thread overview]
Message-ID: <20081108083716.GI9549@volta.aurel32.net> (raw)
In-Reply-To: <20081108083118.GB9549@volta.aurel32.net>
Optimize code generation in gen_muldiv():
- Don't do sign extension when the value is already guaranteed to be
sign extended (otherwise, results are marked as UNPREDICTABLE).
- Access the LO, HI registers directly instead of writing them through
a temporary variable.
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-mips/translate.c | 162 ++++++++++++++---------------------------------
1 files changed, 47 insertions(+), 115 deletions(-)
diff --git a/target-mips/translate.c b/target-mips/translate.c
index f1cb7ca..544e5c8 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -604,27 +604,7 @@ static inline void gen_store_gpr (TCGv t, int reg)
tcg_gen_mov_tl(cpu_gpr[reg], t);
}
-/* Moves to/from HI and LO registers. */
-static inline void gen_load_HI (TCGv t, int reg)
-{
- tcg_gen_mov_tl(t, cpu_HI[reg]);
-}
-
-static inline void gen_store_HI (TCGv t, int reg)
-{
- tcg_gen_mov_tl(cpu_HI[reg], t);
-}
-
-static inline void gen_load_LO (TCGv t, int reg)
-{
- tcg_gen_mov_tl(t, cpu_LO[reg]);
-}
-
-static inline void gen_store_LO (TCGv t, int reg)
-{
- tcg_gen_mov_tl(cpu_LO[reg], t);
-}
-
+/* Moves to/from ACX register. */
static inline void gen_load_ACX (TCGv t, int reg)
{
tcg_gen_mov_tl(t, cpu_ACX[reg]);
@@ -1850,23 +1830,23 @@ static void gen_HILO (DisasContext *ctx, uint32_t opc, int reg)
}
switch (opc) {
case OPC_MFHI:
- gen_load_HI(t0, 0);
+ tcg_gen_mov_tl(t0, cpu_HI[0]);
gen_store_gpr(t0, reg);
opn = "mfhi";
break;
case OPC_MFLO:
- gen_load_LO(t0, 0);
+ tcg_gen_mov_tl(t0, cpu_LO[0]);
gen_store_gpr(t0, reg);
opn = "mflo";
break;
case OPC_MTHI:
gen_load_gpr(t0, reg);
- gen_store_HI(t0, 0);
+ tcg_gen_mov_tl(cpu_HI[0], t0);
opn = "mthi";
break;
case OPC_MTLO:
gen_load_gpr(t0, reg);
- gen_store_LO(t0, 0);
+ tcg_gen_mov_tl(cpu_LO[0], t0);
opn = "mtlo";
break;
default:
@@ -1893,27 +1873,28 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
{
int l1 = gen_new_label();
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
{
- TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64);
- TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
- TCGv r_tmp3 = tcg_temp_new(TCG_TYPE_I64);
-
- tcg_gen_ext_tl_i64(r_tmp1, t0);
- tcg_gen_ext_tl_i64(r_tmp2, t1);
- tcg_gen_div_i64(r_tmp3, r_tmp1, r_tmp2);
- tcg_gen_rem_i64(r_tmp2, r_tmp1, r_tmp2);
- tcg_gen_trunc_i64_tl(t0, r_tmp3);
- tcg_gen_trunc_i64_tl(t1, r_tmp2);
+ int l2 = gen_new_label();
+ TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_I32);
+ TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_I32);
+ TCGv r_tmp3 = tcg_temp_local_new(TCG_TYPE_I32);
+
+ tcg_gen_trunc_tl_i32(r_tmp1, t0);
+ tcg_gen_trunc_tl_i32(r_tmp2, t1);
+ tcg_gen_brcondi_i32(TCG_COND_NE, r_tmp1, -1 << 31, l2);
+ tcg_gen_brcondi_i32(TCG_COND_NE, r_tmp2, -1, l2);
+ tcg_gen_ext32s_tl(cpu_LO[0], t0);
+ tcg_gen_movi_tl(cpu_HI[0], 0);
+ tcg_gen_br(l1);
+ gen_set_label(l2);
+ tcg_gen_div_i32(r_tmp3, r_tmp1, r_tmp2);
+ tcg_gen_rem_i32(r_tmp2, r_tmp1, r_tmp2);
+ tcg_gen_ext_i32_tl(cpu_LO[0], r_tmp3);
+ tcg_gen_ext_i32_tl(cpu_HI[0], r_tmp2);
tcg_temp_free(r_tmp1);
tcg_temp_free(r_tmp2);
tcg_temp_free(r_tmp3);
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
- gen_store_LO(t0, 0);
- gen_store_HI(t1, 0);
}
gen_set_label(l1);
}
@@ -1934,13 +1915,11 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
tcg_gen_trunc_tl_i32(r_tmp2, t1);
tcg_gen_divu_i32(r_tmp3, r_tmp1, r_tmp2);
tcg_gen_remu_i32(r_tmp1, r_tmp1, r_tmp2);
- tcg_gen_ext_i32_tl(t0, r_tmp3);
- tcg_gen_ext_i32_tl(t1, r_tmp1);
+ tcg_gen_ext_i32_tl(cpu_LO[0], r_tmp3);
+ tcg_gen_ext_i32_tl(cpu_HI[0], r_tmp1);
tcg_temp_free(r_tmp1);
tcg_temp_free(r_tmp2);
tcg_temp_free(r_tmp3);
- gen_store_LO(t0, 0);
- gen_store_HI(t1, 0);
}
gen_set_label(l1);
}
@@ -1951,8 +1930,6 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64);
TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
tcg_gen_ext_tl_i64(r_tmp1, t0);
tcg_gen_ext_tl_i64(r_tmp2, t1);
tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
@@ -1961,10 +1938,8 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
tcg_gen_trunc_i64_tl(t1, r_tmp1);
tcg_temp_free(r_tmp1);
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
- gen_store_LO(t0, 0);
- gen_store_HI(t1, 0);
+ tcg_gen_ext32s_tl(cpu_LO[0], t0);
+ tcg_gen_ext32s_tl(cpu_HI[0], t1);
}
opn = "mult";
break;
@@ -1983,10 +1958,8 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
tcg_gen_trunc_i64_tl(t1, r_tmp1);
tcg_temp_free(r_tmp1);
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
- gen_store_LO(t0, 0);
- gen_store_HI(t1, 0);
+ tcg_gen_ext32s_tl(cpu_LO[0], t0);
+ tcg_gen_ext32s_tl(cpu_HI[0], t1);
}
opn = "multu";
break;
@@ -2001,24 +1974,12 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
tcg_gen_brcondi_tl(TCG_COND_NE, t0, -1LL << 63, l2);
tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1LL, l2);
- {
- tcg_gen_movi_tl(t1, 0);
- gen_store_LO(t0, 0);
- gen_store_HI(t1, 0);
- tcg_gen_br(l1);
- }
+ tcg_gen_mov_tl(cpu_LO[0], t0);
+ tcg_gen_movi_tl(cpu_HI[0], 0);
+ tcg_gen_br(l1);
gen_set_label(l2);
- {
- TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64);
- TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
-
- tcg_gen_div_i64(r_tmp1, t0, t1);
- tcg_gen_rem_i64(r_tmp2, t0, t1);
- gen_store_LO(r_tmp1, 0);
- gen_store_HI(r_tmp2, 0);
- tcg_temp_free(r_tmp1);
- tcg_temp_free(r_tmp2);
- }
+ tcg_gen_div_i64(cpu_LO[0], t0, t1);
+ tcg_gen_rem_i64(cpu_HI[0], t0, t1);
}
gen_set_label(l1);
}
@@ -2029,17 +1990,8 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
int l1 = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
- {
- TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64);
- TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
-
- tcg_gen_divu_i64(r_tmp1, t0, t1);
- tcg_gen_remu_i64(r_tmp2, t0, t1);
- tcg_temp_free(r_tmp1);
- tcg_temp_free(r_tmp2);
- gen_store_LO(r_tmp1, 0);
- gen_store_HI(r_tmp2, 0);
- }
+ tcg_gen_divu_i64(cpu_LO[0], t0, t1);
+ tcg_gen_remu_i64(cpu_HI[0], t0, t1);
gen_set_label(l1);
}
opn = "ddivu";
@@ -2058,24 +2010,18 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64);
TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
tcg_gen_ext_tl_i64(r_tmp1, t0);
tcg_gen_ext_tl_i64(r_tmp2, t1);
tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
- gen_load_LO(t0, 0);
- gen_load_HI(t1, 0);
- tcg_gen_concat_tl_i64(r_tmp2, t0, t1);
+ tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
tcg_gen_add_i64(r_tmp1, r_tmp1, r_tmp2);
tcg_temp_free(r_tmp2);
tcg_gen_trunc_i64_tl(t0, r_tmp1);
tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
tcg_gen_trunc_i64_tl(t1, r_tmp1);
tcg_temp_free(r_tmp1);
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
- gen_store_LO(t0, 0);
- gen_store_HI(t1, 0);
+ tcg_gen_ext32s_tl(cpu_LO[0], t0);
+ tcg_gen_ext32s_tl(cpu_LO[1], t1);
}
opn = "madd";
break;
@@ -2089,19 +2035,15 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
tcg_gen_extu_tl_i64(r_tmp1, t0);
tcg_gen_extu_tl_i64(r_tmp2, t1);
tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
- gen_load_LO(t0, 0);
- gen_load_HI(t1, 0);
- tcg_gen_concat_tl_i64(r_tmp2, t0, t1);
+ tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
tcg_gen_add_i64(r_tmp1, r_tmp1, r_tmp2);
tcg_temp_free(r_tmp2);
tcg_gen_trunc_i64_tl(t0, r_tmp1);
tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
tcg_gen_trunc_i64_tl(t1, r_tmp1);
tcg_temp_free(r_tmp1);
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
- gen_store_LO(t0, 0);
- gen_store_HI(t1, 0);
+ tcg_gen_ext32s_tl(cpu_LO[0], t0);
+ tcg_gen_ext32s_tl(cpu_HI[0], t1);
}
opn = "maddu";
break;
@@ -2110,24 +2052,18 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64);
TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
tcg_gen_ext_tl_i64(r_tmp1, t0);
tcg_gen_ext_tl_i64(r_tmp2, t1);
tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
- gen_load_LO(t0, 0);
- gen_load_HI(t1, 0);
- tcg_gen_concat_tl_i64(r_tmp2, t0, t1);
+ tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
tcg_gen_sub_i64(r_tmp1, r_tmp1, r_tmp2);
tcg_temp_free(r_tmp2);
tcg_gen_trunc_i64_tl(t0, r_tmp1);
tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
tcg_gen_trunc_i64_tl(t1, r_tmp1);
tcg_temp_free(r_tmp1);
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
- gen_store_LO(t0, 0);
- gen_store_HI(t1, 0);
+ tcg_gen_ext32s_tl(cpu_LO[0], t0);
+ tcg_gen_ext32s_tl(cpu_HI[0], t1);
}
opn = "msub";
break;
@@ -2141,19 +2077,15 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
tcg_gen_extu_tl_i64(r_tmp1, t0);
tcg_gen_extu_tl_i64(r_tmp2, t1);
tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
- gen_load_LO(t0, 0);
- gen_load_HI(t1, 0);
- tcg_gen_concat_tl_i64(r_tmp2, t0, t1);
+ tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
tcg_gen_sub_i64(r_tmp1, r_tmp1, r_tmp2);
tcg_temp_free(r_tmp2);
tcg_gen_trunc_i64_tl(t0, r_tmp1);
tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
tcg_gen_trunc_i64_tl(t1, r_tmp1);
tcg_temp_free(r_tmp1);
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
- gen_store_LO(t0, 0);
- gen_store_HI(t1, 0);
+ tcg_gen_ext32s_tl(cpu_LO[0], t0);
+ tcg_gen_ext32s_tl(cpu_HI[0], t1);
}
opn = "msubu";
break;
--
1.5.6.5
--
.''`. Aurelien Jarno | GPG: 1024D/F1BCDB73
: :' : Debian developer | Electrical Engineer
`. `' aurel32@debian.org | aurelien@aurel32.net
`- people.debian.org/~aurel32 | www.aurel32.net
next prev parent reply other threads:[~2008-11-08 8:37 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-11-08 8:31 [Qemu-devel] [PATCH 0/11] target-mips: optimizations Aurelien Jarno
2008-11-08 8:32 ` [Qemu-devel] [PATCH 01/11] target-mips: optimize gen_save_pc() Aurelien Jarno
2008-11-08 8:32 ` [Qemu-devel] [PATCH 02/11] target-mips: optimize gen_op_addr_add() (1/2) Aurelien Jarno
2008-11-08 8:33 ` [Qemu-devel] [PATCH 03/11] target-mips: optimize gen_op_addr_add() (2/2) Aurelien Jarno
2008-11-08 8:34 ` [Qemu-devel] [PATCH 04/11] target-mips: convert bitfield ops to TCG Aurelien Jarno
2008-11-08 12:57 ` Laurent Desnogues
2008-11-08 19:13 ` Aurelien Jarno
2008-11-08 8:34 ` [Qemu-devel] [PATCH 05/11] target-mips: convert bit shuffle " Aurelien Jarno
2008-11-08 8:35 ` [Qemu-devel] [PATCH 06/11] target-mips: optimize gen_arith()/gen_arith_imm() Aurelien Jarno
2008-11-08 8:37 ` Aurelien Jarno [this message]
2008-11-08 8:37 ` [Qemu-devel] [PATCH 08/11] target-mips: optimize gen_farith() Aurelien Jarno
2008-11-08 8:38 ` [Qemu-devel] [PATCH 09/11] target-mips: optimize movc*() Aurelien Jarno
2008-11-08 8:39 ` [Qemu-devel] [PATCH 10/11] target-mips: gen_compute_branch1() Aurelien Jarno
2008-11-08 8:39 ` [Qemu-devel] [PATCH 11/11] target-mips: fix temporary variable freeing in op_ldst_##insn() Aurelien Jarno
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20081108083716.GI9549@volta.aurel32.net \
--to=aurelien@aurel32.net \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.