All of lore.kernel.org
 help / color / mirror / Atom feed
From: Aurelien Jarno <aurelien@aurel32.net>
To: qemu-devel@nongnu.org
Subject: [Qemu-devel] [PATCH v2 07/11] target-mips: optimize gen_muldiv()
Date: Sat, 8 Nov 2008 20:20:11 +0100	[thread overview]
Message-ID: <20081108192011.GH15084@volta.aurel32.net> (raw)
In-Reply-To: <20081108191604.GA20748@volta.aurel32.net>

Optimize code generation in gen_muldiv():
- Don't do sign extension when the value is already guaranteed to be
  sign extended (otherwise, results are marked as UNPREDICTABLE).
- Access the LO, HI registers directly instead of writting them through
  a temporary variable.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target-mips/translate.c |  162 ++++++++++++++---------------------------------
 1 files changed, 47 insertions(+), 115 deletions(-)

diff --git a/target-mips/translate.c b/target-mips/translate.c
index dffdb82..e6978d6 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -604,27 +604,7 @@ static inline void gen_store_gpr (TCGv t, int reg)
         tcg_gen_mov_tl(cpu_gpr[reg], t);
 }
 
-/* Moves to/from HI and LO registers.  */
-static inline void gen_load_HI (TCGv t, int reg)
-{
-    tcg_gen_mov_tl(t, cpu_HI[reg]);
-}
-
-static inline void gen_store_HI (TCGv t, int reg)
-{
-    tcg_gen_mov_tl(cpu_HI[reg], t);
-}
-
-static inline void gen_load_LO (TCGv t, int reg)
-{
-    tcg_gen_mov_tl(t, cpu_LO[reg]);
-}
-
-static inline void gen_store_LO (TCGv t, int reg)
-{
-    tcg_gen_mov_tl(cpu_LO[reg], t);
-}
-
+/* Moves to/from ACX register.  */
 static inline void gen_load_ACX (TCGv t, int reg)
 {
     tcg_gen_mov_tl(t, cpu_ACX[reg]);
@@ -1856,23 +1836,23 @@ static void gen_HILO (DisasContext *ctx, uint32_t opc, int reg)
     }
     switch (opc) {
     case OPC_MFHI:
-        gen_load_HI(t0, 0);
+        tcg_gen_mov_tl(t0, cpu_HI[0]);
         gen_store_gpr(t0, reg);
         opn = "mfhi";
         break;
     case OPC_MFLO:
-        gen_load_LO(t0, 0);
+        tcg_gen_mov_tl(t0, cpu_LO[0]);
         gen_store_gpr(t0, reg);
         opn = "mflo";
         break;
     case OPC_MTHI:
         gen_load_gpr(t0, reg);
-        gen_store_HI(t0, 0);
+        tcg_gen_mov_tl(cpu_HI[0], t0);
         opn = "mthi";
         break;
     case OPC_MTLO:
         gen_load_gpr(t0, reg);
-        gen_store_LO(t0, 0);
+        tcg_gen_mov_tl(cpu_LO[0], t0);
         opn = "mtlo";
         break;
     default:
@@ -1899,27 +1879,28 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
         {
             int l1 = gen_new_label();
 
-            tcg_gen_ext32s_tl(t0, t0);
-            tcg_gen_ext32s_tl(t1, t1);
             tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
             {
-                TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64);
-                TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
-                TCGv r_tmp3 = tcg_temp_new(TCG_TYPE_I64);
-
-                tcg_gen_ext_tl_i64(r_tmp1, t0);
-                tcg_gen_ext_tl_i64(r_tmp2, t1);
-                tcg_gen_div_i64(r_tmp3, r_tmp1, r_tmp2);
-                tcg_gen_rem_i64(r_tmp2, r_tmp1, r_tmp2);
-                tcg_gen_trunc_i64_tl(t0, r_tmp3);
-                tcg_gen_trunc_i64_tl(t1, r_tmp2);
+                int l2 = gen_new_label();
+                TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_I32);
+                TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_I32);
+                TCGv r_tmp3 = tcg_temp_local_new(TCG_TYPE_I32);
+
+                tcg_gen_trunc_tl_i32(r_tmp1, t0);
+                tcg_gen_trunc_tl_i32(r_tmp2, t1);
+                tcg_gen_brcondi_i32(TCG_COND_NE, r_tmp1, -1 << 31, l2);
+                tcg_gen_brcondi_i32(TCG_COND_NE, r_tmp2, -1, l2);
+                tcg_gen_ext32s_tl(cpu_LO[0], t0);
+                tcg_gen_movi_tl(cpu_HI[0], 0);
+                tcg_gen_br(l1);
+                gen_set_label(l2);
+                tcg_gen_div_i32(r_tmp3, r_tmp1, r_tmp2);
+                tcg_gen_rem_i32(r_tmp2, r_tmp1, r_tmp2);
+                tcg_gen_ext_i32_tl(cpu_LO[0], r_tmp3);
+                tcg_gen_ext_i32_tl(cpu_HI[0], r_tmp2);
                 tcg_temp_free(r_tmp1);
                 tcg_temp_free(r_tmp2);
                 tcg_temp_free(r_tmp3);
-                tcg_gen_ext32s_tl(t0, t0);
-                tcg_gen_ext32s_tl(t1, t1);
-                gen_store_LO(t0, 0);
-                gen_store_HI(t1, 0);
             }
             gen_set_label(l1);
         }
@@ -1940,13 +1921,11 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
                 tcg_gen_trunc_tl_i32(r_tmp2, t1);
                 tcg_gen_divu_i32(r_tmp3, r_tmp1, r_tmp2);
                 tcg_gen_remu_i32(r_tmp1, r_tmp1, r_tmp2);
-                tcg_gen_ext_i32_tl(t0, r_tmp3);
-                tcg_gen_ext_i32_tl(t1, r_tmp1);
+                tcg_gen_ext_i32_tl(cpu_LO[0], r_tmp3);
+                tcg_gen_ext_i32_tl(cpu_HI[0], r_tmp1);
                 tcg_temp_free(r_tmp1);
                 tcg_temp_free(r_tmp2);
                 tcg_temp_free(r_tmp3);
-                gen_store_LO(t0, 0);
-                gen_store_HI(t1, 0);
             }
             gen_set_label(l1);
         }
@@ -1957,8 +1936,6 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
             TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64);
             TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
 
-            tcg_gen_ext32s_tl(t0, t0);
-            tcg_gen_ext32s_tl(t1, t1);
             tcg_gen_ext_tl_i64(r_tmp1, t0);
             tcg_gen_ext_tl_i64(r_tmp2, t1);
             tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
@@ -1967,10 +1944,8 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
             tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
             tcg_gen_trunc_i64_tl(t1, r_tmp1);
             tcg_temp_free(r_tmp1);
-            tcg_gen_ext32s_tl(t0, t0);
-            tcg_gen_ext32s_tl(t1, t1);
-            gen_store_LO(t0, 0);
-            gen_store_HI(t1, 0);
+            tcg_gen_ext32s_tl(cpu_LO[0], t0);
+            tcg_gen_ext32s_tl(cpu_HI[0], t1);
         }
         opn = "mult";
         break;
@@ -1989,10 +1964,8 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
             tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
             tcg_gen_trunc_i64_tl(t1, r_tmp1);
             tcg_temp_free(r_tmp1);
-            tcg_gen_ext32s_tl(t0, t0);
-            tcg_gen_ext32s_tl(t1, t1);
-            gen_store_LO(t0, 0);
-            gen_store_HI(t1, 0);
+            tcg_gen_ext32s_tl(cpu_LO[0], t0);
+            tcg_gen_ext32s_tl(cpu_HI[0], t1);
         }
         opn = "multu";
         break;
@@ -2007,24 +1980,12 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
 
                 tcg_gen_brcondi_tl(TCG_COND_NE, t0, -1LL << 63, l2);
                 tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1LL, l2);
-                {
-                    tcg_gen_movi_tl(t1, 0);
-                    gen_store_LO(t0, 0);
-                    gen_store_HI(t1, 0);
-                    tcg_gen_br(l1);
-                }
+                tcg_gen_mov_tl(cpu_LO[0], t0);
+                tcg_gen_movi_tl(cpu_HI[0], 0);
+                tcg_gen_br(l1);
                 gen_set_label(l2);
-                {
-                    TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64);
-                    TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
-
-                    tcg_gen_div_i64(r_tmp1, t0, t1);
-                    tcg_gen_rem_i64(r_tmp2, t0, t1);
-                    gen_store_LO(r_tmp1, 0);
-                    gen_store_HI(r_tmp2, 0);
-                    tcg_temp_free(r_tmp1);
-                    tcg_temp_free(r_tmp2);
-                }
+                tcg_gen_div_i64(cpu_LO[0], t0, t1);
+                tcg_gen_rem_i64(cpu_HI[0], t0, t1);
             }
             gen_set_label(l1);
         }
@@ -2035,17 +1996,8 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
             int l1 = gen_new_label();
 
             tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
-            {
-                TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64);
-                TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
-
-                tcg_gen_divu_i64(r_tmp1, t0, t1);
-                tcg_gen_remu_i64(r_tmp2, t0, t1);
-                tcg_temp_free(r_tmp1);
-                tcg_temp_free(r_tmp2);
-                gen_store_LO(r_tmp1, 0);
-                gen_store_HI(r_tmp2, 0);
-            }
+            tcg_gen_divu_i64(cpu_LO[0], t0, t1);
+            tcg_gen_remu_i64(cpu_HI[0], t0, t1);
             gen_set_label(l1);
         }
         opn = "ddivu";
@@ -2064,24 +2016,18 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
             TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64);
             TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
 
-            tcg_gen_ext32s_tl(t0, t0);
-            tcg_gen_ext32s_tl(t1, t1);
             tcg_gen_ext_tl_i64(r_tmp1, t0);
             tcg_gen_ext_tl_i64(r_tmp2, t1);
             tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
-            gen_load_LO(t0, 0);
-            gen_load_HI(t1, 0);
-            tcg_gen_concat_tl_i64(r_tmp2, t0, t1);
+            tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
             tcg_gen_add_i64(r_tmp1, r_tmp1, r_tmp2);
             tcg_temp_free(r_tmp2);
             tcg_gen_trunc_i64_tl(t0, r_tmp1);
             tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
             tcg_gen_trunc_i64_tl(t1, r_tmp1);
             tcg_temp_free(r_tmp1);
-            tcg_gen_ext32s_tl(t0, t0);
-            tcg_gen_ext32s_tl(t1, t1);
-            gen_store_LO(t0, 0);
-            gen_store_HI(t1, 0);
+            tcg_gen_ext32s_tl(cpu_LO[0], t0);
+            tcg_gen_ext32s_tl(cpu_LO[1], t1);
         }
         opn = "madd";
         break;
@@ -2095,19 +2041,15 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
             tcg_gen_extu_tl_i64(r_tmp1, t0);
             tcg_gen_extu_tl_i64(r_tmp2, t1);
             tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
-            gen_load_LO(t0, 0);
-            gen_load_HI(t1, 0);
-            tcg_gen_concat_tl_i64(r_tmp2, t0, t1);
+            tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
             tcg_gen_add_i64(r_tmp1, r_tmp1, r_tmp2);
             tcg_temp_free(r_tmp2);
             tcg_gen_trunc_i64_tl(t0, r_tmp1);
             tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
             tcg_gen_trunc_i64_tl(t1, r_tmp1);
             tcg_temp_free(r_tmp1);
-            tcg_gen_ext32s_tl(t0, t0);
-            tcg_gen_ext32s_tl(t1, t1);
-            gen_store_LO(t0, 0);
-            gen_store_HI(t1, 0);
+            tcg_gen_ext32s_tl(cpu_LO[0], t0);
+            tcg_gen_ext32s_tl(cpu_HI[0], t1);
         }
         opn = "maddu";
         break;
@@ -2116,24 +2058,18 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
             TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64);
             TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
 
-            tcg_gen_ext32s_tl(t0, t0);
-            tcg_gen_ext32s_tl(t1, t1);
             tcg_gen_ext_tl_i64(r_tmp1, t0);
             tcg_gen_ext_tl_i64(r_tmp2, t1);
             tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
-            gen_load_LO(t0, 0);
-            gen_load_HI(t1, 0);
-            tcg_gen_concat_tl_i64(r_tmp2, t0, t1);
+            tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
             tcg_gen_sub_i64(r_tmp1, r_tmp1, r_tmp2);
             tcg_temp_free(r_tmp2);
             tcg_gen_trunc_i64_tl(t0, r_tmp1);
             tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
             tcg_gen_trunc_i64_tl(t1, r_tmp1);
             tcg_temp_free(r_tmp1);
-            tcg_gen_ext32s_tl(t0, t0);
-            tcg_gen_ext32s_tl(t1, t1);
-            gen_store_LO(t0, 0);
-            gen_store_HI(t1, 0);
+            tcg_gen_ext32s_tl(cpu_LO[0], t0);
+            tcg_gen_ext32s_tl(cpu_HI[0], t1);
         }
         opn = "msub";
         break;
@@ -2147,19 +2083,15 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
             tcg_gen_extu_tl_i64(r_tmp1, t0);
             tcg_gen_extu_tl_i64(r_tmp2, t1);
             tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
-            gen_load_LO(t0, 0);
-            gen_load_HI(t1, 0);
-            tcg_gen_concat_tl_i64(r_tmp2, t0, t1);
+            tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
             tcg_gen_sub_i64(r_tmp1, r_tmp1, r_tmp2);
             tcg_temp_free(r_tmp2);
             tcg_gen_trunc_i64_tl(t0, r_tmp1);
             tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
             tcg_gen_trunc_i64_tl(t1, r_tmp1);
             tcg_temp_free(r_tmp1);
-            tcg_gen_ext32s_tl(t0, t0);
-            tcg_gen_ext32s_tl(t1, t1);
-            gen_store_LO(t0, 0);
-            gen_store_HI(t1, 0);
+            tcg_gen_ext32s_tl(cpu_LO[0], t0);
+            tcg_gen_ext32s_tl(cpu_HI[0], t1);
         }
         opn = "msubu";
         break;
-- 
1.5.6.5


-- 
  .''`.  Aurelien Jarno	            | GPG: 1024D/F1BCDB73
 : :' :  Debian developer           | Electrical Engineer
 `. `'   aurel32@debian.org         | aurelien@aurel32.net
   `-    people.debian.org/~aurel32 | www.aurel32.net

  parent reply	other threads:[~2008-11-08 19:20 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-11-08 19:16 [Qemu-devel] [PATCH v2 0/11] target-mips: optimizations Aurelien Jarno
2008-11-08 19:16 ` [Qemu-devel] [PATCH v2 01/11] target-mips: optimize gen_save_pc() Aurelien Jarno
2008-11-08 19:17 ` [Qemu-devel] [PATCH v2 02/11] target-mips: optimize gen_op_addr_add() (1/2) Aurelien Jarno
2008-11-08 19:17 ` [Qemu-devel] [PATCH v2 03/11] target-mips: optimize gen_op_addr_add() (2/2) Aurelien Jarno
2008-11-08 19:18 ` [Qemu-devel] [PATCH v2 04/11] target-mips: convert bitfield ops to TCG Aurelien Jarno
2008-11-08 19:19 ` [Qemu-devel] [PATCH v2 05/11] target-mips: convert bit shuffle " Aurelien Jarno
2008-11-08 19:19 ` [Qemu-devel] [PATCH v2 06/11] target-mips: optimize gen_arith()/gen_arith_imm() Aurelien Jarno
2008-11-08 19:20 ` Aurelien Jarno [this message]
2008-11-08 19:20 ` [Qemu-devel] [PATCH v2 08/11] target-mips: optimize gen_farith() Aurelien Jarno
2008-11-08 19:21 ` [Qemu-devel] [PATCH v2 09/11] target-mips: optimize movc*() Aurelien Jarno
2008-11-08 19:21 ` [Qemu-devel] [PATCH 10/11] target-mips: gen_compute_branch1() Aurelien Jarno
2008-11-08 19:22 ` [Qemu-devel] [PATCH v2 11/11] target-mips: fix temporary variable freeing in op_ldst_##insn() Aurelien Jarno

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20081108192011.GH15084@volta.aurel32.net \
    --to=aurelien@aurel32.net \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.