qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Richard Henderson <rth@twiddle.net>
To: qemu-devel@nongnu.org
Cc: blauwirbel@gmail.com, aurelien@aurel32.net
Subject: [Qemu-devel] [PATCH 10/14] tcg-sparc: Hoist common argument loads in tcg_out_op
Date: Mon, 17 Mar 2014 11:37:52 -0700	[thread overview]
Message-ID: <1395081476-6038-11-git-send-email-rth@twiddle.net> (raw)
In-Reply-To: <1395081476-6038-1-git-send-email-rth@twiddle.net>

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/sparc/tcg-target.c | 123 +++++++++++++++++++++++--------------------------
 1 file changed, 58 insertions(+), 65 deletions(-)

diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index b70d9b5..88676da 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -1136,49 +1136,56 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
 #endif /* CONFIG_SOFTMMU */
 }
 
-static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
-                              const int *const_args)
+static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+                       const TCGArg args[TCG_MAX_OP_ARGS],
+                       const int const_args[TCG_MAX_OP_ARGS])
 {
-    int c;
+    TCGArg a0, a1, a2;
+    int c, c2;
+
+    /* Hoist the loads of the most common arguments.  */
+    a0 = args[0];
+    a1 = args[1];
+    a2 = args[2];
+    c2 = const_args[2];
 
     switch (opc) {
     case INDEX_op_exit_tb:
-        if (check_fit_ptr(args[0], 13)) {
+        if (check_fit_ptr(a0, 13)) {
             tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
-            tcg_out_movi_imm13(s, TCG_REG_O0, args[0]);
+            tcg_out_movi_imm13(s, TCG_REG_O0, a0);
         } else {
-            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I0, args[0] & ~0x3ff);
+            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I0, a0 & ~0x3ff);
             tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
-            tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0,
-                           args[0] & 0x3ff, ARITH_OR);
+            tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, a0 & 0x3ff, ARITH_OR);
         }
         break;
     case INDEX_op_goto_tb:
         if (s->tb_jmp_offset) {
             /* direct jump method */
             uint32_t old_insn = *(uint32_t *)s->code_ptr;
-            s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
+            s->tb_jmp_offset[a0] = s->code_ptr - s->code_buf;
             /* Make sure to preserve links during retranslation.  */
             tcg_out32(s, CALL | (old_insn & ~INSN_OP(-1)));
         } else {
             /* indirect jump method */
-            tcg_out_ld_ptr(s, TCG_REG_T1, (uintptr_t)(s->tb_next + args[0]));
+            tcg_out_ld_ptr(s, TCG_REG_T1, (uintptr_t)(s->tb_next + a0));
             tcg_out_arithi(s, TCG_REG_G0, TCG_REG_T1, 0, JMPL);
         }
         tcg_out_nop(s);
-        s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
+        s->tb_next_offset[a0] = s->code_ptr - s->code_buf;
         break;
     case INDEX_op_call:
         if (const_args[0]) {
-            tcg_out_calli(s, args[0]);
+            tcg_out_calli(s, a0);
         } else {
-            tcg_out_arithi(s, TCG_REG_O7, args[0], 0, JMPL);
+            tcg_out_arithi(s, TCG_REG_O7, a0, 0, JMPL);
         }
         /* delay slot */
         tcg_out_nop(s);
         break;
     case INDEX_op_br:
-        tcg_out_bpcc(s, COND_A, BPCC_PT, args[0]);
+        tcg_out_bpcc(s, COND_A, BPCC_PT, a0);
         tcg_out_nop(s);
         break;
 
@@ -1187,30 +1194,30 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         glue(glue(case INDEX_op_, x), _i64)
 
     OP_32_64(ld8u):
-        tcg_out_ldst(s, args[0], args[1], args[2], LDUB);
+        tcg_out_ldst(s, a0, a1, a2, LDUB);
         break;
     OP_32_64(ld8s):
-        tcg_out_ldst(s, args[0], args[1], args[2], LDSB);
+        tcg_out_ldst(s, a0, a1, a2, LDSB);
         break;
     OP_32_64(ld16u):
-        tcg_out_ldst(s, args[0], args[1], args[2], LDUH);
+        tcg_out_ldst(s, a0, a1, a2, LDUH);
         break;
     OP_32_64(ld16s):
-        tcg_out_ldst(s, args[0], args[1], args[2], LDSH);
+        tcg_out_ldst(s, a0, a1, a2, LDSH);
         break;
     case INDEX_op_ld_i32:
     case INDEX_op_ld32u_i64:
-        tcg_out_ldst(s, args[0], args[1], args[2], LDUW);
+        tcg_out_ldst(s, a0, a1, a2, LDUW);
         break;
     OP_32_64(st8):
-        tcg_out_ldst(s, args[0], args[1], args[2], STB);
+        tcg_out_ldst(s, a0, a1, a2, STB);
         break;
     OP_32_64(st16):
-        tcg_out_ldst(s, args[0], args[1], args[2], STH);
+        tcg_out_ldst(s, a0, a1, a2, STH);
         break;
     case INDEX_op_st_i32:
     case INDEX_op_st32_i64:
-        tcg_out_ldst(s, args[0], args[1], args[2], STW);
+        tcg_out_ldst(s, a0, a1, a2, STW);
         break;
     OP_32_64(add):
         c = ARITH_ADD;
@@ -1237,7 +1244,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         c = SHIFT_SLL;
     do_shift32:
         /* Limit immediate shift count lest we create an illegal insn.  */
-        tcg_out_arithc(s, args[0], args[1], args[2] & 31, const_args[2], c);
+        tcg_out_arithc(s, a0, a1, a2 & 31, c2, c);
         break;
     case INDEX_op_shr_i32:
         c = SHIFT_SRL;
@@ -1257,34 +1264,29 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
 	goto gen_arith1;
 
     case INDEX_op_div_i32:
-        tcg_out_div32(s, args[0], args[1], args[2], const_args[2], 0);
+        tcg_out_div32(s, a0, a1, a2, c2, 0);
         break;
     case INDEX_op_divu_i32:
-        tcg_out_div32(s, args[0], args[1], args[2], const_args[2], 1);
+        tcg_out_div32(s, a0, a1, a2, c2, 1);
         break;
 
     case INDEX_op_brcond_i32:
-        tcg_out_brcond_i32(s, args[2], args[0], args[1], const_args[1],
-                           args[3]);
+        tcg_out_brcond_i32(s, a2, a0, a1, const_args[1], args[3]);
         break;
     case INDEX_op_setcond_i32:
-        tcg_out_setcond_i32(s, args[3], args[0], args[1],
-                            args[2], const_args[2]);
+        tcg_out_setcond_i32(s, args[3], a0, a1, a2, c2);
         break;
     case INDEX_op_movcond_i32:
-        tcg_out_movcond_i32(s, args[5], args[0], args[1],
-                            args[2], const_args[2], args[3], const_args[3]);
+        tcg_out_movcond_i32(s, args[5], a0, a1, a2, c2, args[3], const_args[3]);
         break;
 
     case INDEX_op_add2_i32:
-        tcg_out_addsub2(s, args[0], args[1], args[2], args[3],
-                        args[4], const_args[4], args[5], const_args[5],
-                        ARITH_ADDCC, ARITH_ADDX);
+        tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], const_args[4],
+                        args[5], const_args[5], ARITH_ADDCC, ARITH_ADDX);
         break;
     case INDEX_op_sub2_i32:
-        tcg_out_addsub2(s, args[0], args[1], args[2], args[3],
-                        args[4], const_args[4], args[5], const_args[5],
-                        ARITH_SUBCC, ARITH_SUBX);
+        tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], const_args[4],
+                        args[5], const_args[5], ARITH_SUBCC, ARITH_SUBX);
         break;
     case INDEX_op_mulu2_i32:
         c = ARITH_UMUL;
@@ -1294,41 +1296,39 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
     do_mul2:
         /* The 32-bit multiply insns produce a full 64-bit result.  If the
            destination register can hold it, we can avoid the slower RDY.  */
-        tcg_out_arithc(s, args[0], args[2], args[3], const_args[3], c);
-        if (SPARC64 || args[0] <= TCG_REG_O7) {
-            tcg_out_arithi(s, args[1], args[0], 32, SHIFT_SRLX);
+        tcg_out_arithc(s, a0, a2, args[3], const_args[3], c);
+        if (SPARC64 || a0 <= TCG_REG_O7) {
+            tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX);
         } else {
-            tcg_out_rdy(s, args[1]);
+            tcg_out_rdy(s, a1);
         }
         break;
 
     case INDEX_op_qemu_ld_i32:
-        tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3], false);
+        tcg_out_qemu_ld(s, a0, a1, a2, args[3], false);
         break;
     case INDEX_op_qemu_ld_i64:
-        tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3], true);
+        tcg_out_qemu_ld(s, a0, a1, a2, args[3], true);
         break;
     case INDEX_op_qemu_st_i32:
-        tcg_out_qemu_st(s, args[0], args[1], args[2], args[3]);
-        break;
     case INDEX_op_qemu_st_i64:
-        tcg_out_qemu_st(s, args[0], args[1], args[2], args[3]);
+        tcg_out_qemu_st(s, a0, a1, a2, args[3]);
         break;
 
     case INDEX_op_ld32s_i64:
-        tcg_out_ldst(s, args[0], args[1], args[2], LDSW);
+        tcg_out_ldst(s, a0, a1, a2, LDSW);
         break;
     case INDEX_op_ld_i64:
-        tcg_out_ldst(s, args[0], args[1], args[2], LDX);
+        tcg_out_ldst(s, a0, a1, a2, LDX);
         break;
     case INDEX_op_st_i64:
-        tcg_out_ldst(s, args[0], args[1], args[2], STX);
+        tcg_out_ldst(s, a0, a1, a2, STX);
         break;
     case INDEX_op_shl_i64:
         c = SHIFT_SLLX;
     do_shift64:
         /* Limit immediate shift count lest we create an illegal insn.  */
-        tcg_out_arithc(s, args[0], args[1], args[2] & 63, const_args[2], c);
+        tcg_out_arithc(s, a0, a1, a2 & 63, c2, c);
         break;
     case INDEX_op_shr_i64:
         c = SHIFT_SRLX;
@@ -1346,38 +1346,31 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         c = ARITH_UDIVX;
         goto gen_arith;
     case INDEX_op_ext32s_i64:
-        tcg_out_arithi(s, args[0], args[1], 0, SHIFT_SRA);
+        tcg_out_arithi(s, a0, a1, 0, SHIFT_SRA);
         break;
     case INDEX_op_ext32u_i64:
-        tcg_out_arithi(s, args[0], args[1], 0, SHIFT_SRL);
+        tcg_out_arithi(s, a0, a1, 0, SHIFT_SRL);
         break;
     case INDEX_op_trunc_i32:
-        if (args[2] == 0) {
-            tcg_out_mov(s, TCG_TYPE_I32, args[0], args[1]);
-        } else {
-            tcg_out_arithi(s, args[0], args[1], args[2], SHIFT_SRLX);
-        }
+        tcg_out_arithi(s, a0, a1, a2, SHIFT_SRLX);
         break;
 
     case INDEX_op_brcond_i64:
-        tcg_out_brcond_i64(s, args[2], args[0], args[1], const_args[1],
-                           args[3]);
+        tcg_out_brcond_i64(s, a2, a0, a1, const_args[1], args[3]);
         break;
     case INDEX_op_setcond_i64:
-        tcg_out_setcond_i64(s, args[3], args[0], args[1],
-                            args[2], const_args[2]);
+        tcg_out_setcond_i64(s, args[3], a0, a1, a2, c2);
         break;
     case INDEX_op_movcond_i64:
-        tcg_out_movcond_i64(s, args[5], args[0], args[1],
-                            args[2], const_args[2], args[3], const_args[3]);
+        tcg_out_movcond_i64(s, args[5], a0, a1, a2, c2, args[3], const_args[3]);
         break;
 
     gen_arith:
-        tcg_out_arithc(s, args[0], args[1], args[2], const_args[2], c);
+        tcg_out_arithc(s, a0, a1, a2, c2, c);
         break;
 
     gen_arith1:
-	tcg_out_arithc(s, args[0], TCG_REG_G0, args[1], const_args[1], c);
+	tcg_out_arithc(s, a0, TCG_REG_G0, a1, const_args[1], c);
 	break;
 
     case INDEX_op_mov_i64:
-- 
1.8.5.3

  parent reply	other threads:[~2014-03-17 18:38 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-03-17 18:37 [Qemu-devel] [PATCH 00/14] tcg/sparc v8plus code generation Richard Henderson
2014-03-17 18:37 ` [Qemu-devel] [PATCH 01/14] tcg: Fix missed pointer size != TCG_TARGET_REG_BITS changes Richard Henderson
2014-03-17 18:37 ` [Qemu-devel] [PATCH 02/14] tcg: Add INDEX_op_trunc_i32 Richard Henderson
2014-03-21 23:35   ` Stuart Brady
2014-03-21 23:39     ` Stuart Brady
2014-03-17 18:37 ` [Qemu-devel] [PATCH 03/14] tcg-sparc: Remove most uses of TCG_TARGET_REG_BITS Richard Henderson
2014-03-17 18:37 ` [Qemu-devel] [PATCH 04/14] tcg-sparc: Support trunc_i32 Richard Henderson
2014-03-17 18:37 ` [Qemu-devel] [PATCH 05/14] tcg-sparc: Use 64-bit registers with sparcv8plus Richard Henderson
2014-03-17 18:37 ` [Qemu-devel] [PATCH 06/14] tcg-sparc: Use the RETURN instruction Richard Henderson
2014-03-17 18:37 ` [Qemu-devel] [PATCH 07/14] tcg-sparc: Implement muls2_i32 Richard Henderson
2014-03-17 18:37 ` [Qemu-devel] [PATCH 08/14] tcg-sparc: Tidy check_fit_* tests Richard Henderson
2014-03-17 18:37 ` [Qemu-devel] [PATCH 09/14] tcg-sparc: Don't handle mov/movi in tcg_out_op Richard Henderson
2014-03-17 18:37 ` Richard Henderson [this message]
2014-03-17 18:37 ` [Qemu-devel] [PATCH 11/14] tcg-sparc: Fixup function argument types Richard Henderson
2014-03-17 18:37 ` [Qemu-devel] [PATCH 12/14] tcg-sparc: Fix small 32-bit movi Richard Henderson
2014-03-17 18:37 ` [Qemu-devel] [PATCH 13/14] tcg-sparc: Fix 32-bit constant arguments tests Richard Henderson
2014-03-22  9:48   ` Stuart Brady
2014-03-22 17:08     ` Richard Henderson
2014-03-17 18:37 ` [Qemu-devel] [PATCH 14/14] tcg-sparc: Accept stores of zero Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1395081476-6038-11-git-send-email-rth@twiddle.net \
    --to=rth@twiddle.net \
    --cc=aurelien@aurel32.net \
    --cc=blauwirbel@gmail.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).