All of lore.kernel.org
 help / color / mirror / Atom feed
From: Paul Brook <paul@codesourcery.com>
To: qemu-devel@nongnu.org
Subject: [Qemu-devel] TCG native 32->64 concatenation
Date: Sun, 7 Sep 2008 17:53:26 +0100	[thread overview]
Message-ID: <200809071753.27384.paul@codesourcery.com> (raw)

The patch below adds a new concat_i32_i64 TCG op.  This allows a pair of 
32-bit values to be efficiently combined to form a 64-bit value.  I've 
converted all the cases I could find to use this, and tested the arm code on 
both 32 and 64-bit hosts.

This touches bits of code that I can't easily test well, so I'd appreciate 
another pair of eyes looking over it before I commit.

Signed-off-by: Paul Brook <paul@codesourcery.com>

Index: target-sh4/translate.c
===================================================================
--- target-sh4/translate.c	(revision 5178)
+++ target-sh4/translate.c	(working copy)
@@ -393,15 +393,12 @@ static inline void gen_load_fpr32(TCGv t
 static inline void gen_load_fpr64(TCGv t, int reg)
 {
     TCGv tmp1 = tcg_temp_new(TCG_TYPE_I32);
-    TCGv tmp2 = tcg_temp_new(TCG_TYPE_I64);
+    TCGv tmp2 = tcg_temp_new(TCG_TYPE_I32);
 
     tcg_gen_ld_i32(tmp1, cpu_env, offsetof(CPUState, fregs[reg]));
-    tcg_gen_extu_i32_i64(t, tmp1);
-    tcg_gen_shli_i64(t, t, 32);
-    tcg_gen_ld_i32(tmp1, cpu_env, offsetof(CPUState, fregs[reg + 1]));
-    tcg_gen_extu_i32_i64(tmp2, tmp1);
+    tcg_gen_ld_i32(tmp2, cpu_env, offsetof(CPUState, fregs[reg + 1]));
+    tcg_gen_concat_i32_i64(t, tmp2, tmp1);
     tcg_temp_free(tmp1);
-    tcg_gen_or_i64(t, t, tmp2);
     tcg_temp_free(tmp2);
 }
 
Index: target-ppc/translate.c
===================================================================
--- target-ppc/translate.c	(revision 5178)
+++ target-ppc/translate.c	(working copy)
@@ -5308,12 +5308,7 @@ static always_inline void gen_load_gpr64
 #if defined(TARGET_PPC64)
     tcg_gen_mov_i64(t, cpu_gpr[reg]);
 #else
-    tcg_gen_extu_i32_i64(t, cpu_gprh[reg]);
-    tcg_gen_shli_i64(t, t, 32);
-    TCGv tmp = tcg_temp_local_new(TCG_TYPE_I64);
-    tcg_gen_extu_i32_i64(tmp, cpu_gpr[reg]);
-    tcg_gen_or_i64(t, t, tmp);
-    tcg_temp_free(tmp);
+    tcg_gen_concat_i32_i64(t, cpu_gpr[reg], cpu_gprh[reg]);
 #endif
 }
 
Index: target-mips/translate.c
===================================================================
--- target-mips/translate.c	(revision 5178)
+++ target-mips/translate.c	(working copy)
@@ -666,14 +666,11 @@ static inline void gen_load_fpr64 (Disas
         tcg_gen_ld_i64(t, current_fpu, 8 * reg);
     } else {
         TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I32);
-        TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
+        TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I32);
 
         tcg_gen_ld_i32(r_tmp1, current_fpu, 8 * (reg | 1) + 4 * 
FP_ENDIAN_IDX);
-        tcg_gen_extu_i32_i64(t, r_tmp1);
-        tcg_gen_shli_i64(t, t, 32);
-        tcg_gen_ld_i32(r_tmp1, current_fpu, 8 * (reg & ~1) + 4 * 
FP_ENDIAN_IDX);
-        tcg_gen_extu_i32_i64(r_tmp2, r_tmp1);
-        tcg_gen_or_i64(t, t, r_tmp2);
+        tcg_gen_ld_i32(r_tmp2, current_fpu, 8 * (reg & ~1) + 4 * 
FP_ENDIAN_IDX);
+        tcg_gen_concat_i32_i64(t, r_tmp2, r_tmp1);
         tcg_temp_free(r_tmp1);
         tcg_temp_free(r_tmp2);
     }
@@ -6531,22 +6528,17 @@ static void gen_farith (DisasContext *ct
     case FOP(38, 16):
         check_cp1_64bitmode(ctx);
         {
-            TCGv fp64_0 = tcg_temp_new(TCG_TYPE_I64);
-            TCGv fp64_1 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp64 = tcg_temp_new(TCG_TYPE_I64);
             TCGv fp32_0 = tcg_temp_new(TCG_TYPE_I32);
             TCGv fp32_1 = tcg_temp_new(TCG_TYPE_I32);
 
             gen_load_fpr32(fp32_0, fs);
             gen_load_fpr32(fp32_1, ft);
-            tcg_gen_extu_i32_i64(fp64_0, fp32_0);
-            tcg_gen_extu_i32_i64(fp64_1, fp32_1);
-            tcg_temp_free(fp32_0);
+            tcg_gen_concat_i32_i64(fp64, fp32_0, fp32_1);
             tcg_temp_free(fp32_1);
-            tcg_gen_shli_i64(fp64_1, fp64_1, 32);
-            tcg_gen_or_i64(fp64_0, fp64_0, fp64_1);
-            tcg_temp_free(fp64_1);
-            gen_store_fpr64(ctx, fp64_0, fd);
-            tcg_temp_free(fp64_0);
+            tcg_temp_free(fp32_0);
+            gen_store_fpr64(ctx, fp64, fd);
+            tcg_temp_free(fp64);
         }
         opn = "cvt.ps.s";
         break;
Index: tcg/tcg-op.h
===================================================================
--- tcg/tcg-op.h	(revision 5178)
+++ tcg/tcg-op.h	(working copy)
@@ -1395,6 +1395,23 @@ static inline void tcg_gen_discard_i64(T
 }
 #endif
 
+static inline void tcg_gen_concat_i32_i64(TCGv dest, TCGv low, TCGv high)
+{
+#if TCG_TARGET_REG_BITS == 32
+    tcg_gen_mov_i32(dest, low);
+    tcg_gen_mov_i32(TCGV_HIGH(dest), high);
+#else
+    TCGv tmp = tcg_temp_new (TCG_TYPE_I64);
+    /* This extension is only needed for type correctness.
+       We may be able to do better given target specific information.  */
+    tcg_gen_extu_i32_i64(tmp, high);
+    tcg_gen_shli_i64(tmp, tmp, 32);
+    tcg_gen_extu_i32_i64(dest, low);
+    tcg_gen_or_i64(dest, dest, tmp);
+    tcg_temp_free(tmp);
+#endif
+}
+
 /***************************************/
 /* QEMU specific operations. Their type depend on the QEMU CPU
    type. */
Index: tcg/README
===================================================================
--- tcg/README	(revision 5178)
+++ tcg/README	(working copy)
@@ -265,6 +265,10 @@ Convert t1 (32 bit) to t0 (64 bit) and d
 * trunc_i64_i32 t0, t1
 Truncate t1 (64 bit) to t0 (32 bit)
 
+* concat_i32_i64 t0, t1, t2
+Construct t0 (64-bit) taking the low half from t1 (32 bit) and the high half
+from t2 (32 bit).
+
 ********* Load/Store
 
 * ld_i32/i64 t0, t1, offset
Index: target-arm/translate.c
===================================================================
--- target-arm/translate.c	(revision 5178)
+++ target-arm/translate.c	(working copy)
@@ -1447,10 +1447,7 @@ static void gen_iwmmxt_movl_T0_T1_wRn(in
 
 static void gen_iwmmxt_movl_wRn_T0_T1(int rn)
 {
-    tcg_gen_extu_i32_i64(cpu_V0, cpu_T[0]);
-    tcg_gen_extu_i32_i64(cpu_V1, cpu_T[0]);
-    tcg_gen_shli_i64(cpu_V1, cpu_V1, 32);
-    tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
+    tcg_gen_concat_i32_i64(cpu_V0, cpu_T[0], cpu_T[0]);
     iwmmxt_store_reg(cpu_V0, rn);
 }
 
@@ -4663,14 +4660,11 @@ static int disas_neon_data_insn(CPUState
                     } else {
                         tmp = neon_load_reg(rm + pass, 0);
                         gen_neon_shift_narrow(size, tmp, tmp2, q, u);
-                        tcg_gen_extu_i32_i64(cpu_V0, tmp);
+                        tmp3 = neon_load_reg(rm + pass, 1);
+                        gen_neon_shift_narrow(size, tmp3, tmp2, q, u);
+                        tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
                         dead_tmp(tmp);
-                        tmp = neon_load_reg(rm + pass, 1);
-                        gen_neon_shift_narrow(size, tmp, tmp2, q, u);
-                        tcg_gen_extu_i32_i64(cpu_V1, tmp);
-                        dead_tmp(tmp);
-                        tcg_gen_shli_i64(cpu_V1, cpu_V1, 32);
-                        tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
+                        dead_tmp(tmp3);
                     }
                     tmp = new_tmp();
                     if (op == 8 && !u) {
@@ -5600,7 +5594,7 @@ static void gen_addq_lo(DisasContext *s,
     TCGv tmp;
     TCGv tmp2;
 
-    /* Load 64-bit value rd:rn.  */
+    /* Load value and extend to 64 bits.  */
     tmp = tcg_temp_new(TCG_TYPE_I64);
     tmp2 = load_reg(s, rlow);
     tcg_gen_extu_i32_i64(tmp, tmp2);
@@ -5612,19 +5606,16 @@ static void gen_addq_lo(DisasContext *s,
 static void gen_addq(DisasContext *s, TCGv val, int rlow, int rhigh)
 {
     TCGv tmp;
-    TCGv tmp2;
+    TCGv tmpl;
+    TCGv tmph;
 
     /* Load 64-bit value rd:rn.  */
+    tmpl = load_reg(s, rlow);
+    tmph = load_reg(s, rhigh);
     tmp = tcg_temp_new(TCG_TYPE_I64);
-    tmp2 = load_reg(s, rhigh);
-    tcg_gen_extu_i32_i64(tmp, tmp2);
-    dead_tmp(tmp2);
-    tcg_gen_shli_i64(tmp, tmp, 32);
-    tcg_gen_add_i64(val, val, tmp);
-
-    tmp2 = load_reg(s, rlow);
-    tcg_gen_extu_i32_i64(tmp, tmp2);
-    dead_tmp(tmp2);
+    tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
+    dead_tmp(tmpl);
+    dead_tmp(tmph);
     tcg_gen_add_i64(val, val, tmp);
 }
 

             reply	other threads:[~2008-09-07 16:53 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-09-07 16:53 Paul Brook [this message]
2008-09-07 18:15 ` [Qemu-devel] TCG native 32->64 concatenation Blue Swirl
2008-09-07 18:43   ` Paul Brook
2008-09-14 17:03 ` Aurelien Jarno
2008-09-15 23:16 ` andrzej zaborowski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=200809071753.27384.paul@codesourcery.com \
    --to=paul@codesourcery.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.