[Qemu-devel] TCG native 32->64 concatenation

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

* [Qemu-devel] TCG native 32->64 concatenation
@ 2008-09-07 16:53 Paul Brook
  2008-09-07 18:15 ` Blue Swirl
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Paul Brook @ 2008-09-07 16:53 UTC (permalink / raw)
  To: qemu-devel

The patch below adds a new concat_i32_i64 TCG op.  This allows a pair of 
32-bit values to be efficiently combined to form a 64-bit value.  I've 
converted all the cases I could find to use this, and tested the arm code on 
both 32 and 64-bit hosts.

This touches bits of code that I can't easily test well, so I'd appreciate 
another pair of eyes looking over it before I commit.

Signed-off-by: Paul Brook <paul@codesourcery.com>

Index: target-sh4/translate.c
===================================================================
--- target-sh4/translate.c	(revision 5178)
+++ target-sh4/translate.c	(working copy)
@@ -393,15 +393,12 @@ static inline void gen_load_fpr32(TCGv t
 static inline void gen_load_fpr64(TCGv t, int reg)
 {
     TCGv tmp1 = tcg_temp_new(TCG_TYPE_I32);
-    TCGv tmp2 = tcg_temp_new(TCG_TYPE_I64);
+    TCGv tmp2 = tcg_temp_new(TCG_TYPE_I32);
 
     tcg_gen_ld_i32(tmp1, cpu_env, offsetof(CPUState, fregs[reg]));
-    tcg_gen_extu_i32_i64(t, tmp1);
-    tcg_gen_shli_i64(t, t, 32);
-    tcg_gen_ld_i32(tmp1, cpu_env, offsetof(CPUState, fregs[reg + 1]));
-    tcg_gen_extu_i32_i64(tmp2, tmp1);
+    tcg_gen_ld_i32(tmp2, cpu_env, offsetof(CPUState, fregs[reg + 1]));
+    tcg_gen_concat_i32_i64(t, tmp2, tmp1);
     tcg_temp_free(tmp1);
-    tcg_gen_or_i64(t, t, tmp2);
     tcg_temp_free(tmp2);
 }
 
Index: target-ppc/translate.c
===================================================================
--- target-ppc/translate.c	(revision 5178)
+++ target-ppc/translate.c	(working copy)
@@ -5308,12 +5308,7 @@ static always_inline void gen_load_gpr64
 #if defined(TARGET_PPC64)
     tcg_gen_mov_i64(t, cpu_gpr[reg]);
 #else
-    tcg_gen_extu_i32_i64(t, cpu_gprh[reg]);
-    tcg_gen_shli_i64(t, t, 32);
-    TCGv tmp = tcg_temp_local_new(TCG_TYPE_I64);
-    tcg_gen_extu_i32_i64(tmp, cpu_gpr[reg]);
-    tcg_gen_or_i64(t, t, tmp);
-    tcg_temp_free(tmp);
+    tcg_gen_concat_i32_i64(t, cpu_gpr[reg], cpu_gprh[reg]);
 #endif
 }
 
Index: target-mips/translate.c
===================================================================
--- target-mips/translate.c	(revision 5178)
+++ target-mips/translate.c	(working copy)
@@ -666,14 +666,11 @@ static inline void gen_load_fpr64 (Disas
         tcg_gen_ld_i64(t, current_fpu, 8 * reg);
     } else {
         TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I32);
-        TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
+        TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I32);
 
         tcg_gen_ld_i32(r_tmp1, current_fpu, 8 * (reg | 1) + 4 * 
FP_ENDIAN_IDX);
-        tcg_gen_extu_i32_i64(t, r_tmp1);
-        tcg_gen_shli_i64(t, t, 32);
-        tcg_gen_ld_i32(r_tmp1, current_fpu, 8 * (reg & ~1) + 4 * 
FP_ENDIAN_IDX);
-        tcg_gen_extu_i32_i64(r_tmp2, r_tmp1);
-        tcg_gen_or_i64(t, t, r_tmp2);
+        tcg_gen_ld_i32(r_tmp2, current_fpu, 8 * (reg & ~1) + 4 * 
FP_ENDIAN_IDX);
+        tcg_gen_concat_i32_i64(t, r_tmp2, r_tmp1);
         tcg_temp_free(r_tmp1);
         tcg_temp_free(r_tmp2);
     }
@@ -6531,22 +6528,17 @@ static void gen_farith (DisasContext *ct
     case FOP(38, 16):
         check_cp1_64bitmode(ctx);
         {
-            TCGv fp64_0 = tcg_temp_new(TCG_TYPE_I64);
-            TCGv fp64_1 = tcg_temp_new(TCG_TYPE_I64);
+            TCGv fp64 = tcg_temp_new(TCG_TYPE_I64);
             TCGv fp32_0 = tcg_temp_new(TCG_TYPE_I32);
             TCGv fp32_1 = tcg_temp_new(TCG_TYPE_I32);
 
             gen_load_fpr32(fp32_0, fs);
             gen_load_fpr32(fp32_1, ft);
-            tcg_gen_extu_i32_i64(fp64_0, fp32_0);
-            tcg_gen_extu_i32_i64(fp64_1, fp32_1);
-            tcg_temp_free(fp32_0);
+            tcg_gen_concat_i32_i64(fp64, fp32_0, fp32_1);
             tcg_temp_free(fp32_1);
-            tcg_gen_shli_i64(fp64_1, fp64_1, 32);
-            tcg_gen_or_i64(fp64_0, fp64_0, fp64_1);
-            tcg_temp_free(fp64_1);
-            gen_store_fpr64(ctx, fp64_0, fd);
-            tcg_temp_free(fp64_0);
+            tcg_temp_free(fp32_0);
+            gen_store_fpr64(ctx, fp64, fd);
+            tcg_temp_free(fp64);
         }
         opn = "cvt.ps.s";
         break;
Index: tcg/tcg-op.h
===================================================================
--- tcg/tcg-op.h	(revision 5178)
+++ tcg/tcg-op.h	(working copy)
@@ -1395,6 +1395,23 @@ static inline void tcg_gen_discard_i64(T
 }
 #endif
 
+static inline void tcg_gen_concat_i32_i64(TCGv dest, TCGv low, TCGv high)
+{
+#if TCG_TARGET_REG_BITS == 32
+    tcg_gen_mov_i32(dest, low);
+    tcg_gen_mov_i32(TCGV_HIGH(dest), high);
+#else
+    TCGv tmp = tcg_temp_new (TCG_TYPE_I64);
+    /* This extension is only needed for type correctness.
+       We may be able to do better given target specific information.  */
+    tcg_gen_extu_i32_i64(tmp, high);
+    tcg_gen_shli_i64(tmp, tmp, 32);
+    tcg_gen_extu_i32_i64(dest, low);
+    tcg_gen_or_i64(dest, dest, tmp);
+    tcg_temp_free(tmp);
+#endif
+}
+
 /***************************************/
 /* QEMU specific operations. Their type depend on the QEMU CPU
    type. */
Index: tcg/README
===================================================================
--- tcg/README	(revision 5178)
+++ tcg/README	(working copy)
@@ -265,6 +265,10 @@ Convert t1 (32 bit) to t0 (64 bit) and d
 * trunc_i64_i32 t0, t1
 Truncate t1 (64 bit) to t0 (32 bit)
 
+* concat_i32_i64 t0, t1, t2
+Construct t0 (64-bit) taking the low half from t1 (32 bit) and the high half
+from t2 (32 bit).
+
 ********* Load/Store
 
 * ld_i32/i64 t0, t1, offset
Index: target-arm/translate.c
===================================================================
--- target-arm/translate.c	(revision 5178)
+++ target-arm/translate.c	(working copy)
@@ -1447,10 +1447,7 @@ static void gen_iwmmxt_movl_T0_T1_wRn(in
 
 static void gen_iwmmxt_movl_wRn_T0_T1(int rn)
 {
-    tcg_gen_extu_i32_i64(cpu_V0, cpu_T[0]);
-    tcg_gen_extu_i32_i64(cpu_V1, cpu_T[0]);
-    tcg_gen_shli_i64(cpu_V1, cpu_V1, 32);
-    tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
+    tcg_gen_concat_i32_i64(cpu_V0, cpu_T[0], cpu_T[0]);
     iwmmxt_store_reg(cpu_V0, rn);
 }
 
@@ -4663,14 +4660,11 @@ static int disas_neon_data_insn(CPUState
                     } else {
                         tmp = neon_load_reg(rm + pass, 0);
                         gen_neon_shift_narrow(size, tmp, tmp2, q, u);
-                        tcg_gen_extu_i32_i64(cpu_V0, tmp);
+                        tmp3 = neon_load_reg(rm + pass, 1);
+                        gen_neon_shift_narrow(size, tmp3, tmp2, q, u);
+                        tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
                         dead_tmp(tmp);
-                        tmp = neon_load_reg(rm + pass, 1);
-                        gen_neon_shift_narrow(size, tmp, tmp2, q, u);
-                        tcg_gen_extu_i32_i64(cpu_V1, tmp);
-                        dead_tmp(tmp);
-                        tcg_gen_shli_i64(cpu_V1, cpu_V1, 32);
-                        tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
+                        dead_tmp(tmp3);
                     }
                     tmp = new_tmp();
                     if (op == 8 && !u) {
@@ -5600,7 +5594,7 @@ static void gen_addq_lo(DisasContext *s,
     TCGv tmp;
     TCGv tmp2;
 
-    /* Load 64-bit value rd:rn.  */
+    /* Load value and extend to 64 bits.  */
     tmp = tcg_temp_new(TCG_TYPE_I64);
     tmp2 = load_reg(s, rlow);
     tcg_gen_extu_i32_i64(tmp, tmp2);
@@ -5612,19 +5606,16 @@ static void gen_addq_lo(DisasContext *s,
 static void gen_addq(DisasContext *s, TCGv val, int rlow, int rhigh)
 {
     TCGv tmp;
-    TCGv tmp2;
+    TCGv tmpl;
+    TCGv tmph;
 
     /* Load 64-bit value rd:rn.  */
+    tmpl = load_reg(s, rlow);
+    tmph = load_reg(s, rhigh);
     tmp = tcg_temp_new(TCG_TYPE_I64);
-    tmp2 = load_reg(s, rhigh);
-    tcg_gen_extu_i32_i64(tmp, tmp2);
-    dead_tmp(tmp2);
-    tcg_gen_shli_i64(tmp, tmp, 32);
-    tcg_gen_add_i64(val, val, tmp);
-
-    tmp2 = load_reg(s, rlow);
-    tcg_gen_extu_i32_i64(tmp, tmp2);
-    dead_tmp(tmp2);
+    tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
+    dead_tmp(tmpl);
+    dead_tmp(tmph);
     tcg_gen_add_i64(val, val, tmp);
 }
 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Qemu-devel] TCG native 32->64 concatenation
  2008-09-07 16:53 [Qemu-devel] TCG native 32->64 concatenation Paul Brook
@ 2008-09-07 18:15 ` Blue Swirl
  2008-09-07 18:43   ` Paul Brook
  2008-09-14 17:03 ` Aurelien Jarno
  2008-09-15 23:16 ` andrzej zaborowski
  2 siblings, 1 reply; 5+ messages in thread
From: Blue Swirl @ 2008-09-07 18:15 UTC (permalink / raw)
  To: qemu-devel

On 9/7/08, Paul Brook <paul@codesourcery.com> wrote:
> The patch below adds a new concat_i32_i64 TCG op.  This allows a pair of
>  32-bit values to be efficiently combined to form a 64-bit value.  I've
>  converted all the cases I could find to use this, and tested the arm code on
>  both 32 and 64-bit hosts.

Sparc's helper_pack64 does the same thing, I used it to reduce
register pressure on i386 for 64 bit stores.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Qemu-devel] TCG native 32->64 concatenation
  2008-09-07 18:15 ` Blue Swirl
@ 2008-09-07 18:43   ` Paul Brook
  0 siblings, 0 replies; 5+ messages in thread
From: Paul Brook @ 2008-09-07 18:43 UTC (permalink / raw)
  To: qemu-devel; +Cc: Blue Swirl

On Sunday 07 September 2008, Blue Swirl wrote:
> On 9/7/08, Paul Brook <paul@codesourcery.com> wrote:
> > The patch below adds a new concat_i32_i64 TCG op.  This allows a pair of
> >  32-bit values to be efficiently combined to form a 64-bit value.  I've
> >  converted all the cases I could find to use this, and tested the arm
> > code on both 32 and 64-bit hosts.
>
> Sparc's helper_pack64 does the same thing, I used it to reduce
> register pressure on i386 for 64 bit stores.

They're slightly different because helper_pack64 takes target_ulong arguments.
Replacing helper_pack64 with concat_i32_i64 would need explicit truncation on 
64-bit targets. The current TCG implementation will work if you use the wrong 
register type, however I don't want targets relying on that.

Paul

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Qemu-devel] TCG native 32->64 concatenation
  2008-09-07 16:53 [Qemu-devel] TCG native 32->64 concatenation Paul Brook
  2008-09-07 18:15 ` Blue Swirl
@ 2008-09-14 17:03 ` Aurelien Jarno
  2008-09-15 23:16 ` andrzej zaborowski
  2 siblings, 0 replies; 5+ messages in thread
From: Aurelien Jarno @ 2008-09-14 17:03 UTC (permalink / raw)
  To: Paul Brook; +Cc: qemu-devel

On Sun, Sep 07, 2008 at 05:53:26PM +0100, Paul Brook wrote:
> The patch below adds a new concat_i32_i64 TCG op.  This allows a pair of 
> 32-bit values to be efficiently combined to form a 64-bit value.  I've 
> converted all the cases I could find to use this, and tested the arm code on 
> both 32 and 64-bit hosts.
> 
> This touches bits of code that I can't easily test well, so I'd appreciate 
> another pair of eyes looking over it before I commit.

The patch looks ok.

> Signed-off-by: Paul Brook <paul@codesourcery.com>

Acked-off-by: Aurelien Jarno <aurelien@aurel32.net>

> Index: target-sh4/translate.c
> ===================================================================
> --- target-sh4/translate.c	(revision 5178)
> +++ target-sh4/translate.c	(working copy)
> @@ -393,15 +393,12 @@ static inline void gen_load_fpr32(TCGv t
>  static inline void gen_load_fpr64(TCGv t, int reg)
>  {
>      TCGv tmp1 = tcg_temp_new(TCG_TYPE_I32);
> -    TCGv tmp2 = tcg_temp_new(TCG_TYPE_I64);
> +    TCGv tmp2 = tcg_temp_new(TCG_TYPE_I32);
>  
>      tcg_gen_ld_i32(tmp1, cpu_env, offsetof(CPUState, fregs[reg]));
> -    tcg_gen_extu_i32_i64(t, tmp1);
> -    tcg_gen_shli_i64(t, t, 32);
> -    tcg_gen_ld_i32(tmp1, cpu_env, offsetof(CPUState, fregs[reg + 1]));
> -    tcg_gen_extu_i32_i64(tmp2, tmp1);
> +    tcg_gen_ld_i32(tmp2, cpu_env, offsetof(CPUState, fregs[reg + 1]));
> +    tcg_gen_concat_i32_i64(t, tmp2, tmp1);
>      tcg_temp_free(tmp1);
> -    tcg_gen_or_i64(t, t, tmp2);
>      tcg_temp_free(tmp2);
>  }
>  
> Index: target-ppc/translate.c
> ===================================================================
> --- target-ppc/translate.c	(revision 5178)
> +++ target-ppc/translate.c	(working copy)
> @@ -5308,12 +5308,7 @@ static always_inline void gen_load_gpr64
>  #if defined(TARGET_PPC64)
>      tcg_gen_mov_i64(t, cpu_gpr[reg]);
>  #else
> -    tcg_gen_extu_i32_i64(t, cpu_gprh[reg]);
> -    tcg_gen_shli_i64(t, t, 32);
> -    TCGv tmp = tcg_temp_local_new(TCG_TYPE_I64);
> -    tcg_gen_extu_i32_i64(tmp, cpu_gpr[reg]);
> -    tcg_gen_or_i64(t, t, tmp);
> -    tcg_temp_free(tmp);
> +    tcg_gen_concat_i32_i64(t, cpu_gpr[reg], cpu_gprh[reg]);
>  #endif
>  }
>  
> Index: target-mips/translate.c
> ===================================================================
> --- target-mips/translate.c	(revision 5178)
> +++ target-mips/translate.c	(working copy)
> @@ -666,14 +666,11 @@ static inline void gen_load_fpr64 (Disas
>          tcg_gen_ld_i64(t, current_fpu, 8 * reg);
>      } else {
>          TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I32);
> -        TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
> +        TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I32);
>  
>          tcg_gen_ld_i32(r_tmp1, current_fpu, 8 * (reg | 1) + 4 * 
> FP_ENDIAN_IDX);
> -        tcg_gen_extu_i32_i64(t, r_tmp1);
> -        tcg_gen_shli_i64(t, t, 32);
> -        tcg_gen_ld_i32(r_tmp1, current_fpu, 8 * (reg & ~1) + 4 * 
> FP_ENDIAN_IDX);
> -        tcg_gen_extu_i32_i64(r_tmp2, r_tmp1);
> -        tcg_gen_or_i64(t, t, r_tmp2);
> +        tcg_gen_ld_i32(r_tmp2, current_fpu, 8 * (reg & ~1) + 4 * 
> FP_ENDIAN_IDX);
> +        tcg_gen_concat_i32_i64(t, r_tmp2, r_tmp1);
>          tcg_temp_free(r_tmp1);
>          tcg_temp_free(r_tmp2);
>      }
> @@ -6531,22 +6528,17 @@ static void gen_farith (DisasContext *ct
>      case FOP(38, 16):
>          check_cp1_64bitmode(ctx);
>          {
> -            TCGv fp64_0 = tcg_temp_new(TCG_TYPE_I64);
> -            TCGv fp64_1 = tcg_temp_new(TCG_TYPE_I64);
> +            TCGv fp64 = tcg_temp_new(TCG_TYPE_I64);
>              TCGv fp32_0 = tcg_temp_new(TCG_TYPE_I32);
>              TCGv fp32_1 = tcg_temp_new(TCG_TYPE_I32);
>  
>              gen_load_fpr32(fp32_0, fs);
>              gen_load_fpr32(fp32_1, ft);
> -            tcg_gen_extu_i32_i64(fp64_0, fp32_0);
> -            tcg_gen_extu_i32_i64(fp64_1, fp32_1);
> -            tcg_temp_free(fp32_0);
> +            tcg_gen_concat_i32_i64(fp64, fp32_0, fp32_1);
>              tcg_temp_free(fp32_1);
> -            tcg_gen_shli_i64(fp64_1, fp64_1, 32);
> -            tcg_gen_or_i64(fp64_0, fp64_0, fp64_1);
> -            tcg_temp_free(fp64_1);
> -            gen_store_fpr64(ctx, fp64_0, fd);
> -            tcg_temp_free(fp64_0);
> +            tcg_temp_free(fp32_0);
> +            gen_store_fpr64(ctx, fp64, fd);
> +            tcg_temp_free(fp64);
>          }
>          opn = "cvt.ps.s";
>          break;
> Index: tcg/tcg-op.h
> ===================================================================
> --- tcg/tcg-op.h	(revision 5178)
> +++ tcg/tcg-op.h	(working copy)
> @@ -1395,6 +1395,23 @@ static inline void tcg_gen_discard_i64(T
>  }
>  #endif
>  
> +static inline void tcg_gen_concat_i32_i64(TCGv dest, TCGv low, TCGv high)
> +{
> +#if TCG_TARGET_REG_BITS == 32
> +    tcg_gen_mov_i32(dest, low);
> +    tcg_gen_mov_i32(TCGV_HIGH(dest), high);
> +#else
> +    TCGv tmp = tcg_temp_new (TCG_TYPE_I64);
> +    /* This extension is only needed for type correctness.
> +       We may be able to do better given target specific information.  */
> +    tcg_gen_extu_i32_i64(tmp, high);
> +    tcg_gen_shli_i64(tmp, tmp, 32);
> +    tcg_gen_extu_i32_i64(dest, low);
> +    tcg_gen_or_i64(dest, dest, tmp);
> +    tcg_temp_free(tmp);
> +#endif
> +}
> +
>  /***************************************/
>  /* QEMU specific operations. Their type depend on the QEMU CPU
>     type. */
> Index: tcg/README
> ===================================================================
> --- tcg/README	(revision 5178)
> +++ tcg/README	(working copy)
> @@ -265,6 +265,10 @@ Convert t1 (32 bit) to t0 (64 bit) and d
>  * trunc_i64_i32 t0, t1
>  Truncate t1 (64 bit) to t0 (32 bit)
>  
> +* concat_i32_i64 t0, t1, t2
> +Construct t0 (64-bit) taking the low half from t1 (32 bit) and the high half
> +from t2 (32 bit).
> +
>  ********* Load/Store
>  
>  * ld_i32/i64 t0, t1, offset
> Index: target-arm/translate.c
> ===================================================================
> --- target-arm/translate.c	(revision 5178)
> +++ target-arm/translate.c	(working copy)
> @@ -1447,10 +1447,7 @@ static void gen_iwmmxt_movl_T0_T1_wRn(in
>  
>  static void gen_iwmmxt_movl_wRn_T0_T1(int rn)
>  {
> -    tcg_gen_extu_i32_i64(cpu_V0, cpu_T[0]);
> -    tcg_gen_extu_i32_i64(cpu_V1, cpu_T[0]);
> -    tcg_gen_shli_i64(cpu_V1, cpu_V1, 32);
> -    tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
> +    tcg_gen_concat_i32_i64(cpu_V0, cpu_T[0], cpu_T[0]);
>      iwmmxt_store_reg(cpu_V0, rn);
>  }
>  
> @@ -4663,14 +4660,11 @@ static int disas_neon_data_insn(CPUState
>                      } else {
>                          tmp = neon_load_reg(rm + pass, 0);
>                          gen_neon_shift_narrow(size, tmp, tmp2, q, u);
> -                        tcg_gen_extu_i32_i64(cpu_V0, tmp);
> +                        tmp3 = neon_load_reg(rm + pass, 1);
> +                        gen_neon_shift_narrow(size, tmp3, tmp2, q, u);
> +                        tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
>                          dead_tmp(tmp);
> -                        tmp = neon_load_reg(rm + pass, 1);
> -                        gen_neon_shift_narrow(size, tmp, tmp2, q, u);
> -                        tcg_gen_extu_i32_i64(cpu_V1, tmp);
> -                        dead_tmp(tmp);
> -                        tcg_gen_shli_i64(cpu_V1, cpu_V1, 32);
> -                        tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
> +                        dead_tmp(tmp3);
>                      }
>                      tmp = new_tmp();
>                      if (op == 8 && !u) {
> @@ -5600,7 +5594,7 @@ static void gen_addq_lo(DisasContext *s,
>      TCGv tmp;
>      TCGv tmp2;
>  
> -    /* Load 64-bit value rd:rn.  */
> +    /* Load value and extend to 64 bits.  */
>      tmp = tcg_temp_new(TCG_TYPE_I64);
>      tmp2 = load_reg(s, rlow);
>      tcg_gen_extu_i32_i64(tmp, tmp2);
> @@ -5612,19 +5606,16 @@ static void gen_addq_lo(DisasContext *s,
>  static void gen_addq(DisasContext *s, TCGv val, int rlow, int rhigh)
>  {
>      TCGv tmp;
> -    TCGv tmp2;
> +    TCGv tmpl;
> +    TCGv tmph;
>  
>      /* Load 64-bit value rd:rn.  */
> +    tmpl = load_reg(s, rlow);
> +    tmph = load_reg(s, rhigh);
>      tmp = tcg_temp_new(TCG_TYPE_I64);
> -    tmp2 = load_reg(s, rhigh);
> -    tcg_gen_extu_i32_i64(tmp, tmp2);
> -    dead_tmp(tmp2);
> -    tcg_gen_shli_i64(tmp, tmp, 32);
> -    tcg_gen_add_i64(val, val, tmp);
> -
> -    tmp2 = load_reg(s, rlow);
> -    tcg_gen_extu_i32_i64(tmp, tmp2);
> -    dead_tmp(tmp2);
> +    tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
> +    dead_tmp(tmpl);
> +    dead_tmp(tmph);
>      tcg_gen_add_i64(val, val, tmp);
>  }
>  
> 
> 
> 

-- 
  .''`.  Aurelien Jarno	            | GPG: 1024D/F1BCDB73
 : :' :  Debian developer           | Electrical Engineer
 `. `'   aurel32@debian.org         | aurelien@aurel32.net
   `-    people.debian.org/~aurel32 | www.aurel32.net

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Qemu-devel] TCG native 32->64 concatenation
  2008-09-07 16:53 [Qemu-devel] TCG native 32->64 concatenation Paul Brook
  2008-09-07 18:15 ` Blue Swirl
  2008-09-14 17:03 ` Aurelien Jarno
@ 2008-09-15 23:16 ` andrzej zaborowski
  2 siblings, 0 replies; 5+ messages in thread
From: andrzej zaborowski @ 2008-09-15 23:16 UTC (permalink / raw)
  To: qemu-devel

2008/9/7 Paul Brook <paul@codesourcery.com>:
> The patch below adds a new concat_i32_i64 TCG op.  This allows a pair of
> 32-bit values to be efficiently combined to form a 64-bit value.  I've
> converted all the cases I could find to use this, and tested the arm code on
> both 32 and 64-bit hosts.
>
> This touches bits of code that I can't easily test well, so I'd appreciate
> another pair of eyes looking over it before I commit.
>
> Signed-off-by: Paul Brook <paul@codesourcery.com>
>
> Index: target-sh4/translate.c
> ===================================================================
> --- target-sh4/translate.c      (revision 5178)
> +++ target-sh4/translate.c      (working copy)
> @@ -393,15 +393,12 @@ static inline void gen_load_fpr32(TCGv t
>  static inline void gen_load_fpr64(TCGv t, int reg)
>  {
>     TCGv tmp1 = tcg_temp_new(TCG_TYPE_I32);
> -    TCGv tmp2 = tcg_temp_new(TCG_TYPE_I64);
> +    TCGv tmp2 = tcg_temp_new(TCG_TYPE_I32);
>
>     tcg_gen_ld_i32(tmp1, cpu_env, offsetof(CPUState, fregs[reg]));
> -    tcg_gen_extu_i32_i64(t, tmp1);
> -    tcg_gen_shli_i64(t, t, 32);
> -    tcg_gen_ld_i32(tmp1, cpu_env, offsetof(CPUState, fregs[reg + 1]));
> -    tcg_gen_extu_i32_i64(tmp2, tmp1);
> +    tcg_gen_ld_i32(tmp2, cpu_env, offsetof(CPUState, fregs[reg + 1]));
> +    tcg_gen_concat_i32_i64(t, tmp2, tmp1);
>     tcg_temp_free(tmp1);
> -    tcg_gen_or_i64(t, t, tmp2);
>     tcg_temp_free(tmp2);
>  }
>
> Index: target-ppc/translate.c
> ===================================================================
> --- target-ppc/translate.c      (revision 5178)
> +++ target-ppc/translate.c      (working copy)
> @@ -5308,12 +5308,7 @@ static always_inline void gen_load_gpr64
>  #if defined(TARGET_PPC64)
>     tcg_gen_mov_i64(t, cpu_gpr[reg]);
>  #else
> -    tcg_gen_extu_i32_i64(t, cpu_gprh[reg]);
> -    tcg_gen_shli_i64(t, t, 32);
> -    TCGv tmp = tcg_temp_local_new(TCG_TYPE_I64);
> -    tcg_gen_extu_i32_i64(tmp, cpu_gpr[reg]);
> -    tcg_gen_or_i64(t, t, tmp);
> -    tcg_temp_free(tmp);
> +    tcg_gen_concat_i32_i64(t, cpu_gpr[reg], cpu_gprh[reg]);
>  #endif
>  }
>
> Index: target-mips/translate.c
> ===================================================================
> --- target-mips/translate.c     (revision 5178)
> +++ target-mips/translate.c     (working copy)
> @@ -666,14 +666,11 @@ static inline void gen_load_fpr64 (Disas
>         tcg_gen_ld_i64(t, current_fpu, 8 * reg);
>     } else {
>         TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I32);
> -        TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
> +        TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I32);
>
>         tcg_gen_ld_i32(r_tmp1, current_fpu, 8 * (reg | 1) + 4 *
> FP_ENDIAN_IDX);
> -        tcg_gen_extu_i32_i64(t, r_tmp1);
> -        tcg_gen_shli_i64(t, t, 32);
> -        tcg_gen_ld_i32(r_tmp1, current_fpu, 8 * (reg & ~1) + 4 *
> FP_ENDIAN_IDX);
> -        tcg_gen_extu_i32_i64(r_tmp2, r_tmp1);
> -        tcg_gen_or_i64(t, t, r_tmp2);
> +        tcg_gen_ld_i32(r_tmp2, current_fpu, 8 * (reg & ~1) + 4 *
> FP_ENDIAN_IDX);
> +        tcg_gen_concat_i32_i64(t, r_tmp2, r_tmp1);
>         tcg_temp_free(r_tmp1);
>         tcg_temp_free(r_tmp2);
>     }
> @@ -6531,22 +6528,17 @@ static void gen_farith (DisasContext *ct
>     case FOP(38, 16):
>         check_cp1_64bitmode(ctx);
>         {
> -            TCGv fp64_0 = tcg_temp_new(TCG_TYPE_I64);
> -            TCGv fp64_1 = tcg_temp_new(TCG_TYPE_I64);
> +            TCGv fp64 = tcg_temp_new(TCG_TYPE_I64);
>             TCGv fp32_0 = tcg_temp_new(TCG_TYPE_I32);
>             TCGv fp32_1 = tcg_temp_new(TCG_TYPE_I32);
>
>             gen_load_fpr32(fp32_0, fs);
>             gen_load_fpr32(fp32_1, ft);
> -            tcg_gen_extu_i32_i64(fp64_0, fp32_0);
> -            tcg_gen_extu_i32_i64(fp64_1, fp32_1);
> -            tcg_temp_free(fp32_0);
> +            tcg_gen_concat_i32_i64(fp64, fp32_0, fp32_1);
>             tcg_temp_free(fp32_1);
> -            tcg_gen_shli_i64(fp64_1, fp64_1, 32);
> -            tcg_gen_or_i64(fp64_0, fp64_0, fp64_1);
> -            tcg_temp_free(fp64_1);
> -            gen_store_fpr64(ctx, fp64_0, fd);
> -            tcg_temp_free(fp64_0);
> +            tcg_temp_free(fp32_0);
> +            gen_store_fpr64(ctx, fp64, fd);
> +            tcg_temp_free(fp64);
>         }
>         opn = "cvt.ps.s";
>         break;
> Index: tcg/tcg-op.h
> ===================================================================
> --- tcg/tcg-op.h        (revision 5178)
> +++ tcg/tcg-op.h        (working copy)
> @@ -1395,6 +1395,23 @@ static inline void tcg_gen_discard_i64(T
>  }
>  #endif
>
> +static inline void tcg_gen_concat_i32_i64(TCGv dest, TCGv low, TCGv high)
> +{
> +#if TCG_TARGET_REG_BITS == 32
> +    tcg_gen_mov_i32(dest, low);
> +    tcg_gen_mov_i32(TCGV_HIGH(dest), high);
> +#else
> +    TCGv tmp = tcg_temp_new (TCG_TYPE_I64);
> +    /* This extension is only needed for type correctness.
> +       We may be able to do better given target specific information.  */
> +    tcg_gen_extu_i32_i64(tmp, high);
> +    tcg_gen_shli_i64(tmp, tmp, 32);
> +    tcg_gen_extu_i32_i64(dest, low);
> +    tcg_gen_or_i64(dest, dest, tmp);
> +    tcg_temp_free(tmp);
> +#endif
> +}
> +
>  /***************************************/
>  /* QEMU specific operations. Their type depend on the QEMU CPU
>    type. */
> Index: tcg/README
> ===================================================================
> --- tcg/README  (revision 5178)
> +++ tcg/README  (working copy)
> @@ -265,6 +265,10 @@ Convert t1 (32 bit) to t0 (64 bit) and d
>  * trunc_i64_i32 t0, t1
>  Truncate t1 (64 bit) to t0 (32 bit)
>
> +* concat_i32_i64 t0, t1, t2
> +Construct t0 (64-bit) taking the low half from t1 (32 bit) and the high half
> +from t2 (32 bit).
> +
>  ********* Load/Store
>
>  * ld_i32/i64 t0, t1, offset
> Index: target-arm/translate.c
> ===================================================================
> --- target-arm/translate.c      (revision 5178)
> +++ target-arm/translate.c      (working copy)
> @@ -1447,10 +1447,7 @@ static void gen_iwmmxt_movl_T0_T1_wRn(in
>
>  static void gen_iwmmxt_movl_wRn_T0_T1(int rn)
>  {
> -    tcg_gen_extu_i32_i64(cpu_V0, cpu_T[0]);
> -    tcg_gen_extu_i32_i64(cpu_V1, cpu_T[0]);
> -    tcg_gen_shli_i64(cpu_V1, cpu_V1, 32);
> -    tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
> +    tcg_gen_concat_i32_i64(cpu_V0, cpu_T[0], cpu_T[0]);

Oh, I think this was supposed to use T0 and T1 instead of duplicating
T0, so changing this to

+    tcg_gen_concat_i32_i64(cpu_V0, cpu_T[0], cpu_T[1]);

would fix an old bug.

Cheers

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2008-09-15 23:16 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-09-07 16:53 [Qemu-devel] TCG native 32->64 concatenation Paul Brook
2008-09-07 18:15 ` Blue Swirl
2008-09-07 18:43   ` Paul Brook
2008-09-14 17:03 ` Aurelien Jarno
2008-09-15 23:16 ` andrzej zaborowski

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).