From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1KfNIs-0007cE-IG for qemu-devel@nongnu.org; Mon, 15 Sep 2008 19:16:22 -0400 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1KfNIs-0007bs-2X for qemu-devel@nongnu.org; Mon, 15 Sep 2008 19:16:22 -0400 Received: from [199.232.76.173] (port=52220 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1KfNIr-0007bo-Kp for qemu-devel@nongnu.org; Mon, 15 Sep 2008 19:16:21 -0400 Received: from rv-out-0708.google.com ([209.85.198.243]:21451) by monty-python.gnu.org with esmtp (Exim 4.60) (envelope-from ) id 1KfNIq-0005Ac-Vh for qemu-devel@nongnu.org; Mon, 15 Sep 2008 19:16:21 -0400 Received: by rv-out-0708.google.com with SMTP id f25so2291970rvb.22 for ; Mon, 15 Sep 2008 16:16:19 -0700 (PDT) Message-ID: Date: Tue, 16 Sep 2008 01:16:19 +0200 From: "andrzej zaborowski" Subject: Re: [Qemu-devel] TCG native 32->64 concatenation In-Reply-To: <200809071753.27384.paul@codesourcery.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 7bit Content-Disposition: inline References: <200809071753.27384.paul@codesourcery.com> Reply-To: qemu-devel@nongnu.org List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org 2008/9/7 Paul Brook : > The patch below adds a new concat_i32_i64 TCG op. This allows a pair of > 32-bit values to be efficiently combined to form a 64-bit value. I've > converted all the cases I could find to use this, and tested the arm code on > both 32 and 64-bit hosts. > > This touches bits of code that I can't easily test well, so I'd appreciate > another pair of eyes looking over it before I commit. > > Signed-off-by: Paul Brook > > Index: target-sh4/translate.c > =================================================================== > --- target-sh4/translate.c (revision 5178) > +++ target-sh4/translate.c (working copy) > @@ -393,15 +393,12 @@ static inline void gen_load_fpr32(TCGv t > static inline void gen_load_fpr64(TCGv t, int reg) > { > TCGv tmp1 = tcg_temp_new(TCG_TYPE_I32); > - TCGv tmp2 = tcg_temp_new(TCG_TYPE_I64); > + TCGv tmp2 = tcg_temp_new(TCG_TYPE_I32); > > tcg_gen_ld_i32(tmp1, cpu_env, offsetof(CPUState, fregs[reg])); > - tcg_gen_extu_i32_i64(t, tmp1); > - tcg_gen_shli_i64(t, t, 32); > - tcg_gen_ld_i32(tmp1, cpu_env, offsetof(CPUState, fregs[reg + 1])); > - tcg_gen_extu_i32_i64(tmp2, tmp1); > + tcg_gen_ld_i32(tmp2, cpu_env, offsetof(CPUState, fregs[reg + 1])); > + tcg_gen_concat_i32_i64(t, tmp2, tmp1); > tcg_temp_free(tmp1); > - tcg_gen_or_i64(t, t, tmp2); > tcg_temp_free(tmp2); > } > > Index: target-ppc/translate.c > =================================================================== > --- target-ppc/translate.c (revision 5178) > +++ target-ppc/translate.c (working copy) > @@ -5308,12 +5308,7 @@ static always_inline void gen_load_gpr64 > #if defined(TARGET_PPC64) > tcg_gen_mov_i64(t, cpu_gpr[reg]); > #else > - tcg_gen_extu_i32_i64(t, cpu_gprh[reg]); > - tcg_gen_shli_i64(t, t, 32); > - TCGv tmp = tcg_temp_local_new(TCG_TYPE_I64); > - tcg_gen_extu_i32_i64(tmp, cpu_gpr[reg]); > - tcg_gen_or_i64(t, t, tmp); > - tcg_temp_free(tmp); > + tcg_gen_concat_i32_i64(t, cpu_gpr[reg], cpu_gprh[reg]); > #endif > } > > Index: target-mips/translate.c > =================================================================== > --- target-mips/translate.c (revision 5178) > +++ target-mips/translate.c (working copy) > @@ -666,14 +666,11 @@ static inline void gen_load_fpr64 (Disas > tcg_gen_ld_i64(t, current_fpu, 8 * reg); > } else { > TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I32); > - TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64); > + TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I32); > > tcg_gen_ld_i32(r_tmp1, current_fpu, 8 * (reg | 1) + 4 * > FP_ENDIAN_IDX); > - tcg_gen_extu_i32_i64(t, r_tmp1); > - tcg_gen_shli_i64(t, t, 32); > - tcg_gen_ld_i32(r_tmp1, current_fpu, 8 * (reg & ~1) + 4 * > FP_ENDIAN_IDX); > - tcg_gen_extu_i32_i64(r_tmp2, r_tmp1); > - tcg_gen_or_i64(t, t, r_tmp2); > + tcg_gen_ld_i32(r_tmp2, current_fpu, 8 * (reg & ~1) + 4 * > FP_ENDIAN_IDX); > + tcg_gen_concat_i32_i64(t, r_tmp2, r_tmp1); > tcg_temp_free(r_tmp1); > tcg_temp_free(r_tmp2); > } > @@ -6531,22 +6528,17 @@ static void gen_farith (DisasContext *ct > case FOP(38, 16): > check_cp1_64bitmode(ctx); > { > - TCGv fp64_0 = tcg_temp_new(TCG_TYPE_I64); > - TCGv fp64_1 = tcg_temp_new(TCG_TYPE_I64); > + TCGv fp64 = tcg_temp_new(TCG_TYPE_I64); > TCGv fp32_0 = tcg_temp_new(TCG_TYPE_I32); > TCGv fp32_1 = tcg_temp_new(TCG_TYPE_I32); > > gen_load_fpr32(fp32_0, fs); > gen_load_fpr32(fp32_1, ft); > - tcg_gen_extu_i32_i64(fp64_0, fp32_0); > - tcg_gen_extu_i32_i64(fp64_1, fp32_1); > - tcg_temp_free(fp32_0); > + tcg_gen_concat_i32_i64(fp64, fp32_0, fp32_1); > tcg_temp_free(fp32_1); > - tcg_gen_shli_i64(fp64_1, fp64_1, 32); > - tcg_gen_or_i64(fp64_0, fp64_0, fp64_1); > - tcg_temp_free(fp64_1); > - gen_store_fpr64(ctx, fp64_0, fd); > - tcg_temp_free(fp64_0); > + tcg_temp_free(fp32_0); > + gen_store_fpr64(ctx, fp64, fd); > + tcg_temp_free(fp64); > } > opn = "cvt.ps.s"; > break; > Index: tcg/tcg-op.h > =================================================================== > --- tcg/tcg-op.h (revision 5178) > +++ tcg/tcg-op.h (working copy) > @@ -1395,6 +1395,23 @@ static inline void tcg_gen_discard_i64(T > } > #endif > > +static inline void tcg_gen_concat_i32_i64(TCGv dest, TCGv low, TCGv high) > +{ > +#if TCG_TARGET_REG_BITS == 32 > + tcg_gen_mov_i32(dest, low); > + tcg_gen_mov_i32(TCGV_HIGH(dest), high); > +#else > + TCGv tmp = tcg_temp_new (TCG_TYPE_I64); > + /* This extension is only needed for type correctness. > + We may be able to do better given target specific information. */ > + tcg_gen_extu_i32_i64(tmp, high); > + tcg_gen_shli_i64(tmp, tmp, 32); > + tcg_gen_extu_i32_i64(dest, low); > + tcg_gen_or_i64(dest, dest, tmp); > + tcg_temp_free(tmp); > +#endif > +} > + > /***************************************/ > /* QEMU specific operations. Their type depend on the QEMU CPU > type. */ > Index: tcg/README > =================================================================== > --- tcg/README (revision 5178) > +++ tcg/README (working copy) > @@ -265,6 +265,10 @@ Convert t1 (32 bit) to t0 (64 bit) and d > * trunc_i64_i32 t0, t1 > Truncate t1 (64 bit) to t0 (32 bit) > > +* concat_i32_i64 t0, t1, t2 > +Construct t0 (64-bit) taking the low half from t1 (32 bit) and the high half > +from t2 (32 bit). > + > ********* Load/Store > > * ld_i32/i64 t0, t1, offset > Index: target-arm/translate.c > =================================================================== > --- target-arm/translate.c (revision 5178) > +++ target-arm/translate.c (working copy) > @@ -1447,10 +1447,7 @@ static void gen_iwmmxt_movl_T0_T1_wRn(in > > static void gen_iwmmxt_movl_wRn_T0_T1(int rn) > { > - tcg_gen_extu_i32_i64(cpu_V0, cpu_T[0]); > - tcg_gen_extu_i32_i64(cpu_V1, cpu_T[0]); > - tcg_gen_shli_i64(cpu_V1, cpu_V1, 32); > - tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1); > + tcg_gen_concat_i32_i64(cpu_V0, cpu_T[0], cpu_T[0]); Oh, I think this was supposed to use T0 and T1 instead of duplicating T0, so changing this to + tcg_gen_concat_i32_i64(cpu_V0, cpu_T[0], cpu_T[1]); would fix an old bug. Cheers