From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1KcNW1-0007uI-D6 for qemu-devel@nongnu.org; Sun, 07 Sep 2008 12:53:33 -0400 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1KcNW0-0007u0-Mm for qemu-devel@nongnu.org; Sun, 07 Sep 2008 12:53:33 -0400 Received: from [199.232.76.173] (port=59713 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1KcNW0-0007tv-Je for qemu-devel@nongnu.org; Sun, 07 Sep 2008 12:53:32 -0400 Received: from mail.codesourcery.com ([65.74.133.4]:51937) by monty-python.gnu.org with esmtps (TLS-1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.60) (envelope-from ) id 1KcNW0-0005Nh-61 for qemu-devel@nongnu.org; Sun, 07 Sep 2008 12:53:32 -0400 From: Paul Brook Date: Sun, 7 Sep 2008 17:53:26 +0100 MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Content-Disposition: inline Message-Id: <200809071753.27384.paul@codesourcery.com> Subject: [Qemu-devel] TCG native 32->64 concatenation Reply-To: qemu-devel@nongnu.org List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org The patch below adds a new concat_i32_i64 TCG op. This allows a pair of 32-bit values to be efficiently combined to form a 64-bit value. I've converted all the cases I could find to use this, and tested the arm code on both 32 and 64-bit hosts. This touches bits of code that I can't easily test well, so I'd appreciate another pair of eyes looking over it before I commit. Signed-off-by: Paul Brook Index: target-sh4/translate.c =================================================================== --- target-sh4/translate.c (revision 5178) +++ target-sh4/translate.c (working copy) @@ -393,15 +393,12 @@ static inline void gen_load_fpr32(TCGv t static inline void gen_load_fpr64(TCGv t, int reg) { TCGv tmp1 = tcg_temp_new(TCG_TYPE_I32); - TCGv tmp2 = tcg_temp_new(TCG_TYPE_I64); + TCGv tmp2 = tcg_temp_new(TCG_TYPE_I32); tcg_gen_ld_i32(tmp1, cpu_env, offsetof(CPUState, fregs[reg])); - tcg_gen_extu_i32_i64(t, tmp1); - tcg_gen_shli_i64(t, t, 32); - tcg_gen_ld_i32(tmp1, cpu_env, offsetof(CPUState, fregs[reg + 1])); - tcg_gen_extu_i32_i64(tmp2, tmp1); + tcg_gen_ld_i32(tmp2, cpu_env, offsetof(CPUState, fregs[reg + 1])); + tcg_gen_concat_i32_i64(t, tmp2, tmp1); tcg_temp_free(tmp1); - tcg_gen_or_i64(t, t, tmp2); tcg_temp_free(tmp2); } Index: target-ppc/translate.c =================================================================== --- target-ppc/translate.c (revision 5178) +++ target-ppc/translate.c (working copy) @@ -5308,12 +5308,7 @@ static always_inline void gen_load_gpr64 #if defined(TARGET_PPC64) tcg_gen_mov_i64(t, cpu_gpr[reg]); #else - tcg_gen_extu_i32_i64(t, cpu_gprh[reg]); - tcg_gen_shli_i64(t, t, 32); - TCGv tmp = tcg_temp_local_new(TCG_TYPE_I64); - tcg_gen_extu_i32_i64(tmp, cpu_gpr[reg]); - tcg_gen_or_i64(t, t, tmp); - tcg_temp_free(tmp); + tcg_gen_concat_i32_i64(t, cpu_gpr[reg], cpu_gprh[reg]); #endif } Index: target-mips/translate.c =================================================================== --- target-mips/translate.c (revision 5178) +++ target-mips/translate.c (working copy) @@ -666,14 +666,11 @@ static inline void gen_load_fpr64 (Disas tcg_gen_ld_i64(t, current_fpu, 8 * reg); } else { TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I32); - TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64); + TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I32); tcg_gen_ld_i32(r_tmp1, current_fpu, 8 * (reg | 1) + 4 * FP_ENDIAN_IDX); - tcg_gen_extu_i32_i64(t, r_tmp1); - tcg_gen_shli_i64(t, t, 32); - tcg_gen_ld_i32(r_tmp1, current_fpu, 8 * (reg & ~1) + 4 * FP_ENDIAN_IDX); - tcg_gen_extu_i32_i64(r_tmp2, r_tmp1); - tcg_gen_or_i64(t, t, r_tmp2); + tcg_gen_ld_i32(r_tmp2, current_fpu, 8 * (reg & ~1) + 4 * FP_ENDIAN_IDX); + tcg_gen_concat_i32_i64(t, r_tmp2, r_tmp1); tcg_temp_free(r_tmp1); tcg_temp_free(r_tmp2); } @@ -6531,22 +6528,17 @@ static void gen_farith (DisasContext *ct case FOP(38, 16): check_cp1_64bitmode(ctx); { - TCGv fp64_0 = tcg_temp_new(TCG_TYPE_I64); - TCGv fp64_1 = tcg_temp_new(TCG_TYPE_I64); + TCGv fp64 = tcg_temp_new(TCG_TYPE_I64); TCGv fp32_0 = tcg_temp_new(TCG_TYPE_I32); TCGv fp32_1 = tcg_temp_new(TCG_TYPE_I32); gen_load_fpr32(fp32_0, fs); gen_load_fpr32(fp32_1, ft); - tcg_gen_extu_i32_i64(fp64_0, fp32_0); - tcg_gen_extu_i32_i64(fp64_1, fp32_1); - tcg_temp_free(fp32_0); + tcg_gen_concat_i32_i64(fp64, fp32_0, fp32_1); tcg_temp_free(fp32_1); - tcg_gen_shli_i64(fp64_1, fp64_1, 32); - tcg_gen_or_i64(fp64_0, fp64_0, fp64_1); - tcg_temp_free(fp64_1); - gen_store_fpr64(ctx, fp64_0, fd); - tcg_temp_free(fp64_0); + tcg_temp_free(fp32_0); + gen_store_fpr64(ctx, fp64, fd); + tcg_temp_free(fp64); } opn = "cvt.ps.s"; break; Index: tcg/tcg-op.h =================================================================== --- tcg/tcg-op.h (revision 5178) +++ tcg/tcg-op.h (working copy) @@ -1395,6 +1395,23 @@ static inline void tcg_gen_discard_i64(T } #endif +static inline void tcg_gen_concat_i32_i64(TCGv dest, TCGv low, TCGv high) +{ +#if TCG_TARGET_REG_BITS == 32 + tcg_gen_mov_i32(dest, low); + tcg_gen_mov_i32(TCGV_HIGH(dest), high); +#else + TCGv tmp = tcg_temp_new (TCG_TYPE_I64); + /* This extension is only needed for type correctness. + We may be able to do better given target specific information. */ + tcg_gen_extu_i32_i64(tmp, high); + tcg_gen_shli_i64(tmp, tmp, 32); + tcg_gen_extu_i32_i64(dest, low); + tcg_gen_or_i64(dest, dest, tmp); + tcg_temp_free(tmp); +#endif +} + /***************************************/ /* QEMU specific operations. Their type depend on the QEMU CPU type. */ Index: tcg/README =================================================================== --- tcg/README (revision 5178) +++ tcg/README (working copy) @@ -265,6 +265,10 @@ Convert t1 (32 bit) to t0 (64 bit) and d * trunc_i64_i32 t0, t1 Truncate t1 (64 bit) to t0 (32 bit) +* concat_i32_i64 t0, t1, t2 +Construct t0 (64-bit) taking the low half from t1 (32 bit) and the high half +from t2 (32 bit). + ********* Load/Store * ld_i32/i64 t0, t1, offset Index: target-arm/translate.c =================================================================== --- target-arm/translate.c (revision 5178) +++ target-arm/translate.c (working copy) @@ -1447,10 +1447,7 @@ static void gen_iwmmxt_movl_T0_T1_wRn(in static void gen_iwmmxt_movl_wRn_T0_T1(int rn) { - tcg_gen_extu_i32_i64(cpu_V0, cpu_T[0]); - tcg_gen_extu_i32_i64(cpu_V1, cpu_T[0]); - tcg_gen_shli_i64(cpu_V1, cpu_V1, 32); - tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1); + tcg_gen_concat_i32_i64(cpu_V0, cpu_T[0], cpu_T[0]); iwmmxt_store_reg(cpu_V0, rn); } @@ -4663,14 +4660,11 @@ static int disas_neon_data_insn(CPUState } else { tmp = neon_load_reg(rm + pass, 0); gen_neon_shift_narrow(size, tmp, tmp2, q, u); - tcg_gen_extu_i32_i64(cpu_V0, tmp); + tmp3 = neon_load_reg(rm + pass, 1); + gen_neon_shift_narrow(size, tmp3, tmp2, q, u); + tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3); dead_tmp(tmp); - tmp = neon_load_reg(rm + pass, 1); - gen_neon_shift_narrow(size, tmp, tmp2, q, u); - tcg_gen_extu_i32_i64(cpu_V1, tmp); - dead_tmp(tmp); - tcg_gen_shli_i64(cpu_V1, cpu_V1, 32); - tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1); + dead_tmp(tmp3); } tmp = new_tmp(); if (op == 8 && !u) { @@ -5600,7 +5594,7 @@ static void gen_addq_lo(DisasContext *s, TCGv tmp; TCGv tmp2; - /* Load 64-bit value rd:rn. */ + /* Load value and extend to 64 bits. */ tmp = tcg_temp_new(TCG_TYPE_I64); tmp2 = load_reg(s, rlow); tcg_gen_extu_i32_i64(tmp, tmp2); @@ -5612,19 +5606,16 @@ static void gen_addq_lo(DisasContext *s, static void gen_addq(DisasContext *s, TCGv val, int rlow, int rhigh) { TCGv tmp; - TCGv tmp2; + TCGv tmpl; + TCGv tmph; /* Load 64-bit value rd:rn. */ + tmpl = load_reg(s, rlow); + tmph = load_reg(s, rhigh); tmp = tcg_temp_new(TCG_TYPE_I64); - tmp2 = load_reg(s, rhigh); - tcg_gen_extu_i32_i64(tmp, tmp2); - dead_tmp(tmp2); - tcg_gen_shli_i64(tmp, tmp, 32); - tcg_gen_add_i64(val, val, tmp); - - tmp2 = load_reg(s, rlow); - tcg_gen_extu_i32_i64(tmp, tmp2); - dead_tmp(tmp2); + tcg_gen_concat_i32_i64(tmp, tmpl, tmph); + dead_tmp(tmpl); + dead_tmp(tmph); tcg_gen_add_i64(val, val, tmp); }