From: Aurelien Jarno <aurelien@aurel32.net>
To: Paul Brook <paul@codesourcery.com>
Cc: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] TCG native 32->64 concatenation
Date: Sun, 14 Sep 2008 19:03:25 +0200 [thread overview]
Message-ID: <20080914170325.GF22422@volta.aurel32.net> (raw)
In-Reply-To: <200809071753.27384.paul@codesourcery.com>
On Sun, Sep 07, 2008 at 05:53:26PM +0100, Paul Brook wrote:
> The patch below adds a new concat_i32_i64 TCG op. This allows a pair of
> 32-bit values to be efficiently combined to form a 64-bit value. I've
> converted all the cases I could find to use this, and tested the arm code on
> both 32 and 64-bit hosts.
>
> This touches bits of code that I can't easily test well, so I'd appreciate
> another pair of eyes looking over it before I commit.
The patch looks ok.
> Signed-off-by: Paul Brook <paul@codesourcery.com>
Acked-off-by: Aurelien Jarno <aurelien@aurel32.net>
> Index: target-sh4/translate.c
> ===================================================================
> --- target-sh4/translate.c (revision 5178)
> +++ target-sh4/translate.c (working copy)
> @@ -393,15 +393,12 @@ static inline void gen_load_fpr32(TCGv t
> static inline void gen_load_fpr64(TCGv t, int reg)
> {
> TCGv tmp1 = tcg_temp_new(TCG_TYPE_I32);
> - TCGv tmp2 = tcg_temp_new(TCG_TYPE_I64);
> + TCGv tmp2 = tcg_temp_new(TCG_TYPE_I32);
>
> tcg_gen_ld_i32(tmp1, cpu_env, offsetof(CPUState, fregs[reg]));
> - tcg_gen_extu_i32_i64(t, tmp1);
> - tcg_gen_shli_i64(t, t, 32);
> - tcg_gen_ld_i32(tmp1, cpu_env, offsetof(CPUState, fregs[reg + 1]));
> - tcg_gen_extu_i32_i64(tmp2, tmp1);
> + tcg_gen_ld_i32(tmp2, cpu_env, offsetof(CPUState, fregs[reg + 1]));
> + tcg_gen_concat_i32_i64(t, tmp2, tmp1);
> tcg_temp_free(tmp1);
> - tcg_gen_or_i64(t, t, tmp2);
> tcg_temp_free(tmp2);
> }
>
> Index: target-ppc/translate.c
> ===================================================================
> --- target-ppc/translate.c (revision 5178)
> +++ target-ppc/translate.c (working copy)
> @@ -5308,12 +5308,7 @@ static always_inline void gen_load_gpr64
> #if defined(TARGET_PPC64)
> tcg_gen_mov_i64(t, cpu_gpr[reg]);
> #else
> - tcg_gen_extu_i32_i64(t, cpu_gprh[reg]);
> - tcg_gen_shli_i64(t, t, 32);
> - TCGv tmp = tcg_temp_local_new(TCG_TYPE_I64);
> - tcg_gen_extu_i32_i64(tmp, cpu_gpr[reg]);
> - tcg_gen_or_i64(t, t, tmp);
> - tcg_temp_free(tmp);
> + tcg_gen_concat_i32_i64(t, cpu_gpr[reg], cpu_gprh[reg]);
> #endif
> }
>
> Index: target-mips/translate.c
> ===================================================================
> --- target-mips/translate.c (revision 5178)
> +++ target-mips/translate.c (working copy)
> @@ -666,14 +666,11 @@ static inline void gen_load_fpr64 (Disas
> tcg_gen_ld_i64(t, current_fpu, 8 * reg);
> } else {
> TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I32);
> - TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
> + TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I32);
>
> tcg_gen_ld_i32(r_tmp1, current_fpu, 8 * (reg | 1) + 4 *
> FP_ENDIAN_IDX);
> - tcg_gen_extu_i32_i64(t, r_tmp1);
> - tcg_gen_shli_i64(t, t, 32);
> - tcg_gen_ld_i32(r_tmp1, current_fpu, 8 * (reg & ~1) + 4 *
> FP_ENDIAN_IDX);
> - tcg_gen_extu_i32_i64(r_tmp2, r_tmp1);
> - tcg_gen_or_i64(t, t, r_tmp2);
> + tcg_gen_ld_i32(r_tmp2, current_fpu, 8 * (reg & ~1) + 4 *
> FP_ENDIAN_IDX);
> + tcg_gen_concat_i32_i64(t, r_tmp2, r_tmp1);
> tcg_temp_free(r_tmp1);
> tcg_temp_free(r_tmp2);
> }
> @@ -6531,22 +6528,17 @@ static void gen_farith (DisasContext *ct
> case FOP(38, 16):
> check_cp1_64bitmode(ctx);
> {
> - TCGv fp64_0 = tcg_temp_new(TCG_TYPE_I64);
> - TCGv fp64_1 = tcg_temp_new(TCG_TYPE_I64);
> + TCGv fp64 = tcg_temp_new(TCG_TYPE_I64);
> TCGv fp32_0 = tcg_temp_new(TCG_TYPE_I32);
> TCGv fp32_1 = tcg_temp_new(TCG_TYPE_I32);
>
> gen_load_fpr32(fp32_0, fs);
> gen_load_fpr32(fp32_1, ft);
> - tcg_gen_extu_i32_i64(fp64_0, fp32_0);
> - tcg_gen_extu_i32_i64(fp64_1, fp32_1);
> - tcg_temp_free(fp32_0);
> + tcg_gen_concat_i32_i64(fp64, fp32_0, fp32_1);
> tcg_temp_free(fp32_1);
> - tcg_gen_shli_i64(fp64_1, fp64_1, 32);
> - tcg_gen_or_i64(fp64_0, fp64_0, fp64_1);
> - tcg_temp_free(fp64_1);
> - gen_store_fpr64(ctx, fp64_0, fd);
> - tcg_temp_free(fp64_0);
> + tcg_temp_free(fp32_0);
> + gen_store_fpr64(ctx, fp64, fd);
> + tcg_temp_free(fp64);
> }
> opn = "cvt.ps.s";
> break;
> Index: tcg/tcg-op.h
> ===================================================================
> --- tcg/tcg-op.h (revision 5178)
> +++ tcg/tcg-op.h (working copy)
> @@ -1395,6 +1395,23 @@ static inline void tcg_gen_discard_i64(T
> }
> #endif
>
> +static inline void tcg_gen_concat_i32_i64(TCGv dest, TCGv low, TCGv high)
> +{
> +#if TCG_TARGET_REG_BITS == 32
> + tcg_gen_mov_i32(dest, low);
> + tcg_gen_mov_i32(TCGV_HIGH(dest), high);
> +#else
> + TCGv tmp = tcg_temp_new (TCG_TYPE_I64);
> + /* This extension is only needed for type correctness.
> + We may be able to do better given target specific information. */
> + tcg_gen_extu_i32_i64(tmp, high);
> + tcg_gen_shli_i64(tmp, tmp, 32);
> + tcg_gen_extu_i32_i64(dest, low);
> + tcg_gen_or_i64(dest, dest, tmp);
> + tcg_temp_free(tmp);
> +#endif
> +}
> +
> /***************************************/
> /* QEMU specific operations. Their type depend on the QEMU CPU
> type. */
> Index: tcg/README
> ===================================================================
> --- tcg/README (revision 5178)
> +++ tcg/README (working copy)
> @@ -265,6 +265,10 @@ Convert t1 (32 bit) to t0 (64 bit) and d
> * trunc_i64_i32 t0, t1
> Truncate t1 (64 bit) to t0 (32 bit)
>
> +* concat_i32_i64 t0, t1, t2
> +Construct t0 (64-bit) taking the low half from t1 (32 bit) and the high half
> +from t2 (32 bit).
> +
> ********* Load/Store
>
> * ld_i32/i64 t0, t1, offset
> Index: target-arm/translate.c
> ===================================================================
> --- target-arm/translate.c (revision 5178)
> +++ target-arm/translate.c (working copy)
> @@ -1447,10 +1447,7 @@ static void gen_iwmmxt_movl_T0_T1_wRn(in
>
> static void gen_iwmmxt_movl_wRn_T0_T1(int rn)
> {
> - tcg_gen_extu_i32_i64(cpu_V0, cpu_T[0]);
> - tcg_gen_extu_i32_i64(cpu_V1, cpu_T[0]);
> - tcg_gen_shli_i64(cpu_V1, cpu_V1, 32);
> - tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
> + tcg_gen_concat_i32_i64(cpu_V0, cpu_T[0], cpu_T[0]);
> iwmmxt_store_reg(cpu_V0, rn);
> }
>
> @@ -4663,14 +4660,11 @@ static int disas_neon_data_insn(CPUState
> } else {
> tmp = neon_load_reg(rm + pass, 0);
> gen_neon_shift_narrow(size, tmp, tmp2, q, u);
> - tcg_gen_extu_i32_i64(cpu_V0, tmp);
> + tmp3 = neon_load_reg(rm + pass, 1);
> + gen_neon_shift_narrow(size, tmp3, tmp2, q, u);
> + tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
> dead_tmp(tmp);
> - tmp = neon_load_reg(rm + pass, 1);
> - gen_neon_shift_narrow(size, tmp, tmp2, q, u);
> - tcg_gen_extu_i32_i64(cpu_V1, tmp);
> - dead_tmp(tmp);
> - tcg_gen_shli_i64(cpu_V1, cpu_V1, 32);
> - tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
> + dead_tmp(tmp3);
> }
> tmp = new_tmp();
> if (op == 8 && !u) {
> @@ -5600,7 +5594,7 @@ static void gen_addq_lo(DisasContext *s,
> TCGv tmp;
> TCGv tmp2;
>
> - /* Load 64-bit value rd:rn. */
> + /* Load value and extend to 64 bits. */
> tmp = tcg_temp_new(TCG_TYPE_I64);
> tmp2 = load_reg(s, rlow);
> tcg_gen_extu_i32_i64(tmp, tmp2);
> @@ -5612,19 +5606,16 @@ static void gen_addq_lo(DisasContext *s,
> static void gen_addq(DisasContext *s, TCGv val, int rlow, int rhigh)
> {
> TCGv tmp;
> - TCGv tmp2;
> + TCGv tmpl;
> + TCGv tmph;
>
> /* Load 64-bit value rd:rn. */
> + tmpl = load_reg(s, rlow);
> + tmph = load_reg(s, rhigh);
> tmp = tcg_temp_new(TCG_TYPE_I64);
> - tmp2 = load_reg(s, rhigh);
> - tcg_gen_extu_i32_i64(tmp, tmp2);
> - dead_tmp(tmp2);
> - tcg_gen_shli_i64(tmp, tmp, 32);
> - tcg_gen_add_i64(val, val, tmp);
> -
> - tmp2 = load_reg(s, rlow);
> - tcg_gen_extu_i32_i64(tmp, tmp2);
> - dead_tmp(tmp2);
> + tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
> + dead_tmp(tmpl);
> + dead_tmp(tmph);
> tcg_gen_add_i64(val, val, tmp);
> }
>
>
>
>
--
.''`. Aurelien Jarno | GPG: 1024D/F1BCDB73
: :' : Debian developer | Electrical Engineer
`. `' aurel32@debian.org | aurelien@aurel32.net
`- people.debian.org/~aurel32 | www.aurel32.net
next prev parent reply other threads:[~2008-09-14 17:03 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-09-07 16:53 [Qemu-devel] TCG native 32->64 concatenation Paul Brook
2008-09-07 18:15 ` Blue Swirl
2008-09-07 18:43 ` Paul Brook
2008-09-14 17:03 ` Aurelien Jarno [this message]
2008-09-15 23:16 ` andrzej zaborowski
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080914170325.GF22422@volta.aurel32.net \
--to=aurelien@aurel32.net \
--cc=paul@codesourcery.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.