From: Aurelien Jarno <aurelien@aurel32.net>
To: Richard Henderson <rth@twiddle.net>
Cc: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH 1/4] tcg: Add muluh and mulsh opcodes
Date: Wed, 28 Aug 2013 22:59:31 +0200 [thread overview]
Message-ID: <20130828205931.GE23739@ohm.aurel32.net> (raw)
In-Reply-To: <1376782006-31746-2-git-send-email-rth@twiddle.net>
On Sat, Aug 17, 2013 at 04:26:43PM -0700, Richard Henderson wrote:
> Use them in places where mulu2 and muls2 are used.
> Optimize mulx2 with dead low part to mulxh.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
> tcg/aarch64/tcg-target.h | 4 ++++
> tcg/arm/tcg-target.h | 2 ++
> tcg/hppa/tcg-target.h | 2 ++
> tcg/i386/tcg-target.h | 4 ++++
> tcg/ia64/tcg-target.h | 4 ++++
> tcg/mips/tcg-target.h | 2 ++
> tcg/optimize.c | 20 ++++++++++++++++++++
> tcg/ppc/tcg-target.h | 2 ++
> tcg/ppc64/tcg-target.h | 4 ++++
> tcg/s390/tcg-target.h | 4 ++++
> tcg/sparc/tcg-target.h | 4 ++++
> tcg/tcg-op.h | 40 ++++++++++++++++++++++++++++++++++++----
> tcg/tcg-opc.h | 4 ++++
> tcg/tcg.c | 36 ++++++++++++++++++++++++++++++------
> tcg/tcg.h | 2 ++
> tcg/tci/tcg-target.h | 5 ++++-
> 16 files changed, 128 insertions(+), 11 deletions(-)
>
> diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
> index 51e5092..26ee28b 100644
> --- a/tcg/aarch64/tcg-target.h
> +++ b/tcg/aarch64/tcg-target.h
> @@ -61,6 +61,8 @@ typedef enum {
> #define TCG_TARGET_HAS_sub2_i32 0
> #define TCG_TARGET_HAS_mulu2_i32 0
> #define TCG_TARGET_HAS_muls2_i32 0
> +#define TCG_TARGET_HAS_muluh_i32 0
> +#define TCG_TARGET_HAS_mulsh_i32 0
>
> #define TCG_TARGET_HAS_div_i64 0
> #define TCG_TARGET_HAS_rem_i64 0
> @@ -87,6 +89,8 @@ typedef enum {
> #define TCG_TARGET_HAS_sub2_i64 0
> #define TCG_TARGET_HAS_mulu2_i64 0
> #define TCG_TARGET_HAS_muls2_i64 0
> +#define TCG_TARGET_HAS_muluh_i64 0
> +#define TCG_TARGET_HAS_mulsh_i64 0
>
> enum {
> TCG_AREG0 = TCG_REG_X19,
> diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
> index 5cd9d6a..ed48092 100644
> --- a/tcg/arm/tcg-target.h
> +++ b/tcg/arm/tcg-target.h
> @@ -80,6 +80,8 @@ extern bool use_idiv_instructions;
> #define TCG_TARGET_HAS_deposit_i32 1
> #define TCG_TARGET_HAS_movcond_i32 1
> #define TCG_TARGET_HAS_muls2_i32 1
> +#define TCG_TARGET_HAS_muluh_i32 0
> +#define TCG_TARGET_HAS_mulsh_i32 0
> #define TCG_TARGET_HAS_div_i32 use_idiv_instructions
> #define TCG_TARGET_HAS_rem_i32 0
>
> diff --git a/tcg/hppa/tcg-target.h b/tcg/hppa/tcg-target.h
> index 25467bd..0f6f2ff 100644
> --- a/tcg/hppa/tcg-target.h
> +++ b/tcg/hppa/tcg-target.h
> @@ -100,6 +100,8 @@ typedef enum {
> #define TCG_TARGET_HAS_deposit_i32 1
> #define TCG_TARGET_HAS_movcond_i32 1
> #define TCG_TARGET_HAS_muls2_i32 0
> +#define TCG_TARGET_HAS_muluh_i32 0
> +#define TCG_TARGET_HAS_mulsh_i32 0
>
> /* optional instructions automatically implemented */
> #define TCG_TARGET_HAS_neg_i32 0 /* sub rd, 0, rs */
> diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
> index e3f6bb9..b7d1a55 100644
> --- a/tcg/i386/tcg-target.h
> +++ b/tcg/i386/tcg-target.h
> @@ -96,6 +96,8 @@ typedef enum {
> #define TCG_TARGET_HAS_sub2_i32 1
> #define TCG_TARGET_HAS_mulu2_i32 1
> #define TCG_TARGET_HAS_muls2_i32 1
> +#define TCG_TARGET_HAS_muluh_i32 0
> +#define TCG_TARGET_HAS_mulsh_i32 0
>
> #if TCG_TARGET_REG_BITS == 64
> #define TCG_TARGET_HAS_div2_i64 1
> @@ -122,6 +124,8 @@ typedef enum {
> #define TCG_TARGET_HAS_sub2_i64 1
> #define TCG_TARGET_HAS_mulu2_i64 1
> #define TCG_TARGET_HAS_muls2_i64 1
> +#define TCG_TARGET_HAS_muluh_i64 0
> +#define TCG_TARGET_HAS_mulsh_i64 0
> #endif
>
> #define TCG_TARGET_deposit_i32_valid(ofs, len) \
> diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
> index f32d519..ee6b2c8 100644
> --- a/tcg/ia64/tcg-target.h
> +++ b/tcg/ia64/tcg-target.h
> @@ -146,6 +146,10 @@ typedef enum {
> #define TCG_TARGET_HAS_mulu2_i64 0
> #define TCG_TARGET_HAS_muls2_i32 0
> #define TCG_TARGET_HAS_muls2_i64 0
> +#define TCG_TARGET_HAS_muluh_i32 0
> +#define TCG_TARGET_HAS_muluh_i64 0
> +#define TCG_TARGET_HAS_mulsh_i32 0
> +#define TCG_TARGET_HAS_mulsh_i64 0
>
> #define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16)
> #define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16)
> diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
> index a438950..6cb7c2f 100644
> --- a/tcg/mips/tcg-target.h
> +++ b/tcg/mips/tcg-target.h
> @@ -89,6 +89,8 @@ typedef enum {
> #define TCG_TARGET_HAS_eqv_i32 0
> #define TCG_TARGET_HAS_nand_i32 0
> #define TCG_TARGET_HAS_muls2_i32 1
> +#define TCG_TARGET_HAS_muluh_i32 0
> +#define TCG_TARGET_HAS_mulsh_i32 0
>
> /* optional instructions only implemented on MIPS4, MIPS32 and Loongson 2 */
> #if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \
> diff --git a/tcg/optimize.c b/tcg/optimize.c
> index b35868a..e8dedf3 100644
> --- a/tcg/optimize.c
> +++ b/tcg/optimize.c
> @@ -198,6 +198,8 @@ static TCGOpcode op_to_mov(TCGOpcode op)
>
> static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
> {
> + uint64_t l64, h64;
> +
> switch (op) {
> CASE_OP_32_64(add):
> return x + y;
> @@ -290,6 +292,18 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
> case INDEX_op_ext32u_i64:
> return (uint32_t)x;
>
> + case INDEX_op_muluh_i32:
> + return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
> + case INDEX_op_mulsh_i32:
> + return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
> +
> + case INDEX_op_muluh_i64:
> + mulu64(&l64, &h64, x, y);
> + return h64;
> + case INDEX_op_mulsh_i64:
> + muls64(&l64, &h64, x, y);
> + return h64;
> +
> default:
> fprintf(stderr,
> "Unrecognized operation %d in do_constant_folding.\n", op);
> @@ -531,6 +545,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
> CASE_OP_32_64(eqv):
> CASE_OP_32_64(nand):
> CASE_OP_32_64(nor):
> + CASE_OP_32_64(muluh):
> + CASE_OP_32_64(mulsh):
> swap_commutative(args[0], &args[1], &args[2]);
> break;
> CASE_OP_32_64(brcond):
> @@ -771,6 +787,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
> switch (op) {
> CASE_OP_32_64(and):
> CASE_OP_32_64(mul):
> + CASE_OP_32_64(muluh):
> + CASE_OP_32_64(mulsh):
> if ((temps[args[2]].state == TCG_TEMP_CONST
> && temps[args[2]].val == 0)) {
> s->gen_opc_buf[op_index] = op_to_movi(op);
> @@ -882,6 +900,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
> CASE_OP_32_64(eqv):
> CASE_OP_32_64(nand):
> CASE_OP_32_64(nor):
> + CASE_OP_32_64(muluh):
> + CASE_OP_32_64(mulsh):
> if (temps[args[1]].state == TCG_TEMP_CONST
> && temps[args[2]].state == TCG_TEMP_CONST) {
> s->gen_opc_buf[op_index] = op_to_movi(op);
> diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
> index b42d97c..613c5ff 100644
> --- a/tcg/ppc/tcg-target.h
> +++ b/tcg/ppc/tcg-target.h
> @@ -96,6 +96,8 @@ typedef enum {
> #define TCG_TARGET_HAS_deposit_i32 1
> #define TCG_TARGET_HAS_movcond_i32 1
> #define TCG_TARGET_HAS_muls2_i32 0
> +#define TCG_TARGET_HAS_muluh_i32 0
> +#define TCG_TARGET_HAS_mulsh_i32 0
>
> #define TCG_AREG0 TCG_REG_R27
>
> diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h
> index 48fc6e2..0789daf 100644
> --- a/tcg/ppc64/tcg-target.h
> +++ b/tcg/ppc64/tcg-target.h
> @@ -95,6 +95,8 @@ typedef enum {
> #define TCG_TARGET_HAS_sub2_i32 0
> #define TCG_TARGET_HAS_mulu2_i32 0
> #define TCG_TARGET_HAS_muls2_i32 0
> +#define TCG_TARGET_HAS_muluh_i32 0
> +#define TCG_TARGET_HAS_mulsh_i32 0
>
> #define TCG_TARGET_HAS_div_i64 1
> #define TCG_TARGET_HAS_rem_i64 0
> @@ -118,6 +120,8 @@ typedef enum {
> #define TCG_TARGET_HAS_sub2_i64 1
> #define TCG_TARGET_HAS_mulu2_i64 1
> #define TCG_TARGET_HAS_muls2_i64 1
> +#define TCG_TARGET_HAS_muluh_i64 0
> +#define TCG_TARGET_HAS_mulsh_i64 0
>
> #define TCG_AREG0 TCG_REG_R27
>
> diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
> index 42ca36c..b02f170 100644
> --- a/tcg/s390/tcg-target.h
> +++ b/tcg/s390/tcg-target.h
> @@ -69,6 +69,8 @@ typedef enum TCGReg {
> #define TCG_TARGET_HAS_sub2_i32 1
> #define TCG_TARGET_HAS_mulu2_i32 0
> #define TCG_TARGET_HAS_muls2_i32 0
> +#define TCG_TARGET_HAS_muluh_i32 0
> +#define TCG_TARGET_HAS_mulsh_i32 0
>
> #define TCG_TARGET_HAS_div2_i64 1
> #define TCG_TARGET_HAS_rot_i64 1
> @@ -94,6 +96,8 @@ typedef enum TCGReg {
> #define TCG_TARGET_HAS_sub2_i64 1
> #define TCG_TARGET_HAS_mulu2_i64 1
> #define TCG_TARGET_HAS_muls2_i64 0
> +#define TCG_TARGET_HAS_muluh_i64 0
> +#define TCG_TARGET_HAS_mulsh_i64 0
>
> extern bool tcg_target_deposit_valid(int ofs, int len);
> #define TCG_TARGET_deposit_i32_valid tcg_target_deposit_valid
> diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
> index dab52d7..1a696bc 100644
> --- a/tcg/sparc/tcg-target.h
> +++ b/tcg/sparc/tcg-target.h
> @@ -107,6 +107,8 @@ typedef enum {
> #define TCG_TARGET_HAS_sub2_i32 1
> #define TCG_TARGET_HAS_mulu2_i32 1
> #define TCG_TARGET_HAS_muls2_i32 0
> +#define TCG_TARGET_HAS_muluh_i32 0
> +#define TCG_TARGET_HAS_mulsh_i32 0
>
> #if TCG_TARGET_REG_BITS == 64
> #define TCG_TARGET_HAS_div_i64 1
> @@ -134,6 +136,8 @@ typedef enum {
> #define TCG_TARGET_HAS_sub2_i64 0
> #define TCG_TARGET_HAS_mulu2_i64 0
> #define TCG_TARGET_HAS_muls2_i64 0
> +#define TCG_TARGET_HAS_muluh_i64 0
> +#define TCG_TARGET_HAS_mulsh_i64 0
> #endif
>
> #define TCG_AREG0 TCG_REG_I0
> diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
> index 364964d..3de7545 100644
> --- a/tcg/tcg-op.h
> +++ b/tcg/tcg-op.h
> @@ -1039,10 +1039,18 @@ static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
> t0 = tcg_temp_new_i64();
> t1 = tcg_temp_new_i32();
>
> - tcg_gen_op4_i32(INDEX_op_mulu2_i32, TCGV_LOW(t0), TCGV_HIGH(t0),
> - TCGV_LOW(arg1), TCGV_LOW(arg2));
> - /* Allow the optimizer room to replace mulu2 with two moves. */
> - tcg_gen_op0(INDEX_op_nop);
> + if (TCG_TARGET_HAS_mulu2_i32) {
> + tcg_gen_op4_i32(INDEX_op_mulu2_i32, TCGV_LOW(t0), TCGV_HIGH(t0),
> + TCGV_LOW(arg1), TCGV_LOW(arg2));
> + /* Allow the optimizer room to replace mulu2 with two moves. */
> + tcg_gen_op0(INDEX_op_nop);
> + } else {
> + tcg_debug_assert(TCG_TARGET_HAS_muluh_i32);
> + tcg_gen_op3_i32(INDEX_op_mul_i32, TCGV_LOW(t0),
> + TCGV_LOW(arg1), TCGV_LOW(arg2));
> + tcg_gen_op3_i32(INDEX_op_muluh_i32, TCGV_HIGH(t0),
> + TCGV_LOW(arg1), TCGV_LOW(arg2));
> + }
>
> tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2));
> tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
> @@ -2401,6 +2409,12 @@ static inline void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh,
> tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2);
> /* Allow the optimizer room to replace mulu2 with two moves. */
> tcg_gen_op0(INDEX_op_nop);
> + } else if (TCG_TARGET_HAS_muluh_i32) {
> + TCGv_i32 t = tcg_temp_new_i32();
> + tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
> + tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2);
> + tcg_gen_mov_i32(rl, t);
> + tcg_temp_free_i32(t);
> } else {
> TCGv_i64 t0 = tcg_temp_new_i64();
> TCGv_i64 t1 = tcg_temp_new_i64();
> @@ -2420,6 +2434,12 @@ static inline void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh,
> tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2);
> /* Allow the optimizer room to replace muls2 with two moves. */
> tcg_gen_op0(INDEX_op_nop);
> + } else if (TCG_TARGET_HAS_mulsh_i32) {
> + TCGv_i32 t = tcg_temp_new_i32();
> + tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
> + tcg_gen_op3_i32(INDEX_op_mulsh_i32, rh, arg1, arg2);
> + tcg_gen_mov_i32(rl, t);
> + tcg_temp_free_i32(t);
> } else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_mulu2_i32) {
> TCGv_i32 t0 = tcg_temp_new_i32();
> TCGv_i32 t1 = tcg_temp_new_i32();
> @@ -2499,6 +2519,12 @@ static inline void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh,
> tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2);
> /* Allow the optimizer room to replace mulu2 with two moves. */
> tcg_gen_op0(INDEX_op_nop);
> + } else if (TCG_TARGET_HAS_muluh_i64) {
> + TCGv_i64 t = tcg_temp_new_i64();
> + tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
> + tcg_gen_op3_i64(INDEX_op_muluh_i64, rh, arg1, arg2);
> + tcg_gen_mov_i64(rl, t);
> + tcg_temp_free_i64(t);
> } else if (TCG_TARGET_HAS_mulu2_i64) {
> TCGv_i64 t0 = tcg_temp_new_i64();
> TCGv_i64 t1 = tcg_temp_new_i64();
> @@ -2540,6 +2566,12 @@ static inline void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh,
> tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2);
> /* Allow the optimizer room to replace muls2 with two moves. */
> tcg_gen_op0(INDEX_op_nop);
> + } else if (TCG_TARGET_HAS_mulsh_i64) {
> + TCGv_i64 t = tcg_temp_new_i64();
> + tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
> + tcg_gen_op3_i64(INDEX_op_mulsh_i64, rh, arg1, arg2);
> + tcg_gen_mov_i64(rl, t);
> + tcg_temp_free_i64(t);
> } else {
> TCGv_i64 t0 = tcg_temp_new_i64();
> int sizemask = 0;
> diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
> index a8af5b9..a75c29d 100644
> --- a/tcg/tcg-opc.h
> +++ b/tcg/tcg-opc.h
> @@ -91,6 +91,8 @@ DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_add2_i32))
> DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_sub2_i32))
> DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_mulu2_i32))
> DEF(muls2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_muls2_i32))
> +DEF(muluh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i32))
> +DEF(mulsh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i32))
> DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | IMPL(TCG_TARGET_REG_BITS == 32))
> DEF(setcond2_i32, 1, 4, 1, IMPL(TCG_TARGET_REG_BITS == 32))
>
> @@ -167,6 +169,8 @@ DEF(add2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_add2_i64))
> DEF(sub2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_sub2_i64))
> DEF(mulu2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulu2_i64))
> DEF(muls2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muls2_i64))
> +DEF(muluh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i64))
> +DEF(mulsh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i64))
>
> /* QEMU specific */
> #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index dac8224..75034ca 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -1243,12 +1243,13 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
> static void tcg_liveness_analysis(TCGContext *s)
> {
> int i, op_index, nb_args, nb_iargs, nb_oargs, arg, nb_ops;
> - TCGOpcode op, op_new;
> + TCGOpcode op, op_new, op_new2;
> TCGArg *args;
> const TCGOpDef *def;
> uint8_t *dead_temps, *mem_temps;
> uint16_t dead_args;
> uint8_t sync_args;
> + bool have_op_new2;
>
> s->gen_opc_ptr++; /* skip end */
>
> @@ -1385,29 +1386,52 @@ static void tcg_liveness_analysis(TCGContext *s)
> goto do_not_remove;
>
> case INDEX_op_mulu2_i32:
> + op_new = INDEX_op_mul_i32;
> + op_new2 = INDEX_op_muluh_i32;
> + have_op_new2 = TCG_TARGET_HAS_muluh_i32;
> + goto do_mul2;
> case INDEX_op_muls2_i32:
> op_new = INDEX_op_mul_i32;
> + op_new2 = INDEX_op_mulsh_i32;
> + have_op_new2 = TCG_TARGET_HAS_mulsh_i32;
> goto do_mul2;
> case INDEX_op_mulu2_i64:
> + op_new = INDEX_op_mul_i64;
> + op_new2 = INDEX_op_muluh_i64;
> + have_op_new2 = TCG_TARGET_HAS_muluh_i64;
> + goto do_mul2;
> case INDEX_op_muls2_i64:
> op_new = INDEX_op_mul_i64;
> + op_new2 = INDEX_op_mulsh_i64;
> + have_op_new2 = TCG_TARGET_HAS_mulsh_i64;
> + goto do_mul2;
> do_mul2:
> args -= 4;
> nb_iargs = 2;
> nb_oargs = 2;
> - /* Likewise, test for the high part of the operation dead. */
> if (dead_temps[args[1]] && !mem_temps[args[1]]) {
> if (dead_temps[args[0]] && !mem_temps[args[0]]) {
> + /* Both parts of the operation are dead. */
> goto do_remove;
> }
> + /* The high part of the operation is dead; generate the low. */
> s->gen_opc_buf[op_index] = op = op_new;
> args[1] = args[2];
> args[2] = args[3];
> - assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
> - tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 1);
> - /* Fall through and mark the single-word operation live. */
> - nb_oargs = 1;
> + } else if (have_op_new2 && dead_temps[args[0]]
> + && !mem_temps[args[0]]) {
> + /* The low part of the operation is dead; generate the high. */
> + s->gen_opc_buf[op_index] = op = op_new2;
> + args[0] = args[1];
> + args[1] = args[2];
> + args[2] = args[3];
> + } else {
> + goto do_not_remove;
> }
> + assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
> + tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 1);
> + /* Mark the single-word operation live. */
> + nb_oargs = 1;
> goto do_not_remove;
>
> default:
> diff --git a/tcg/tcg.h b/tcg/tcg.h
> index f3f9889..3f869dd 100644
> --- a/tcg/tcg.h
> +++ b/tcg/tcg.h
> @@ -85,6 +85,8 @@ typedef uint64_t TCGRegSet;
> #define TCG_TARGET_HAS_sub2_i64 0
> #define TCG_TARGET_HAS_mulu2_i64 0
> #define TCG_TARGET_HAS_muls2_i64 0
> +#define TCG_TARGET_HAS_muluh_i64 0
> +#define TCG_TARGET_HAS_mulsh_i64 0
> /* Turn some undef macros into true macros. */
> #define TCG_TARGET_HAS_add2_i32 1
> #define TCG_TARGET_HAS_sub2_i32 1
> diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
> index d7fc14e..ff12b4b 100644
> --- a/tcg/tci/tcg-target.h
> +++ b/tcg/tci/tcg-target.h
> @@ -76,6 +76,8 @@
> #define TCG_TARGET_HAS_rot_i32 1
> #define TCG_TARGET_HAS_movcond_i32 0
> #define TCG_TARGET_HAS_muls2_i32 0
> +#define TCG_TARGET_HAS_muluh_i32 0
> +#define TCG_TARGET_HAS_mulsh_i32 0
>
> #if TCG_TARGET_REG_BITS == 64
> #define TCG_TARGET_HAS_bswap16_i64 1
> @@ -100,13 +102,14 @@
> #define TCG_TARGET_HAS_rot_i64 1
> #define TCG_TARGET_HAS_movcond_i64 0
> #define TCG_TARGET_HAS_muls2_i64 0
> -
> #define TCG_TARGET_HAS_add2_i32 0
> #define TCG_TARGET_HAS_sub2_i32 0
> #define TCG_TARGET_HAS_mulu2_i32 0
> #define TCG_TARGET_HAS_add2_i64 0
> #define TCG_TARGET_HAS_sub2_i64 0
> #define TCG_TARGET_HAS_mulu2_i64 0
> +#define TCG_TARGET_HAS_muluh_i64 0
> +#define TCG_TARGET_HAS_mulsh_i64 0
> #endif /* TCG_TARGET_REG_BITS == 64 */
>
> /* Number of registers available.
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
--
Aurelien Jarno GPG: 1024D/F1BCDB73
aurelien@aurel32.net http://www.aurel32.net
next prev parent reply other threads:[~2013-08-28 20:59 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-08-17 23:26 [Qemu-devel] [PATCH 0/4] tcg: Add muluh and mulsh opcodes Richard Henderson
2013-08-17 23:26 ` [Qemu-devel] [PATCH 1/4] " Richard Henderson
2013-08-28 20:59 ` Aurelien Jarno [this message]
2013-08-17 23:26 ` [Qemu-devel] [PATCH 2/4] tcg-mips: Implement mulsh, muluh Richard Henderson
2013-08-28 20:59 ` Aurelien Jarno
2013-08-17 23:26 ` [Qemu-devel] [PATCH 3/4] tcg-ppc64: Implement muluh, mulsh Richard Henderson
2013-08-28 21:00 ` Aurelien Jarno
2013-08-17 23:26 ` [Qemu-devel] [PATCH 4/4] tcg: Constant fold div, rem Richard Henderson
2013-08-28 21:02 ` Aurelien Jarno
2013-08-27 21:48 ` [Qemu-devel] [PATCH 0/4] tcg: Add muluh and mulsh opcodes Richard Henderson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20130828205931.GE23739@ohm.aurel32.net \
--to=aurelien@aurel32.net \
--cc=qemu-devel@nongnu.org \
--cc=rth@twiddle.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.