* [Qemu-devel] [PATCH 0/4] tcg: Add muluh and mulsh opcodes @ 2013-08-17 23:26 Richard Henderson 2013-08-17 23:26 ` [Qemu-devel] [PATCH 1/4] " Richard Henderson ` (4 more replies) 0 siblings, 5 replies; 10+ messages in thread From: Richard Henderson @ 2013-08-17 23:26 UTC (permalink / raw) To: qemu-devel; +Cc: aurelien We have -- or will have -- several targets which have a native multiply-highpart instruction: ppc*, ia64, aarch64, alpha. If we leave only the mul[us]2 opcode with which to expose this, we have to handle the register allocation bits in the backends. Better, IMO, to expose the two parts at the TCG opcode level, simplifying the backends. I've left tcg_gen_mul[us]_i{32,64} as the "public" interface to these opcodes at the translator level. If the guest does not need both results, they can just be ignored. If the host has a combined mult insn (i386, arm) then one output is garbage; if the host has separate mult insns, then the optimizer can delete the unused opcode. Really only tested with x86_64 and ppc64. The linux-user-test image for alpha sees: IN: 0x0000004000814148: umulh t5,t0,t0 OP: ld_i32 tmp0,env,$0xffffffffffffffa8 movi_i32 tmp1,$0x0 brcond_i32 tmp0,tmp1,ne,$0x0 ---- 0x4000814148 mul_i64 tmp3,ir6,ir1 muluh_i64 ir1,ir6,ir1 mov_i64 tmp2,tmp3 movi_i64 pc,$0x400081414c exit_tb $0x0 set_label $0x0 exit_tb $0x3fff8c244483 OP after optimization and liveness analysis: ld_i32 tmp0,env,$0xffffffffffffffa8 movi_i32 tmp1,$0x0 brcond_i32 tmp0,tmp1,ne,$0x0 ---- 0x4000814148 nopn $0x3,$0xd,$0x3 muluh_i64 ir1,ir1,ir6 nopn $0x2,$0x2 movi_i64 pc,$0x400081414c exit_tb $0x0 set_label $0x0 exit_tb $0x3fff8c244483 end OUT: [size=76] 0x6011b0f0: lwz r14,-88(r27) 0x6011b0f4: cmpwi cr7,r14,0 0x6011b0f8: bne- cr7,0x6011b128 0x6011b0fc: ld r14,8(r27) 0x6011b100: ld r15,48(r27) 0x6011b104: mulhdu r14,r14,r15 0x6011b108: std r14,8(r27) ... r~ Richard Henderson (4): tcg: Add muluh and mulsh opcodes tcg-mips: Implement mulsh, muluh tcg-ppc64: Implement muluh, mulsh tcg: Constant fold div, rem tcg/aarch64/tcg-target.h | 4 ++++ tcg/arm/tcg-target.h | 2 ++ tcg/hppa/tcg-target.h | 2 ++ tcg/i386/tcg-target.h | 4 ++++ tcg/ia64/tcg-target.h | 4 ++++ tcg/mips/tcg-target.c | 10 ++++++++++ tcg/mips/tcg-target.h | 2 ++ tcg/optimize.c | 43 +++++++++++++++++++++++++++++++++++++++++++ tcg/ppc/tcg-target.h | 2 ++ tcg/ppc64/tcg-target.c | 32 +++++++------------------------- tcg/ppc64/tcg-target.h | 8 ++++++-- tcg/s390/tcg-target.h | 4 ++++ tcg/sparc/tcg-target.h | 4 ++++ tcg/tcg-op.h | 40 ++++++++++++++++++++++++++++++++++++---- tcg/tcg-opc.h | 4 ++++ tcg/tcg.c | 36 ++++++++++++++++++++++++++++++------ tcg/tcg.h | 2 ++ tcg/tci/tcg-target.h | 5 ++++- 18 files changed, 170 insertions(+), 38 deletions(-) -- 1.8.1.4 ^ permalink raw reply [flat|nested] 10+ messages in thread
* [Qemu-devel] [PATCH 1/4] tcg: Add muluh and mulsh opcodes 2013-08-17 23:26 [Qemu-devel] [PATCH 0/4] tcg: Add muluh and mulsh opcodes Richard Henderson @ 2013-08-17 23:26 ` Richard Henderson 2013-08-28 20:59 ` Aurelien Jarno 2013-08-17 23:26 ` [Qemu-devel] [PATCH 2/4] tcg-mips: Implement mulsh, muluh Richard Henderson ` (3 subsequent siblings) 4 siblings, 1 reply; 10+ messages in thread From: Richard Henderson @ 2013-08-17 23:26 UTC (permalink / raw) To: qemu-devel; +Cc: aurelien Use them in places where mulu2 and muls2 are used. Optimize mulx2 with dead low part to mulxh. Signed-off-by: Richard Henderson <rth@twiddle.net> --- tcg/aarch64/tcg-target.h | 4 ++++ tcg/arm/tcg-target.h | 2 ++ tcg/hppa/tcg-target.h | 2 ++ tcg/i386/tcg-target.h | 4 ++++ tcg/ia64/tcg-target.h | 4 ++++ tcg/mips/tcg-target.h | 2 ++ tcg/optimize.c | 20 ++++++++++++++++++++ tcg/ppc/tcg-target.h | 2 ++ tcg/ppc64/tcg-target.h | 4 ++++ tcg/s390/tcg-target.h | 4 ++++ tcg/sparc/tcg-target.h | 4 ++++ tcg/tcg-op.h | 40 ++++++++++++++++++++++++++++++++++++---- tcg/tcg-opc.h | 4 ++++ tcg/tcg.c | 36 ++++++++++++++++++++++++++++++------ tcg/tcg.h | 2 ++ tcg/tci/tcg-target.h | 5 ++++- 16 files changed, 128 insertions(+), 11 deletions(-) diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 51e5092..26ee28b 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -61,6 +61,8 @@ typedef enum { #define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_muls2_i32 0 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_div_i64 0 #define TCG_TARGET_HAS_rem_i64 0 @@ -87,6 +89,8 @@ typedef enum { #define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_mulu2_i64 0 #define TCG_TARGET_HAS_muls2_i64 0 +#define TCG_TARGET_HAS_muluh_i64 0 +#define TCG_TARGET_HAS_mulsh_i64 0 enum { TCG_AREG0 = TCG_REG_X19, diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 5cd9d6a..ed48092 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -80,6 +80,8 @@ extern bool use_idiv_instructions; #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_muls2_i32 1 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_div_i32 use_idiv_instructions #define TCG_TARGET_HAS_rem_i32 0 diff --git a/tcg/hppa/tcg-target.h b/tcg/hppa/tcg-target.h index 25467bd..0f6f2ff 100644 --- a/tcg/hppa/tcg-target.h +++ b/tcg/hppa/tcg-target.h @@ -100,6 +100,8 @@ typedef enum { #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_muls2_i32 0 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_mulsh_i32 0 /* optional instructions automatically implemented */ #define TCG_TARGET_HAS_neg_i32 0 /* sub rd, 0, rs */ diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index e3f6bb9..b7d1a55 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -96,6 +96,8 @@ typedef enum { #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 1 #define TCG_TARGET_HAS_muls2_i32 1 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_mulsh_i32 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_div2_i64 1 @@ -122,6 +124,8 @@ typedef enum { #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 1 #define TCG_TARGET_HAS_muls2_i64 1 +#define TCG_TARGET_HAS_muluh_i64 0 +#define TCG_TARGET_HAS_mulsh_i64 0 #endif #define TCG_TARGET_deposit_i32_valid(ofs, len) \ diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h index f32d519..ee6b2c8 100644 --- a/tcg/ia64/tcg-target.h +++ b/tcg/ia64/tcg-target.h @@ -146,6 +146,10 @@ typedef enum { #define TCG_TARGET_HAS_mulu2_i64 0 #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muls2_i64 0 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_muluh_i64 0 +#define TCG_TARGET_HAS_mulsh_i32 0 +#define TCG_TARGET_HAS_mulsh_i64 0 #define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16) #define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16) diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index a438950..6cb7c2f 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -89,6 +89,8 @@ typedef enum { #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_muls2_i32 1 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_mulsh_i32 0 /* optional instructions only implemented on MIPS4, MIPS32 and Loongson 2 */ #if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \ diff --git a/tcg/optimize.c b/tcg/optimize.c index b35868a..e8dedf3 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -198,6 +198,8 @@ static TCGOpcode op_to_mov(TCGOpcode op) static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) { + uint64_t l64, h64; + switch (op) { CASE_OP_32_64(add): return x + y; @@ -290,6 +292,18 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) case INDEX_op_ext32u_i64: return (uint32_t)x; + case INDEX_op_muluh_i32: + return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; + case INDEX_op_mulsh_i32: + return ((int64_t)(int32_t)x * (int32_t)y) >> 32; + + case INDEX_op_muluh_i64: + mulu64(&l64, &h64, x, y); + return h64; + case INDEX_op_mulsh_i64: + muls64(&l64, &h64, x, y); + return h64; + default: fprintf(stderr, "Unrecognized operation %d in do_constant_folding.\n", op); @@ -531,6 +545,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, CASE_OP_32_64(eqv): CASE_OP_32_64(nand): CASE_OP_32_64(nor): + CASE_OP_32_64(muluh): + CASE_OP_32_64(mulsh): swap_commutative(args[0], &args[1], &args[2]); break; CASE_OP_32_64(brcond): @@ -771,6 +787,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, switch (op) { CASE_OP_32_64(and): CASE_OP_32_64(mul): + CASE_OP_32_64(muluh): + CASE_OP_32_64(mulsh): if ((temps[args[2]].state == TCG_TEMP_CONST && temps[args[2]].val == 0)) { s->gen_opc_buf[op_index] = op_to_movi(op); @@ -882,6 +900,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, CASE_OP_32_64(eqv): CASE_OP_32_64(nand): CASE_OP_32_64(nor): + CASE_OP_32_64(muluh): + CASE_OP_32_64(mulsh): if (temps[args[1]].state == TCG_TEMP_CONST && temps[args[2]].state == TCG_TEMP_CONST) { s->gen_opc_buf[op_index] = op_to_movi(op); diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index b42d97c..613c5ff 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -96,6 +96,8 @@ typedef enum { #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_muls2_i32 0 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_AREG0 TCG_REG_R27 diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h index 48fc6e2..0789daf 100644 --- a/tcg/ppc64/tcg-target.h +++ b/tcg/ppc64/tcg-target.h @@ -95,6 +95,8 @@ typedef enum { #define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_muls2_i32 0 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 0 @@ -118,6 +120,8 @@ typedef enum { #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 1 #define TCG_TARGET_HAS_muls2_i64 1 +#define TCG_TARGET_HAS_muluh_i64 0 +#define TCG_TARGET_HAS_mulsh_i64 0 #define TCG_AREG0 TCG_REG_R27 diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h index 42ca36c..b02f170 100644 --- a/tcg/s390/tcg-target.h +++ b/tcg/s390/tcg-target.h @@ -69,6 +69,8 @@ typedef enum TCGReg { #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_muls2_i32 0 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_div2_i64 1 #define TCG_TARGET_HAS_rot_i64 1 @@ -94,6 +96,8 @@ typedef enum TCGReg { #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 1 #define TCG_TARGET_HAS_muls2_i64 0 +#define TCG_TARGET_HAS_muluh_i64 0 +#define TCG_TARGET_HAS_mulsh_i64 0 extern bool tcg_target_deposit_valid(int ofs, int len); #define TCG_TARGET_deposit_i32_valid tcg_target_deposit_valid diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h index dab52d7..1a696bc 100644 --- a/tcg/sparc/tcg-target.h +++ b/tcg/sparc/tcg-target.h @@ -107,6 +107,8 @@ typedef enum { #define TCG_TARGET_HAS_sub2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 1 #define TCG_TARGET_HAS_muls2_i32 0 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_mulsh_i32 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_div_i64 1 @@ -134,6 +136,8 @@ typedef enum { #define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_mulu2_i64 0 #define TCG_TARGET_HAS_muls2_i64 0 +#define TCG_TARGET_HAS_muluh_i64 0 +#define TCG_TARGET_HAS_mulsh_i64 0 #endif #define TCG_AREG0 TCG_REG_I0 diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index 364964d..3de7545 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -1039,10 +1039,18 @@ static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) t0 = tcg_temp_new_i64(); t1 = tcg_temp_new_i32(); - tcg_gen_op4_i32(INDEX_op_mulu2_i32, TCGV_LOW(t0), TCGV_HIGH(t0), - TCGV_LOW(arg1), TCGV_LOW(arg2)); - /* Allow the optimizer room to replace mulu2 with two moves. */ - tcg_gen_op0(INDEX_op_nop); + if (TCG_TARGET_HAS_mulu2_i32) { + tcg_gen_op4_i32(INDEX_op_mulu2_i32, TCGV_LOW(t0), TCGV_HIGH(t0), + TCGV_LOW(arg1), TCGV_LOW(arg2)); + /* Allow the optimizer room to replace mulu2 with two moves. */ + tcg_gen_op0(INDEX_op_nop); + } else { + tcg_debug_assert(TCG_TARGET_HAS_muluh_i32); + tcg_gen_op3_i32(INDEX_op_mul_i32, TCGV_LOW(t0), + TCGV_LOW(arg1), TCGV_LOW(arg2)); + tcg_gen_op3_i32(INDEX_op_muluh_i32, TCGV_HIGH(t0), + TCGV_LOW(arg1), TCGV_LOW(arg2)); + } tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2)); tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1); @@ -2401,6 +2409,12 @@ static inline void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2); /* Allow the optimizer room to replace mulu2 with two moves. */ tcg_gen_op0(INDEX_op_nop); + } else if (TCG_TARGET_HAS_muluh_i32) { + TCGv_i32 t = tcg_temp_new_i32(); + tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2); + tcg_gen_mov_i32(rl, t); + tcg_temp_free_i32(t); } else { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); @@ -2420,6 +2434,12 @@ static inline void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2); /* Allow the optimizer room to replace muls2 with two moves. */ tcg_gen_op0(INDEX_op_nop); + } else if (TCG_TARGET_HAS_mulsh_i32) { + TCGv_i32 t = tcg_temp_new_i32(); + tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2); + tcg_gen_op3_i32(INDEX_op_mulsh_i32, rh, arg1, arg2); + tcg_gen_mov_i32(rl, t); + tcg_temp_free_i32(t); } else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_mulu2_i32) { TCGv_i32 t0 = tcg_temp_new_i32(); TCGv_i32 t1 = tcg_temp_new_i32(); @@ -2499,6 +2519,12 @@ static inline void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2); /* Allow the optimizer room to replace mulu2 with two moves. */ tcg_gen_op0(INDEX_op_nop); + } else if (TCG_TARGET_HAS_muluh_i64) { + TCGv_i64 t = tcg_temp_new_i64(); + tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_muluh_i64, rh, arg1, arg2); + tcg_gen_mov_i64(rl, t); + tcg_temp_free_i64(t); } else if (TCG_TARGET_HAS_mulu2_i64) { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); @@ -2540,6 +2566,12 @@ static inline void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2); /* Allow the optimizer room to replace muls2 with two moves. */ tcg_gen_op0(INDEX_op_nop); + } else if (TCG_TARGET_HAS_mulsh_i64) { + TCGv_i64 t = tcg_temp_new_i64(); + tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2); + tcg_gen_op3_i64(INDEX_op_mulsh_i64, rh, arg1, arg2); + tcg_gen_mov_i64(rl, t); + tcg_temp_free_i64(t); } else { TCGv_i64 t0 = tcg_temp_new_i64(); int sizemask = 0; diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index a8af5b9..a75c29d 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -91,6 +91,8 @@ DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_add2_i32)) DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_sub2_i32)) DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_mulu2_i32)) DEF(muls2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_muls2_i32)) +DEF(muluh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i32)) +DEF(mulsh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i32)) DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | IMPL(TCG_TARGET_REG_BITS == 32)) DEF(setcond2_i32, 1, 4, 1, IMPL(TCG_TARGET_REG_BITS == 32)) @@ -167,6 +169,8 @@ DEF(add2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_add2_i64)) DEF(sub2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_sub2_i64)) DEF(mulu2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulu2_i64)) DEF(muls2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muls2_i64)) +DEF(muluh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i64)) +DEF(mulsh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i64)) /* QEMU specific */ #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS diff --git a/tcg/tcg.c b/tcg/tcg.c index dac8224..75034ca 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1243,12 +1243,13 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps, static void tcg_liveness_analysis(TCGContext *s) { int i, op_index, nb_args, nb_iargs, nb_oargs, arg, nb_ops; - TCGOpcode op, op_new; + TCGOpcode op, op_new, op_new2; TCGArg *args; const TCGOpDef *def; uint8_t *dead_temps, *mem_temps; uint16_t dead_args; uint8_t sync_args; + bool have_op_new2; s->gen_opc_ptr++; /* skip end */ @@ -1385,29 +1386,52 @@ static void tcg_liveness_analysis(TCGContext *s) goto do_not_remove; case INDEX_op_mulu2_i32: + op_new = INDEX_op_mul_i32; + op_new2 = INDEX_op_muluh_i32; + have_op_new2 = TCG_TARGET_HAS_muluh_i32; + goto do_mul2; case INDEX_op_muls2_i32: op_new = INDEX_op_mul_i32; + op_new2 = INDEX_op_mulsh_i32; + have_op_new2 = TCG_TARGET_HAS_mulsh_i32; goto do_mul2; case INDEX_op_mulu2_i64: + op_new = INDEX_op_mul_i64; + op_new2 = INDEX_op_muluh_i64; + have_op_new2 = TCG_TARGET_HAS_muluh_i64; + goto do_mul2; case INDEX_op_muls2_i64: op_new = INDEX_op_mul_i64; + op_new2 = INDEX_op_mulsh_i64; + have_op_new2 = TCG_TARGET_HAS_mulsh_i64; + goto do_mul2; do_mul2: args -= 4; nb_iargs = 2; nb_oargs = 2; - /* Likewise, test for the high part of the operation dead. */ if (dead_temps[args[1]] && !mem_temps[args[1]]) { if (dead_temps[args[0]] && !mem_temps[args[0]]) { + /* Both parts of the operation are dead. */ goto do_remove; } + /* The high part of the operation is dead; generate the low. */ s->gen_opc_buf[op_index] = op = op_new; args[1] = args[2]; args[2] = args[3]; - assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop); - tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 1); - /* Fall through and mark the single-word operation live. */ - nb_oargs = 1; + } else if (have_op_new2 && dead_temps[args[0]] + && !mem_temps[args[0]]) { + /* The low part of the operation is dead; generate the high. */ + s->gen_opc_buf[op_index] = op = op_new2; + args[0] = args[1]; + args[1] = args[2]; + args[2] = args[3]; + } else { + goto do_not_remove; } + assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop); + tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 1); + /* Mark the single-word operation live. */ + nb_oargs = 1; goto do_not_remove; default: diff --git a/tcg/tcg.h b/tcg/tcg.h index f3f9889..3f869dd 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -85,6 +85,8 @@ typedef uint64_t TCGRegSet; #define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_mulu2_i64 0 #define TCG_TARGET_HAS_muls2_i64 0 +#define TCG_TARGET_HAS_muluh_i64 0 +#define TCG_TARGET_HAS_mulsh_i64 0 /* Turn some undef macros into true macros. */ #define TCG_TARGET_HAS_add2_i32 1 #define TCG_TARGET_HAS_sub2_i32 1 diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index d7fc14e..ff12b4b 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -76,6 +76,8 @@ #define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_movcond_i32 0 #define TCG_TARGET_HAS_muls2_i32 0 +#define TCG_TARGET_HAS_muluh_i32 0 +#define TCG_TARGET_HAS_mulsh_i32 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_bswap16_i64 1 @@ -100,13 +102,14 @@ #define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_movcond_i64 0 #define TCG_TARGET_HAS_muls2_i64 0 - #define TCG_TARGET_HAS_add2_i32 0 #define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_mulu2_i32 0 #define TCG_TARGET_HAS_add2_i64 0 #define TCG_TARGET_HAS_sub2_i64 0 #define TCG_TARGET_HAS_mulu2_i64 0 +#define TCG_TARGET_HAS_muluh_i64 0 +#define TCG_TARGET_HAS_mulsh_i64 0 #endif /* TCG_TARGET_REG_BITS == 64 */ /* Number of registers available. -- 1.8.1.4 ^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [PATCH 1/4] tcg: Add muluh and mulsh opcodes 2013-08-17 23:26 ` [Qemu-devel] [PATCH 1/4] " Richard Henderson @ 2013-08-28 20:59 ` Aurelien Jarno 0 siblings, 0 replies; 10+ messages in thread From: Aurelien Jarno @ 2013-08-28 20:59 UTC (permalink / raw) To: Richard Henderson; +Cc: qemu-devel On Sat, Aug 17, 2013 at 04:26:43PM -0700, Richard Henderson wrote: > Use them in places where mulu2 and muls2 are used. > Optimize mulx2 with dead low part to mulxh. > > Signed-off-by: Richard Henderson <rth@twiddle.net> > --- > tcg/aarch64/tcg-target.h | 4 ++++ > tcg/arm/tcg-target.h | 2 ++ > tcg/hppa/tcg-target.h | 2 ++ > tcg/i386/tcg-target.h | 4 ++++ > tcg/ia64/tcg-target.h | 4 ++++ > tcg/mips/tcg-target.h | 2 ++ > tcg/optimize.c | 20 ++++++++++++++++++++ > tcg/ppc/tcg-target.h | 2 ++ > tcg/ppc64/tcg-target.h | 4 ++++ > tcg/s390/tcg-target.h | 4 ++++ > tcg/sparc/tcg-target.h | 4 ++++ > tcg/tcg-op.h | 40 ++++++++++++++++++++++++++++++++++++---- > tcg/tcg-opc.h | 4 ++++ > tcg/tcg.c | 36 ++++++++++++++++++++++++++++++------ > tcg/tcg.h | 2 ++ > tcg/tci/tcg-target.h | 5 ++++- > 16 files changed, 128 insertions(+), 11 deletions(-) > > diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h > index 51e5092..26ee28b 100644 > --- a/tcg/aarch64/tcg-target.h > +++ b/tcg/aarch64/tcg-target.h > @@ -61,6 +61,8 @@ typedef enum { > #define TCG_TARGET_HAS_sub2_i32 0 > #define TCG_TARGET_HAS_mulu2_i32 0 > #define TCG_TARGET_HAS_muls2_i32 0 > +#define TCG_TARGET_HAS_muluh_i32 0 > +#define TCG_TARGET_HAS_mulsh_i32 0 > > #define TCG_TARGET_HAS_div_i64 0 > #define TCG_TARGET_HAS_rem_i64 0 > @@ -87,6 +89,8 @@ typedef enum { > #define TCG_TARGET_HAS_sub2_i64 0 > #define TCG_TARGET_HAS_mulu2_i64 0 > #define TCG_TARGET_HAS_muls2_i64 0 > +#define TCG_TARGET_HAS_muluh_i64 0 > +#define TCG_TARGET_HAS_mulsh_i64 0 > > enum { > TCG_AREG0 = TCG_REG_X19, > diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h > index 5cd9d6a..ed48092 100644 > --- a/tcg/arm/tcg-target.h > +++ b/tcg/arm/tcg-target.h > @@ -80,6 +80,8 @@ extern bool use_idiv_instructions; > #define TCG_TARGET_HAS_deposit_i32 1 > #define TCG_TARGET_HAS_movcond_i32 1 > #define TCG_TARGET_HAS_muls2_i32 1 > +#define TCG_TARGET_HAS_muluh_i32 0 > +#define TCG_TARGET_HAS_mulsh_i32 0 > #define TCG_TARGET_HAS_div_i32 use_idiv_instructions > #define TCG_TARGET_HAS_rem_i32 0 > > diff --git a/tcg/hppa/tcg-target.h b/tcg/hppa/tcg-target.h > index 25467bd..0f6f2ff 100644 > --- a/tcg/hppa/tcg-target.h > +++ b/tcg/hppa/tcg-target.h > @@ -100,6 +100,8 @@ typedef enum { > #define TCG_TARGET_HAS_deposit_i32 1 > #define TCG_TARGET_HAS_movcond_i32 1 > #define TCG_TARGET_HAS_muls2_i32 0 > +#define TCG_TARGET_HAS_muluh_i32 0 > +#define TCG_TARGET_HAS_mulsh_i32 0 > > /* optional instructions automatically implemented */ > #define TCG_TARGET_HAS_neg_i32 0 /* sub rd, 0, rs */ > diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h > index e3f6bb9..b7d1a55 100644 > --- a/tcg/i386/tcg-target.h > +++ b/tcg/i386/tcg-target.h > @@ -96,6 +96,8 @@ typedef enum { > #define TCG_TARGET_HAS_sub2_i32 1 > #define TCG_TARGET_HAS_mulu2_i32 1 > #define TCG_TARGET_HAS_muls2_i32 1 > +#define TCG_TARGET_HAS_muluh_i32 0 > +#define TCG_TARGET_HAS_mulsh_i32 0 > > #if TCG_TARGET_REG_BITS == 64 > #define TCG_TARGET_HAS_div2_i64 1 > @@ -122,6 +124,8 @@ typedef enum { > #define TCG_TARGET_HAS_sub2_i64 1 > #define TCG_TARGET_HAS_mulu2_i64 1 > #define TCG_TARGET_HAS_muls2_i64 1 > +#define TCG_TARGET_HAS_muluh_i64 0 > +#define TCG_TARGET_HAS_mulsh_i64 0 > #endif > > #define TCG_TARGET_deposit_i32_valid(ofs, len) \ > diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h > index f32d519..ee6b2c8 100644 > --- a/tcg/ia64/tcg-target.h > +++ b/tcg/ia64/tcg-target.h > @@ -146,6 +146,10 @@ typedef enum { > #define TCG_TARGET_HAS_mulu2_i64 0 > #define TCG_TARGET_HAS_muls2_i32 0 > #define TCG_TARGET_HAS_muls2_i64 0 > +#define TCG_TARGET_HAS_muluh_i32 0 > +#define TCG_TARGET_HAS_muluh_i64 0 > +#define TCG_TARGET_HAS_mulsh_i32 0 > +#define TCG_TARGET_HAS_mulsh_i64 0 > > #define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16) > #define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16) > diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h > index a438950..6cb7c2f 100644 > --- a/tcg/mips/tcg-target.h > +++ b/tcg/mips/tcg-target.h > @@ -89,6 +89,8 @@ typedef enum { > #define TCG_TARGET_HAS_eqv_i32 0 > #define TCG_TARGET_HAS_nand_i32 0 > #define TCG_TARGET_HAS_muls2_i32 1 > +#define TCG_TARGET_HAS_muluh_i32 0 > +#define TCG_TARGET_HAS_mulsh_i32 0 > > /* optional instructions only implemented on MIPS4, MIPS32 and Loongson 2 */ > #if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \ > diff --git a/tcg/optimize.c b/tcg/optimize.c > index b35868a..e8dedf3 100644 > --- a/tcg/optimize.c > +++ b/tcg/optimize.c > @@ -198,6 +198,8 @@ static TCGOpcode op_to_mov(TCGOpcode op) > > static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) > { > + uint64_t l64, h64; > + > switch (op) { > CASE_OP_32_64(add): > return x + y; > @@ -290,6 +292,18 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) > case INDEX_op_ext32u_i64: > return (uint32_t)x; > > + case INDEX_op_muluh_i32: > + return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; > + case INDEX_op_mulsh_i32: > + return ((int64_t)(int32_t)x * (int32_t)y) >> 32; > + > + case INDEX_op_muluh_i64: > + mulu64(&l64, &h64, x, y); > + return h64; > + case INDEX_op_mulsh_i64: > + muls64(&l64, &h64, x, y); > + return h64; > + > default: > fprintf(stderr, > "Unrecognized operation %d in do_constant_folding.\n", op); > @@ -531,6 +545,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, > CASE_OP_32_64(eqv): > CASE_OP_32_64(nand): > CASE_OP_32_64(nor): > + CASE_OP_32_64(muluh): > + CASE_OP_32_64(mulsh): > swap_commutative(args[0], &args[1], &args[2]); > break; > CASE_OP_32_64(brcond): > @@ -771,6 +787,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, > switch (op) { > CASE_OP_32_64(and): > CASE_OP_32_64(mul): > + CASE_OP_32_64(muluh): > + CASE_OP_32_64(mulsh): > if ((temps[args[2]].state == TCG_TEMP_CONST > && temps[args[2]].val == 0)) { > s->gen_opc_buf[op_index] = op_to_movi(op); > @@ -882,6 +900,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, > CASE_OP_32_64(eqv): > CASE_OP_32_64(nand): > CASE_OP_32_64(nor): > + CASE_OP_32_64(muluh): > + CASE_OP_32_64(mulsh): > if (temps[args[1]].state == TCG_TEMP_CONST > && temps[args[2]].state == TCG_TEMP_CONST) { > s->gen_opc_buf[op_index] = op_to_movi(op); > diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h > index b42d97c..613c5ff 100644 > --- a/tcg/ppc/tcg-target.h > +++ b/tcg/ppc/tcg-target.h > @@ -96,6 +96,8 @@ typedef enum { > #define TCG_TARGET_HAS_deposit_i32 1 > #define TCG_TARGET_HAS_movcond_i32 1 > #define TCG_TARGET_HAS_muls2_i32 0 > +#define TCG_TARGET_HAS_muluh_i32 0 > +#define TCG_TARGET_HAS_mulsh_i32 0 > > #define TCG_AREG0 TCG_REG_R27 > > diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h > index 48fc6e2..0789daf 100644 > --- a/tcg/ppc64/tcg-target.h > +++ b/tcg/ppc64/tcg-target.h > @@ -95,6 +95,8 @@ typedef enum { > #define TCG_TARGET_HAS_sub2_i32 0 > #define TCG_TARGET_HAS_mulu2_i32 0 > #define TCG_TARGET_HAS_muls2_i32 0 > +#define TCG_TARGET_HAS_muluh_i32 0 > +#define TCG_TARGET_HAS_mulsh_i32 0 > > #define TCG_TARGET_HAS_div_i64 1 > #define TCG_TARGET_HAS_rem_i64 0 > @@ -118,6 +120,8 @@ typedef enum { > #define TCG_TARGET_HAS_sub2_i64 1 > #define TCG_TARGET_HAS_mulu2_i64 1 > #define TCG_TARGET_HAS_muls2_i64 1 > +#define TCG_TARGET_HAS_muluh_i64 0 > +#define TCG_TARGET_HAS_mulsh_i64 0 > > #define TCG_AREG0 TCG_REG_R27 > > diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h > index 42ca36c..b02f170 100644 > --- a/tcg/s390/tcg-target.h > +++ b/tcg/s390/tcg-target.h > @@ -69,6 +69,8 @@ typedef enum TCGReg { > #define TCG_TARGET_HAS_sub2_i32 1 > #define TCG_TARGET_HAS_mulu2_i32 0 > #define TCG_TARGET_HAS_muls2_i32 0 > +#define TCG_TARGET_HAS_muluh_i32 0 > +#define TCG_TARGET_HAS_mulsh_i32 0 > > #define TCG_TARGET_HAS_div2_i64 1 > #define TCG_TARGET_HAS_rot_i64 1 > @@ -94,6 +96,8 @@ typedef enum TCGReg { > #define TCG_TARGET_HAS_sub2_i64 1 > #define TCG_TARGET_HAS_mulu2_i64 1 > #define TCG_TARGET_HAS_muls2_i64 0 > +#define TCG_TARGET_HAS_muluh_i64 0 > +#define TCG_TARGET_HAS_mulsh_i64 0 > > extern bool tcg_target_deposit_valid(int ofs, int len); > #define TCG_TARGET_deposit_i32_valid tcg_target_deposit_valid > diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h > index dab52d7..1a696bc 100644 > --- a/tcg/sparc/tcg-target.h > +++ b/tcg/sparc/tcg-target.h > @@ -107,6 +107,8 @@ typedef enum { > #define TCG_TARGET_HAS_sub2_i32 1 > #define TCG_TARGET_HAS_mulu2_i32 1 > #define TCG_TARGET_HAS_muls2_i32 0 > +#define TCG_TARGET_HAS_muluh_i32 0 > +#define TCG_TARGET_HAS_mulsh_i32 0 > > #if TCG_TARGET_REG_BITS == 64 > #define TCG_TARGET_HAS_div_i64 1 > @@ -134,6 +136,8 @@ typedef enum { > #define TCG_TARGET_HAS_sub2_i64 0 > #define TCG_TARGET_HAS_mulu2_i64 0 > #define TCG_TARGET_HAS_muls2_i64 0 > +#define TCG_TARGET_HAS_muluh_i64 0 > +#define TCG_TARGET_HAS_mulsh_i64 0 > #endif > > #define TCG_AREG0 TCG_REG_I0 > diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h > index 364964d..3de7545 100644 > --- a/tcg/tcg-op.h > +++ b/tcg/tcg-op.h > @@ -1039,10 +1039,18 @@ static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) > t0 = tcg_temp_new_i64(); > t1 = tcg_temp_new_i32(); > > - tcg_gen_op4_i32(INDEX_op_mulu2_i32, TCGV_LOW(t0), TCGV_HIGH(t0), > - TCGV_LOW(arg1), TCGV_LOW(arg2)); > - /* Allow the optimizer room to replace mulu2 with two moves. */ > - tcg_gen_op0(INDEX_op_nop); > + if (TCG_TARGET_HAS_mulu2_i32) { > + tcg_gen_op4_i32(INDEX_op_mulu2_i32, TCGV_LOW(t0), TCGV_HIGH(t0), > + TCGV_LOW(arg1), TCGV_LOW(arg2)); > + /* Allow the optimizer room to replace mulu2 with two moves. */ > + tcg_gen_op0(INDEX_op_nop); > + } else { > + tcg_debug_assert(TCG_TARGET_HAS_muluh_i32); > + tcg_gen_op3_i32(INDEX_op_mul_i32, TCGV_LOW(t0), > + TCGV_LOW(arg1), TCGV_LOW(arg2)); > + tcg_gen_op3_i32(INDEX_op_muluh_i32, TCGV_HIGH(t0), > + TCGV_LOW(arg1), TCGV_LOW(arg2)); > + } > > tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2)); > tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1); > @@ -2401,6 +2409,12 @@ static inline void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, > tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2); > /* Allow the optimizer room to replace mulu2 with two moves. */ > tcg_gen_op0(INDEX_op_nop); > + } else if (TCG_TARGET_HAS_muluh_i32) { > + TCGv_i32 t = tcg_temp_new_i32(); > + tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2); > + tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2); > + tcg_gen_mov_i32(rl, t); > + tcg_temp_free_i32(t); > } else { > TCGv_i64 t0 = tcg_temp_new_i64(); > TCGv_i64 t1 = tcg_temp_new_i64(); > @@ -2420,6 +2434,12 @@ static inline void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, > tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2); > /* Allow the optimizer room to replace muls2 with two moves. */ > tcg_gen_op0(INDEX_op_nop); > + } else if (TCG_TARGET_HAS_mulsh_i32) { > + TCGv_i32 t = tcg_temp_new_i32(); > + tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2); > + tcg_gen_op3_i32(INDEX_op_mulsh_i32, rh, arg1, arg2); > + tcg_gen_mov_i32(rl, t); > + tcg_temp_free_i32(t); > } else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_mulu2_i32) { > TCGv_i32 t0 = tcg_temp_new_i32(); > TCGv_i32 t1 = tcg_temp_new_i32(); > @@ -2499,6 +2519,12 @@ static inline void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, > tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2); > /* Allow the optimizer room to replace mulu2 with two moves. */ > tcg_gen_op0(INDEX_op_nop); > + } else if (TCG_TARGET_HAS_muluh_i64) { > + TCGv_i64 t = tcg_temp_new_i64(); > + tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2); > + tcg_gen_op3_i64(INDEX_op_muluh_i64, rh, arg1, arg2); > + tcg_gen_mov_i64(rl, t); > + tcg_temp_free_i64(t); > } else if (TCG_TARGET_HAS_mulu2_i64) { > TCGv_i64 t0 = tcg_temp_new_i64(); > TCGv_i64 t1 = tcg_temp_new_i64(); > @@ -2540,6 +2566,12 @@ static inline void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, > tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2); > /* Allow the optimizer room to replace muls2 with two moves. */ > tcg_gen_op0(INDEX_op_nop); > + } else if (TCG_TARGET_HAS_mulsh_i64) { > + TCGv_i64 t = tcg_temp_new_i64(); > + tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2); > + tcg_gen_op3_i64(INDEX_op_mulsh_i64, rh, arg1, arg2); > + tcg_gen_mov_i64(rl, t); > + tcg_temp_free_i64(t); > } else { > TCGv_i64 t0 = tcg_temp_new_i64(); > int sizemask = 0; > diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h > index a8af5b9..a75c29d 100644 > --- a/tcg/tcg-opc.h > +++ b/tcg/tcg-opc.h > @@ -91,6 +91,8 @@ DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_add2_i32)) > DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_sub2_i32)) > DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_mulu2_i32)) > DEF(muls2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_muls2_i32)) > +DEF(muluh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i32)) > +DEF(mulsh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i32)) > DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | IMPL(TCG_TARGET_REG_BITS == 32)) > DEF(setcond2_i32, 1, 4, 1, IMPL(TCG_TARGET_REG_BITS == 32)) > > @@ -167,6 +169,8 @@ DEF(add2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_add2_i64)) > DEF(sub2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_sub2_i64)) > DEF(mulu2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulu2_i64)) > DEF(muls2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muls2_i64)) > +DEF(muluh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i64)) > +DEF(mulsh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i64)) > > /* QEMU specific */ > #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS > diff --git a/tcg/tcg.c b/tcg/tcg.c > index dac8224..75034ca 100644 > --- a/tcg/tcg.c > +++ b/tcg/tcg.c > @@ -1243,12 +1243,13 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps, > static void tcg_liveness_analysis(TCGContext *s) > { > int i, op_index, nb_args, nb_iargs, nb_oargs, arg, nb_ops; > - TCGOpcode op, op_new; > + TCGOpcode op, op_new, op_new2; > TCGArg *args; > const TCGOpDef *def; > uint8_t *dead_temps, *mem_temps; > uint16_t dead_args; > uint8_t sync_args; > + bool have_op_new2; > > s->gen_opc_ptr++; /* skip end */ > > @@ -1385,29 +1386,52 @@ static void tcg_liveness_analysis(TCGContext *s) > goto do_not_remove; > > case INDEX_op_mulu2_i32: > + op_new = INDEX_op_mul_i32; > + op_new2 = INDEX_op_muluh_i32; > + have_op_new2 = TCG_TARGET_HAS_muluh_i32; > + goto do_mul2; > case INDEX_op_muls2_i32: > op_new = INDEX_op_mul_i32; > + op_new2 = INDEX_op_mulsh_i32; > + have_op_new2 = TCG_TARGET_HAS_mulsh_i32; > goto do_mul2; > case INDEX_op_mulu2_i64: > + op_new = INDEX_op_mul_i64; > + op_new2 = INDEX_op_muluh_i64; > + have_op_new2 = TCG_TARGET_HAS_muluh_i64; > + goto do_mul2; > case INDEX_op_muls2_i64: > op_new = INDEX_op_mul_i64; > + op_new2 = INDEX_op_mulsh_i64; > + have_op_new2 = TCG_TARGET_HAS_mulsh_i64; > + goto do_mul2; > do_mul2: > args -= 4; > nb_iargs = 2; > nb_oargs = 2; > - /* Likewise, test for the high part of the operation dead. */ > if (dead_temps[args[1]] && !mem_temps[args[1]]) { > if (dead_temps[args[0]] && !mem_temps[args[0]]) { > + /* Both parts of the operation are dead. */ > goto do_remove; > } > + /* The high part of the operation is dead; generate the low. */ > s->gen_opc_buf[op_index] = op = op_new; > args[1] = args[2]; > args[2] = args[3]; > - assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop); > - tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 1); > - /* Fall through and mark the single-word operation live. */ > - nb_oargs = 1; > + } else if (have_op_new2 && dead_temps[args[0]] > + && !mem_temps[args[0]]) { > + /* The low part of the operation is dead; generate the high. */ > + s->gen_opc_buf[op_index] = op = op_new2; > + args[0] = args[1]; > + args[1] = args[2]; > + args[2] = args[3]; > + } else { > + goto do_not_remove; > } > + assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop); > + tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 1); > + /* Mark the single-word operation live. */ > + nb_oargs = 1; > goto do_not_remove; > > default: > diff --git a/tcg/tcg.h b/tcg/tcg.h > index f3f9889..3f869dd 100644 > --- a/tcg/tcg.h > +++ b/tcg/tcg.h > @@ -85,6 +85,8 @@ typedef uint64_t TCGRegSet; > #define TCG_TARGET_HAS_sub2_i64 0 > #define TCG_TARGET_HAS_mulu2_i64 0 > #define TCG_TARGET_HAS_muls2_i64 0 > +#define TCG_TARGET_HAS_muluh_i64 0 > +#define TCG_TARGET_HAS_mulsh_i64 0 > /* Turn some undef macros into true macros. */ > #define TCG_TARGET_HAS_add2_i32 1 > #define TCG_TARGET_HAS_sub2_i32 1 > diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h > index d7fc14e..ff12b4b 100644 > --- a/tcg/tci/tcg-target.h > +++ b/tcg/tci/tcg-target.h > @@ -76,6 +76,8 @@ > #define TCG_TARGET_HAS_rot_i32 1 > #define TCG_TARGET_HAS_movcond_i32 0 > #define TCG_TARGET_HAS_muls2_i32 0 > +#define TCG_TARGET_HAS_muluh_i32 0 > +#define TCG_TARGET_HAS_mulsh_i32 0 > > #if TCG_TARGET_REG_BITS == 64 > #define TCG_TARGET_HAS_bswap16_i64 1 > @@ -100,13 +102,14 @@ > #define TCG_TARGET_HAS_rot_i64 1 > #define TCG_TARGET_HAS_movcond_i64 0 > #define TCG_TARGET_HAS_muls2_i64 0 > - > #define TCG_TARGET_HAS_add2_i32 0 > #define TCG_TARGET_HAS_sub2_i32 0 > #define TCG_TARGET_HAS_mulu2_i32 0 > #define TCG_TARGET_HAS_add2_i64 0 > #define TCG_TARGET_HAS_sub2_i64 0 > #define TCG_TARGET_HAS_mulu2_i64 0 > +#define TCG_TARGET_HAS_muluh_i64 0 > +#define TCG_TARGET_HAS_mulsh_i64 0 > #endif /* TCG_TARGET_REG_BITS == 64 */ > > /* Number of registers available. Reviewed-by: Aurelien Jarno <aurelien@aurel32.net> -- Aurelien Jarno GPG: 1024D/F1BCDB73 aurelien@aurel32.net http://www.aurel32.net ^ permalink raw reply [flat|nested] 10+ messages in thread
* [Qemu-devel] [PATCH 2/4] tcg-mips: Implement mulsh, muluh 2013-08-17 23:26 [Qemu-devel] [PATCH 0/4] tcg: Add muluh and mulsh opcodes Richard Henderson 2013-08-17 23:26 ` [Qemu-devel] [PATCH 1/4] " Richard Henderson @ 2013-08-17 23:26 ` Richard Henderson 2013-08-28 20:59 ` Aurelien Jarno 2013-08-17 23:26 ` [Qemu-devel] [PATCH 3/4] tcg-ppc64: Implement muluh, mulsh Richard Henderson ` (2 subsequent siblings) 4 siblings, 1 reply; 10+ messages in thread From: Richard Henderson @ 2013-08-17 23:26 UTC (permalink / raw) To: qemu-devel; +Cc: aurelien With the optimization in tcg_liveness_analysis, we can avoid the MFLO when it is unused. Signed-off-by: Richard Henderson <rth@twiddle.net> --- tcg/mips/tcg-target.c | 10 ++++++++++ tcg/mips/tcg-target.h | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index 793532e..31cd514 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -1423,6 +1423,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_opc_reg(s, OPC_MFLO, args[0], 0, 0); tcg_out_opc_reg(s, OPC_MFHI, args[1], 0, 0); break; + case INDEX_op_mulsh_i32: + tcg_out_opc_reg(s, OPC_MULT, 0, args[1], args[2]); + tcg_out_opc_reg(s, OPC_MFHI, args[0], 0, 0); + break; + case INDEX_op_muluh_i32: + tcg_out_opc_reg(s, OPC_MULTU, 0, args[1], args[2]); + tcg_out_opc_reg(s, OPC_MFHI, args[0], 0, 0); + break; case INDEX_op_div_i32: tcg_out_opc_reg(s, OPC_DIV, 0, args[1], args[2]); tcg_out_opc_reg(s, OPC_MFLO, args[0], 0, 0); @@ -1602,6 +1610,8 @@ static const TCGTargetOpDef mips_op_defs[] = { { INDEX_op_mul_i32, { "r", "rZ", "rZ" } }, { INDEX_op_muls2_i32, { "r", "r", "rZ", "rZ" } }, { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rZ" } }, + { INDEX_op_mulsh_i32, { "r", "rZ", "rZ" } }, + { INDEX_op_muluh_i32, { "r", "rZ", "rZ" } }, { INDEX_op_div_i32, { "r", "rZ", "rZ" } }, { INDEX_op_divu_i32, { "r", "rZ", "rZ" } }, { INDEX_op_rem_i32, { "r", "rZ", "rZ" } }, diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index 6cb7c2f..7ef79e0 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -89,8 +89,8 @@ typedef enum { #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_muls2_i32 1 -#define TCG_TARGET_HAS_muluh_i32 0 -#define TCG_TARGET_HAS_mulsh_i32 0 +#define TCG_TARGET_HAS_muluh_i32 1 +#define TCG_TARGET_HAS_mulsh_i32 1 /* optional instructions only implemented on MIPS4, MIPS32 and Loongson 2 */ #if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \ -- 1.8.1.4 ^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [PATCH 2/4] tcg-mips: Implement mulsh, muluh 2013-08-17 23:26 ` [Qemu-devel] [PATCH 2/4] tcg-mips: Implement mulsh, muluh Richard Henderson @ 2013-08-28 20:59 ` Aurelien Jarno 0 siblings, 0 replies; 10+ messages in thread From: Aurelien Jarno @ 2013-08-28 20:59 UTC (permalink / raw) To: Richard Henderson; +Cc: qemu-devel On Sat, Aug 17, 2013 at 04:26:44PM -0700, Richard Henderson wrote: > With the optimization in tcg_liveness_analysis, > we can avoid the MFLO when it is unused. > > Signed-off-by: Richard Henderson <rth@twiddle.net> > --- > tcg/mips/tcg-target.c | 10 ++++++++++ > tcg/mips/tcg-target.h | 4 ++-- > 2 files changed, 12 insertions(+), 2 deletions(-) > > diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c > index 793532e..31cd514 100644 > --- a/tcg/mips/tcg-target.c > +++ b/tcg/mips/tcg-target.c > @@ -1423,6 +1423,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, > tcg_out_opc_reg(s, OPC_MFLO, args[0], 0, 0); > tcg_out_opc_reg(s, OPC_MFHI, args[1], 0, 0); > break; > + case INDEX_op_mulsh_i32: > + tcg_out_opc_reg(s, OPC_MULT, 0, args[1], args[2]); > + tcg_out_opc_reg(s, OPC_MFHI, args[0], 0, 0); > + break; > + case INDEX_op_muluh_i32: > + tcg_out_opc_reg(s, OPC_MULTU, 0, args[1], args[2]); > + tcg_out_opc_reg(s, OPC_MFHI, args[0], 0, 0); > + break; > case INDEX_op_div_i32: > tcg_out_opc_reg(s, OPC_DIV, 0, args[1], args[2]); > tcg_out_opc_reg(s, OPC_MFLO, args[0], 0, 0); > @@ -1602,6 +1610,8 @@ static const TCGTargetOpDef mips_op_defs[] = { > { INDEX_op_mul_i32, { "r", "rZ", "rZ" } }, > { INDEX_op_muls2_i32, { "r", "r", "rZ", "rZ" } }, > { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rZ" } }, > + { INDEX_op_mulsh_i32, { "r", "rZ", "rZ" } }, > + { INDEX_op_muluh_i32, { "r", "rZ", "rZ" } }, > { INDEX_op_div_i32, { "r", "rZ", "rZ" } }, > { INDEX_op_divu_i32, { "r", "rZ", "rZ" } }, > { INDEX_op_rem_i32, { "r", "rZ", "rZ" } }, > diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h > index 6cb7c2f..7ef79e0 100644 > --- a/tcg/mips/tcg-target.h > +++ b/tcg/mips/tcg-target.h > @@ -89,8 +89,8 @@ typedef enum { > #define TCG_TARGET_HAS_eqv_i32 0 > #define TCG_TARGET_HAS_nand_i32 0 > #define TCG_TARGET_HAS_muls2_i32 1 > -#define TCG_TARGET_HAS_muluh_i32 0 > -#define TCG_TARGET_HAS_mulsh_i32 0 > +#define TCG_TARGET_HAS_muluh_i32 1 > +#define TCG_TARGET_HAS_mulsh_i32 1 > > /* optional instructions only implemented on MIPS4, MIPS32 and Loongson 2 */ > #if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \ Reviewed-by: Aurelien Jarno <aurelien@aurel32.net> -- Aurelien Jarno GPG: 1024D/F1BCDB73 aurelien@aurel32.net http://www.aurel32.net ^ permalink raw reply [flat|nested] 10+ messages in thread
* [Qemu-devel] [PATCH 3/4] tcg-ppc64: Implement muluh, mulsh 2013-08-17 23:26 [Qemu-devel] [PATCH 0/4] tcg: Add muluh and mulsh opcodes Richard Henderson 2013-08-17 23:26 ` [Qemu-devel] [PATCH 1/4] " Richard Henderson 2013-08-17 23:26 ` [Qemu-devel] [PATCH 2/4] tcg-mips: Implement mulsh, muluh Richard Henderson @ 2013-08-17 23:26 ` Richard Henderson 2013-08-28 21:00 ` Aurelien Jarno 2013-08-17 23:26 ` [Qemu-devel] [PATCH 4/4] tcg: Constant fold div, rem Richard Henderson 2013-08-27 21:48 ` [Qemu-devel] [PATCH 0/4] tcg: Add muluh and mulsh opcodes Richard Henderson 4 siblings, 1 reply; 10+ messages in thread From: Richard Henderson @ 2013-08-17 23:26 UTC (permalink / raw) To: qemu-devel; +Cc: aurelien Using these instead of mulu2 and muls2 lets us avoid having to argument overlap analysis in the backend. Normal register allocation will DTRT. Signed-off-by: Richard Henderson <rth@twiddle.net> --- tcg/ppc64/tcg-target.c | 32 +++++++------------------------- tcg/ppc64/tcg-target.h | 8 ++++---- 2 files changed, 11 insertions(+), 29 deletions(-) diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c index 0678de2..939f7cb 100644 --- a/tcg/ppc64/tcg-target.c +++ b/tcg/ppc64/tcg-target.c @@ -1975,29 +1975,11 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, } break; - case INDEX_op_mulu2_i64: - case INDEX_op_muls2_i64: - { - int oph = (opc == INDEX_op_mulu2_i64 ? MULHDU : MULHD); - TCGReg outl = args[0], outh = args[1]; - a0 = args[2], a1 = args[3]; - - if (outl == a0 || outl == a1) { - if (outh == a0 || outh == a1) { - outl = TCG_REG_R0; - } else { - tcg_out32(s, oph | TAB(outh, a0, a1)); - oph = 0; - } - } - tcg_out32(s, MULLD | TAB(outl, a0, a1)); - if (oph != 0) { - tcg_out32(s, oph | TAB(outh, a0, a1)); - } - if (outl != args[0]) { - tcg_out_mov(s, TCG_TYPE_I64, args[0], outl); - } - } + case INDEX_op_muluh_i64: + tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2])); + break; + case INDEX_op_mulsh_i64: + tcg_out32(s, MULHD | TAB(args[0], args[1], args[2])); break; default: @@ -2124,8 +2106,8 @@ static const TCGTargetOpDef ppc_op_defs[] = { { INDEX_op_add2_i64, { "r", "r", "r", "r", "rI", "rZM" } }, { INDEX_op_sub2_i64, { "r", "r", "rI", "r", "rZM", "r" } }, - { INDEX_op_muls2_i64, { "r", "r", "r", "r" } }, - { INDEX_op_mulu2_i64, { "r", "r", "r", "r" } }, + { INDEX_op_mulsh_i64, { "r", "r", "r" } }, + { INDEX_op_muluh_i64, { "r", "r", "r" } }, { -1 }, }; diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h index 0789daf..fa4b9da 100644 --- a/tcg/ppc64/tcg-target.h +++ b/tcg/ppc64/tcg-target.h @@ -118,10 +118,10 @@ typedef enum { #define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 -#define TCG_TARGET_HAS_mulu2_i64 1 -#define TCG_TARGET_HAS_muls2_i64 1 -#define TCG_TARGET_HAS_muluh_i64 0 -#define TCG_TARGET_HAS_mulsh_i64 0 +#define TCG_TARGET_HAS_mulu2_i64 0 +#define TCG_TARGET_HAS_muls2_i64 0 +#define TCG_TARGET_HAS_muluh_i64 1 +#define TCG_TARGET_HAS_mulsh_i64 1 #define TCG_AREG0 TCG_REG_R27 -- 1.8.1.4 ^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [PATCH 3/4] tcg-ppc64: Implement muluh, mulsh 2013-08-17 23:26 ` [Qemu-devel] [PATCH 3/4] tcg-ppc64: Implement muluh, mulsh Richard Henderson @ 2013-08-28 21:00 ` Aurelien Jarno 0 siblings, 0 replies; 10+ messages in thread From: Aurelien Jarno @ 2013-08-28 21:00 UTC (permalink / raw) To: Richard Henderson; +Cc: qemu-devel On Sat, Aug 17, 2013 at 04:26:45PM -0700, Richard Henderson wrote: > Using these instead of mulu2 and muls2 lets us avoid having to argument > overlap analysis in the backend. Normal register allocation will DTRT. > > Signed-off-by: Richard Henderson <rth@twiddle.net> > --- > tcg/ppc64/tcg-target.c | 32 +++++++------------------------- > tcg/ppc64/tcg-target.h | 8 ++++---- > 2 files changed, 11 insertions(+), 29 deletions(-) > > diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c > index 0678de2..939f7cb 100644 > --- a/tcg/ppc64/tcg-target.c > +++ b/tcg/ppc64/tcg-target.c > @@ -1975,29 +1975,11 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, > } > break; > > - case INDEX_op_mulu2_i64: > - case INDEX_op_muls2_i64: > - { > - int oph = (opc == INDEX_op_mulu2_i64 ? MULHDU : MULHD); > - TCGReg outl = args[0], outh = args[1]; > - a0 = args[2], a1 = args[3]; > - > - if (outl == a0 || outl == a1) { > - if (outh == a0 || outh == a1) { > - outl = TCG_REG_R0; > - } else { > - tcg_out32(s, oph | TAB(outh, a0, a1)); > - oph = 0; > - } > - } > - tcg_out32(s, MULLD | TAB(outl, a0, a1)); > - if (oph != 0) { > - tcg_out32(s, oph | TAB(outh, a0, a1)); > - } > - if (outl != args[0]) { > - tcg_out_mov(s, TCG_TYPE_I64, args[0], outl); > - } > - } > + case INDEX_op_muluh_i64: > + tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2])); > + break; > + case INDEX_op_mulsh_i64: > + tcg_out32(s, MULHD | TAB(args[0], args[1], args[2])); > break; > > default: > @@ -2124,8 +2106,8 @@ static const TCGTargetOpDef ppc_op_defs[] = { > > { INDEX_op_add2_i64, { "r", "r", "r", "r", "rI", "rZM" } }, > { INDEX_op_sub2_i64, { "r", "r", "rI", "r", "rZM", "r" } }, > - { INDEX_op_muls2_i64, { "r", "r", "r", "r" } }, > - { INDEX_op_mulu2_i64, { "r", "r", "r", "r" } }, > + { INDEX_op_mulsh_i64, { "r", "r", "r" } }, > + { INDEX_op_muluh_i64, { "r", "r", "r" } }, > > { -1 }, > }; > diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h > index 0789daf..fa4b9da 100644 > --- a/tcg/ppc64/tcg-target.h > +++ b/tcg/ppc64/tcg-target.h > @@ -118,10 +118,10 @@ typedef enum { > #define TCG_TARGET_HAS_movcond_i64 1 > #define TCG_TARGET_HAS_add2_i64 1 > #define TCG_TARGET_HAS_sub2_i64 1 > -#define TCG_TARGET_HAS_mulu2_i64 1 > -#define TCG_TARGET_HAS_muls2_i64 1 > -#define TCG_TARGET_HAS_muluh_i64 0 > -#define TCG_TARGET_HAS_mulsh_i64 0 > +#define TCG_TARGET_HAS_mulu2_i64 0 > +#define TCG_TARGET_HAS_muls2_i64 0 > +#define TCG_TARGET_HAS_muluh_i64 1 > +#define TCG_TARGET_HAS_mulsh_i64 1 > > #define TCG_AREG0 TCG_REG_R27 > Reviewed-by: Aurelien Jarno <aurelien@aurel32.net> -- Aurelien Jarno GPG: 1024D/F1BCDB73 aurelien@aurel32.net http://www.aurel32.net ^ permalink raw reply [flat|nested] 10+ messages in thread
* [Qemu-devel] [PATCH 4/4] tcg: Constant fold div, rem 2013-08-17 23:26 [Qemu-devel] [PATCH 0/4] tcg: Add muluh and mulsh opcodes Richard Henderson ` (2 preceding siblings ...) 2013-08-17 23:26 ` [Qemu-devel] [PATCH 3/4] tcg-ppc64: Implement muluh, mulsh Richard Henderson @ 2013-08-17 23:26 ` Richard Henderson 2013-08-28 21:02 ` Aurelien Jarno 2013-08-27 21:48 ` [Qemu-devel] [PATCH 0/4] tcg: Add muluh and mulsh opcodes Richard Henderson 4 siblings, 1 reply; 10+ messages in thread From: Richard Henderson @ 2013-08-17 23:26 UTC (permalink / raw) To: qemu-devel; +Cc: aurelien Signed-off-by: Richard Henderson <rth@twiddle.net> --- tcg/optimize.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tcg/optimize.c b/tcg/optimize.c index e8dedf3..b29bf25 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -304,6 +304,25 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) muls64(&l64, &h64, x, y); return h64; + case INDEX_op_div_i32: + /* Avoid crashing on divide by zero, otherwise undefined. */ + return (int32_t)x / ((int32_t)y ? : 1); + case INDEX_op_divu_i32: + return (uint32_t)x / ((uint32_t)y ? : 1); + case INDEX_op_div_i64: + return (int64_t)x / ((int64_t)y ? : 1); + case INDEX_op_divu_i64: + return (uint64_t)x / ((uint64_t)y ? : 1); + + case INDEX_op_rem_i32: + return (int32_t)x % ((int32_t)y ? : 1); + case INDEX_op_remu_i32: + return (uint32_t)x % ((uint32_t)y ? : 1); + case INDEX_op_rem_i64: + return (int64_t)x % ((int64_t)y ? : 1); + case INDEX_op_remu_i64: + return (uint64_t)x % ((uint64_t)y ? : 1); + default: fprintf(stderr, "Unrecognized operation %d in do_constant_folding.\n", op); @@ -902,6 +921,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, CASE_OP_32_64(nor): CASE_OP_32_64(muluh): CASE_OP_32_64(mulsh): + CASE_OP_32_64(div): + CASE_OP_32_64(divu): + CASE_OP_32_64(rem): + CASE_OP_32_64(remu): if (temps[args[1]].state == TCG_TEMP_CONST && temps[args[2]].state == TCG_TEMP_CONST) { s->gen_opc_buf[op_index] = op_to_movi(op); -- 1.8.1.4 ^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [PATCH 4/4] tcg: Constant fold div, rem 2013-08-17 23:26 ` [Qemu-devel] [PATCH 4/4] tcg: Constant fold div, rem Richard Henderson @ 2013-08-28 21:02 ` Aurelien Jarno 0 siblings, 0 replies; 10+ messages in thread From: Aurelien Jarno @ 2013-08-28 21:02 UTC (permalink / raw) To: Richard Henderson; +Cc: qemu-devel On Sat, Aug 17, 2013 at 04:26:46PM -0700, Richard Henderson wrote: > Signed-off-by: Richard Henderson <rth@twiddle.net> > --- > tcg/optimize.c | 23 +++++++++++++++++++++++ > 1 file changed, 23 insertions(+) > > diff --git a/tcg/optimize.c b/tcg/optimize.c > index e8dedf3..b29bf25 100644 > --- a/tcg/optimize.c > +++ b/tcg/optimize.c > @@ -304,6 +304,25 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) > muls64(&l64, &h64, x, y); > return h64; > > + case INDEX_op_div_i32: > + /* Avoid crashing on divide by zero, otherwise undefined. */ > + return (int32_t)x / ((int32_t)y ? : 1); > + case INDEX_op_divu_i32: > + return (uint32_t)x / ((uint32_t)y ? : 1); > + case INDEX_op_div_i64: > + return (int64_t)x / ((int64_t)y ? : 1); > + case INDEX_op_divu_i64: > + return (uint64_t)x / ((uint64_t)y ? : 1); > + > + case INDEX_op_rem_i32: > + return (int32_t)x % ((int32_t)y ? : 1); > + case INDEX_op_remu_i32: > + return (uint32_t)x % ((uint32_t)y ? : 1); > + case INDEX_op_rem_i64: > + return (int64_t)x % ((int64_t)y ? : 1); > + case INDEX_op_remu_i64: > + return (uint64_t)x % ((uint64_t)y ? : 1); > + > default: > fprintf(stderr, > "Unrecognized operation %d in do_constant_folding.\n", op); > @@ -902,6 +921,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, > CASE_OP_32_64(nor): > CASE_OP_32_64(muluh): > CASE_OP_32_64(mulsh): > + CASE_OP_32_64(div): > + CASE_OP_32_64(divu): > + CASE_OP_32_64(rem): > + CASE_OP_32_64(remu): > if (temps[args[1]].state == TCG_TEMP_CONST > && temps[args[2]].state == TCG_TEMP_CONST) { > s->gen_opc_buf[op_index] = op_to_movi(op); This looks fine to me, though I haven't been able to trigger this code path. So: Reviewed-by: Aurelien Jarno <aurelien@aurel32.net> -- Aurelien Jarno GPG: 1024D/F1BCDB73 aurelien@aurel32.net http://www.aurel32.net ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [PATCH 0/4] tcg: Add muluh and mulsh opcodes 2013-08-17 23:26 [Qemu-devel] [PATCH 0/4] tcg: Add muluh and mulsh opcodes Richard Henderson ` (3 preceding siblings ...) 2013-08-17 23:26 ` [Qemu-devel] [PATCH 4/4] tcg: Constant fold div, rem Richard Henderson @ 2013-08-27 21:48 ` Richard Henderson 4 siblings, 0 replies; 10+ messages in thread From: Richard Henderson @ 2013-08-27 21:48 UTC (permalink / raw) To: qemu-devel; +Cc: aurelien Ping. r~ On 08/17/2013 04:26 PM, Richard Henderson wrote: > We have -- or will have -- several targets which have a native > multiply-highpart instruction: ppc*, ia64, aarch64, alpha. > > If we leave only the mul[us]2 opcode with which to expose this, > we have to handle the register allocation bits in the backends. > Better, IMO, to expose the two parts at the TCG opcode level, > simplifying the backends. > > I've left tcg_gen_mul[us]_i{32,64} as the "public" interface to > these opcodes at the translator level. If the guest does not > need both results, they can just be ignored. If the host has a > combined mult insn (i386, arm) then one output is garbage; if > the host has separate mult insns, then the optimizer can delete > the unused opcode. > > Really only tested with x86_64 and ppc64. > The linux-user-test image for alpha sees: > > IN: > 0x0000004000814148: umulh t5,t0,t0 > > OP: > ld_i32 tmp0,env,$0xffffffffffffffa8 > movi_i32 tmp1,$0x0 > brcond_i32 tmp0,tmp1,ne,$0x0 > ---- 0x4000814148 > mul_i64 tmp3,ir6,ir1 > muluh_i64 ir1,ir6,ir1 > mov_i64 tmp2,tmp3 > movi_i64 pc,$0x400081414c > exit_tb $0x0 > set_label $0x0 > exit_tb $0x3fff8c244483 > > OP after optimization and liveness analysis: > ld_i32 tmp0,env,$0xffffffffffffffa8 > movi_i32 tmp1,$0x0 > brcond_i32 tmp0,tmp1,ne,$0x0 > ---- 0x4000814148 > nopn $0x3,$0xd,$0x3 > muluh_i64 ir1,ir1,ir6 > nopn $0x2,$0x2 > movi_i64 pc,$0x400081414c > exit_tb $0x0 > set_label $0x0 > exit_tb $0x3fff8c244483 > end > > OUT: [size=76] > 0x6011b0f0: lwz r14,-88(r27) > 0x6011b0f4: cmpwi cr7,r14,0 > 0x6011b0f8: bne- cr7,0x6011b128 > 0x6011b0fc: ld r14,8(r27) > 0x6011b100: ld r15,48(r27) > 0x6011b104: mulhdu r14,r14,r15 > 0x6011b108: std r14,8(r27) > ... > > > > r~ > > > Richard Henderson (4): > tcg: Add muluh and mulsh opcodes > tcg-mips: Implement mulsh, muluh > tcg-ppc64: Implement muluh, mulsh > tcg: Constant fold div, rem > > tcg/aarch64/tcg-target.h | 4 ++++ > tcg/arm/tcg-target.h | 2 ++ > tcg/hppa/tcg-target.h | 2 ++ > tcg/i386/tcg-target.h | 4 ++++ > tcg/ia64/tcg-target.h | 4 ++++ > tcg/mips/tcg-target.c | 10 ++++++++++ > tcg/mips/tcg-target.h | 2 ++ > tcg/optimize.c | 43 +++++++++++++++++++++++++++++++++++++++++++ > tcg/ppc/tcg-target.h | 2 ++ > tcg/ppc64/tcg-target.c | 32 +++++++------------------------- > tcg/ppc64/tcg-target.h | 8 ++++++-- > tcg/s390/tcg-target.h | 4 ++++ > tcg/sparc/tcg-target.h | 4 ++++ > tcg/tcg-op.h | 40 ++++++++++++++++++++++++++++++++++++---- > tcg/tcg-opc.h | 4 ++++ > tcg/tcg.c | 36 ++++++++++++++++++++++++++++++------ > tcg/tcg.h | 2 ++ > tcg/tci/tcg-target.h | 5 ++++- > 18 files changed, 170 insertions(+), 38 deletions(-) > ^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2013-08-28 21:02 UTC | newest] Thread overview: 10+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2013-08-17 23:26 [Qemu-devel] [PATCH 0/4] tcg: Add muluh and mulsh opcodes Richard Henderson 2013-08-17 23:26 ` [Qemu-devel] [PATCH 1/4] " Richard Henderson 2013-08-28 20:59 ` Aurelien Jarno 2013-08-17 23:26 ` [Qemu-devel] [PATCH 2/4] tcg-mips: Implement mulsh, muluh Richard Henderson 2013-08-28 20:59 ` Aurelien Jarno 2013-08-17 23:26 ` [Qemu-devel] [PATCH 3/4] tcg-ppc64: Implement muluh, mulsh Richard Henderson 2013-08-28 21:00 ` Aurelien Jarno 2013-08-17 23:26 ` [Qemu-devel] [PATCH 4/4] tcg: Constant fold div, rem Richard Henderson 2013-08-28 21:02 ` Aurelien Jarno 2013-08-27 21:48 ` [Qemu-devel] [PATCH 0/4] tcg: Add muluh and mulsh opcodes Richard Henderson
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).