* [Qemu-devel] [PATCH 1/6] tcg: Generic support for conditional set and conditional move.
2009-12-17 19:32 ` [Qemu-devel] [PATCH 0/6] tcg conditional set/move, round 2 Richard Henderson
@ 2009-12-17 17:27 ` Richard Henderson
2009-12-17 20:50 ` malc
2009-12-18 11:38 ` [Qemu-devel] " Laurent Desnogues
2009-12-17 17:28 ` [Qemu-devel] [PATCH 2/6] tcg: Add tcg_invert_cond Richard Henderson
` (5 subsequent siblings)
6 siblings, 2 replies; 26+ messages in thread
From: Richard Henderson @ 2009-12-17 17:27 UTC (permalink / raw)
To: qemu-devel; +Cc: Laurent Desnogues
Defines setcond and movcond for implementing conditional moves at
the tcg opcode level. 64-bit-on-32-bit is expanded via a setcond2
primitive plus other operations.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/README | 26 +++++++++++++++-
tcg/tcg-op.h | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
tcg/tcg-opc.h | 5 +++
tcg/tcg.c | 23 ++++++++++----
4 files changed, 138 insertions(+), 7 deletions(-)
diff --git a/tcg/README b/tcg/README
index e672258..8617994 100644
--- a/tcg/README
+++ b/tcg/README
@@ -152,6 +152,11 @@ Conditional jump if t0 cond t1 is true. cond can be:
TCG_COND_LEU /* unsigned */
TCG_COND_GTU /* unsigned */
+* brcond2_i32 cond, t0_low, t0_high, t1_low, t1_high, label
+
+Similar to brcond, except that the 64-bit values T0 and T1
+are formed from two 32-bit arguments.
+
********* Arithmetic
* add_i32/i64 t0, t1, t2
@@ -282,6 +287,25 @@ order bytes must be set to zero.
Indicate that the value of t0 won't be used later. It is useful to
force dead code elimination.
+********* Conditional moves
+
+* setcond_i32/i64 cond, dest, t1, t2
+
+dest = (t1 cond t2)
+
+Set DEST to 1 if (T1 cond T2) is true, otherwise set to 0.
+
+* movcond_i32/i64 cond, dest, c1, c2, vtrue, vfalse
+
+dest= (c1 cond c2 ? vtrue : of)
+
+Set DEST to VTRUE if (c1 cond c2) is true, otherwise set to VFALSE.
+
+* setcond2_i32 cond, dest, t1_low, t1_high, t2_low, t2_high
+
+Similar to setcond, except that the 64-bit values T1 and T2 are
+formed from two 32-bit arguments. The result is a 32-bit value.
+
********* Type conversions
* ext_i32_i64 t0, t1
@@ -375,7 +399,7 @@ The target word size (TCG_TARGET_REG_BITS) is expected to be 32 bit or
On a 32 bit target, all 64 bit operations are converted to 32 bits. A
few specific operations must be implemented to allow it (see add2_i32,
-sub2_i32, brcond2_i32).
+sub2_i32, brcond2_i32, setcond2_i32).
Floating point operations are not supported in this version. A
previous incarnation of the code generator had full support of them,
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index faf2e8b..f43ed16 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -280,6 +280,32 @@ static inline void tcg_gen_op6_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2,
*gen_opparam_ptr++ = GET_TCGV_I64(arg6);
}
+static inline void tcg_gen_op6i_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2,
+ TCGv_i32 arg3, TCGv_i32 arg4,
+ TCGv_i32 arg5, TCGArg arg6)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg4);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg5);
+ *gen_opparam_ptr++ = arg6;
+}
+
+static inline void tcg_gen_op6i_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2,
+ TCGv_i64 arg3, TCGv_i64 arg4,
+ TCGv_i64 arg5, TCGArg arg6)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg4);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg5);
+ *gen_opparam_ptr++ = arg6;
+}
+
static inline void tcg_gen_op6ii_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2,
TCGv_i32 arg3, TCGv_i32 arg4, TCGArg arg5,
TCGArg arg6)
@@ -1795,6 +1821,67 @@ static inline void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
}
}
+static inline void tcg_gen_setcond_i32(int cond, TCGv_i32 ret,
+ TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
+}
+
+static inline void tcg_gen_setcond_i64(int cond, TCGv_i64 ret,
+ TCGv_i64 arg1, TCGv_i64 arg2)
+{
+#if TCG_TARGET_REG_BITS == 64
+ tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
+#else
+ tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
+ TCGV_LOW(arg1), TCGV_HIGH(arg1),
+ TCGV_LOW(arg2), TCGV_HIGH(arg2), cond);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+#endif
+}
+
+static inline void tcg_gen_movcond_i32(int cond, TCGv_i32 ret,
+ TCGv_i32 cmp1, TCGv_i32 cmp2,
+ TCGv_i32 op_t, TCGv_i32 op_f)
+{
+ if (TCGV_EQUAL_I32(op_t, op_f)) {
+ tcg_gen_mov_i32(ret, op_t);
+ return;
+ }
+ tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, cmp1, cmp2, op_t, op_f, cond);
+}
+
+static inline void tcg_gen_movcond_i64(int cond, TCGv_i64 ret,
+ TCGv_i64 cmp1, TCGv_i64 cmp2,
+ TCGv_i64 op_t, TCGv_i64 op_f)
+{
+ if (TCGV_EQUAL_I64(op_t, op_f)) {
+ tcg_gen_mov_i64(ret, op_t);
+ return;
+ }
+#if TCG_TARGET_REG_BITS == 64
+ tcg_gen_op6i_i64(INDEX_op_movcond_i64, ret, cmp1, cmp2, op_t, op_f, cond);
+#else
+ {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 zero = tcg_const_i32(0);
+
+ tcg_gen_op6i_i32(INDEX_op_setcond2_i32, t0,
+ TCGV_LOW(cmp1), TCGV_HIGH(cmp1),
+ TCGV_LOW(cmp2), TCGV_HIGH(cmp2), cond);
+
+ /* ??? We could perhaps conditionally define a movcond2_i32. */
+ tcg_gen_movcond_i32(TCG_COND_NE, TCGV_LOW(ret), t0, zero,
+ TCGV_LOW(op_t), TCGV_LOW(op_f));
+ tcg_gen_movcond_i32(TCG_COND_NE, TCGV_HIGH(ret), t0, zero,
+ TCGV_HIGH(op_t), TCGV_HIGH(op_f));
+
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(zero);
+ }
+#endif
+}
+
/***************************************/
/* QEMU specific operations. Their type depend on the QEMU CPU
type. */
@@ -2067,6 +2154,8 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
#define tcg_gen_sari_tl tcg_gen_sari_i64
#define tcg_gen_brcond_tl tcg_gen_brcond_i64
#define tcg_gen_brcondi_tl tcg_gen_brcondi_i64
+#define tcg_gen_setcond_tl tcg_gen_setcond_i64
+#define tcg_gen_movcond_tl tcg_gen_movcond_i64
#define tcg_gen_mul_tl tcg_gen_mul_i64
#define tcg_gen_muli_tl tcg_gen_muli_i64
#define tcg_gen_div_tl tcg_gen_div_i64
@@ -2137,6 +2226,8 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
#define tcg_gen_sari_tl tcg_gen_sari_i32
#define tcg_gen_brcond_tl tcg_gen_brcond_i32
#define tcg_gen_brcondi_tl tcg_gen_brcondi_i32
+#define tcg_gen_setcond_tl tcg_gen_setcond_i32
+#define tcg_gen_movcond_tl tcg_gen_movcond_i32
#define tcg_gen_mul_tl tcg_gen_mul_i32
#define tcg_gen_muli_tl tcg_gen_muli_i32
#define tcg_gen_div_tl tcg_gen_div_i32
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index b7f3fd7..086968c 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -42,6 +42,8 @@ DEF2(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
DEF2(mov_i32, 1, 1, 0, 0)
DEF2(movi_i32, 1, 0, 1, 0)
+DEF2(setcond_i32, 1, 2, 1, 0)
+DEF2(movcond_i32, 1, 4, 1, 0)
/* load/store */
DEF2(ld8u_i32, 1, 1, 1, 0)
DEF2(ld8s_i32, 1, 1, 1, 0)
@@ -82,6 +84,7 @@ DEF2(add2_i32, 2, 4, 0, 0)
DEF2(sub2_i32, 2, 4, 0, 0)
DEF2(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
DEF2(mulu2_i32, 2, 2, 0, 0)
+DEF2(setcond2_i32, 1, 4, 1, 0)
#endif
#ifdef TCG_TARGET_HAS_ext8s_i32
DEF2(ext8s_i32, 1, 1, 0, 0)
@@ -111,6 +114,8 @@ DEF2(neg_i32, 1, 1, 0, 0)
#if TCG_TARGET_REG_BITS == 64
DEF2(mov_i64, 1, 1, 0, 0)
DEF2(movi_i64, 1, 0, 1, 0)
+DEF2(setcond_i64, 1, 2, 1, 0)
+DEF2(movcond_i64, 1, 4, 1, 0)
/* load/store */
DEF2(ld8u_i64, 1, 1, 1, 0)
DEF2(ld8s_i64, 1, 1, 1, 0)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 3c0e296..f7ea727 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -670,6 +670,7 @@ void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1,
}
#endif
+
static void tcg_reg_alloc_start(TCGContext *s)
{
int i;
@@ -888,21 +889,31 @@ void tcg_dump_ops(TCGContext *s, FILE *outfile)
fprintf(outfile, "%s",
tcg_get_arg_str_idx(s, buf, sizeof(buf), args[k++]));
}
- if (c == INDEX_op_brcond_i32
+ switch (c) {
+ case INDEX_op_brcond_i32:
+#if TCG_TARGET_REG_BITS == 32
+ case INDEX_op_brcond2_i32:
+#elif TCG_TARGET_REG_BITS == 64
+ case INDEX_op_brcond_i64:
+#endif
+ case INDEX_op_setcond_i32:
+ case INDEX_op_movcond_i32:
#if TCG_TARGET_REG_BITS == 32
- || c == INDEX_op_brcond2_i32
+ case INDEX_op_setcond2_i32:
#elif TCG_TARGET_REG_BITS == 64
- || c == INDEX_op_brcond_i64
+ case INDEX_op_setcond_i64:
+ case INDEX_op_movcond_i64:
#endif
- ) {
if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]])
fprintf(outfile, ",%s", cond_name[args[k++]]);
else
fprintf(outfile, ",$0x%" TCG_PRIlx, args[k++]);
i = 1;
- }
- else
+ break;
+ default:
i = 0;
+ break;
+ }
for(; i < nb_cargs; i++) {
if (k != 0)
fprintf(outfile, ",");
--
1.6.5.2
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [Qemu-devel] [PATCH 2/6] tcg: Add tcg_invert_cond.
2009-12-17 19:32 ` [Qemu-devel] [PATCH 0/6] tcg conditional set/move, round 2 Richard Henderson
2009-12-17 17:27 ` [Qemu-devel] [PATCH 1/6] tcg: Generic support for conditional set and conditional move Richard Henderson
@ 2009-12-17 17:28 ` Richard Henderson
2009-12-18 11:39 ` [Qemu-devel] " Laurent Desnogues
2009-12-17 17:32 ` [Qemu-devel] [PATCH 3/6] tcg-x86_64: Implement setcond and movcond Richard Henderson
` (4 subsequent siblings)
6 siblings, 1 reply; 26+ messages in thread
From: Richard Henderson @ 2009-12-17 17:28 UTC (permalink / raw)
To: qemu-devel; +Cc: Laurent Desnogues
It is very handy to have a reliable mapping of a condition to its inverse.
This will be used in several patches to follow.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/tcg.h | 5 +++++
1 files changed, 5 insertions(+), 0 deletions(-)
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 9824493..376d6af 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -205,6 +205,11 @@ typedef enum {
TCG_COND_GTU,
} TCGCond;
+static inline TCGCond tcg_invert_cond(TCGCond c)
+{
+ return (TCGCond)(c ^ 1);
+}
+
#define TEMP_VAL_DEAD 0
#define TEMP_VAL_REG 1
#define TEMP_VAL_MEM 2
--
1.6.5.2
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [Qemu-devel] [PATCH 3/6] tcg-x86_64: Implement setcond and movcond.
2009-12-17 19:32 ` [Qemu-devel] [PATCH 0/6] tcg conditional set/move, round 2 Richard Henderson
2009-12-17 17:27 ` [Qemu-devel] [PATCH 1/6] tcg: Generic support for conditional set and conditional move Richard Henderson
2009-12-17 17:28 ` [Qemu-devel] [PATCH 2/6] tcg: Add tcg_invert_cond Richard Henderson
@ 2009-12-17 17:32 ` Richard Henderson
2009-12-18 11:39 ` [Qemu-devel] " Laurent Desnogues
2009-12-17 17:55 ` [Qemu-devel] [PATCH 4/6] tcg-i386: Implement small forward branches Richard Henderson
` (3 subsequent siblings)
6 siblings, 1 reply; 26+ messages in thread
From: Richard Henderson @ 2009-12-17 17:32 UTC (permalink / raw)
To: qemu-devel; +Cc: Laurent Desnogues
Implement conditional moves in the x86_64 backend.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/x86_64/tcg-target.c | 65 ++++++++++++++++++++++++++++++++++++++++++++--
1 files changed, 62 insertions(+), 3 deletions(-)
diff --git a/tcg/x86_64/tcg-target.c b/tcg/x86_64/tcg-target.c
index 2339091..e411755 100644
--- a/tcg/x86_64/tcg-target.c
+++ b/tcg/x86_64/tcg-target.c
@@ -491,9 +491,8 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
}
}
-static void tcg_out_brcond(TCGContext *s, int cond,
- TCGArg arg1, TCGArg arg2, int const_arg2,
- int label_index, int rexw)
+static void tcg_out_cond(TCGContext *s, int cond, TCGArg arg1,
+ TCGArg arg2, int const_arg2, int rexw)
{
if (const_arg2) {
if (arg2 == 0) {
@@ -508,9 +507,45 @@ static void tcg_out_brcond(TCGContext *s, int cond,
} else {
tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3) | rexw, arg2, arg1);
}
+}
+
+static void tcg_out_brcond(TCGContext *s, int cond,
+ TCGArg arg1, TCGArg arg2, int const_arg2,
+ int label_index, int rexw)
+{
+ tcg_out_cond(s, cond, arg1, arg2, const_arg2, rexw);
tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index);
}
+static void tcg_out_setcond(TCGContext *s, int cond, TCGArg arg0,
+ TCGArg arg1, TCGArg arg2, int const_arg2, int rexw)
+{
+ int use_xor = (arg0 != arg1 && (const_arg2 || arg0 != arg2));
+
+ if (use_xor)
+ tcg_out_movi(s, TCG_TYPE_I32, arg0, 0);
+ tcg_out_cond(s, cond, arg1, arg2, const_arg2, rexw);
+ tcg_out_modrm(s, 0x90 | tcg_cond_to_jcc[cond] | P_EXT | P_REXB, 0, arg0);
+ if (!use_xor)
+ tgen_arithi32(s, ARITH_AND, arg0, 0xff);
+}
+
+static void tcg_out_movcond(TCGContext *s, int cond, TCGArg arg0,
+ TCGArg arg1, TCGArg arg2, int const_arg2,
+ TCGArg arg3, TCGArg arg4, int rexw)
+{
+ if (arg0 == arg3) {
+ cond = tcg_invert_cond(cond);
+ arg3 = arg4;
+ arg4 = arg0;
+ }
+
+ tcg_out_cond(s, cond, arg1, arg2, const_arg2, rexw);
+ if (arg0 != arg4)
+ tcg_out_mov(s, arg0, arg4);
+ tcg_out_modrm(s, 0x40 | tcg_cond_to_jcc[cond] | P_EXT | rexw, arg0, arg3);
+}
+
#if defined(CONFIG_SOFTMMU)
#include "../../softmmu_defs.h"
@@ -1197,6 +1232,24 @@ static inline void tcg_out_op(TCGContext *s, int opc, const TCGArg *args,
tcg_out_modrm(s, 0x8b, args[0], args[1]);
break;
+ case INDEX_op_setcond_i32:
+ tcg_out_setcond(s, args[3], args[0], args[1], args[2],
+ const_args[2], 0);
+ break;
+ case INDEX_op_setcond_i64:
+ tcg_out_setcond(s, args[3], args[0], args[1], args[2],
+ const_args[2], P_REXW);
+ break;
+
+ case INDEX_op_movcond_i32:
+ tcg_out_movcond(s, args[5], args[0], args[1], args[2],
+ const_args[2], args[3], args[4], 0);
+ break;
+ case INDEX_op_movcond_i64:
+ tcg_out_movcond(s, args[5], args[0], args[1], args[2],
+ const_args[2], args[3], args[4], P_REXW);
+ break;
+
case INDEX_op_qemu_ld8u:
tcg_out_qemu_ld(s, args, 0);
break;
@@ -1376,6 +1429,12 @@ static const TCGTargetOpDef x86_64_op_defs[] = {
{ INDEX_op_ext16u_i64, { "r", "r"} },
{ INDEX_op_ext32u_i64, { "r", "r"} },
+ { INDEX_op_setcond_i32, { "r", "r", "re" } },
+ { INDEX_op_setcond_i64, { "r", "r", "re" } },
+
+ { INDEX_op_movcond_i32, { "r", "r", "re", "r", "r" } },
+ { INDEX_op_movcond_i64, { "r", "r", "re", "r", "r" } },
+
{ INDEX_op_qemu_ld8u, { "r", "L" } },
{ INDEX_op_qemu_ld8s, { "r", "L" } },
{ INDEX_op_qemu_ld16u, { "r", "L" } },
--
1.6.5.2
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [Qemu-devel] [PATCH 4/6] tcg-i386: Implement small forward branches.
2009-12-17 19:32 ` [Qemu-devel] [PATCH 0/6] tcg conditional set/move, round 2 Richard Henderson
` (2 preceding siblings ...)
2009-12-17 17:32 ` [Qemu-devel] [PATCH 3/6] tcg-x86_64: Implement setcond and movcond Richard Henderson
@ 2009-12-17 17:55 ` Richard Henderson
2009-12-18 11:39 ` [Qemu-devel] " Laurent Desnogues
2009-12-17 18:38 ` [Qemu-devel] [PATCH 5/6] tcg-i386: Simplify brcond2 Richard Henderson
` (2 subsequent siblings)
6 siblings, 1 reply; 26+ messages in thread
From: Richard Henderson @ 2009-12-17 17:55 UTC (permalink / raw)
To: qemu-devel; +Cc: Laurent Desnogues
There are places, like brcond2, where we know that the destination
of a forward branch will be within 127 bytes. Add the R_386_PC8
relocation type to support this, and add a flag to tcg_out_jxx to
generate it. Set the flag in the small forward branches in brcond2.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
elf.h | 2 ++
tcg/i386/tcg-target.c | 36 +++++++++++++++++++++++++-----------
2 files changed, 27 insertions(+), 11 deletions(-)
diff --git a/elf.h b/elf.h
index 11674d7..c84c8ab 100644
--- a/elf.h
+++ b/elf.h
@@ -243,6 +243,8 @@ typedef struct {
#define R_386_GOTOFF 9
#define R_386_GOTPC 10
#define R_386_NUM 11
+/* Not a dynamic reloc, so not included in R_386_NUM. Used in TCG. */
+#define R_386_PC8 23
#define R_MIPS_NONE 0
#define R_MIPS_16 1
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 972b102..cc3d28f 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -61,6 +61,9 @@ static void patch_reloc(uint8_t *code_ptr, int type,
case R_386_PC32:
*(uint32_t *)code_ptr = value - (long)code_ptr;
break;
+ case R_386_PC8:
+ *(uint8_t *)code_ptr = value - (long)code_ptr;
+ break;
default:
tcg_abort();
}
@@ -305,7 +308,8 @@ static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
tgen_arithi(s, ARITH_ADD, reg, val, 0);
}
-static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
+/* Use SMALL != 0 to force a short forward branch. */
+static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
{
int32_t val, val1;
TCGLabel *l = &s->labels[label_index];
@@ -320,6 +324,7 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
tcg_out8(s, 0x70 + opc);
tcg_out8(s, val1);
} else {
+ assert (!small);
if (opc == -1) {
tcg_out8(s, 0xe9);
tcg_out32(s, val - 5);
@@ -329,6 +334,15 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
tcg_out32(s, val - 6);
}
}
+ } else if (small) {
+ if (opc == -1) {
+ tcg_out8(s, 0xeb);
+ } else {
+ tcg_out8(s, 0x0f);
+ tcg_out8(s, 0x70 + opc);
+ }
+ tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
+ s->code_ptr += 1;
} else {
if (opc == -1) {
tcg_out8(s, 0xe9);
@@ -355,7 +369,7 @@ static void tcg_out_brcond(TCGContext *s, int cond,
} else {
tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
}
- tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index);
+ tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, 0);
}
/* XXX: we implement it at the target level to avoid having to
@@ -376,42 +390,42 @@ static void tcg_out_brcond2(TCGContext *s,
break;
case TCG_COND_LT:
tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3], args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2], args[5]);
break;
case TCG_COND_LE:
tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3], args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2], args[5]);
break;
case TCG_COND_GT:
tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3], args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2], args[5]);
break;
case TCG_COND_GE:
tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3], args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2], args[5]);
break;
case TCG_COND_LTU:
tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3], args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2], args[5]);
break;
case TCG_COND_LEU:
tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3], args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2], args[5]);
break;
case TCG_COND_GTU:
tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3], args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2], args[5]);
break;
case TCG_COND_GEU:
tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3], args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2], args[5]);
break;
default:
@@ -913,7 +927,7 @@ static inline void tcg_out_op(TCGContext *s, int opc,
}
break;
case INDEX_op_br:
- tcg_out_jxx(s, JCC_JMP, args[0]);
+ tcg_out_jxx(s, JCC_JMP, args[0], 0);
break;
case INDEX_op_movi_i32:
tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
--
1.6.5.2
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [Qemu-devel] [PATCH 5/6] tcg-i386: Simplify brcond2.
2009-12-17 19:32 ` [Qemu-devel] [PATCH 0/6] tcg conditional set/move, round 2 Richard Henderson
` (3 preceding siblings ...)
2009-12-17 17:55 ` [Qemu-devel] [PATCH 4/6] tcg-i386: Implement small forward branches Richard Henderson
@ 2009-12-17 18:38 ` Richard Henderson
2009-12-18 11:40 ` [Qemu-devel] " Laurent Desnogues
2009-12-17 19:08 ` [Qemu-devel] [PATCH 6/6] tcg-i386: Implement setcond, movcond, setcond2 Richard Henderson
2009-12-18 11:37 ` [Qemu-devel] Re: [PATCH 0/6] tcg conditional set/move, round 2 Laurent Desnogues
6 siblings, 1 reply; 26+ messages in thread
From: Richard Henderson @ 2009-12-17 18:38 UTC (permalink / raw)
To: qemu-devel; +Cc: Laurent Desnogues
Split out tcg_out_cond from tcg_out_brcond. Add "small" arguments
to all branch functions for completeness. Unify all the calls to
generate branches within brcond2 and pass on the small flag.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/i386/tcg-target.c | 87 ++++++++++++++++++++++--------------------------
1 files changed, 40 insertions(+), 47 deletions(-)
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index cc3d28f..f7b2416 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -355,9 +355,8 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
}
}
-static void tcg_out_brcond(TCGContext *s, int cond,
- TCGArg arg1, TCGArg arg2, int const_arg2,
- int label_index)
+static void tcg_out_cond(TCGContext *s, int cond,
+ TCGArg arg1, TCGArg arg2, int const_arg2)
{
if (const_arg2) {
if (arg2 == 0) {
@@ -369,68 +368,61 @@ static void tcg_out_brcond(TCGContext *s, int cond,
} else {
tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
}
- tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, 0);
+}
+
+static void tcg_out_brcond(TCGContext *s, int cond,
+ TCGArg arg1, TCGArg arg2, int const_arg2,
+ int label_index, int small)
+{
+ tcg_out_cond(s, cond, arg1, arg2, const_arg2);
+ tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
}
/* XXX: we implement it at the target level to avoid having to
handle cross basic blocks temporaries */
-static void tcg_out_brcond2(TCGContext *s,
- const TCGArg *args, const int *const_args)
+static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
+ const int *const_args, int small)
{
- int label_next;
- label_next = gen_new_label();
- switch(args[4]) {
+ int label_next = gen_new_label();
+ int label_dest = args[5];
+ int cond = args[4], c1, c2, c3;
+
+ switch (cond) {
case TCG_COND_EQ:
- tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2], label_next);
- tcg_out_brcond(s, TCG_COND_EQ, args[1], args[3], const_args[3], args[5]);
+ c1 = -1, c2 = TCG_COND_NE, c3 = TCG_COND_EQ;
break;
case TCG_COND_NE:
- tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2], args[5]);
- tcg_out_brcond(s, TCG_COND_NE, args[1], args[3], const_args[3], args[5]);
+ c1 = TCG_COND_NE, c2 = -1, c3 = TCG_COND_NE;
break;
case TCG_COND_LT:
- tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3], args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next, 1);
- tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2], args[5]);
- break;
- case TCG_COND_LE:
- tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3], args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next, 1);
- tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2], args[5]);
- break;
- case TCG_COND_GT:
- tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3], args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next, 1);
- tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2], args[5]);
- break;
- case TCG_COND_GE:
- tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3], args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next, 1);
- tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2], args[5]);
- break;
case TCG_COND_LTU:
- tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3], args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next, 1);
- tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2], args[5]);
+ c1 = cond, c2 = TCG_COND_NE, c3 = TCG_COND_LTU;
break;
+ case TCG_COND_LE:
case TCG_COND_LEU:
- tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3], args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next, 1);
- tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2], args[5]);
+ c1 = cond, c2 = TCG_COND_NE, c3 = TCG_COND_LEU;
break;
+ case TCG_COND_GT:
case TCG_COND_GTU:
- tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3], args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next, 1);
- tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2], args[5]);
+ c1 = cond, c2 = TCG_COND_NE, c3 = TCG_COND_GTU;
break;
+ case TCG_COND_GE:
case TCG_COND_GEU:
- tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3], args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next, 1);
- tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2], args[5]);
+ c1 = cond, c2 = TCG_COND_NE, c3 = TCG_COND_GEU;
break;
default:
- tcg_abort();
+ tcg_abort ();
+ }
+
+ tcg_out_cond(s, cond, args[1], args[3], const_args[3]);
+ if (c1 != -1) {
+ tcg_out_jxx(s, tcg_cond_to_jcc[c1], label_dest, small);
}
+ if (c2 != -1) {
+ tcg_out_jxx(s, tcg_cond_to_jcc[c2], label_next, 1);
+ }
+ tcg_out_brcond(s, c3, args[0], args[2], const_args[2], label_dest, small);
+
tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
}
@@ -1058,10 +1050,11 @@ static inline void tcg_out_op(TCGContext *s, int opc,
tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5], args[1]);
break;
case INDEX_op_brcond_i32:
- tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]);
+ tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
+ args[3], 0);
break;
case INDEX_op_brcond2_i32:
- tcg_out_brcond2(s, args, const_args);
+ tcg_out_brcond2(s, args, const_args, 0);
break;
case INDEX_op_bswap16_i32:
--
1.6.5.2
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [Qemu-devel] [PATCH 6/6] tcg-i386: Implement setcond, movcond, setcond2.
2009-12-17 19:32 ` [Qemu-devel] [PATCH 0/6] tcg conditional set/move, round 2 Richard Henderson
` (4 preceding siblings ...)
2009-12-17 18:38 ` [Qemu-devel] [PATCH 5/6] tcg-i386: Simplify brcond2 Richard Henderson
@ 2009-12-17 19:08 ` Richard Henderson
2009-12-18 11:37 ` [Qemu-devel] Re: [PATCH 0/6] tcg conditional set/move, round 2 Laurent Desnogues
6 siblings, 0 replies; 26+ messages in thread
From: Richard Henderson @ 2009-12-17 19:08 UTC (permalink / raw)
To: qemu-devel; +Cc: Laurent Desnogues
Implement conditional moves in the i386 backend.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/i386/tcg-target.c | 173 +++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 173 insertions(+), 0 deletions(-)
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index f7b2416..98d667c 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -426,6 +426,165 @@ static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
}
+static void tcg_out_setcond(TCGContext *s, int cond, TCGArg arg0,
+ TCGArg arg1, TCGArg arg2, int const_arg2)
+{
+ int use_xor = (arg0 != arg1 && (const_arg2 || arg0 != arg2));
+
+ if (use_xor)
+ tcg_out_movi(s, TCG_TYPE_I32, arg0, 0);
+ tcg_out_cond(s, cond, arg1, arg2, const_arg2);
+ tcg_out_modrm(s, 0x90 | tcg_cond_to_jcc[cond] | P_EXT, 0, arg0);
+ if (!use_xor)
+ tgen_arithi(s, ARITH_AND, arg0, 0xff, 0);
+}
+
+static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
+ const int *const_args)
+{
+ TCGArg new_args[6];
+ int label_true, label_over;
+
+ memcpy(new_args, args+1, 5*sizeof(TCGArg));
+
+ if (args[0] == args[1] || args[0] == args[2]
+ || (!const_args[3] && args[0] == args[3])
+ || (!const_args[4] && args[0] == args[4])) {
+ /* When the destination overlaps with one of the argument
+ registers, don't do anything tricky. */
+ label_true = gen_new_label();
+ label_over = gen_new_label();
+
+ new_args[5] = label_true;
+ tcg_out_brcond2(s, new_args, const_args+1, 1);
+
+ tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
+ tcg_out_jxx(s, JCC_JMP, label_over, 1);
+ tcg_out_label(s, label_true, (tcg_target_long)s->code_ptr);
+
+ tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
+ tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
+ } else {
+ /* When the destination does not overlap one of the arguments,
+ clear the destination first, jump if cond false, and emit an
+ increment in the true case. This results in smaller code. */
+
+ tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
+
+ label_over = gen_new_label();
+ new_args[4] = tcg_invert_cond(new_args[4]);
+ new_args[5] = label_over;
+ tcg_out_brcond2(s, new_args, const_args+1, 1);
+
+ tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
+ tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
+ }
+}
+
+static inline int have_cmov(void)
+{
+#ifdef __i686__
+ /* Compiler options say that cmov is available. */
+ return 1;
+#else
+ /* ??? Use cpuid or something and figure out what's running. */
+ return 0;
+#endif
+}
+
+static void tcg_out_movcond(TCGContext *s, const TCGArg *args,
+ const int *const_args)
+{
+ int vtc, vfc, cond, use_cmov = 0, do_swap = 0;
+ TCGArg d, vt, vf;
+
+ d = args[0];
+ vt = args[3];
+ vf = args[4];
+ vtc = const_args[3];
+ vfc = const_args[4];
+
+ /* ??? The jcc code path below assumes that one mov insn must be skipped.
+ Rather than complicate the code below, make sure to simplify the
+ conditional move here. */
+ if (vtc == vfc && vt == vf) {
+ if (vtc)
+ tcg_out_movi(s, TCG_TYPE_I32, d, vt);
+ else
+ tcg_out_mov(s, d, vt);
+ return;
+ }
+
+ cond = args[5];
+
+ /* If both arguments are constants, we *could* do all the funny bits that
+ gcc does with sbc, masks, etc. There's likely no point. Just use the
+ jcc version in this case. We also have to be careful about clobbering
+ inputs when trying to move constants into position. */
+
+ if (have_cmov()) {
+ use_cmov = 1;
+ if (vtc) {
+ if (vfc || d == vf)
+ use_cmov = 0;
+ else
+ do_swap = 1;
+ } else if (d == vt) {
+ if (vfc)
+ use_cmov = 0;
+ else
+ do_swap = 1;
+ }
+ }
+
+ if (!use_cmov) {
+ /* We're going to follow the lead of cmov and set D=VF first,
+ which means inverting the condition upon which we jump. */
+ cond = tcg_invert_cond(cond);
+
+ /* Don't allow the move we jump over to be a nop. */
+ do_swap = (!vtc && d == vt);
+ }
+
+ if (do_swap) {
+ TCGArg t;
+ cond = tcg_invert_cond(cond);
+ t = vf, vf = vt, vt = t;
+ t = vfc, vfc = vtc, vtc = t;
+ }
+
+ /* If possible, set D=0 before the compare, so that we can use XOR. */
+ if (vfc && vf == 0 && d != args[1] && (const_args[2] || d != args[2])) {
+ tcg_out_movi(s, TCG_TYPE_I32, d, 0);
+ vf = d, vfc = 0;
+ }
+
+ tcg_out_cond(s, cond, args[1], args[2], const_args[2]);
+
+ if (vfc) {
+ /* Force the use of "mov $0, d" to avoid clobbering flags. */
+ tcg_out8(s, 0xb8 + d);
+ tcg_out32(s, vf);
+ } else {
+ tcg_out_mov(s, d, vf);
+ }
+
+ if (use_cmov) {
+ assert (!vtc);
+ tcg_out_modrm(s, 0x40 | tcg_cond_to_jcc[cond] | P_EXT, d, vt);
+ } else {
+ int label_next = gen_new_label();
+
+ tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_next, 1);
+ if (vtc)
+ tcg_out_movi(s, TCG_TYPE_I32, d, vt);
+ else
+ tcg_out_mov(s, d, vt);
+
+ tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
+ }
+}
+
#if defined(CONFIG_SOFTMMU)
#include "../../softmmu_defs.h"
@@ -1087,6 +1246,16 @@ static inline void tcg_out_op(TCGContext *s, int opc,
tcg_out_modrm(s, 0xb7 | P_EXT, args[0], args[1]);
break;
+ case INDEX_op_setcond_i32:
+ tcg_out_setcond(s, args[3], args[0], args[1], args[2], const_args[2]);
+ break;
+ case INDEX_op_movcond_i32:
+ tcg_out_movcond(s, args, const_args);
+ break;
+ case INDEX_op_setcond2_i32:
+ tcg_out_setcond2(s, args, const_args);
+ break;
+
case INDEX_op_qemu_ld8u:
tcg_out_qemu_ld(s, args, 0);
break;
@@ -1175,6 +1344,10 @@ static const TCGTargetOpDef x86_op_defs[] = {
{ INDEX_op_ext8u_i32, { "r", "q"} },
{ INDEX_op_ext16u_i32, { "r", "r"} },
+ { INDEX_op_setcond_i32, { "q", "r", "ri" } },
+ { INDEX_op_movcond_i32, { "r", "r", "ri", "ri", "ri" } },
+ { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
+
#if TARGET_LONG_BITS == 32
{ INDEX_op_qemu_ld8u, { "r", "L" } },
{ INDEX_op_qemu_ld8s, { "r", "L" } },
--
1.6.5.2
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [Qemu-devel] [PATCH 0/6] tcg conditional set/move, round 2
[not found] <761ea48b0912170620l534dcb02m8ea6b59524d76dbe@mail.gmail.com>
@ 2009-12-17 19:32 ` Richard Henderson
2009-12-17 17:27 ` [Qemu-devel] [PATCH 1/6] tcg: Generic support for conditional set and conditional move Richard Henderson
` (6 more replies)
0 siblings, 7 replies; 26+ messages in thread
From: Richard Henderson @ 2009-12-17 19:32 UTC (permalink / raw)
To: qemu-devel; +Cc: Laurent Desnogues
> funny how you seem to follow the same paths I did months ago :-)
Heh. They are the obvious points for improvement in the emulation.
Hopefully we can get them finished this time, so that some third
person doesn't go through the same thing next year. =)
> - cosmetics: don't use tabs and be sure not to have
> whitespaces at end of lines
Grr, I thought I'd been extra careful about that. Fixed.
> - the changes to tcg-op.h in patch 6 should go with patch 1
Done.
> - outside of the small parameter handling, I'd prefer
> you provide a separate patch for tcg_out_brcond2;
> I don't think the changes using c1, c2 and c3 really
> belong to setcond/movcond.
The i386 part has been split into 3 patches, though they
are sequentially dependent.
I've left off the sparc backend patch, as well as the patches
for the translators. Let's take care of these first.
r~
---
Richard Henderson (6):
tcg: Generic support for conditional set and conditional move.
tcg: Add tcg_invert_cond.
tcg-x86_64: Implement setcond and movcond.
tcg-i386: Implement small forward branches.
tcg-i386: Simplify brcond2.
tcg-i386: Implement setcond, movcond, setcond2.
elf.h | 2 +
tcg/README | 26 ++++-
tcg/i386/tcg-target.c | 278 ++++++++++++++++++++++++++++++++++++++--------
tcg/tcg-op.h | 91 +++++++++++++++
tcg/tcg-opc.h | 5 +
tcg/tcg.c | 23 +++-
tcg/tcg.h | 5 +
tcg/x86_64/tcg-target.c | 65 +++++++++++-
8 files changed, 436 insertions(+), 59 deletions(-)
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [Qemu-devel] [PATCH 1/6] tcg: Generic support for conditional set and conditional move.
2009-12-17 17:27 ` [Qemu-devel] [PATCH 1/6] tcg: Generic support for conditional set and conditional move Richard Henderson
@ 2009-12-17 20:50 ` malc
2009-12-18 11:38 ` [Qemu-devel] " Laurent Desnogues
1 sibling, 0 replies; 26+ messages in thread
From: malc @ 2009-12-17 20:50 UTC (permalink / raw)
To: Richard Henderson; +Cc: Laurent Desnogues, qemu-devel
On Thu, 17 Dec 2009, Richard Henderson wrote:
> Defines setcond and movcond for implementing conditional moves at
> the tcg opcode level. 64-bit-on-32-bit is expanded via a setcond2
> primitive plus other operations.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
> tcg/README | 26 +++++++++++++++-
> tcg/tcg-op.h | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> tcg/tcg-opc.h | 5 +++
> tcg/tcg.c | 23 ++++++++++----
> 4 files changed, 138 insertions(+), 7 deletions(-)
>
> diff --git a/tcg/README b/tcg/README
> index e672258..8617994 100644
> --- a/tcg/README
> +++ b/tcg/README
> @@ -152,6 +152,11 @@ Conditional jump if t0 cond t1 is true. cond can be:
> TCG_COND_LEU /* unsigned */
> TCG_COND_GTU /* unsigned */
>
> +* brcond2_i32 cond, t0_low, t0_high, t1_low, t1_high, label
> +
> +Similar to brcond, except that the 64-bit values T0 and T1
> +are formed from two 32-bit arguments.
> +
> ********* Arithmetic
>
> * add_i32/i64 t0, t1, t2
> @@ -282,6 +287,25 @@ order bytes must be set to zero.
> Indicate that the value of t0 won't be used later. It is useful to
> force dead code elimination.
>
> +********* Conditional moves
> +
> +* setcond_i32/i64 cond, dest, t1, t2
> +
> +dest = (t1 cond t2)
> +
> +Set DEST to 1 if (T1 cond T2) is true, otherwise set to 0.
> +
> +* movcond_i32/i64 cond, dest, c1, c2, vtrue, vfalse
> +
> +dest= (c1 cond c2 ? vtrue : of)
Nitpick: no space before '='
> +
> +Set DEST to VTRUE if (c1 cond c2) is true, otherwise set to VFALSE.
But it reads `? vtrue : of'. Should probably be `? vtrue : vfalse`
[..snip..]
--
mailto:av1474@comtv.ru
^ permalink raw reply [flat|nested] 26+ messages in thread
* [Qemu-devel] Re: [PATCH 0/6] tcg conditional set/move, round 2
2009-12-17 19:32 ` [Qemu-devel] [PATCH 0/6] tcg conditional set/move, round 2 Richard Henderson
` (5 preceding siblings ...)
2009-12-17 19:08 ` [Qemu-devel] [PATCH 6/6] tcg-i386: Implement setcond, movcond, setcond2 Richard Henderson
@ 2009-12-18 11:37 ` Laurent Desnogues
2009-12-18 21:38 ` [Qemu-devel] tcg conditional set/move, round 3 Richard Henderson
6 siblings, 1 reply; 26+ messages in thread
From: Laurent Desnogues @ 2009-12-18 11:37 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel
On Thu, Dec 17, 2009 at 8:32 PM, Richard Henderson <rth@twiddle.net> wrote:
>> funny how you seem to follow the same paths I did months ago :-)
>
> Heh. They are the obvious points for improvement in the emulation.
> Hopefully we can get them finished this time, so that some third
> person doesn't go through the same thing next year. =)
I agree :-)
>> - cosmetics: don't use tabs and be sure not to have
>> whitespaces at end of lines
>
> Grr, I thought I'd been extra careful about that. Fixed.
>
>> - the changes to tcg-op.h in patch 6 should go with patch 1
>
> Done.
>
>> - outside of the small parameter handling, I'd prefer
>> you provide a separate patch for tcg_out_brcond2;
>> I don't think the changes using c1, c2 and c3 really
>> belong to setcond/movcond.
>
> The i386 part has been split into 3 patches, though they
> are sequentially dependent.
>
> I've left off the sparc backend patch, as well as the patches
> for the translators. Let's take care of these first.
Thanks, that will make things easier.
>
> r~
> ---
>
> Richard Henderson (6):
This is a first pass of review. It's far complete (in particular no
testing was done).
Here is a summary of my comments:
> tcg: Generic support for conditional set and conditional move.
Needs cosmetics changes.
> tcg: Add tcg_invert_cond.
OK.
> tcg-x86_64: Implement setcond and movcond.
Some cosmetics and comments, but overall good.
> tcg-i386: Implement small forward branches.
I think this contains a bug.
> tcg-i386: Simplify brcond2.
I don't like the rewrite of brcond2.
> tcg-i386: Implement setcond, movcond, setcond2.
Not yet reviewed.
Laurent
>
> elf.h | 2 +
> tcg/README | 26 ++++-
> tcg/i386/tcg-target.c | 278 ++++++++++++++++++++++++++++++++++++++--------
> tcg/tcg-op.h | 91 +++++++++++++++
> tcg/tcg-opc.h | 5 +
> tcg/tcg.c | 23 +++-
> tcg/tcg.h | 5 +
> tcg/x86_64/tcg-target.c | 65 +++++++++++-
> 8 files changed, 436 insertions(+), 59 deletions(-)
>
>
^ permalink raw reply [flat|nested] 26+ messages in thread
* [Qemu-devel] Re: [PATCH 1/6] tcg: Generic support for conditional set and conditional move.
2009-12-17 17:27 ` [Qemu-devel] [PATCH 1/6] tcg: Generic support for conditional set and conditional move Richard Henderson
2009-12-17 20:50 ` malc
@ 2009-12-18 11:38 ` Laurent Desnogues
1 sibling, 0 replies; 26+ messages in thread
From: Laurent Desnogues @ 2009-12-18 11:38 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel
On Thu, Dec 17, 2009 at 6:27 PM, Richard Henderson <rth@twiddle.net> wrote:
> Defines setcond and movcond for implementing conditional moves at
> the tcg opcode level. 64-bit-on-32-bit is expanded via a setcond2
> primitive plus other operations.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
> tcg/README | 26 +++++++++++++++-
> tcg/tcg-op.h | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> tcg/tcg-opc.h | 5 +++
> tcg/tcg.c | 23 ++++++++++----
> 4 files changed, 138 insertions(+), 7 deletions(-)
>
> diff --git a/tcg/README b/tcg/README
> index e672258..8617994 100644
> --- a/tcg/README
> +++ b/tcg/README
> @@ -152,6 +152,11 @@ Conditional jump if t0 cond t1 is true. cond can be:
> TCG_COND_LEU /* unsigned */
> TCG_COND_GTU /* unsigned */
>
> +* brcond2_i32 cond, t0_low, t0_high, t1_low, t1_high, label
> +
> +Similar to brcond, except that the 64-bit values T0 and T1
> +are formed from two 32-bit arguments.
> +
> ********* Arithmetic
>
> * add_i32/i64 t0, t1, t2
> @@ -282,6 +287,25 @@ order bytes must be set to zero.
> Indicate that the value of t0 won't be used later. It is useful to
> force dead code elimination.
>
> +********* Conditional moves
> +
> +* setcond_i32/i64 cond, dest, t1, t2
> +
> +dest = (t1 cond t2)
> +
> +Set DEST to 1 if (T1 cond T2) is true, otherwise set to 0.
> +
> +* movcond_i32/i64 cond, dest, c1, c2, vtrue, vfalse
> +
> +dest= (c1 cond c2 ? vtrue : of)
As malc already wrote this should be:
dest = (c1 cond c2 ? vtrue : vfalse)
> +
> +Set DEST to VTRUE if (c1 cond c2) is true, otherwise set to VFALSE.
> +
> +* setcond2_i32 cond, dest, t1_low, t1_high, t2_low, t2_high
> +
> +Similar to setcond, except that the 64-bit values T1 and T2 are
> +formed from two 32-bit arguments. The result is a 32-bit value.
> +
> ********* Type conversions
>
> * ext_i32_i64 t0, t1
> @@ -375,7 +399,7 @@ The target word size (TCG_TARGET_REG_BITS) is expected to be 32 bit or
>
> On a 32 bit target, all 64 bit operations are converted to 32 bits. A
> few specific operations must be implemented to allow it (see add2_i32,
> -sub2_i32, brcond2_i32).
> +sub2_i32, brcond2_i32, setcond2_i32).
>
> Floating point operations are not supported in this version. A
> previous incarnation of the code generator had full support of them,
> diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
> index faf2e8b..f43ed16 100644
> --- a/tcg/tcg-op.h
> +++ b/tcg/tcg-op.h
> @@ -280,6 +280,32 @@ static inline void tcg_gen_op6_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2,
> *gen_opparam_ptr++ = GET_TCGV_I64(arg6);
> }
>
> +static inline void tcg_gen_op6i_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2,
> + TCGv_i32 arg3, TCGv_i32 arg4,
> + TCGv_i32 arg5, TCGArg arg6)
> +{
> + *gen_opc_ptr++ = opc;
> + *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
> + *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
> + *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
> + *gen_opparam_ptr++ = GET_TCGV_I32(arg4);
> + *gen_opparam_ptr++ = GET_TCGV_I32(arg5);
> + *gen_opparam_ptr++ = arg6;
> +}
> +
> +static inline void tcg_gen_op6i_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2,
> + TCGv_i64 arg3, TCGv_i64 arg4,
> + TCGv_i64 arg5, TCGArg arg6)
> +{
> + *gen_opc_ptr++ = opc;
> + *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
> + *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
> + *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
> + *gen_opparam_ptr++ = GET_TCGV_I64(arg4);
> + *gen_opparam_ptr++ = GET_TCGV_I64(arg5);
> + *gen_opparam_ptr++ = arg6;
> +}
> +
> static inline void tcg_gen_op6ii_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2,
> TCGv_i32 arg3, TCGv_i32 arg4, TCGArg arg5,
> TCGArg arg6)
> @@ -1795,6 +1821,67 @@ static inline void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
> }
> }
>
> +static inline void tcg_gen_setcond_i32(int cond, TCGv_i32 ret,
> + TCGv_i32 arg1, TCGv_i32 arg2)
> +{
> + tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
> +}
> +
> +static inline void tcg_gen_setcond_i64(int cond, TCGv_i64 ret,
> + TCGv_i64 arg1, TCGv_i64 arg2)
> +{
> +#if TCG_TARGET_REG_BITS == 64
> + tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
> +#else
> + tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
> + TCGV_LOW(arg1), TCGV_HIGH(arg1),
> + TCGV_LOW(arg2), TCGV_HIGH(arg2), cond);
> + tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
> +#endif
> +}
> +
> +static inline void tcg_gen_movcond_i32(int cond, TCGv_i32 ret,
> + TCGv_i32 cmp1, TCGv_i32 cmp2,
> + TCGv_i32 op_t, TCGv_i32 op_f)
> +{
> + if (TCGV_EQUAL_I32(op_t, op_f)) {
> + tcg_gen_mov_i32(ret, op_t);
> + return;
> + }
> + tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, cmp1, cmp2, op_t, op_f, cond);
> +}
> +
> +static inline void tcg_gen_movcond_i64(int cond, TCGv_i64 ret,
> + TCGv_i64 cmp1, TCGv_i64 cmp2,
> + TCGv_i64 op_t, TCGv_i64 op_f)
> +{
> + if (TCGV_EQUAL_I64(op_t, op_f)) {
> + tcg_gen_mov_i64(ret, op_t);
> + return;
> + }
> +#if TCG_TARGET_REG_BITS == 64
> + tcg_gen_op6i_i64(INDEX_op_movcond_i64, ret, cmp1, cmp2, op_t, op_f, cond);
> +#else
> + {
> + TCGv_i32 t0 = tcg_temp_new_i32();
> + TCGv_i32 zero = tcg_const_i32(0);
> +
> + tcg_gen_op6i_i32(INDEX_op_setcond2_i32, t0,
> + TCGV_LOW(cmp1), TCGV_HIGH(cmp1),
> + TCGV_LOW(cmp2), TCGV_HIGH(cmp2), cond);
> +
> + /* ??? We could perhaps conditionally define a movcond2_i32. */
> + tcg_gen_movcond_i32(TCG_COND_NE, TCGV_LOW(ret), t0, zero,
> + TCGV_LOW(op_t), TCGV_LOW(op_f));
> + tcg_gen_movcond_i32(TCG_COND_NE, TCGV_HIGH(ret), t0, zero,
> + TCGV_HIGH(op_t), TCGV_HIGH(op_f));
> +
> + tcg_temp_free_i32(t0);
> + tcg_temp_free_i32(zero);
> + }
> +#endif
I agree movcond2 would be handy (though it can be argued
that anyway the speed of a 64-bit guest on a 32-bit host, where
it would matter the most, is low anyway).
I think it would also be nice to have to have a movtrue helper
that'd simply be movcond cond, dest, c1, c2, vtrue, dest.
All that can wait.
> +}
> +
> /***************************************/
> /* QEMU specific operations. Their type depend on the QEMU CPU
> type. */
> @@ -2067,6 +2154,8 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
> #define tcg_gen_sari_tl tcg_gen_sari_i64
> #define tcg_gen_brcond_tl tcg_gen_brcond_i64
> #define tcg_gen_brcondi_tl tcg_gen_brcondi_i64
> +#define tcg_gen_setcond_tl tcg_gen_setcond_i64
> +#define tcg_gen_movcond_tl tcg_gen_movcond_i64
> #define tcg_gen_mul_tl tcg_gen_mul_i64
> #define tcg_gen_muli_tl tcg_gen_muli_i64
> #define tcg_gen_div_tl tcg_gen_div_i64
> @@ -2137,6 +2226,8 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
> #define tcg_gen_sari_tl tcg_gen_sari_i32
> #define tcg_gen_brcond_tl tcg_gen_brcond_i32
> #define tcg_gen_brcondi_tl tcg_gen_brcondi_i32
> +#define tcg_gen_setcond_tl tcg_gen_setcond_i32
> +#define tcg_gen_movcond_tl tcg_gen_movcond_i32
> #define tcg_gen_mul_tl tcg_gen_mul_i32
> #define tcg_gen_muli_tl tcg_gen_muli_i32
> #define tcg_gen_div_tl tcg_gen_div_i32
> diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
> index b7f3fd7..086968c 100644
> --- a/tcg/tcg-opc.h
> +++ b/tcg/tcg-opc.h
> @@ -42,6 +42,8 @@ DEF2(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
>
> DEF2(mov_i32, 1, 1, 0, 0)
> DEF2(movi_i32, 1, 0, 1, 0)
> +DEF2(setcond_i32, 1, 2, 1, 0)
> +DEF2(movcond_i32, 1, 4, 1, 0)
> /* load/store */
> DEF2(ld8u_i32, 1, 1, 1, 0)
> DEF2(ld8s_i32, 1, 1, 1, 0)
> @@ -82,6 +84,7 @@ DEF2(add2_i32, 2, 4, 0, 0)
> DEF2(sub2_i32, 2, 4, 0, 0)
> DEF2(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
> DEF2(mulu2_i32, 2, 2, 0, 0)
> +DEF2(setcond2_i32, 1, 4, 1, 0)
> #endif
> #ifdef TCG_TARGET_HAS_ext8s_i32
> DEF2(ext8s_i32, 1, 1, 0, 0)
> @@ -111,6 +114,8 @@ DEF2(neg_i32, 1, 1, 0, 0)
> #if TCG_TARGET_REG_BITS == 64
> DEF2(mov_i64, 1, 1, 0, 0)
> DEF2(movi_i64, 1, 0, 1, 0)
> +DEF2(setcond_i64, 1, 2, 1, 0)
> +DEF2(movcond_i64, 1, 4, 1, 0)
> /* load/store */
> DEF2(ld8u_i64, 1, 1, 1, 0)
> DEF2(ld8s_i64, 1, 1, 1, 0)
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index 3c0e296..f7ea727 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -670,6 +670,7 @@ void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1,
> }
> #endif
>
> +
Was this really needed? :-)
Laurent
> static void tcg_reg_alloc_start(TCGContext *s)
> {
> int i;
> @@ -888,21 +889,31 @@ void tcg_dump_ops(TCGContext *s, FILE *outfile)
> fprintf(outfile, "%s",
> tcg_get_arg_str_idx(s, buf, sizeof(buf), args[k++]));
> }
> - if (c == INDEX_op_brcond_i32
> + switch (c) {
> + case INDEX_op_brcond_i32:
> +#if TCG_TARGET_REG_BITS == 32
> + case INDEX_op_brcond2_i32:
> +#elif TCG_TARGET_REG_BITS == 64
> + case INDEX_op_brcond_i64:
> +#endif
> + case INDEX_op_setcond_i32:
> + case INDEX_op_movcond_i32:
> #if TCG_TARGET_REG_BITS == 32
> - || c == INDEX_op_brcond2_i32
> + case INDEX_op_setcond2_i32:
> #elif TCG_TARGET_REG_BITS == 64
> - || c == INDEX_op_brcond_i64
> + case INDEX_op_setcond_i64:
> + case INDEX_op_movcond_i64:
> #endif
> - ) {
> if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]])
> fprintf(outfile, ",%s", cond_name[args[k++]]);
> else
> fprintf(outfile, ",$0x%" TCG_PRIlx, args[k++]);
> i = 1;
> - }
> - else
> + break;
> + default:
> i = 0;
> + break;
> + }
> for(; i < nb_cargs; i++) {
> if (k != 0)
> fprintf(outfile, ",");
> --
> 1.6.5.2
>
>
^ permalink raw reply [flat|nested] 26+ messages in thread
* [Qemu-devel] Re: [PATCH 2/6] tcg: Add tcg_invert_cond.
2009-12-17 17:28 ` [Qemu-devel] [PATCH 2/6] tcg: Add tcg_invert_cond Richard Henderson
@ 2009-12-18 11:39 ` Laurent Desnogues
0 siblings, 0 replies; 26+ messages in thread
From: Laurent Desnogues @ 2009-12-18 11:39 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel
On Thu, Dec 17, 2009 at 6:28 PM, Richard Henderson <rth@twiddle.net> wrote:
> It is very handy to have a reliable mapping of a condition to its inverse.
> This will be used in several patches to follow.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
Acked-by: Laurent Desnogues <laurent.desnogues@gmail.com>
Laurent
> ---
> tcg/tcg.h | 5 +++++
> 1 files changed, 5 insertions(+), 0 deletions(-)
>
> diff --git a/tcg/tcg.h b/tcg/tcg.h
> index 9824493..376d6af 100644
> --- a/tcg/tcg.h
> +++ b/tcg/tcg.h
> @@ -205,6 +205,11 @@ typedef enum {
> TCG_COND_GTU,
> } TCGCond;
>
> +static inline TCGCond tcg_invert_cond(TCGCond c)
> +{
> + return (TCGCond)(c ^ 1);
> +}
> +
> #define TEMP_VAL_DEAD 0
> #define TEMP_VAL_REG 1
> #define TEMP_VAL_MEM 2
> --
> 1.6.5.2
>
>
^ permalink raw reply [flat|nested] 26+ messages in thread
* [Qemu-devel] Re: [PATCH 3/6] tcg-x86_64: Implement setcond and movcond.
2009-12-17 17:32 ` [Qemu-devel] [PATCH 3/6] tcg-x86_64: Implement setcond and movcond Richard Henderson
@ 2009-12-18 11:39 ` Laurent Desnogues
2009-12-18 17:11 ` Richard Henderson
0 siblings, 1 reply; 26+ messages in thread
From: Laurent Desnogues @ 2009-12-18 11:39 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel
On Thu, Dec 17, 2009 at 6:32 PM, Richard Henderson <rth@twiddle.net> wrote:
> Implement conditional moves in the x86_64 backend.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
> tcg/x86_64/tcg-target.c | 65 ++++++++++++++++++++++++++++++++++++++++++++--
> 1 files changed, 62 insertions(+), 3 deletions(-)
>
> diff --git a/tcg/x86_64/tcg-target.c b/tcg/x86_64/tcg-target.c
> index 2339091..e411755 100644
> --- a/tcg/x86_64/tcg-target.c
> +++ b/tcg/x86_64/tcg-target.c
> @@ -491,9 +491,8 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
> }
> }
>
> -static void tcg_out_brcond(TCGContext *s, int cond,
> - TCGArg arg1, TCGArg arg2, int const_arg2,
> - int label_index, int rexw)
> +static void tcg_out_cond(TCGContext *s, int cond, TCGArg arg1,
> + TCGArg arg2, int const_arg2, int rexw)
> {
> if (const_arg2) {
> if (arg2 == 0) {
> @@ -508,9 +507,45 @@ static void tcg_out_brcond(TCGContext *s, int cond,
> } else {
> tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3) | rexw, arg2, arg1);
> }
> +}
> +
> +static void tcg_out_brcond(TCGContext *s, int cond,
> + TCGArg arg1, TCGArg arg2, int const_arg2,
> + int label_index, int rexw)
> +{
> + tcg_out_cond(s, cond, arg1, arg2, const_arg2, rexw);
> tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index);
> }
>
> +static void tcg_out_setcond(TCGContext *s, int cond, TCGArg arg0,
> + TCGArg arg1, TCGArg arg2, int const_arg2, int rexw)
Perhaps renaming arg0 to dest would make things slightly
more readable.
> +{
> + int use_xor = (arg0 != arg1 && (const_arg2 || arg0 != arg2));
> +
> + if (use_xor)
> + tcg_out_movi(s, TCG_TYPE_I32, arg0, 0);
> + tcg_out_cond(s, cond, arg1, arg2, const_arg2, rexw);
> + tcg_out_modrm(s, 0x90 | tcg_cond_to_jcc[cond] | P_EXT | P_REXB, 0, arg0);
A comment saying this is a setcc would be nice.
Also note that tcg_out_modrm will generate an unneeded prefix
for some registers. cf. the patch I sent to the list months ago.
> + if (!use_xor)
> + tgen_arithi32(s, ARITH_AND, arg0, 0xff);
Wouldn't movzbl be better?
Regarding the xor optimization, I tested it on my i7 and it was
(very) slightly slower running a 64-bit SPEC2k gcc.
> +}
> +
> +static void tcg_out_movcond(TCGContext *s, int cond, TCGArg arg0,
> + TCGArg arg1, TCGArg arg2, int const_arg2,
> + TCGArg arg3, TCGArg arg4, int rexw)
Perhaps renaming arg0 to dest would make things slightly
more readable.
You should also add a note stating that arg3 != arg4.
> +{
> + if (arg0 == arg3) {
> + cond = tcg_invert_cond(cond);
> + arg3 = arg4;
> + arg4 = arg0;
> + }
> +
> + tcg_out_cond(s, cond, arg1, arg2, const_arg2, rexw);
> + if (arg0 != arg4)
> + tcg_out_mov(s, arg0, arg4);
> + tcg_out_modrm(s, 0x40 | tcg_cond_to_jcc[cond] | P_EXT | rexw, arg0, arg3);
A comment saying this is cmovcc would be nice.
Note, I didn't check the correctness, though it looks OK. I'll
make real tests at the next iteration :-)
> +}
> +
> #if defined(CONFIG_SOFTMMU)
>
> #include "../../softmmu_defs.h"
> @@ -1197,6 +1232,24 @@ static inline void tcg_out_op(TCGContext *s, int opc, const TCGArg *args,
> tcg_out_modrm(s, 0x8b, args[0], args[1]);
> break;
>
> + case INDEX_op_setcond_i32:
> + tcg_out_setcond(s, args[3], args[0], args[1], args[2],
> + const_args[2], 0);
> + break;
> + case INDEX_op_setcond_i64:
> + tcg_out_setcond(s, args[3], args[0], args[1], args[2],
> + const_args[2], P_REXW);
> + break;
> +
> + case INDEX_op_movcond_i32:
> + tcg_out_movcond(s, args[5], args[0], args[1], args[2],
> + const_args[2], args[3], args[4], 0);
> + break;
> + case INDEX_op_movcond_i64:
> + tcg_out_movcond(s, args[5], args[0], args[1], args[2],
> + const_args[2], args[3], args[4], P_REXW);
> + break;
> +
> case INDEX_op_qemu_ld8u:
> tcg_out_qemu_ld(s, args, 0);
> break;
> @@ -1376,6 +1429,12 @@ static const TCGTargetOpDef x86_64_op_defs[] = {
> { INDEX_op_ext16u_i64, { "r", "r"} },
> { INDEX_op_ext32u_i64, { "r", "r"} },
>
> + { INDEX_op_setcond_i32, { "r", "r", "re" } },
> + { INDEX_op_setcond_i64, { "r", "r", "re" } },
> +
> + { INDEX_op_movcond_i32, { "r", "r", "re", "r", "r" } },
> + { INDEX_op_movcond_i64, { "r", "r", "re", "r", "r" } },
For the i32 variants, "ri" instead of "re" is enough.
Laurent
> +
> { INDEX_op_qemu_ld8u, { "r", "L" } },
> { INDEX_op_qemu_ld8s, { "r", "L" } },
> { INDEX_op_qemu_ld16u, { "r", "L" } },
> --
> 1.6.5.2
>
>
^ permalink raw reply [flat|nested] 26+ messages in thread
* [Qemu-devel] Re: [PATCH 4/6] tcg-i386: Implement small forward branches.
2009-12-17 17:55 ` [Qemu-devel] [PATCH 4/6] tcg-i386: Implement small forward branches Richard Henderson
@ 2009-12-18 11:39 ` Laurent Desnogues
2009-12-18 17:16 ` Richard Henderson
0 siblings, 1 reply; 26+ messages in thread
From: Laurent Desnogues @ 2009-12-18 11:39 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel
On Thu, Dec 17, 2009 at 6:55 PM, Richard Henderson <rth@twiddle.net> wrote:
> There are places, like brcond2, where we know that the destination
> of a forward branch will be within 127 bytes. Add the R_386_PC8
> relocation type to support this, and add a flag to tcg_out_jxx to
> generate it. Set the flag in the small forward branches in brcond2.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
> elf.h | 2 ++
> tcg/i386/tcg-target.c | 36 +++++++++++++++++++++++++-----------
> 2 files changed, 27 insertions(+), 11 deletions(-)
>
> diff --git a/elf.h b/elf.h
> index 11674d7..c84c8ab 100644
> --- a/elf.h
> +++ b/elf.h
> @@ -243,6 +243,8 @@ typedef struct {
> #define R_386_GOTOFF 9
> #define R_386_GOTPC 10
> #define R_386_NUM 11
> +/* Not a dynamic reloc, so not included in R_386_NUM. Used in TCG. */
> +#define R_386_PC8 23
>
> #define R_MIPS_NONE 0
> #define R_MIPS_16 1
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index 972b102..cc3d28f 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -61,6 +61,9 @@ static void patch_reloc(uint8_t *code_ptr, int type,
> case R_386_PC32:
> *(uint32_t *)code_ptr = value - (long)code_ptr;
> break;
> + case R_386_PC8:
> + *(uint8_t *)code_ptr = value - (long)code_ptr;
> + break;
> default:
> tcg_abort();
> }
> @@ -305,7 +308,8 @@ static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
> tgen_arithi(s, ARITH_ADD, reg, val, 0);
> }
>
> -static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
> +/* Use SMALL != 0 to force a short forward branch. */
> +static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
> {
> int32_t val, val1;
> TCGLabel *l = &s->labels[label_index];
> @@ -320,6 +324,7 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
> tcg_out8(s, 0x70 + opc);
> tcg_out8(s, val1);
> } else {
> + assert (!small);
To be consistent with the rest I'd use:
if (small)
tcg_abort();
> if (opc == -1) {
> tcg_out8(s, 0xe9);
> tcg_out32(s, val - 5);
> @@ -329,6 +334,15 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
> tcg_out32(s, val - 6);
> }
> }
> + } else if (small) {
> + if (opc == -1) {
> + tcg_out8(s, 0xeb);
> + } else {
> + tcg_out8(s, 0x0f);
I don't think this prefix should be output.
Laurent
> + tcg_out8(s, 0x70 + opc);
> + }
> + tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
> + s->code_ptr += 1;
> } else {
> if (opc == -1) {
> tcg_out8(s, 0xe9);
> @@ -355,7 +369,7 @@ static void tcg_out_brcond(TCGContext *s, int cond,
> } else {
> tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
> }
> - tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index);
> + tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, 0);
> }
>
> /* XXX: we implement it at the target level to avoid having to
> @@ -376,42 +390,42 @@ static void tcg_out_brcond2(TCGContext *s,
> break;
> case TCG_COND_LT:
> tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3], args[5]);
> - tcg_out_jxx(s, JCC_JNE, label_next);
> + tcg_out_jxx(s, JCC_JNE, label_next, 1);
> tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2], args[5]);
> break;
> case TCG_COND_LE:
> tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3], args[5]);
> - tcg_out_jxx(s, JCC_JNE, label_next);
> + tcg_out_jxx(s, JCC_JNE, label_next, 1);
> tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2], args[5]);
> break;
> case TCG_COND_GT:
> tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3], args[5]);
> - tcg_out_jxx(s, JCC_JNE, label_next);
> + tcg_out_jxx(s, JCC_JNE, label_next, 1);
> tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2], args[5]);
> break;
> case TCG_COND_GE:
> tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3], args[5]);
> - tcg_out_jxx(s, JCC_JNE, label_next);
> + tcg_out_jxx(s, JCC_JNE, label_next, 1);
> tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2], args[5]);
> break;
> case TCG_COND_LTU:
> tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3], args[5]);
> - tcg_out_jxx(s, JCC_JNE, label_next);
> + tcg_out_jxx(s, JCC_JNE, label_next, 1);
> tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2], args[5]);
> break;
> case TCG_COND_LEU:
> tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3], args[5]);
> - tcg_out_jxx(s, JCC_JNE, label_next);
> + tcg_out_jxx(s, JCC_JNE, label_next, 1);
> tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2], args[5]);
> break;
> case TCG_COND_GTU:
> tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3], args[5]);
> - tcg_out_jxx(s, JCC_JNE, label_next);
> + tcg_out_jxx(s, JCC_JNE, label_next, 1);
> tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2], args[5]);
> break;
> case TCG_COND_GEU:
> tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3], args[5]);
> - tcg_out_jxx(s, JCC_JNE, label_next);
> + tcg_out_jxx(s, JCC_JNE, label_next, 1);
> tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2], args[5]);
> break;
> default:
> @@ -913,7 +927,7 @@ static inline void tcg_out_op(TCGContext *s, int opc,
> }
> break;
> case INDEX_op_br:
> - tcg_out_jxx(s, JCC_JMP, args[0]);
> + tcg_out_jxx(s, JCC_JMP, args[0], 0);
> break;
> case INDEX_op_movi_i32:
> tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
> --
> 1.6.5.2
>
>
^ permalink raw reply [flat|nested] 26+ messages in thread
* [Qemu-devel] Re: [PATCH 5/6] tcg-i386: Simplify brcond2.
2009-12-17 18:38 ` [Qemu-devel] [PATCH 5/6] tcg-i386: Simplify brcond2 Richard Henderson
@ 2009-12-18 11:40 ` Laurent Desnogues
2009-12-18 17:45 ` Richard Henderson
0 siblings, 1 reply; 26+ messages in thread
From: Laurent Desnogues @ 2009-12-18 11:40 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel
On Thu, Dec 17, 2009 at 7:38 PM, Richard Henderson <rth@twiddle.net> wrote:
> Split out tcg_out_cond from tcg_out_brcond. Add "small" arguments
> to all branch functions for completeness. Unify all the calls to
> generate branches within brcond2 and pass on the small flag.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
> tcg/i386/tcg-target.c | 87 ++++++++++++++++++++++--------------------------
> 1 files changed, 40 insertions(+), 47 deletions(-)
>
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index cc3d28f..f7b2416 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -355,9 +355,8 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
> }
> }
>
> -static void tcg_out_brcond(TCGContext *s, int cond,
> - TCGArg arg1, TCGArg arg2, int const_arg2,
> - int label_index)
> +static void tcg_out_cond(TCGContext *s, int cond,
> + TCGArg arg1, TCGArg arg2, int const_arg2)
> {
> if (const_arg2) {
> if (arg2 == 0) {
> @@ -369,68 +368,61 @@ static void tcg_out_brcond(TCGContext *s, int cond,
> } else {
> tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
> }
> - tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, 0);
> +}
> +
> +static void tcg_out_brcond(TCGContext *s, int cond,
> + TCGArg arg1, TCGArg arg2, int const_arg2,
> + int label_index, int small)
> +{
> + tcg_out_cond(s, cond, arg1, arg2, const_arg2);
> + tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
> }
>
> /* XXX: we implement it at the target level to avoid having to
> handle cross basic blocks temporaries */
> -static void tcg_out_brcond2(TCGContext *s,
> - const TCGArg *args, const int *const_args)
> +static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
> + const int *const_args, int small)
> {
> - int label_next;
> - label_next = gen_new_label();
> - switch(args[4]) {
> + int label_next = gen_new_label();
> + int label_dest = args[5];
> + int cond = args[4], c1, c2, c3;
> +
> + switch (cond) {
> case TCG_COND_EQ:
> - tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2], label_next);
> - tcg_out_brcond(s, TCG_COND_EQ, args[1], args[3], const_args[3], args[5]);
> + c1 = -1, c2 = TCG_COND_NE, c3 = TCG_COND_EQ;
> break;
> case TCG_COND_NE:
> - tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2], args[5]);
> - tcg_out_brcond(s, TCG_COND_NE, args[1], args[3], const_args[3], args[5]);
> + c1 = TCG_COND_NE, c2 = -1, c3 = TCG_COND_NE;
> break;
> case TCG_COND_LT:
> - tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3], args[5]);
> - tcg_out_jxx(s, JCC_JNE, label_next, 1);
> - tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2], args[5]);
> - break;
> - case TCG_COND_LE:
> - tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3], args[5]);
> - tcg_out_jxx(s, JCC_JNE, label_next, 1);
> - tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2], args[5]);
> - break;
> - case TCG_COND_GT:
> - tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3], args[5]);
> - tcg_out_jxx(s, JCC_JNE, label_next, 1);
> - tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2], args[5]);
> - break;
> - case TCG_COND_GE:
> - tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3], args[5]);
> - tcg_out_jxx(s, JCC_JNE, label_next, 1);
> - tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2], args[5]);
> - break;
> case TCG_COND_LTU:
> - tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3], args[5]);
> - tcg_out_jxx(s, JCC_JNE, label_next, 1);
> - tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2], args[5]);
> + c1 = cond, c2 = TCG_COND_NE, c3 = TCG_COND_LTU;
> break;
> + case TCG_COND_LE:
> case TCG_COND_LEU:
> - tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3], args[5]);
> - tcg_out_jxx(s, JCC_JNE, label_next, 1);
> - tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2], args[5]);
> + c1 = cond, c2 = TCG_COND_NE, c3 = TCG_COND_LEU;
> break;
> + case TCG_COND_GT:
> case TCG_COND_GTU:
> - tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3], args[5]);
> - tcg_out_jxx(s, JCC_JNE, label_next, 1);
> - tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2], args[5]);
> + c1 = cond, c2 = TCG_COND_NE, c3 = TCG_COND_GTU;
> break;
> + case TCG_COND_GE:
> case TCG_COND_GEU:
> - tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3], args[5]);
> - tcg_out_jxx(s, JCC_JNE, label_next, 1);
> - tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2], args[5]);
> + c1 = cond, c2 = TCG_COND_NE, c3 = TCG_COND_GEU;
> break;
> default:
> - tcg_abort();
> + tcg_abort ();
This is unwanted :-)
> + }
> +
> + tcg_out_cond(s, cond, args[1], args[3], const_args[3]);
> + if (c1 != -1) {
> + tcg_out_jxx(s, tcg_cond_to_jcc[c1], label_dest, small);
> }
> + if (c2 != -1) {
> + tcg_out_jxx(s, tcg_cond_to_jcc[c2], label_next, 1);
> + }
> + tcg_out_brcond(s, c3, args[0], args[2], const_args[2], label_dest, small);
> +
> tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
> }
I'm not sure I really like that rewrite, I find it hard to read.
Convince me it's better :-)
Laurent
> @@ -1058,10 +1050,11 @@ static inline void tcg_out_op(TCGContext *s, int opc,
> tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5], args[1]);
> break;
> case INDEX_op_brcond_i32:
> - tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]);
> + tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
> + args[3], 0);
> break;
> case INDEX_op_brcond2_i32:
> - tcg_out_brcond2(s, args, const_args);
> + tcg_out_brcond2(s, args, const_args, 0);
> break;
>
> case INDEX_op_bswap16_i32:
> --
> 1.6.5.2
>
>
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [Qemu-devel] Re: [PATCH 3/6] tcg-x86_64: Implement setcond and movcond.
2009-12-18 11:39 ` [Qemu-devel] " Laurent Desnogues
@ 2009-12-18 17:11 ` Richard Henderson
2009-12-18 17:41 ` Laurent Desnogues
0 siblings, 1 reply; 26+ messages in thread
From: Richard Henderson @ 2009-12-18 17:11 UTC (permalink / raw)
To: Laurent Desnogues; +Cc: qemu-devel
On 12/18/2009 03:39 AM, Laurent Desnogues wrote:
>> +static void tcg_out_setcond(TCGContext *s, int cond, TCGArg arg0,
>> + TCGArg arg1, TCGArg arg2, int const_arg2, int rexw)
>
> Perhaps renaming arg0 to dest would make things slightly
> more readable.
Ok.
> Also note that tcg_out_modrm will generate an unneeded prefix
> for some registers. cf. the patch I sent to the list months ago.
Huh. Didn't notice since the disassembler printed what I expected to
see. Is fixing this at the same time a requirement for acceptance?
I'd prefer to tackle that separately, since no doubt it affects every
use of P_REXB.
>> + tgen_arithi32(s, ARITH_AND, arg0, 0xff);
>
> Wouldn't movzbl be better?
Handled inside tgen_arithi32:
} else if (c == ARITH_AND && val == 0xffu) {
/* movzbl */
tcg_out_modrm(s, 0xb6 | P_EXT | P_REXB, r0, r0);
I didn't feel the need to replicate that.
> Regarding the xor optimization, I tested it on my i7 and it was
> (very) slightly slower running a 64-bit SPEC2k gcc.
Huh. It used to be recommended. The partial word store used to stall
the pipeline until the old value was ready, and the XOR was
special-cased as a clear, which broke both the input dependency and also
prevented a partial-register stall on the output.
Actually, this recommendation is still present: Section 3.5.1.6 in the
November 2009 revision of the Intel Optimization Reference Manual.
If it's all the same, I'd prefer to keep what I have there. All other
things being equal, the XOR is 2 bytes and the MOVZBL is 3.
>> +static void tcg_out_movcond(TCGContext *s, int cond, TCGArg arg0,
>> + TCGArg arg1, TCGArg arg2, int const_arg2,
>> + TCGArg arg3, TCGArg arg4, int rexw)
>
> Perhaps renaming arg0 to dest would make things slightly
> more readable.
Ok.
> You should also add a note stating that arg3 != arg4.
I don't believe that's true though. It's caught immediately when we
emit the movcond opcode, but there's no check later once
copy-propagation has been done within TCG.
I check for that in the i386 and sparc backends, because dest==arg3 &&
dest==arg4 would actually generate incorrect code. Here in the x86_64
backend, where we always use cmov it doesn't generate incorrect code,
merely inefficient.
I could add an early out for that case, if you prefer.
>> + { INDEX_op_setcond_i32, { "r", "r", "re" } },
>> + { INDEX_op_setcond_i64, { "r", "r", "re" } },
>> +
>> + { INDEX_op_movcond_i32, { "r", "r", "re", "r", "r" } },
>> + { INDEX_op_movcond_i64, { "r", "r", "re", "r", "r" } },
>
> For the i32 variants, "ri" instead of "re" is enough.
Ah, quite right.
r~
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [Qemu-devel] Re: [PATCH 4/6] tcg-i386: Implement small forward branches.
2009-12-18 11:39 ` [Qemu-devel] " Laurent Desnogues
@ 2009-12-18 17:16 ` Richard Henderson
0 siblings, 0 replies; 26+ messages in thread
From: Richard Henderson @ 2009-12-18 17:16 UTC (permalink / raw)
To: Laurent Desnogues; +Cc: qemu-devel
On 12/18/2009 03:39 AM, Laurent Desnogues wrote:
> To be consistent with the rest I'd use:
>
> if (small)
> tcg_abort();
Ok.
>> + tcg_out8(s, 0x0f);
>
> I don't think this prefix should be output.
Oops. Paste-o.
r~
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [Qemu-devel] Re: [PATCH 3/6] tcg-x86_64: Implement setcond and movcond.
2009-12-18 17:11 ` Richard Henderson
@ 2009-12-18 17:41 ` Laurent Desnogues
0 siblings, 0 replies; 26+ messages in thread
From: Laurent Desnogues @ 2009-12-18 17:41 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel
On Fri, Dec 18, 2009 at 6:11 PM, Richard Henderson <rth@twiddle.net> wrote:
>> Also note that tcg_out_modrm will generate an unneeded prefix
>> for some registers. cf. the patch I sent to the list months ago.
>
> Huh. Didn't notice since the disassembler printed what I expected to see.
> Is fixing this at the same time a requirement for acceptance?
> I'd prefer to tackle that separately, since no doubt it affects every use of
> P_REXB.
I agree this change can be delayed.
>>> + tgen_arithi32(s, ARITH_AND, arg0, 0xff);
>>
>> Wouldn't movzbl be better?
>
> Handled inside tgen_arithi32:
>
> } else if (c == ARITH_AND && val == 0xffu) {
> /* movzbl */
> tcg_out_modrm(s, 0xb6 | P_EXT | P_REXB, r0, r0);
>
> I didn't feel the need to replicate that.
Oups, I compared with my code which has an explicit mozbl :)
>> Regarding the xor optimization, I tested it on my i7 and it was
>> (very) slightly slower running a 64-bit SPEC2k gcc.
>
> Huh. It used to be recommended. The partial word store used to stall the
> pipeline until the old value was ready, and the XOR was special-cased as a
> clear, which broke both the input dependency and also prevented a
> partial-register stall on the output.
>
> Actually, this recommendation is still present: Section 3.5.1.6 in the
> November 2009 revision of the Intel Optimization Reference Manual.
>
> If it's all the same, I'd prefer to keep what I have there. All other
> things being equal, the XOR is 2 bytes and the MOVZBL is 3.
I agree too. Anyway my measure is not representative enough
to mean anything. And in that case I think shorter code is
better, so let's go for XOR.
>>> +static void tcg_out_movcond(TCGContext *s, int cond, TCGArg arg0,
>>> + TCGArg arg1, TCGArg arg2, int const_arg2,
>>> + TCGArg arg3, TCGArg arg4, int rexw)
>>
>> Perhaps renaming arg0 to dest would make things slightly
>> more readable.
>
> Ok.
>
>> You should also add a note stating that arg3 != arg4.
>
> I don't believe that's true though. It's caught immediately when we emit
> the movcond opcode, but there's no check later once copy-propagation has
> been done within TCG.
>
> I check for that in the i386 and sparc backends, because dest==arg3 &&
> dest==arg4 would actually generate incorrect code. Here in the x86_64
> backend, where we always use cmov it doesn't generate incorrect code, merely
> inefficient.
>
> I could add an early out for that case, if you prefer.
No, you can leave it as is unless someone else objects.
Laurent
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [Qemu-devel] Re: [PATCH 5/6] tcg-i386: Simplify brcond2.
2009-12-18 11:40 ` [Qemu-devel] " Laurent Desnogues
@ 2009-12-18 17:45 ` Richard Henderson
0 siblings, 0 replies; 26+ messages in thread
From: Richard Henderson @ 2009-12-18 17:45 UTC (permalink / raw)
To: Laurent Desnogues; +Cc: qemu-devel
On 12/18/2009 03:40 AM, Laurent Desnogues wrote:
> I'm not sure I really like that rewrite, I find it hard to read.
> Convince me it's better :-)
Would it be more obvious if I used a table instead of a switch?
The code size reduction is
Num: Value Size Type Bind Vis Ndx Name
before:
40: 000039a0 793 FUNC LOCAL DEFAULT 1 tcg_out_brcond2
after:
40: 00003a50 412 FUNC LOCAL DEFAULT 1 tcg_out_brcond2
table-ized:
40: 00003a50 224 FUNC LOCAL DEFAULT 1 tcg_out_brcond2
But I'm not married to the patch; let's drop it if there's a question of
it holding up the rest of the series.
r~
^ permalink raw reply [flat|nested] 26+ messages in thread
* [Qemu-devel] tcg conditional set/move, round 3
2009-12-18 11:37 ` [Qemu-devel] Re: [PATCH 0/6] tcg conditional set/move, round 2 Laurent Desnogues
@ 2009-12-18 21:38 ` Richard Henderson
2009-12-19 11:40 ` [Qemu-devel] " Laurent Desnogues
` (2 more replies)
0 siblings, 3 replies; 26+ messages in thread
From: Richard Henderson @ 2009-12-18 21:38 UTC (permalink / raw)
To: Laurent Desnogues; +Cc: qemu-devel
[-- Attachment #1: Type: text/plain, Size: 795 bytes --]
On 12/18/2009 03:37 AM, Laurent Desnogues wrote:
>> tcg: Generic support for conditional set and conditional move.
>
> Needs cosmetics changes.
Fixed, attachment 1.
>> tcg-x86_64: Implement setcond and movcond.
>
> Some cosmetics and comments, but overall good.
Fixed, attachment 2.
>> tcg-i386: Implement small forward branches.
>
> I think this contains a bug.
Fixed, attachment 3. I've added an abort to patch_reloc to verify that
the relocation is in range. I've propagated the "small" flag to all of
the branch functions so that...
>> tcg-i386: Simplify brcond2.
>
> I don't like the rewrite of brcond2.
... this patch is dropped.
>> tcg-i386: Implement setcond, movcond, setcond2.
>
> Not yet reviewed.
Fixed, attachment 4. Similar changes to the amd64 patch.
r~
[-- Attachment #2: commit-cmov-1 --]
[-- Type: text/plain, Size: 9404 bytes --]
commit b89e4fc848abd936554ce48b7f9cf23d27ed9eb5
Author: Richard Henderson <rth@twiddle.net>
Date: Fri Dec 18 11:02:06 2009 -0800
tcg: Generic support for conditional set and conditional move.
Defines setcond and movcond for implementing conditional moves at
the tcg opcode level. 64-bit-on-32-bit is expanded via a setcond2
primitive plus other operations.
Signed-off-by: Richard Henderson <rth@twiddle.net>
diff --git a/tcg/README b/tcg/README
index e672258..6163abe 100644
--- a/tcg/README
+++ b/tcg/README
@@ -152,6 +152,11 @@ Conditional jump if t0 cond t1 is true. cond can be:
TCG_COND_LEU /* unsigned */
TCG_COND_GTU /* unsigned */
+* brcond2_i32 cond, t0_low, t0_high, t1_low, t1_high, label
+
+Similar to brcond, except that the 64-bit values T0 and T1
+are formed from two 32-bit arguments.
+
********* Arithmetic
* add_i32/i64 t0, t1, t2
@@ -282,6 +287,25 @@ order bytes must be set to zero.
Indicate that the value of t0 won't be used later. It is useful to
force dead code elimination.
+********* Conditional moves
+
+* setcond_i32/i64 cond, dest, t1, t2
+
+dest = (t1 cond t2)
+
+Set DEST to 1 if (T1 cond T2) is true, otherwise set to 0.
+
+* movcond_i32/i64 cond, dest, c1, c2, vtrue, vfalse
+
+dest = (c1 cond c2 ? vtrue : vfalse)
+
+Set DEST to VTRUE if (C1 cond C2) is true, otherwise set to VFALSE.
+
+* setcond2_i32 cond, dest, t1_low, t1_high, t2_low, t2_high
+
+Similar to setcond, except that the 64-bit values T1 and T2 are
+formed from two 32-bit arguments. The result is a 32-bit value.
+
********* Type conversions
* ext_i32_i64 t0, t1
@@ -375,7 +399,7 @@ The target word size (TCG_TARGET_REG_BITS) is expected to be 32 bit or
On a 32 bit target, all 64 bit operations are converted to 32 bits. A
few specific operations must be implemented to allow it (see add2_i32,
-sub2_i32, brcond2_i32).
+sub2_i32, brcond2_i32, setcond2_i32).
Floating point operations are not supported in this version. A
previous incarnation of the code generator had full support of them,
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index faf2e8b..f43ed16 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -280,6 +280,32 @@ static inline void tcg_gen_op6_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2,
*gen_opparam_ptr++ = GET_TCGV_I64(arg6);
}
+static inline void tcg_gen_op6i_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2,
+ TCGv_i32 arg3, TCGv_i32 arg4,
+ TCGv_i32 arg5, TCGArg arg6)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg4);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg5);
+ *gen_opparam_ptr++ = arg6;
+}
+
+static inline void tcg_gen_op6i_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2,
+ TCGv_i64 arg3, TCGv_i64 arg4,
+ TCGv_i64 arg5, TCGArg arg6)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg4);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg5);
+ *gen_opparam_ptr++ = arg6;
+}
+
static inline void tcg_gen_op6ii_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2,
TCGv_i32 arg3, TCGv_i32 arg4, TCGArg arg5,
TCGArg arg6)
@@ -1795,6 +1821,67 @@ static inline void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
}
}
+static inline void tcg_gen_setcond_i32(int cond, TCGv_i32 ret,
+ TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
+}
+
+static inline void tcg_gen_setcond_i64(int cond, TCGv_i64 ret,
+ TCGv_i64 arg1, TCGv_i64 arg2)
+{
+#if TCG_TARGET_REG_BITS == 64
+ tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
+#else
+ tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
+ TCGV_LOW(arg1), TCGV_HIGH(arg1),
+ TCGV_LOW(arg2), TCGV_HIGH(arg2), cond);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+#endif
+}
+
+static inline void tcg_gen_movcond_i32(int cond, TCGv_i32 ret,
+ TCGv_i32 cmp1, TCGv_i32 cmp2,
+ TCGv_i32 op_t, TCGv_i32 op_f)
+{
+ if (TCGV_EQUAL_I32(op_t, op_f)) {
+ tcg_gen_mov_i32(ret, op_t);
+ return;
+ }
+ tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, cmp1, cmp2, op_t, op_f, cond);
+}
+
+static inline void tcg_gen_movcond_i64(int cond, TCGv_i64 ret,
+ TCGv_i64 cmp1, TCGv_i64 cmp2,
+ TCGv_i64 op_t, TCGv_i64 op_f)
+{
+ if (TCGV_EQUAL_I64(op_t, op_f)) {
+ tcg_gen_mov_i64(ret, op_t);
+ return;
+ }
+#if TCG_TARGET_REG_BITS == 64
+ tcg_gen_op6i_i64(INDEX_op_movcond_i64, ret, cmp1, cmp2, op_t, op_f, cond);
+#else
+ {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 zero = tcg_const_i32(0);
+
+ tcg_gen_op6i_i32(INDEX_op_setcond2_i32, t0,
+ TCGV_LOW(cmp1), TCGV_HIGH(cmp1),
+ TCGV_LOW(cmp2), TCGV_HIGH(cmp2), cond);
+
+ /* ??? We could perhaps conditionally define a movcond2_i32. */
+ tcg_gen_movcond_i32(TCG_COND_NE, TCGV_LOW(ret), t0, zero,
+ TCGV_LOW(op_t), TCGV_LOW(op_f));
+ tcg_gen_movcond_i32(TCG_COND_NE, TCGV_HIGH(ret), t0, zero,
+ TCGV_HIGH(op_t), TCGV_HIGH(op_f));
+
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(zero);
+ }
+#endif
+}
+
/***************************************/
/* QEMU specific operations. Their type depend on the QEMU CPU
type. */
@@ -2067,6 +2154,8 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
#define tcg_gen_sari_tl tcg_gen_sari_i64
#define tcg_gen_brcond_tl tcg_gen_brcond_i64
#define tcg_gen_brcondi_tl tcg_gen_brcondi_i64
+#define tcg_gen_setcond_tl tcg_gen_setcond_i64
+#define tcg_gen_movcond_tl tcg_gen_movcond_i64
#define tcg_gen_mul_tl tcg_gen_mul_i64
#define tcg_gen_muli_tl tcg_gen_muli_i64
#define tcg_gen_div_tl tcg_gen_div_i64
@@ -2137,6 +2226,8 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
#define tcg_gen_sari_tl tcg_gen_sari_i32
#define tcg_gen_brcond_tl tcg_gen_brcond_i32
#define tcg_gen_brcondi_tl tcg_gen_brcondi_i32
+#define tcg_gen_setcond_tl tcg_gen_setcond_i32
+#define tcg_gen_movcond_tl tcg_gen_movcond_i32
#define tcg_gen_mul_tl tcg_gen_mul_i32
#define tcg_gen_muli_tl tcg_gen_muli_i32
#define tcg_gen_div_tl tcg_gen_div_i32
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index b7f3fd7..086968c 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -42,6 +42,8 @@ DEF2(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
DEF2(mov_i32, 1, 1, 0, 0)
DEF2(movi_i32, 1, 0, 1, 0)
+DEF2(setcond_i32, 1, 2, 1, 0)
+DEF2(movcond_i32, 1, 4, 1, 0)
/* load/store */
DEF2(ld8u_i32, 1, 1, 1, 0)
DEF2(ld8s_i32, 1, 1, 1, 0)
@@ -82,6 +84,7 @@ DEF2(add2_i32, 2, 4, 0, 0)
DEF2(sub2_i32, 2, 4, 0, 0)
DEF2(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
DEF2(mulu2_i32, 2, 2, 0, 0)
+DEF2(setcond2_i32, 1, 4, 1, 0)
#endif
#ifdef TCG_TARGET_HAS_ext8s_i32
DEF2(ext8s_i32, 1, 1, 0, 0)
@@ -111,6 +114,8 @@ DEF2(neg_i32, 1, 1, 0, 0)
#if TCG_TARGET_REG_BITS == 64
DEF2(mov_i64, 1, 1, 0, 0)
DEF2(movi_i64, 1, 0, 1, 0)
+DEF2(setcond_i64, 1, 2, 1, 0)
+DEF2(movcond_i64, 1, 4, 1, 0)
/* load/store */
DEF2(ld8u_i64, 1, 1, 1, 0)
DEF2(ld8s_i64, 1, 1, 1, 0)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 3c0e296..f7ea727 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -670,6 +670,7 @@ void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1,
}
#endif
+
static void tcg_reg_alloc_start(TCGContext *s)
{
int i;
@@ -888,21 +889,31 @@ void tcg_dump_ops(TCGContext *s, FILE *outfile)
fprintf(outfile, "%s",
tcg_get_arg_str_idx(s, buf, sizeof(buf), args[k++]));
}
- if (c == INDEX_op_brcond_i32
+ switch (c) {
+ case INDEX_op_brcond_i32:
+#if TCG_TARGET_REG_BITS == 32
+ case INDEX_op_brcond2_i32:
+#elif TCG_TARGET_REG_BITS == 64
+ case INDEX_op_brcond_i64:
+#endif
+ case INDEX_op_setcond_i32:
+ case INDEX_op_movcond_i32:
#if TCG_TARGET_REG_BITS == 32
- || c == INDEX_op_brcond2_i32
+ case INDEX_op_setcond2_i32:
#elif TCG_TARGET_REG_BITS == 64
- || c == INDEX_op_brcond_i64
+ case INDEX_op_setcond_i64:
+ case INDEX_op_movcond_i64:
#endif
- ) {
if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]])
fprintf(outfile, ",%s", cond_name[args[k++]]);
else
fprintf(outfile, ",$0x%" TCG_PRIlx, args[k++]);
i = 1;
- }
- else
+ break;
+ default:
i = 0;
+ break;
+ }
for(; i < nb_cargs; i++) {
if (k != 0)
fprintf(outfile, ",");
[-- Attachment #3: commit-cmov-amd64 --]
[-- Type: text/plain, Size: 4050 bytes --]
commit eb326c39503ff265dc83e8792e87e6308d9dfd71
Author: Richard Henderson <rth@twiddle.net>
Date: Fri Dec 18 11:51:35 2009 -0800
tcg-x86_64: Implement setcond and movcond.
Implement conditional moves in the x86_64 backend.
Signed-off-by: Richard Henderson <rth@twiddle.net>
diff --git a/tcg/x86_64/tcg-target.c b/tcg/x86_64/tcg-target.c
index 2339091..1f86946 100644
--- a/tcg/x86_64/tcg-target.c
+++ b/tcg/x86_64/tcg-target.c
@@ -491,9 +491,8 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
}
}
-static void tcg_out_brcond(TCGContext *s, int cond,
- TCGArg arg1, TCGArg arg2, int const_arg2,
- int label_index, int rexw)
+static void tcg_out_cond(TCGContext *s, int cond, TCGArg arg1,
+ TCGArg arg2, int const_arg2, int rexw)
{
if (const_arg2) {
if (arg2 == 0) {
@@ -508,9 +507,51 @@ static void tcg_out_brcond(TCGContext *s, int cond,
} else {
tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3) | rexw, arg2, arg1);
}
+}
+
+static void tcg_out_brcond(TCGContext *s, int cond,
+ TCGArg arg1, TCGArg arg2, int const_arg2,
+ int label_index, int rexw)
+{
+ tcg_out_cond(s, cond, arg1, arg2, const_arg2, rexw);
tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index);
}
+static void tcg_out_setcond(TCGContext *s, int cond, TCGArg dest,
+ TCGArg arg1, TCGArg arg2, int const_arg2, int rexw)
+{
+ int use_xor = (dest != arg1 && (const_arg2 || dest != arg2));
+
+ if (use_xor)
+ tcg_out_movi(s, TCG_TYPE_I32, dest, 0);
+ tcg_out_cond(s, cond, arg1, arg2, const_arg2, rexw);
+ /* setcc */
+ tcg_out_modrm(s, 0x90 | tcg_cond_to_jcc[cond] | P_EXT | P_REXB, 0, dest);
+ if (!use_xor)
+ tgen_arithi32(s, ARITH_AND, dest, 0xff);
+}
+
+static void tcg_out_movcond(TCGContext *s, int cond, TCGArg dest,
+ TCGArg cmp1, TCGArg cmp2, int const_cmp2,
+ TCGArg vtrue, TCGArg vfalse, int rexw)
+{
+ if (vtrue == vfalse) {
+ tcg_out_mov(s, dest, vtrue);
+ return;
+ }
+ if (dest == vtrue) {
+ cond = tcg_invert_cond(cond);
+ vtrue = vfalse;
+ vfalse = dest;
+ }
+
+ tcg_out_cond(s, cond, cmp1, cmp2, const_cmp2, rexw);
+ if (dest != vfalse)
+ tcg_out_mov(s, dest, vfalse);
+ /* cmovcc */
+ tcg_out_modrm(s, 0x40 | tcg_cond_to_jcc[cond] | P_EXT | rexw, dest, vtrue);
+}
+
#if defined(CONFIG_SOFTMMU)
#include "../../softmmu_defs.h"
@@ -1197,6 +1238,24 @@ static inline void tcg_out_op(TCGContext *s, int opc, const TCGArg *args,
tcg_out_modrm(s, 0x8b, args[0], args[1]);
break;
+ case INDEX_op_setcond_i32:
+ tcg_out_setcond(s, args[3], args[0], args[1], args[2],
+ const_args[2], 0);
+ break;
+ case INDEX_op_setcond_i64:
+ tcg_out_setcond(s, args[3], args[0], args[1], args[2],
+ const_args[2], P_REXW);
+ break;
+
+ case INDEX_op_movcond_i32:
+ tcg_out_movcond(s, args[5], args[0], args[1], args[2],
+ const_args[2], args[3], args[4], 0);
+ break;
+ case INDEX_op_movcond_i64:
+ tcg_out_movcond(s, args[5], args[0], args[1], args[2],
+ const_args[2], args[3], args[4], P_REXW);
+ break;
+
case INDEX_op_qemu_ld8u:
tcg_out_qemu_ld(s, args, 0);
break;
@@ -1376,6 +1435,12 @@ static const TCGTargetOpDef x86_64_op_defs[] = {
{ INDEX_op_ext16u_i64, { "r", "r"} },
{ INDEX_op_ext32u_i64, { "r", "r"} },
+ { INDEX_op_setcond_i32, { "r", "r", "ri" } },
+ { INDEX_op_setcond_i64, { "r", "r", "re" } },
+
+ { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "r" } },
+ { INDEX_op_movcond_i64, { "r", "r", "re", "r", "r" } },
+
{ INDEX_op_qemu_ld8u, { "r", "L" } },
{ INDEX_op_qemu_ld8s, { "r", "L" } },
{ INDEX_op_qemu_ld16u, { "r", "L" } },
[-- Attachment #4: commit-cmov-i386-jmps --]
[-- Type: text/plain, Size: 9800 bytes --]
commit 798a4a5bfc491fef81cf968cd0039ad60f34366a
Author: Richard Henderson <rth@twiddle.net>
Date: Fri Dec 18 13:01:30 2009 -0800
tcg-i386: Implement small forward branches.
There are places, like brcond2, where we know that the destination
of a forward branch will be within 127 bytes.
Add the R_386_PC8 relocation type to support this. Add a flag to
tcg_out_jxx and tcg_out_brcond* to enable it. Set the flag in the
brcond2 label_next branches; pass along the input flag otherwise.
Signed-off-by: Richard Henderson <rth@twiddle.net>
diff --git a/elf.h b/elf.h
index 11674d7..c84c8ab 100644
--- a/elf.h
+++ b/elf.h
@@ -243,6 +243,8 @@ typedef struct {
#define R_386_GOTOFF 9
#define R_386_GOTPC 10
#define R_386_NUM 11
+/* Not a dynamic reloc, so not included in R_386_NUM. Used in TCG. */
+#define R_386_PC8 23
#define R_MIPS_NONE 0
#define R_MIPS_16 1
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 972b102..48c9bc8 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -61,6 +61,12 @@ static void patch_reloc(uint8_t *code_ptr, int type,
case R_386_PC32:
*(uint32_t *)code_ptr = value - (long)code_ptr;
break;
+ case R_386_PC8:
+ value -= (long)code_ptr;
+ if (value != (int8_t)value)
+ tcg_abort();
+ *(uint8_t *)code_ptr = value;
+ break;
default:
tcg_abort();
}
@@ -305,7 +311,8 @@ static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
tgen_arithi(s, ARITH_ADD, reg, val, 0);
}
-static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
+/* Use SMALL != 0 to force a short forward branch. */
+static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
{
int32_t val, val1;
TCGLabel *l = &s->labels[label_index];
@@ -320,6 +327,8 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
tcg_out8(s, 0x70 + opc);
tcg_out8(s, val1);
} else {
+ if (small)
+ tcg_abort();
if (opc == -1) {
tcg_out8(s, 0xe9);
tcg_out32(s, val - 5);
@@ -329,6 +338,14 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
tcg_out32(s, val - 6);
}
}
+ } else if (small) {
+ if (opc == -1) {
+ tcg_out8(s, 0xeb);
+ } else {
+ tcg_out8(s, 0x70 + opc);
+ }
+ tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
+ s->code_ptr += 1;
} else {
if (opc == -1) {
tcg_out8(s, 0xe9);
@@ -343,7 +360,7 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
static void tcg_out_brcond(TCGContext *s, int cond,
TCGArg arg1, TCGArg arg2, int const_arg2,
- int label_index)
+ int label_index, int small)
{
if (const_arg2) {
if (arg2 == 0) {
@@ -355,64 +372,84 @@ static void tcg_out_brcond(TCGContext *s, int cond,
} else {
tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
}
- tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index);
+ tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
}
/* XXX: we implement it at the target level to avoid having to
handle cross basic blocks temporaries */
-static void tcg_out_brcond2(TCGContext *s,
- const TCGArg *args, const int *const_args)
+static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
+ const int *const_args, int small)
{
int label_next;
label_next = gen_new_label();
switch(args[4]) {
case TCG_COND_EQ:
- tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2], label_next);
- tcg_out_brcond(s, TCG_COND_EQ, args[1], args[3], const_args[3], args[5]);
+ tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2],
+ label_next, small);
+ tcg_out_brcond(s, TCG_COND_EQ, args[1], args[3], const_args[3],
+ args[5], small);
break;
case TCG_COND_NE:
- tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2], args[5]);
- tcg_out_brcond(s, TCG_COND_NE, args[1], args[3], const_args[3], args[5]);
+ tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2],
+ args[5], small);
+ tcg_out_brcond(s, TCG_COND_NE, args[1], args[3], const_args[3],
+ args[5], small);
break;
case TCG_COND_LT:
- tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3], args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next);
- tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2], args[5]);
+ tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3],
+ args[5], small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2],
+ args[5], small);
break;
case TCG_COND_LE:
- tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3], args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next);
- tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2], args[5]);
+ tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3],
+ args[5], small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2],
+ args[5], small);
break;
case TCG_COND_GT:
- tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3], args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next);
- tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2], args[5]);
+ tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3],
+ args[5], small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2],
+ args[5], small);
break;
case TCG_COND_GE:
- tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3], args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next);
- tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2], args[5]);
+ tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3],
+ args[5], small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2],
+ args[5], small);
break;
case TCG_COND_LTU:
- tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3], args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next);
- tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2], args[5]);
+ tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3],
+ args[5], small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2],
+ args[5], small);
break;
case TCG_COND_LEU:
- tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3], args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next);
- tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2], args[5]);
+ tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3],
+ args[5], small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2],
+ args[5], small);
break;
case TCG_COND_GTU:
- tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3], args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next);
- tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2], args[5]);
+ tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3],
+ args[5], small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2],
+ args[5], small);
break;
case TCG_COND_GEU:
- tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3], args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next);
- tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2], args[5]);
+ tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3],
+ args[5], small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2],
+ args[5], small);
break;
default:
tcg_abort();
@@ -913,7 +950,7 @@ static inline void tcg_out_op(TCGContext *s, int opc,
}
break;
case INDEX_op_br:
- tcg_out_jxx(s, JCC_JMP, args[0]);
+ tcg_out_jxx(s, JCC_JMP, args[0], 0);
break;
case INDEX_op_movi_i32:
tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
@@ -1044,10 +1081,11 @@ static inline void tcg_out_op(TCGContext *s, int opc,
tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5], args[1]);
break;
case INDEX_op_brcond_i32:
- tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]);
+ tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
+ args[3], 0);
break;
case INDEX_op_brcond2_i32:
- tcg_out_brcond2(s, args, const_args);
+ tcg_out_brcond2(s, args, const_args, 0);
break;
case INDEX_op_bswap16_i32:
[-- Attachment #5: commit-cmov-i386 --]
[-- Type: text/plain, Size: 7673 bytes --]
commit b2c6cc4c5efa898b53653a35dd957dc653513a29
Author: Richard Henderson <rth@twiddle.net>
Date: Fri Dec 18 13:29:53 2009 -0800
tcg-i386: Implement setcond, movcond, setcond2.
Implement conditional moves in the i386 backend.
Signed-off-by: Richard Henderson <rth@twiddle.net>
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 48c9bc8..cec0663 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -358,9 +358,8 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
}
}
-static void tcg_out_brcond(TCGContext *s, int cond,
- TCGArg arg1, TCGArg arg2, int const_arg2,
- int label_index, int small)
+static void tcg_out_cond(TCGContext *s, int cond, TCGArg arg1,
+ TCGArg arg2, int const_arg2)
{
if (const_arg2) {
if (arg2 == 0) {
@@ -372,6 +371,13 @@ static void tcg_out_brcond(TCGContext *s, int cond,
} else {
tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
}
+}
+
+static void tcg_out_brcond(TCGContext *s, int cond,
+ TCGArg arg1, TCGArg arg2, int const_arg2,
+ int label_index, int small)
+{
+ tcg_out_cond(s, cond, arg1, arg2, const_arg2);
tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
}
@@ -457,6 +463,168 @@ static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
}
+static void tcg_out_setcond(TCGContext *s, int cond, TCGArg arg0,
+ TCGArg arg1, TCGArg arg2, int const_arg2)
+{
+ int use_xor = (arg0 != arg1 && (const_arg2 || arg0 != arg2));
+
+ if (use_xor)
+ tcg_out_movi(s, TCG_TYPE_I32, arg0, 0);
+ tcg_out_cond(s, cond, arg1, arg2, const_arg2);
+ /* setcc */
+ tcg_out_modrm(s, 0x90 | tcg_cond_to_jcc[cond] | P_EXT, 0, arg0);
+ if (!use_xor)
+ tgen_arithi(s, ARITH_AND, arg0, 0xff, 0);
+}
+
+static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
+ const int *const_args)
+{
+ TCGArg new_args[6];
+ int label_true, label_over;
+
+ memcpy(new_args, args+1, 5*sizeof(TCGArg));
+
+ if (args[0] == args[1] || args[0] == args[2]
+ || (!const_args[3] && args[0] == args[3])
+ || (!const_args[4] && args[0] == args[4])) {
+ /* When the destination overlaps with one of the argument
+ registers, don't do anything tricky. */
+ label_true = gen_new_label();
+ label_over = gen_new_label();
+
+ new_args[5] = label_true;
+ tcg_out_brcond2(s, new_args, const_args+1, 1);
+
+ tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
+ tcg_out_jxx(s, JCC_JMP, label_over, 1);
+ tcg_out_label(s, label_true, (tcg_target_long)s->code_ptr);
+
+ tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
+ tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
+ } else {
+ /* When the destination does not overlap one of the arguments,
+ clear the destination first, jump if cond false, and emit an
+ increment in the true case. This results in smaller code. */
+
+ tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
+
+ label_over = gen_new_label();
+ new_args[4] = tcg_invert_cond(new_args[4]);
+ new_args[5] = label_over;
+ tcg_out_brcond2(s, new_args, const_args+1, 1);
+
+ tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
+ tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
+ }
+}
+
+static inline int have_cmov(void)
+{
+#ifdef __i686__
+ /* Compiler options say that cmov is available. */
+ return 1;
+#else
+ /* ??? Use cpuid or something and figure out what's running. */
+ return 0;
+#endif
+}
+
+static void tcg_out_movcond(TCGContext *s, const TCGArg *args,
+ const int *const_args)
+{
+ int vtc, vfc, cond, use_cmov = 0, do_swap = 0;
+ TCGArg d, vt, vf;
+
+ d = args[0];
+ vt = args[3];
+ vf = args[4];
+ vtc = const_args[3];
+ vfc = const_args[4];
+
+ /* ??? The jcc code path below assumes that one mov insn must be skipped.
+ Rather than complicate the code below, make sure to simplify the
+ conditional move here. */
+ if (vtc == vfc && vt == vf) {
+ if (vtc)
+ tcg_out_movi(s, TCG_TYPE_I32, d, vt);
+ else
+ tcg_out_mov(s, d, vt);
+ return;
+ }
+
+ cond = args[5];
+
+ /* If both arguments are constants, we *could* do all the funny bits that
+ gcc does with sbc, masks, etc. There's likely no point. Just use the
+ jcc version in this case. We also have to be careful about clobbering
+ inputs when trying to move constants into position. */
+
+ if (have_cmov()) {
+ use_cmov = 1;
+ if (vtc) {
+ if (vfc || d == vf)
+ use_cmov = 0;
+ else
+ do_swap = 1;
+ } else if (d == vt) {
+ if (vfc)
+ use_cmov = 0;
+ else
+ do_swap = 1;
+ }
+ }
+
+ if (!use_cmov) {
+ /* We're going to follow the lead of cmov and set D=VF first,
+ which means inverting the condition upon which we jump. */
+ cond = tcg_invert_cond(cond);
+
+ /* Don't allow the move we jump over to be a nop. */
+ do_swap = (!vtc && d == vt);
+ }
+
+ if (do_swap) {
+ TCGArg t;
+ cond = tcg_invert_cond(cond);
+ t = vf, vf = vt, vt = t;
+ t = vfc, vfc = vtc, vtc = t;
+ }
+
+ /* If possible, set D=0 before the compare, so that we can use XOR. */
+ if (vfc && vf == 0 && d != args[1] && (const_args[2] || d != args[2])) {
+ tcg_out_movi(s, TCG_TYPE_I32, d, 0);
+ vf = d, vfc = 0;
+ }
+
+ tcg_out_cond(s, cond, args[1], args[2], const_args[2]);
+
+ if (vfc) {
+ /* Force the use of "mov $0, d" to avoid clobbering flags. */
+ tcg_out8(s, 0xb8 + d);
+ tcg_out32(s, vf);
+ } else {
+ tcg_out_mov(s, d, vf);
+ }
+
+ if (use_cmov) {
+ if (vtc)
+ tcg_abort();
+ /* cmovcc */
+ tcg_out_modrm(s, 0x40 | tcg_cond_to_jcc[cond] | P_EXT, d, vt);
+ } else {
+ int label_next = gen_new_label();
+
+ tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_next, 1);
+ if (vtc)
+ tcg_out_movi(s, TCG_TYPE_I32, d, vt);
+ else
+ tcg_out_mov(s, d, vt);
+
+ tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
+ }
+}
+
#if defined(CONFIG_SOFTMMU)
#include "../../softmmu_defs.h"
@@ -1118,6 +1286,16 @@ static inline void tcg_out_op(TCGContext *s, int opc,
tcg_out_modrm(s, 0xb7 | P_EXT, args[0], args[1]);
break;
+ case INDEX_op_setcond_i32:
+ tcg_out_setcond(s, args[3], args[0], args[1], args[2], const_args[2]);
+ break;
+ case INDEX_op_movcond_i32:
+ tcg_out_movcond(s, args, const_args);
+ break;
+ case INDEX_op_setcond2_i32:
+ tcg_out_setcond2(s, args, const_args);
+ break;
+
case INDEX_op_qemu_ld8u:
tcg_out_qemu_ld(s, args, 0);
break;
@@ -1206,6 +1384,10 @@ static const TCGTargetOpDef x86_op_defs[] = {
{ INDEX_op_ext8u_i32, { "r", "q"} },
{ INDEX_op_ext16u_i32, { "r", "r"} },
+ { INDEX_op_setcond_i32, { "q", "r", "ri" } },
+ { INDEX_op_movcond_i32, { "r", "r", "ri", "ri", "ri" } },
+ { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
+
#if TARGET_LONG_BITS == 32
{ INDEX_op_qemu_ld8u, { "r", "L" } },
{ INDEX_op_qemu_ld8s, { "r", "L" } },
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [Qemu-devel] Re: tcg conditional set/move, round 3
2009-12-18 21:38 ` [Qemu-devel] tcg conditional set/move, round 3 Richard Henderson
@ 2009-12-19 11:40 ` Laurent Desnogues
2009-12-19 16:09 ` Richard Henderson
2009-12-19 12:09 ` [Qemu-devel] " Andreas Färber
2009-12-19 13:03 ` Aurelien Jarno
2 siblings, 1 reply; 26+ messages in thread
From: Laurent Desnogues @ 2009-12-19 11:40 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel
On Fri, Dec 18, 2009 at 10:38 PM, Richard Henderson <rth@twiddle.net> wrote:
> On 12/18/2009 03:37 AM, Laurent Desnogues wrote:
>>>
>>> tcg: Generic support for conditional set and conditional move.
>>
>> Needs cosmetics changes.
>
> Fixed, attachment 1.
>
>>> tcg-x86_64: Implement setcond and movcond.
>>
>> Some cosmetics and comments, but overall good.
>
> Fixed, attachment 2.
>
>>> tcg-i386: Implement small forward branches.
>>
>> I think this contains a bug.
>
> Fixed, attachment 3. I've added an abort to patch_reloc to verify that the
> relocation is in range. I've propagated the "small" flag to all of the
> branch functions so that...
>
>>> tcg-i386: Simplify brcond2.
>>
>> I don't like the rewrite of brcond2.
>
> ... this patch is dropped.
>
>>> tcg-i386: Implement setcond, movcond, setcond2.
>>
>> Not yet reviewed.
>
> Fixed, attachment 4. Similar changes to the amd64 patch.
Everything looks good to me, though for i386 I'd not bet
my life it's 100% correct. OTOH I think this is good enough
to go into mainline, so that people can start adding support
in the front-ends.
BTW for compiling to 32-bit on a 64-bit x86, the configure
script is broken as it checks gcc before having set -m32,
so passing -march=i686 will make it fail (in the end it means
I could not convince QEMU to use cmov :-).
Thanks,
Laurent
Acked-by: Laurent Desnogues <laurent.desnogues@gmail.com>
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [Qemu-devel] tcg conditional set/move, round 3
2009-12-18 21:38 ` [Qemu-devel] tcg conditional set/move, round 3 Richard Henderson
2009-12-19 11:40 ` [Qemu-devel] " Laurent Desnogues
@ 2009-12-19 12:09 ` Andreas Färber
2009-12-19 13:03 ` Aurelien Jarno
2 siblings, 0 replies; 26+ messages in thread
From: Andreas Färber @ 2009-12-19 12:09 UTC (permalink / raw)
To: Richard Henderson; +Cc: Laurent Desnogues, qemu-devel
Hello,
Am 18.12.2009 um 22:38 schrieb Richard Henderson:
> <commit-cmov-1.txt><commit-cmov-amd64.txt><commit-cmov-i386-
> jmps.txt><commit-cmov-i386.txt>
Please send patches inline (and one patch per mail, like your original
series), then a) you get more review comments and b) we can apply the
patches with git-am (including your description) for testing.
You can use git-send-email's --in-reply-to= to make them appear as
reply in this thread.
Thanks,
Andreas
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [Qemu-devel] tcg conditional set/move, round 3
2009-12-18 21:38 ` [Qemu-devel] tcg conditional set/move, round 3 Richard Henderson
2009-12-19 11:40 ` [Qemu-devel] " Laurent Desnogues
2009-12-19 12:09 ` [Qemu-devel] " Andreas Färber
@ 2009-12-19 13:03 ` Aurelien Jarno
2009-12-19 13:32 ` Aurelien Jarno
2009-12-19 16:19 ` Richard Henderson
2 siblings, 2 replies; 26+ messages in thread
From: Aurelien Jarno @ 2009-12-19 13:03 UTC (permalink / raw)
To: Richard Henderson; +Cc: Laurent Desnogues, qemu-devel
On Fri, Dec 18, 2009 at 01:38:24PM -0800, Richard Henderson wrote:
> On 12/18/2009 03:37 AM, Laurent Desnogues wrote:
>>> tcg: Generic support for conditional set and conditional move.
>>
>> Needs cosmetics changes.
>
> Fixed, attachment 1.
>
>>> tcg-x86_64: Implement setcond and movcond.
>>
>> Some cosmetics and comments, but overall good.
>
> Fixed, attachment 2.
>
>>> tcg-i386: Implement small forward branches.
>>
>> I think this contains a bug.
>
> Fixed, attachment 3. I've added an abort to patch_reloc to verify that
> the relocation is in range. I've propagated the "small" flag to all of
> the branch functions so that...
>
>>> tcg-i386: Simplify brcond2.
>>
>> I don't like the rewrite of brcond2.
>
> ... this patch is dropped.
>
>>> tcg-i386: Implement setcond, movcond, setcond2.
>>
>> Not yet reviewed.
>
> Fixed, attachment 4. Similar changes to the amd64 patch.
>
Could you please send the patches inline instead. It makes them easier
to comment.
Please find my comments here:
- I am fine with the setcond instruction
- For the movcond instruction, is there a real use for vtrue and vfalse
values? Most CPU actually implement a version with one value.
Implementing it with two values moves complexity within the arch
specific tcg code.
- Do we really want to make movcond mandatory? It can be easily
implemented using setcond, and, or instructions.
- The cmov instruction is not supported on all i386 CPU. While it is
unlikely that someone runs qemu-system on such a CPU, it is not
improbable that someone runs qemu-user on such a CPU. We should
probably provide an alternative code and a check in configure (e.g.
trying to compile asm inline code containing a cmov instruction).
- I am not sure using xor and followed by setcc *conditionally* instead
of a movzb after setcc is a good idea. The two instructions are
supposed to be executed at the same speed, and time spent in code
generation is not negligeable.
- Pay attention to the coding style, there are a few cases of if
without {}.
A final comment, would it be possible to split setcond and movcond in
different patches? setcond looks ready to go, there are probably some
more concerns/comments about movcond.
--
Aurelien Jarno GPG: 1024D/F1BCDB73
aurelien@aurel32.net http://www.aurel32.net
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [Qemu-devel] tcg conditional set/move, round 3
2009-12-19 13:03 ` Aurelien Jarno
@ 2009-12-19 13:32 ` Aurelien Jarno
2009-12-19 16:19 ` Richard Henderson
1 sibling, 0 replies; 26+ messages in thread
From: Aurelien Jarno @ 2009-12-19 13:32 UTC (permalink / raw)
To: Richard Henderson; +Cc: Laurent Desnogues, qemu-devel
On Sat, Dec 19, 2009 at 02:03:46PM +0100, Aurelien Jarno wrote:
> On Fri, Dec 18, 2009 at 01:38:24PM -0800, Richard Henderson wrote:
> > On 12/18/2009 03:37 AM, Laurent Desnogues wrote:
> >>> tcg: Generic support for conditional set and conditional move.
> >>
> >> Needs cosmetics changes.
> >
> > Fixed, attachment 1.
> >
> >>> tcg-x86_64: Implement setcond and movcond.
> >>
> >> Some cosmetics and comments, but overall good.
> >
> > Fixed, attachment 2.
> >
> >>> tcg-i386: Implement small forward branches.
> >>
> >> I think this contains a bug.
> >
> > Fixed, attachment 3. I've added an abort to patch_reloc to verify that
> > the relocation is in range. I've propagated the "small" flag to all of
> > the branch functions so that...
> >
> >>> tcg-i386: Simplify brcond2.
> >>
> >> I don't like the rewrite of brcond2.
> >
> > ... this patch is dropped.
> >
> >>> tcg-i386: Implement setcond, movcond, setcond2.
> >>
> >> Not yet reviewed.
> >
> > Fixed, attachment 4. Similar changes to the amd64 patch.
> >
>
>
> Could you please send the patches inline instead. It makes them easier
> to comment.
>
> Please find my comments here:
> - I am fine with the setcond instruction
> - For the movcond instruction, is there a real use for vtrue and vfalse
> values? Most CPU actually implement a version with one value.
> Implementing it with two values moves complexity within the arch
> specific tcg code.
> - Do we really want to make movcond mandatory? It can be easily
> implemented using setcond, and, or instructions.
> - The cmov instruction is not supported on all i386 CPU. While it is
> unlikely that someone runs qemu-system on such a CPU, it is not
> improbable that someone runs qemu-user on such a CPU. We should
> probably provide an alternative code and a check in configure (e.g.
> trying to compile asm inline code containing a cmov instruction).
Forget about that, I read the i386 patch to quickly.
--
Aurelien Jarno GPG: 1024D/F1BCDB73
aurelien@aurel32.net http://www.aurel32.net
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [Qemu-devel] Re: tcg conditional set/move, round 3
2009-12-19 11:40 ` [Qemu-devel] " Laurent Desnogues
@ 2009-12-19 16:09 ` Richard Henderson
0 siblings, 0 replies; 26+ messages in thread
From: Richard Henderson @ 2009-12-19 16:09 UTC (permalink / raw)
To: Laurent Desnogues; +Cc: qemu-devel
On 12/19/2009 03:40 AM, Laurent Desnogues wrote:
> BTW for compiling to 32-bit on a 64-bit x86, the configure
> script is broken as it checks gcc before having set -m32,
> so passing -march=i686 will make it fail (in the end it means
> I could not convince QEMU to use cmov :-).
One of my servers has a 32-bit install. I got tired of remembering the
moderately long command-line required to configure GCC for a 32-bit
build on a 64-bit host. :-)
r~
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [Qemu-devel] tcg conditional set/move, round 3
2009-12-19 13:03 ` Aurelien Jarno
2009-12-19 13:32 ` Aurelien Jarno
@ 2009-12-19 16:19 ` Richard Henderson
2009-12-19 23:02 ` Aurelien Jarno
1 sibling, 1 reply; 26+ messages in thread
From: Richard Henderson @ 2009-12-19 16:19 UTC (permalink / raw)
To: Aurelien Jarno; +Cc: Laurent Desnogues, qemu-devel
On 12/19/2009 05:03 AM, Aurelien Jarno wrote:
> - For the movcond instruction, is there a real use for vtrue and vfalse
> values? Most CPU actually implement a version with one value.
> Implementing it with two values moves complexity within the arch
> specific tcg code.
The reason I added both is that rather than force TCG to insert extra
moves in order to always match VFALSE with DEST, I could have the
backend invert the condition if VTRUE happened to match DEST already.
I suppose it would be possible to tweek the TCG register allocator to
understand such commutative operations. That seemed harder, but perhaps
it really isn't considering that inversion code has to be replicated
across all the targets. It's certainly something to think about.
> - Do we really want to make movcond mandatory? It can be easily
> implemented using setcond, and, or instructions.
I think so. In that every host does have something useful to emit
that's better than the generic fallback, and we'll quickly have
implementations for each.
> - I am not sure using xor and followed by setcc *conditionally* instead
> of a movzb after setcc is a good idea. The two instructions are
> supposed to be executed at the same speed, and time spent in code
> generation is not negligeable.
I can certainly remove that bit if you insist.
> - Pay attention to the coding style, there are a few cases of if
> without {}.
Ok. I try to remember, but it's at odds with my usual style, so please
understand momentary lapses.
> A final comment, would it be possible to split setcond and movcond in
> different patches? setcond looks ready to go, there are probably some
> more concerns/comments about movcond.
Ok.
r~
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [Qemu-devel] tcg conditional set/move, round 3
2009-12-19 16:19 ` Richard Henderson
@ 2009-12-19 23:02 ` Aurelien Jarno
0 siblings, 0 replies; 26+ messages in thread
From: Aurelien Jarno @ 2009-12-19 23:02 UTC (permalink / raw)
To: Richard Henderson; +Cc: Laurent Desnogues, qemu-devel
On Sat, Dec 19, 2009 at 08:19:32AM -0800, Richard Henderson wrote:
> On 12/19/2009 05:03 AM, Aurelien Jarno wrote:
> >- For the movcond instruction, is there a real use for vtrue and vfalse
> > values? Most CPU actually implement a version with one value.
> > Implementing it with two values moves complexity within the arch
> > specific tcg code.
>
> The reason I added both is that rather than force TCG to insert
> extra moves in order to always match VFALSE with DEST, I could have
> the backend invert the condition if VTRUE happened to match DEST
> already.
>
> I suppose it would be possible to tweek the TCG register allocator
> to understand such commutative operations. That seemed harder, but
> perhaps it really isn't considering that inversion code has to be
> replicated across all the targets. It's certainly something to
> think about.
>
My main concerns here is that we are introducing a complex code here
that has to be multiplied by the number of TCG targets we have. On the
other hand I am not sure there are a lot of use cases for the different
architectures we support.
In addition I am reserved to make such bug changes given I am not sure
it will result in a speed gain. All the previous setcond implementations
have been stopped because there was no measurable gain.
--
Aurelien Jarno GPG: 1024D/F1BCDB73
aurelien@aurel32.net http://www.aurel32.net
^ permalink raw reply [flat|nested] 26+ messages in thread
end of thread, other threads:[~2009-12-19 23:02 UTC | newest]
Thread overview: 26+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <761ea48b0912170620l534dcb02m8ea6b59524d76dbe@mail.gmail.com>
2009-12-17 19:32 ` [Qemu-devel] [PATCH 0/6] tcg conditional set/move, round 2 Richard Henderson
2009-12-17 17:27 ` [Qemu-devel] [PATCH 1/6] tcg: Generic support for conditional set and conditional move Richard Henderson
2009-12-17 20:50 ` malc
2009-12-18 11:38 ` [Qemu-devel] " Laurent Desnogues
2009-12-17 17:28 ` [Qemu-devel] [PATCH 2/6] tcg: Add tcg_invert_cond Richard Henderson
2009-12-18 11:39 ` [Qemu-devel] " Laurent Desnogues
2009-12-17 17:32 ` [Qemu-devel] [PATCH 3/6] tcg-x86_64: Implement setcond and movcond Richard Henderson
2009-12-18 11:39 ` [Qemu-devel] " Laurent Desnogues
2009-12-18 17:11 ` Richard Henderson
2009-12-18 17:41 ` Laurent Desnogues
2009-12-17 17:55 ` [Qemu-devel] [PATCH 4/6] tcg-i386: Implement small forward branches Richard Henderson
2009-12-18 11:39 ` [Qemu-devel] " Laurent Desnogues
2009-12-18 17:16 ` Richard Henderson
2009-12-17 18:38 ` [Qemu-devel] [PATCH 5/6] tcg-i386: Simplify brcond2 Richard Henderson
2009-12-18 11:40 ` [Qemu-devel] " Laurent Desnogues
2009-12-18 17:45 ` Richard Henderson
2009-12-17 19:08 ` [Qemu-devel] [PATCH 6/6] tcg-i386: Implement setcond, movcond, setcond2 Richard Henderson
2009-12-18 11:37 ` [Qemu-devel] Re: [PATCH 0/6] tcg conditional set/move, round 2 Laurent Desnogues
2009-12-18 21:38 ` [Qemu-devel] tcg conditional set/move, round 3 Richard Henderson
2009-12-19 11:40 ` [Qemu-devel] " Laurent Desnogues
2009-12-19 16:09 ` Richard Henderson
2009-12-19 12:09 ` [Qemu-devel] " Andreas Färber
2009-12-19 13:03 ` Aurelien Jarno
2009-12-19 13:32 ` Aurelien Jarno
2009-12-19 16:19 ` Richard Henderson
2009-12-19 23:02 ` Aurelien Jarno
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).