qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 2/5] tcg-x86_64: Implement setcond.
  2009-12-19 18:52 [Qemu-devel] [PATCH 0/5] tcg conditional set, round 4 Richard Henderson
@ 2009-12-19 16:50 ` Richard Henderson
  2009-12-19 23:11   ` Aurelien Jarno
  2009-12-19 18:01 ` [Qemu-devel] [PATCH 1/5] tcg: Generic support for conditional set Richard Henderson
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 34+ messages in thread
From: Richard Henderson @ 2009-12-19 16:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/x86_64/tcg-target.c |   38 +++++++++++++++++++++++++++++++++-----
 1 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/tcg/x86_64/tcg-target.c b/tcg/x86_64/tcg-target.c
index 2339091..33dc452 100644
--- a/tcg/x86_64/tcg-target.c
+++ b/tcg/x86_64/tcg-target.c
@@ -491,26 +491,42 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
     }
 }
 
-static void tcg_out_brcond(TCGContext *s, int cond, 
-                           TCGArg arg1, TCGArg arg2, int const_arg2,
-                           int label_index, int rexw)
+static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
+                        int const_arg2, int rexw)
 {
     if (const_arg2) {
         if (arg2 == 0) {
             /* test r, r */
             tcg_out_modrm(s, 0x85 | rexw, arg1, arg1);
         } else {
-            if (rexw)
+            if (rexw) {
                 tgen_arithi64(s, ARITH_CMP, arg1, arg2);
-            else
+            } else {
                 tgen_arithi32(s, ARITH_CMP, arg1, arg2);
+            }
         }
     } else {
         tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3) | rexw, arg2, arg1);
     }
+}
+
+static void tcg_out_brcond(TCGContext *s, int cond,
+                           TCGArg arg1, TCGArg arg2, int const_arg2,
+                           int label_index, int rexw)
+{
+    tcg_out_cmp(s, arg1, arg2, const_arg2, rexw);
     tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index);
 }
 
+static void tcg_out_setcond(TCGContext *s, int cond, TCGArg dest,
+                            TCGArg arg1, TCGArg arg2, int const_arg2, int rexw)
+{
+    tcg_out_cmp(s, arg1, arg2, const_arg2, rexw);
+    /* setcc */
+    tcg_out_modrm(s, 0x90 | tcg_cond_to_jcc[cond] | P_EXT | P_REXB, 0, dest);
+    tgen_arithi32(s, ARITH_AND, dest, 0xff);
+}
+
 #if defined(CONFIG_SOFTMMU)
 
 #include "../../softmmu_defs.h"
@@ -1197,6 +1213,15 @@ static inline void tcg_out_op(TCGContext *s, int opc, const TCGArg *args,
         tcg_out_modrm(s, 0x8b, args[0], args[1]);
         break;
 
+    case INDEX_op_setcond_i32:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2],
+                        const_args[2], 0);
+        break;
+    case INDEX_op_setcond_i64:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2],
+                        const_args[2], P_REXW);
+        break;
+
     case INDEX_op_qemu_ld8u:
         tcg_out_qemu_ld(s, args, 0);
         break;
@@ -1376,6 +1401,9 @@ static const TCGTargetOpDef x86_64_op_defs[] = {
     { INDEX_op_ext16u_i64, { "r", "r"} },
     { INDEX_op_ext32u_i64, { "r", "r"} },
 
+    { INDEX_op_setcond_i32, { "r", "r", "ri" } },
+    { INDEX_op_setcond_i64, { "r", "r", "re" } },
+
     { INDEX_op_qemu_ld8u, { "r", "L" } },
     { INDEX_op_qemu_ld8s, { "r", "L" } },
     { INDEX_op_qemu_ld16u, { "r", "L" } },
-- 
1.6.5.2

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Qemu-devel] [PATCH 1/5] tcg: Generic support for conditional set
  2009-12-19 18:52 [Qemu-devel] [PATCH 0/5] tcg conditional set, round 4 Richard Henderson
  2009-12-19 16:50 ` [Qemu-devel] [PATCH 2/5] tcg-x86_64: Implement setcond Richard Henderson
@ 2009-12-19 18:01 ` Richard Henderson
  2009-12-19 23:11   ` Aurelien Jarno
  2009-12-22 11:27   ` Laurent Desnogues
  2009-12-19 18:44 ` [Qemu-devel] [PATCH 3/5] tcg-i386: Implement small forward branches Richard Henderson
                   ` (4 subsequent siblings)
  6 siblings, 2 replies; 34+ messages in thread
From: Richard Henderson @ 2009-12-19 18:01 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien

Defines setcond_{i32,i64} and setcond2_i32 for 64-on-32-bit.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/README    |   20 +++++++++++++++++++-
 tcg/tcg-op.h  |   47 +++++++++++++++++++++++++++++++++++++++++++++++
 tcg/tcg-opc.h |    3 +++
 tcg/tcg.c     |   21 +++++++++++++++------
 4 files changed, 84 insertions(+), 7 deletions(-)

diff --git a/tcg/README b/tcg/README
index e672258..7028de6 100644
--- a/tcg/README
+++ b/tcg/README
@@ -152,6 +152,11 @@ Conditional jump if t0 cond t1 is true. cond can be:
     TCG_COND_LEU /* unsigned */
     TCG_COND_GTU /* unsigned */
 
+* brcond2_i32 cond, t0_low, t0_high, t1_low, t1_high, label
+
+Similar to brcond, except that the 64-bit values T0 and T1
+are formed from two 32-bit arguments.
+
 ********* Arithmetic
 
 * add_i32/i64 t0, t1, t2
@@ -282,6 +287,19 @@ order bytes must be set to zero.
 Indicate that the value of t0 won't be used later. It is useful to
 force dead code elimination.
 
+********* Conditional moves
+
+* setcond_i32/i64 cond, dest, t1, t2
+
+dest = (t1 cond t2)
+
+Set DEST to 1 if (T1 cond T2) is true, otherwise set to 0.
+
+* setcond2_i32 cond, dest, t1_low, t1_high, t2_low, t2_high
+
+Similar to setcond, except that the 64-bit values T1 and T2 are
+formed from two 32-bit arguments.  The result is a 32-bit value.
+
 ********* Type conversions
 
 * ext_i32_i64 t0, t1
@@ -375,7 +393,7 @@ The target word size (TCG_TARGET_REG_BITS) is expected to be 32 bit or
 
 On a 32 bit target, all 64 bit operations are converted to 32 bits. A
 few specific operations must be implemented to allow it (see add2_i32,
-sub2_i32, brcond2_i32).
+sub2_i32, brcond2_i32, setcond2_i32).
 
 Floating point operations are not supported in this version. A
 previous incarnation of the code generator had full support of them,
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index faf2e8b..70a75a0 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -280,6 +280,32 @@ static inline void tcg_gen_op6_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2,
     *gen_opparam_ptr++ = GET_TCGV_I64(arg6);
 }
 
+static inline void tcg_gen_op6i_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2,
+                                    TCGv_i32 arg3, TCGv_i32 arg4,
+                                    TCGv_i32 arg5, TCGArg arg6)
+{
+    *gen_opc_ptr++ = opc;
+    *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+    *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+    *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+    *gen_opparam_ptr++ = GET_TCGV_I32(arg4);
+    *gen_opparam_ptr++ = GET_TCGV_I32(arg5);
+    *gen_opparam_ptr++ = arg6;
+}
+
+static inline void tcg_gen_op6i_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2,
+                                    TCGv_i64 arg3, TCGv_i64 arg4,
+                                    TCGv_i64 arg5, TCGArg arg6)
+{
+    *gen_opc_ptr++ = opc;
+    *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+    *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+    *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+    *gen_opparam_ptr++ = GET_TCGV_I64(arg4);
+    *gen_opparam_ptr++ = GET_TCGV_I64(arg5);
+    *gen_opparam_ptr++ = arg6;
+}
+
 static inline void tcg_gen_op6ii_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2,
                                      TCGv_i32 arg3, TCGv_i32 arg4, TCGArg arg5,
                                      TCGArg arg6)
@@ -1795,6 +1821,25 @@ static inline void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
     }
 }
 
+static inline void tcg_gen_setcond_i32(int cond, TCGv_i32 ret,
+                                       TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
+}
+
+static inline void tcg_gen_setcond_i64(int cond, TCGv_i64 ret,
+                                       TCGv_i64 arg1, TCGv_i64 arg2)
+{
+#if TCG_TARGET_REG_BITS == 64
+    tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
+#else
+    tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
+                     TCGV_LOW(arg1), TCGV_HIGH(arg1),
+                     TCGV_LOW(arg2), TCGV_HIGH(arg2), cond);
+    tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+#endif
+}
+
 /***************************************/
 /* QEMU specific operations. Their type depend on the QEMU CPU
    type. */
@@ -2067,6 +2112,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
 #define tcg_gen_sari_tl tcg_gen_sari_i64
 #define tcg_gen_brcond_tl tcg_gen_brcond_i64
 #define tcg_gen_brcondi_tl tcg_gen_brcondi_i64
+#define tcg_gen_setcond_tl tcg_gen_setcond_i64
 #define tcg_gen_mul_tl tcg_gen_mul_i64
 #define tcg_gen_muli_tl tcg_gen_muli_i64
 #define tcg_gen_div_tl tcg_gen_div_i64
@@ -2137,6 +2183,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
 #define tcg_gen_sari_tl tcg_gen_sari_i32
 #define tcg_gen_brcond_tl tcg_gen_brcond_i32
 #define tcg_gen_brcondi_tl tcg_gen_brcondi_i32
+#define tcg_gen_setcond_tl tcg_gen_setcond_i32
 #define tcg_gen_mul_tl tcg_gen_mul_i32
 #define tcg_gen_muli_tl tcg_gen_muli_i32
 #define tcg_gen_div_tl tcg_gen_div_i32
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index b7f3fd7..89db3b4 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -42,6 +42,7 @@ DEF2(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 
 DEF2(mov_i32, 1, 1, 0, 0)
 DEF2(movi_i32, 1, 0, 1, 0)
+DEF2(setcond_i32, 1, 2, 1, 0)
 /* load/store */
 DEF2(ld8u_i32, 1, 1, 1, 0)
 DEF2(ld8s_i32, 1, 1, 1, 0)
@@ -82,6 +83,7 @@ DEF2(add2_i32, 2, 4, 0, 0)
 DEF2(sub2_i32, 2, 4, 0, 0)
 DEF2(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 DEF2(mulu2_i32, 2, 2, 0, 0)
+DEF2(setcond2_i32, 1, 4, 1, 0)
 #endif
 #ifdef TCG_TARGET_HAS_ext8s_i32
 DEF2(ext8s_i32, 1, 1, 0, 0)
@@ -111,6 +113,7 @@ DEF2(neg_i32, 1, 1, 0, 0)
 #if TCG_TARGET_REG_BITS == 64
 DEF2(mov_i64, 1, 1, 0, 0)
 DEF2(movi_i64, 1, 0, 1, 0)
+DEF2(setcond_i64, 1, 2, 1, 0)
 /* load/store */
 DEF2(ld8u_i64, 1, 1, 1, 0)
 DEF2(ld8s_i64, 1, 1, 1, 0)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 3c0e296..9949814 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -670,6 +670,7 @@ void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1,
 }
 #endif
 
+
 static void tcg_reg_alloc_start(TCGContext *s)
 {
     int i;
@@ -888,21 +889,29 @@ void tcg_dump_ops(TCGContext *s, FILE *outfile)
                 fprintf(outfile, "%s",
                         tcg_get_arg_str_idx(s, buf, sizeof(buf), args[k++]));
             }
-            if (c == INDEX_op_brcond_i32
+            switch (c) {
+            case INDEX_op_brcond_i32:
+#if TCG_TARGET_REG_BITS == 32
+            case INDEX_op_brcond2_i32:
+#elif TCG_TARGET_REG_BITS == 64
+            case INDEX_op_brcond_i64:
+#endif
+            case INDEX_op_setcond_i32:
 #if TCG_TARGET_REG_BITS == 32
-                || c == INDEX_op_brcond2_i32
+            case INDEX_op_setcond2_i32:
 #elif TCG_TARGET_REG_BITS == 64
-                || c == INDEX_op_brcond_i64
+            case INDEX_op_setcond_i64:
 #endif
-                ) {
                 if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]])
                     fprintf(outfile, ",%s", cond_name[args[k++]]);
                 else
                     fprintf(outfile, ",$0x%" TCG_PRIlx, args[k++]);
                 i = 1;
-            }
-            else
+                break;
+            default:
                 i = 0;
+                break;
+            }
             for(; i < nb_cargs; i++) {
                 if (k != 0)
                     fprintf(outfile, ",");
-- 
1.6.5.2

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Qemu-devel] [PATCH 3/5] tcg-i386: Implement small forward branches.
  2009-12-19 18:52 [Qemu-devel] [PATCH 0/5] tcg conditional set, round 4 Richard Henderson
  2009-12-19 16:50 ` [Qemu-devel] [PATCH 2/5] tcg-x86_64: Implement setcond Richard Henderson
  2009-12-19 18:01 ` [Qemu-devel] [PATCH 1/5] tcg: Generic support for conditional set Richard Henderson
@ 2009-12-19 18:44 ` Richard Henderson
  2009-12-19 23:11   ` Aurelien Jarno
  2009-12-19 23:32   ` Laurent Desnogues
  2009-12-19 18:44 ` [Qemu-devel] [PATCH 4/5] tcg: Add tcg_invert_cond Richard Henderson
                   ` (3 subsequent siblings)
  6 siblings, 2 replies; 34+ messages in thread
From: Richard Henderson @ 2009-12-19 18:44 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien

There are places, like brcond2, where we know that the destination
of a forward branch will be within 127 bytes.

Add the R_386_PC8 relocation type to support this.  Add a flag to
tcg_out_jxx and tcg_out_brcond* to enable it.  Set the flag in the
brcond2 label_next branches; pass along the input flag otherwise.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 elf.h                 |    2 +
 tcg/i386/tcg-target.c |  116 +++++++++++++++++++++++++++++++++----------------
 2 files changed, 80 insertions(+), 38 deletions(-)

diff --git a/elf.h b/elf.h
index 11674d7..c84c8ab 100644
--- a/elf.h
+++ b/elf.h
@@ -243,6 +243,8 @@ typedef struct {
 #define R_386_GOTOFF	9
 #define R_386_GOTPC	10
 #define R_386_NUM	11
+/* Not a dynamic reloc, so not included in R_386_NUM.  Used in TCG.  */
+#define R_386_PC8	23
 
 #define R_MIPS_NONE		0
 #define R_MIPS_16		1
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 972b102..4c42caf 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -61,6 +61,12 @@ static void patch_reloc(uint8_t *code_ptr, int type,
     case R_386_PC32:
         *(uint32_t *)code_ptr = value - (long)code_ptr;
         break;
+    case R_386_PC8:
+        value -= (long)code_ptr;
+        if (value != (int8_t)value)
+            tcg_abort();
+        *(uint8_t *)code_ptr = value;
+        break;
     default:
         tcg_abort();
     }
@@ -305,7 +311,8 @@ static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
         tgen_arithi(s, ARITH_ADD, reg, val, 0);
 }
 
-static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
+/* Use SMALL != 0 to force a short forward branch.  */
+static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
 {
     int32_t val, val1;
     TCGLabel *l = &s->labels[label_index];
@@ -314,12 +321,16 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
         val = l->u.value - (tcg_target_long)s->code_ptr;
         val1 = val - 2;
         if ((int8_t)val1 == val1) {
-            if (opc == -1)
+            if (opc == -1) {
                 tcg_out8(s, 0xeb);
-            else
+            } else {
                 tcg_out8(s, 0x70 + opc);
+            }
             tcg_out8(s, val1);
         } else {
+            if (small) {
+                tcg_abort();
+            }
             if (opc == -1) {
                 tcg_out8(s, 0xe9);
                 tcg_out32(s, val - 5);
@@ -329,6 +340,14 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
                 tcg_out32(s, val - 6);
             }
         }
+    } else if (small) {
+        if (opc == -1) {
+            tcg_out8(s, 0xeb);
+        } else {
+            tcg_out8(s, 0x70 + opc);
+        }
+        tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
+        s->code_ptr += 1;
     } else {
         if (opc == -1) {
             tcg_out8(s, 0xe9);
@@ -343,7 +362,7 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
 
 static void tcg_out_brcond(TCGContext *s, int cond, 
                            TCGArg arg1, TCGArg arg2, int const_arg2,
-                           int label_index)
+                           int label_index, int small)
 {
     if (const_arg2) {
         if (arg2 == 0) {
@@ -355,64 +374,84 @@ static void tcg_out_brcond(TCGContext *s, int cond,
     } else {
         tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
     }
-    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index);
+    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
 }
 
 /* XXX: we implement it at the target level to avoid having to
    handle cross basic blocks temporaries */
-static void tcg_out_brcond2(TCGContext *s,
-                            const TCGArg *args, const int *const_args)
+static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
+                            const int *const_args, int small)
 {
     int label_next;
     label_next = gen_new_label();
     switch(args[4]) {
     case TCG_COND_EQ:
-        tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2], label_next);
-        tcg_out_brcond(s, TCG_COND_EQ, args[1], args[3], const_args[3], args[5]);
+        tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2],
+                       label_next, small);
+        tcg_out_brcond(s, TCG_COND_EQ, args[1], args[3], const_args[3],
+                       args[5], small);
         break;
     case TCG_COND_NE:
-        tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2], args[5]);
-        tcg_out_brcond(s, TCG_COND_NE, args[1], args[3], const_args[3], args[5]);
+        tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2],
+                       args[5], small);
+        tcg_out_brcond(s, TCG_COND_NE, args[1], args[3], const_args[3],
+                       args[5], small);
         break;
     case TCG_COND_LT:
-        tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2], args[5]);
+        tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3],
+                       args[5], small);
+        tcg_out_jxx(s, JCC_JNE, label_next, 1);
+        tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2],
+                       args[5], small);
         break;
     case TCG_COND_LE:
-        tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2], args[5]);
+        tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3],
+                       args[5], small);
+        tcg_out_jxx(s, JCC_JNE, label_next, 1);
+        tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2],
+                       args[5], small);
         break;
     case TCG_COND_GT:
-        tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2], args[5]);
+        tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3],
+                       args[5], small);
+        tcg_out_jxx(s, JCC_JNE, label_next, 1);
+        tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2],
+                       args[5], small);
         break;
     case TCG_COND_GE:
-        tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2], args[5]);
+        tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3],
+                       args[5], small);
+        tcg_out_jxx(s, JCC_JNE, label_next, 1);
+        tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2],
+                       args[5], small);
         break;
     case TCG_COND_LTU:
-        tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2], args[5]);
+        tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3],
+                       args[5], small);
+        tcg_out_jxx(s, JCC_JNE, label_next, 1);
+        tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2],
+                       args[5], small);
         break;
     case TCG_COND_LEU:
-        tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2], args[5]);
+        tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3],
+                       args[5], small);
+        tcg_out_jxx(s, JCC_JNE, label_next, 1);
+        tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2],
+                       args[5], small);
         break;
     case TCG_COND_GTU:
-        tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2], args[5]);
+        tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3],
+                       args[5], small);
+        tcg_out_jxx(s, JCC_JNE, label_next, 1);
+        tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2],
+                       args[5], small);
         break;
     case TCG_COND_GEU:
-        tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2], args[5]);
+        tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3],
+                       args[5], small);
+        tcg_out_jxx(s, JCC_JNE, label_next, 1);
+        tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2],
+                       args[5], small);
         break;
     default:
         tcg_abort();
@@ -913,7 +952,7 @@ static inline void tcg_out_op(TCGContext *s, int opc,
         }
         break;
     case INDEX_op_br:
-        tcg_out_jxx(s, JCC_JMP, args[0]);
+        tcg_out_jxx(s, JCC_JMP, args[0], 0);
         break;
     case INDEX_op_movi_i32:
         tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
@@ -1044,10 +1083,11 @@ static inline void tcg_out_op(TCGContext *s, int opc,
             tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5], args[1]);
         break;
     case INDEX_op_brcond_i32:
-        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]);
+        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
+                       args[3], 0);
         break;
     case INDEX_op_brcond2_i32:
-        tcg_out_brcond2(s, args, const_args);
+        tcg_out_brcond2(s, args, const_args, 0);
         break;
 
     case INDEX_op_bswap16_i32:
-- 
1.6.5.2

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Qemu-devel] [PATCH 4/5] tcg: Add tcg_invert_cond.
  2009-12-19 18:52 [Qemu-devel] [PATCH 0/5] tcg conditional set, round 4 Richard Henderson
                   ` (2 preceding siblings ...)
  2009-12-19 18:44 ` [Qemu-devel] [PATCH 3/5] tcg-i386: Implement small forward branches Richard Henderson
@ 2009-12-19 18:44 ` Richard Henderson
  2009-12-19 23:11   ` Aurelien Jarno
  2009-12-19 18:46 ` [Qemu-devel] [PATCH 5/5] tcg-i386: Implement setcond Richard Henderson
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 34+ messages in thread
From: Richard Henderson @ 2009-12-19 18:44 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien

It is very handy to have a reliable mapping of a condition to its inverse.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/tcg.h |    5 +++++
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/tcg/tcg.h b/tcg/tcg.h
index 9824493..376d6af 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -205,6 +205,11 @@ typedef enum {
     TCG_COND_GTU,
 } TCGCond;
 
+static inline TCGCond tcg_invert_cond(TCGCond c)
+{
+    return (TCGCond)(c ^ 1);
+}
+
 #define TEMP_VAL_DEAD  0
 #define TEMP_VAL_REG   1
 #define TEMP_VAL_MEM   2
-- 
1.6.5.2

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Qemu-devel] [PATCH 5/5] tcg-i386: Implement setcond.
  2009-12-19 18:52 [Qemu-devel] [PATCH 0/5] tcg conditional set, round 4 Richard Henderson
                   ` (3 preceding siblings ...)
  2009-12-19 18:44 ` [Qemu-devel] [PATCH 4/5] tcg: Add tcg_invert_cond Richard Henderson
@ 2009-12-19 18:46 ` Richard Henderson
  2009-12-19 23:11   ` Aurelien Jarno
  2009-12-19 23:11 ` [Qemu-devel] [PATCH 0/5] tcg conditional set, round 4 Aurelien Jarno
  2009-12-20 22:57 ` Paul Brook
  6 siblings, 1 reply; 34+ messages in thread
From: Richard Henderson @ 2009-12-19 18:46 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/i386/tcg-target.c |   73 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 70 insertions(+), 3 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 4c42caf..43e0155 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -360,9 +360,8 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
     }
 }
 
-static void tcg_out_brcond(TCGContext *s, int cond, 
-                           TCGArg arg1, TCGArg arg2, int const_arg2,
-                           int label_index, int small)
+static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
+                        int const_arg2)
 {
     if (const_arg2) {
         if (arg2 == 0) {
@@ -374,6 +373,13 @@ static void tcg_out_brcond(TCGContext *s, int cond,
     } else {
         tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
     }
+}
+
+static void tcg_out_brcond(TCGContext *s, int cond,
+                           TCGArg arg1, TCGArg arg2, int const_arg2,
+                           int label_index, int small)
+{
+    tcg_out_cmp(s, arg1, arg2, const_arg2);
     tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
 }
 
@@ -459,6 +465,57 @@ static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
     tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
 }
 
+static void tcg_out_setcond(TCGContext *s, int cond, TCGArg dest,
+                            TCGArg arg1, TCGArg arg2, int const_arg2)
+{
+    tcg_out_cmp(s, arg1, arg2, const_arg2);
+    /* setcc */
+    tcg_out_modrm(s, 0x90 | tcg_cond_to_jcc[cond] | P_EXT, 0, dest);
+    tgen_arithi(s, ARITH_AND, dest, 0xff, 0);
+}
+
+static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
+                             const int *const_args)
+{
+    TCGArg new_args[6];
+    int label_true, label_over;
+
+    memcpy(new_args, args+1, 5*sizeof(TCGArg));
+
+    if (args[0] == args[1] || args[0] == args[2]
+        || (!const_args[3] && args[0] == args[3])
+        || (!const_args[4] && args[0] == args[4])) {
+        /* When the destination overlaps with one of the argument
+           registers, don't do anything tricky.  */
+        label_true = gen_new_label();
+        label_over = gen_new_label();
+
+        new_args[5] = label_true;
+        tcg_out_brcond2(s, new_args, const_args+1, 1);
+
+        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
+        tcg_out_jxx(s, JCC_JMP, label_over, 1);
+        tcg_out_label(s, label_true, (tcg_target_long)s->code_ptr);
+
+        tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
+        tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
+    } else {
+        /* When the destination does not overlap one of the arguments,
+           clear the destination first, jump if cond false, and emit an
+           increment in the true case.  This results in smaller code.  */
+
+        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
+
+        label_over = gen_new_label();
+        new_args[4] = tcg_invert_cond(new_args[4]);
+        new_args[5] = label_over;
+        tcg_out_brcond2(s, new_args, const_args+1, 1);
+
+        tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
+        tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
+    }
+}
+
 #if defined(CONFIG_SOFTMMU)
 
 #include "../../softmmu_defs.h"
@@ -1120,6 +1177,13 @@ static inline void tcg_out_op(TCGContext *s, int opc,
         tcg_out_modrm(s, 0xb7 | P_EXT, args[0], args[1]);
         break;
 
+    case INDEX_op_setcond_i32:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2], const_args[2]);
+        break;
+    case INDEX_op_setcond2_i32:
+        tcg_out_setcond2(s, args, const_args);
+        break;
+
     case INDEX_op_qemu_ld8u:
         tcg_out_qemu_ld(s, args, 0);
         break;
@@ -1208,6 +1272,9 @@ static const TCGTargetOpDef x86_op_defs[] = {
     { INDEX_op_ext8u_i32, { "r", "q"} },
     { INDEX_op_ext16u_i32, { "r", "r"} },
 
+    { INDEX_op_setcond_i32, { "q", "r", "ri" } },
+    { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
+
 #if TARGET_LONG_BITS == 32
     { INDEX_op_qemu_ld8u, { "r", "L" } },
     { INDEX_op_qemu_ld8s, { "r", "L" } },
-- 
1.6.5.2

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Qemu-devel] [PATCH 0/5] tcg conditional set, round 4
@ 2009-12-19 18:52 Richard Henderson
  2009-12-19 16:50 ` [Qemu-devel] [PATCH 2/5] tcg-x86_64: Implement setcond Richard Henderson
                   ` (6 more replies)
  0 siblings, 7 replies; 34+ messages in thread
From: Richard Henderson @ 2009-12-19 18:52 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien

Changes from round 3:

 * Drop movcond for now.
 * Only use movzbl and not xor in setcond.


r~


Richard Henderson (5):
  tcg: Generic support for conditional set
  tcg-x86_64: Implement setcond.
  tcg-i386: Implement small forward branches.
  tcg: Add tcg_invert_cond.
  tcg-i386: Implement setcond.

 elf.h                   |    2 +
 tcg/README              |   20 +++++-
 tcg/i386/tcg-target.c   |  187 +++++++++++++++++++++++++++++++++++++----------
 tcg/tcg-op.h            |   47 ++++++++++++
 tcg/tcg-opc.h           |    3 +
 tcg/tcg.c               |   21 ++++--
 tcg/tcg.h               |    5 +
 tcg/x86_64/tcg-target.c |   38 ++++++++-
 8 files changed, 271 insertions(+), 52 deletions(-)

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH 0/5] tcg conditional set, round 4
  2009-12-19 18:52 [Qemu-devel] [PATCH 0/5] tcg conditional set, round 4 Richard Henderson
                   ` (4 preceding siblings ...)
  2009-12-19 18:46 ` [Qemu-devel] [PATCH 5/5] tcg-i386: Implement setcond Richard Henderson
@ 2009-12-19 23:11 ` Aurelien Jarno
  2009-12-19 23:43   ` malc
  2009-12-20 22:57 ` Paul Brook
  6 siblings, 1 reply; 34+ messages in thread
From: Aurelien Jarno @ 2009-12-19 23:11 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel

On Sat, Dec 19, 2009 at 10:52:52AM -0800, Richard Henderson wrote:
> Changes from round 3:
> 
>  * Drop movcond for now.
>  * Only use movzbl and not xor in setcond.
> 
> 

Thanks for the update, it looks like ready, I only have cosmetic or
minor comments. See my comments in the individual patches

I think we should commit it now, but wait a bit until it is implemented
in all TCG targets to use this new ops. We can circulate patches in the
meanwhile.

> 
> 
> Richard Henderson (5):
>   tcg: Generic support for conditional set
>   tcg-x86_64: Implement setcond.
>   tcg-i386: Implement small forward branches.
>   tcg: Add tcg_invert_cond.
>   tcg-i386: Implement setcond.
> 
>  elf.h                   |    2 +
>  tcg/README              |   20 +++++-
>  tcg/i386/tcg-target.c   |  187 +++++++++++++++++++++++++++++++++++++----------
>  tcg/tcg-op.h            |   47 ++++++++++++
>  tcg/tcg-opc.h           |    3 +
>  tcg/tcg.c               |   21 ++++--
>  tcg/tcg.h               |    5 +
>  tcg/x86_64/tcg-target.c |   38 ++++++++-
>  8 files changed, 271 insertions(+), 52 deletions(-)
> 
> 
> 
> 

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH 1/5] tcg: Generic support for conditional set
  2009-12-19 18:01 ` [Qemu-devel] [PATCH 1/5] tcg: Generic support for conditional set Richard Henderson
@ 2009-12-19 23:11   ` Aurelien Jarno
  2009-12-19 23:24     ` Richard Henderson
  2009-12-22 11:27   ` Laurent Desnogues
  1 sibling, 1 reply; 34+ messages in thread
From: Aurelien Jarno @ 2009-12-19 23:11 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel

On Sat, Dec 19, 2009 at 10:01:57AM -0800, Richard Henderson wrote:
> Defines setcond_{i32,i64} and setcond2_i32 for 64-on-32-bit.

I do wonder if setcond2_i32 and brcond2_i32 should be added there. Those
are internal ops that are actually not exported in tcg-op.h.

> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/README    |   20 +++++++++++++++++++-
>  tcg/tcg-op.h  |   47 +++++++++++++++++++++++++++++++++++++++++++++++
>  tcg/tcg-opc.h |    3 +++
>  tcg/tcg.c     |   21 +++++++++++++++------
>  4 files changed, 84 insertions(+), 7 deletions(-)
> 
> diff --git a/tcg/README b/tcg/README
> index e672258..7028de6 100644
> --- a/tcg/README
> +++ b/tcg/README
> @@ -152,6 +152,11 @@ Conditional jump if t0 cond t1 is true. cond can be:
>      TCG_COND_LEU /* unsigned */
>      TCG_COND_GTU /* unsigned */
>  
> +* brcond2_i32 cond, t0_low, t0_high, t1_low, t1_high, label
> +
> +Similar to brcond, except that the 64-bit values T0 and T1
> +are formed from two 32-bit arguments.
> +
>  ********* Arithmetic
>  
>  * add_i32/i64 t0, t1, t2
> @@ -282,6 +287,19 @@ order bytes must be set to zero.
>  Indicate that the value of t0 won't be used later. It is useful to
>  force dead code elimination.
>  
> +********* Conditional moves
> +
> +* setcond_i32/i64 cond, dest, t1, t2
> +
> +dest = (t1 cond t2)
> +
> +Set DEST to 1 if (T1 cond T2) is true, otherwise set to 0.
> +
> +* setcond2_i32 cond, dest, t1_low, t1_high, t2_low, t2_high
> +
> +Similar to setcond, except that the 64-bit values T1 and T2 are
> +formed from two 32-bit arguments.  The result is a 32-bit value.
> +
>  ********* Type conversions
>  
>  * ext_i32_i64 t0, t1
> @@ -375,7 +393,7 @@ The target word size (TCG_TARGET_REG_BITS) is expected to be 32 bit or
>  
>  On a 32 bit target, all 64 bit operations are converted to 32 bits. A
>  few specific operations must be implemented to allow it (see add2_i32,
> -sub2_i32, brcond2_i32).
> +sub2_i32, brcond2_i32, setcond2_i32).
>  
>  Floating point operations are not supported in this version. A
>  previous incarnation of the code generator had full support of them,
> diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
> index faf2e8b..70a75a0 100644
> --- a/tcg/tcg-op.h
> +++ b/tcg/tcg-op.h
> @@ -280,6 +280,32 @@ static inline void tcg_gen_op6_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2,
>      *gen_opparam_ptr++ = GET_TCGV_I64(arg6);
>  }
>  
> +static inline void tcg_gen_op6i_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2,
> +                                    TCGv_i32 arg3, TCGv_i32 arg4,
> +                                    TCGv_i32 arg5, TCGArg arg6)
> +{
> +    *gen_opc_ptr++ = opc;
> +    *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
> +    *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
> +    *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
> +    *gen_opparam_ptr++ = GET_TCGV_I32(arg4);
> +    *gen_opparam_ptr++ = GET_TCGV_I32(arg5);
> +    *gen_opparam_ptr++ = arg6;
> +}
> +
> +static inline void tcg_gen_op6i_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2,
> +                                    TCGv_i64 arg3, TCGv_i64 arg4,
> +                                    TCGv_i64 arg5, TCGArg arg6)
> +{
> +    *gen_opc_ptr++ = opc;
> +    *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
> +    *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
> +    *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
> +    *gen_opparam_ptr++ = GET_TCGV_I64(arg4);
> +    *gen_opparam_ptr++ = GET_TCGV_I64(arg5);
> +    *gen_opparam_ptr++ = arg6;
> +}
> +
>  static inline void tcg_gen_op6ii_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2,
>                                       TCGv_i32 arg3, TCGv_i32 arg4, TCGArg arg5,
>                                       TCGArg arg6)
> @@ -1795,6 +1821,25 @@ static inline void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
>      }
>  }
>  
> +static inline void tcg_gen_setcond_i32(int cond, TCGv_i32 ret,
> +                                       TCGv_i32 arg1, TCGv_i32 arg2)
> +{
> +    tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
> +}
> +
> +static inline void tcg_gen_setcond_i64(int cond, TCGv_i64 ret,
> +                                       TCGv_i64 arg1, TCGv_i64 arg2)
> +{
> +#if TCG_TARGET_REG_BITS == 64
> +    tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
> +#else
> +    tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
> +                     TCGV_LOW(arg1), TCGV_HIGH(arg1),
> +                     TCGV_LOW(arg2), TCGV_HIGH(arg2), cond);
> +    tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
> +#endif
> +}
> +
>  /***************************************/
>  /* QEMU specific operations. Their type depend on the QEMU CPU
>     type. */
> @@ -2067,6 +2112,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
>  #define tcg_gen_sari_tl tcg_gen_sari_i64
>  #define tcg_gen_brcond_tl tcg_gen_brcond_i64
>  #define tcg_gen_brcondi_tl tcg_gen_brcondi_i64
> +#define tcg_gen_setcond_tl tcg_gen_setcond_i64
>  #define tcg_gen_mul_tl tcg_gen_mul_i64
>  #define tcg_gen_muli_tl tcg_gen_muli_i64
>  #define tcg_gen_div_tl tcg_gen_div_i64
> @@ -2137,6 +2183,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
>  #define tcg_gen_sari_tl tcg_gen_sari_i32
>  #define tcg_gen_brcond_tl tcg_gen_brcond_i32
>  #define tcg_gen_brcondi_tl tcg_gen_brcondi_i32
> +#define tcg_gen_setcond_tl tcg_gen_setcond_i32
>  #define tcg_gen_mul_tl tcg_gen_mul_i32
>  #define tcg_gen_muli_tl tcg_gen_muli_i32
>  #define tcg_gen_div_tl tcg_gen_div_i32
> diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
> index b7f3fd7..89db3b4 100644
> --- a/tcg/tcg-opc.h
> +++ b/tcg/tcg-opc.h
> @@ -42,6 +42,7 @@ DEF2(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
>  
>  DEF2(mov_i32, 1, 1, 0, 0)
>  DEF2(movi_i32, 1, 0, 1, 0)
> +DEF2(setcond_i32, 1, 2, 1, 0)
>  /* load/store */
>  DEF2(ld8u_i32, 1, 1, 1, 0)
>  DEF2(ld8s_i32, 1, 1, 1, 0)
> @@ -82,6 +83,7 @@ DEF2(add2_i32, 2, 4, 0, 0)
>  DEF2(sub2_i32, 2, 4, 0, 0)
>  DEF2(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
>  DEF2(mulu2_i32, 2, 2, 0, 0)
> +DEF2(setcond2_i32, 1, 4, 1, 0)
>  #endif
>  #ifdef TCG_TARGET_HAS_ext8s_i32
>  DEF2(ext8s_i32, 1, 1, 0, 0)
> @@ -111,6 +113,7 @@ DEF2(neg_i32, 1, 1, 0, 0)
>  #if TCG_TARGET_REG_BITS == 64
>  DEF2(mov_i64, 1, 1, 0, 0)
>  DEF2(movi_i64, 1, 0, 1, 0)
> +DEF2(setcond_i64, 1, 2, 1, 0)
>  /* load/store */
>  DEF2(ld8u_i64, 1, 1, 1, 0)
>  DEF2(ld8s_i64, 1, 1, 1, 0)
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index 3c0e296..9949814 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -670,6 +670,7 @@ void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1,
>  }
>  #endif
>  
> +
>  static void tcg_reg_alloc_start(TCGContext *s)
>  {
>      int i;
> @@ -888,21 +889,29 @@ void tcg_dump_ops(TCGContext *s, FILE *outfile)
>                  fprintf(outfile, "%s",
>                          tcg_get_arg_str_idx(s, buf, sizeof(buf), args[k++]));
>              }
> -            if (c == INDEX_op_brcond_i32
> +            switch (c) {
> +            case INDEX_op_brcond_i32:
> +#if TCG_TARGET_REG_BITS == 32
> +            case INDEX_op_brcond2_i32:
> +#elif TCG_TARGET_REG_BITS == 64
> +            case INDEX_op_brcond_i64:
> +#endif
> +            case INDEX_op_setcond_i32:
>  #if TCG_TARGET_REG_BITS == 32
> -                || c == INDEX_op_brcond2_i32
> +            case INDEX_op_setcond2_i32:
>  #elif TCG_TARGET_REG_BITS == 64
> -                || c == INDEX_op_brcond_i64
> +            case INDEX_op_setcond_i64:
>  #endif
> -                ) {
>                  if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]])
>                      fprintf(outfile, ",%s", cond_name[args[k++]]);
>                  else
>                      fprintf(outfile, ",$0x%" TCG_PRIlx, args[k++]);
>                  i = 1;
> -            }
> -            else
> +                break;
> +            default:
>                  i = 0;
> +                break;
> +            }
>              for(; i < nb_cargs; i++) {
>                  if (k != 0)
>                      fprintf(outfile, ",");
> -- 
> 1.6.5.2
> 
> 
> 
> 

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH 2/5] tcg-x86_64: Implement setcond.
  2009-12-19 16:50 ` [Qemu-devel] [PATCH 2/5] tcg-x86_64: Implement setcond Richard Henderson
@ 2009-12-19 23:11   ` Aurelien Jarno
  0 siblings, 0 replies; 34+ messages in thread
From: Aurelien Jarno @ 2009-12-19 23:11 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel

On Sat, Dec 19, 2009 at 08:50:19AM -0800, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <rth@twiddle.net>

Acked-by: Aurelien Jarno <aurelien@aurel32.net>

> ---
>  tcg/x86_64/tcg-target.c |   38 +++++++++++++++++++++++++++++++++-----
>  1 files changed, 33 insertions(+), 5 deletions(-)
> 
> diff --git a/tcg/x86_64/tcg-target.c b/tcg/x86_64/tcg-target.c
> index 2339091..33dc452 100644
> --- a/tcg/x86_64/tcg-target.c
> +++ b/tcg/x86_64/tcg-target.c
> @@ -491,26 +491,42 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
>      }
>  }
>  
> -static void tcg_out_brcond(TCGContext *s, int cond, 
> -                           TCGArg arg1, TCGArg arg2, int const_arg2,
> -                           int label_index, int rexw)
> +static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
> +                        int const_arg2, int rexw)
>  {
>      if (const_arg2) {
>          if (arg2 == 0) {
>              /* test r, r */
>              tcg_out_modrm(s, 0x85 | rexw, arg1, arg1);
>          } else {
> -            if (rexw)
> +            if (rexw) {
>                  tgen_arithi64(s, ARITH_CMP, arg1, arg2);
> -            else
> +            } else {
>                  tgen_arithi32(s, ARITH_CMP, arg1, arg2);
> +            }
>          }
>      } else {
>          tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3) | rexw, arg2, arg1);
>      }
> +}
> +
> +static void tcg_out_brcond(TCGContext *s, int cond,
> +                           TCGArg arg1, TCGArg arg2, int const_arg2,
> +                           int label_index, int rexw)
> +{
> +    tcg_out_cmp(s, arg1, arg2, const_arg2, rexw);
>      tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index);
>  }
>  
> +static void tcg_out_setcond(TCGContext *s, int cond, TCGArg dest,
> +                            TCGArg arg1, TCGArg arg2, int const_arg2, int rexw)
> +{
> +    tcg_out_cmp(s, arg1, arg2, const_arg2, rexw);
> +    /* setcc */
> +    tcg_out_modrm(s, 0x90 | tcg_cond_to_jcc[cond] | P_EXT | P_REXB, 0, dest);
> +    tgen_arithi32(s, ARITH_AND, dest, 0xff);
> +}
> +
>  #if defined(CONFIG_SOFTMMU)
>  
>  #include "../../softmmu_defs.h"
> @@ -1197,6 +1213,15 @@ static inline void tcg_out_op(TCGContext *s, int opc, const TCGArg *args,
>          tcg_out_modrm(s, 0x8b, args[0], args[1]);
>          break;
>  
> +    case INDEX_op_setcond_i32:
> +        tcg_out_setcond(s, args[3], args[0], args[1], args[2],
> +                        const_args[2], 0);
> +        break;
> +    case INDEX_op_setcond_i64:
> +        tcg_out_setcond(s, args[3], args[0], args[1], args[2],
> +                        const_args[2], P_REXW);
> +        break;
> +
>      case INDEX_op_qemu_ld8u:
>          tcg_out_qemu_ld(s, args, 0);
>          break;
> @@ -1376,6 +1401,9 @@ static const TCGTargetOpDef x86_64_op_defs[] = {
>      { INDEX_op_ext16u_i64, { "r", "r"} },
>      { INDEX_op_ext32u_i64, { "r", "r"} },
>  
> +    { INDEX_op_setcond_i32, { "r", "r", "ri" } },
> +    { INDEX_op_setcond_i64, { "r", "r", "re" } },
> +
>      { INDEX_op_qemu_ld8u, { "r", "L" } },
>      { INDEX_op_qemu_ld8s, { "r", "L" } },
>      { INDEX_op_qemu_ld16u, { "r", "L" } },
> -- 
> 1.6.5.2
> 
> 
> 
> 

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH 3/5] tcg-i386: Implement small forward branches.
  2009-12-19 18:44 ` [Qemu-devel] [PATCH 3/5] tcg-i386: Implement small forward branches Richard Henderson
@ 2009-12-19 23:11   ` Aurelien Jarno
  2009-12-19 23:32   ` Laurent Desnogues
  1 sibling, 0 replies; 34+ messages in thread
From: Aurelien Jarno @ 2009-12-19 23:11 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel

On Sat, Dec 19, 2009 at 10:44:11AM -0800, Richard Henderson wrote:
> There are places, like brcond2, where we know that the destination
> of a forward branch will be within 127 bytes.
> 
> Add the R_386_PC8 relocation type to support this.  Add a flag to
> tcg_out_jxx and tcg_out_brcond* to enable it.  Set the flag in the
> brcond2 label_next branches; pass along the input flag otherwise.

This looks ok, though I would appreciate someone else to review it in
details.

> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  elf.h                 |    2 +
>  tcg/i386/tcg-target.c |  116 +++++++++++++++++++++++++++++++++----------------
>  2 files changed, 80 insertions(+), 38 deletions(-)
> 
> diff --git a/elf.h b/elf.h
> index 11674d7..c84c8ab 100644
> --- a/elf.h
> +++ b/elf.h
> @@ -243,6 +243,8 @@ typedef struct {
>  #define R_386_GOTOFF	9
>  #define R_386_GOTPC	10
>  #define R_386_NUM	11
> +/* Not a dynamic reloc, so not included in R_386_NUM.  Used in TCG.  */
> +#define R_386_PC8	23
>  
>  #define R_MIPS_NONE		0
>  #define R_MIPS_16		1
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index 972b102..4c42caf 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -61,6 +61,12 @@ static void patch_reloc(uint8_t *code_ptr, int type,
>      case R_386_PC32:
>          *(uint32_t *)code_ptr = value - (long)code_ptr;
>          break;
> +    case R_386_PC8:
> +        value -= (long)code_ptr;
> +        if (value != (int8_t)value)
> +            tcg_abort();
> +        *(uint8_t *)code_ptr = value;
> +        break;
>      default:
>          tcg_abort();
>      }
> @@ -305,7 +311,8 @@ static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
>          tgen_arithi(s, ARITH_ADD, reg, val, 0);
>  }
>  
> -static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
> +/* Use SMALL != 0 to force a short forward branch.  */
> +static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
>  {
>      int32_t val, val1;
>      TCGLabel *l = &s->labels[label_index];
> @@ -314,12 +321,16 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
>          val = l->u.value - (tcg_target_long)s->code_ptr;
>          val1 = val - 2;
>          if ((int8_t)val1 == val1) {
> -            if (opc == -1)
> +            if (opc == -1) {
>                  tcg_out8(s, 0xeb);
> -            else
> +            } else {
>                  tcg_out8(s, 0x70 + opc);
> +            }
>              tcg_out8(s, val1);
>          } else {
> +            if (small) {
> +                tcg_abort();
> +            }
>              if (opc == -1) {
>                  tcg_out8(s, 0xe9);
>                  tcg_out32(s, val - 5);
> @@ -329,6 +340,14 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
>                  tcg_out32(s, val - 6);
>              }
>          }
> +    } else if (small) {
> +        if (opc == -1) {
> +            tcg_out8(s, 0xeb);
> +        } else {
> +            tcg_out8(s, 0x70 + opc);
> +        }
> +        tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
> +        s->code_ptr += 1;
>      } else {
>          if (opc == -1) {
>              tcg_out8(s, 0xe9);
> @@ -343,7 +362,7 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
>  
>  static void tcg_out_brcond(TCGContext *s, int cond, 
>                             TCGArg arg1, TCGArg arg2, int const_arg2,
> -                           int label_index)
> +                           int label_index, int small)
>  {
>      if (const_arg2) {
>          if (arg2 == 0) {
> @@ -355,64 +374,84 @@ static void tcg_out_brcond(TCGContext *s, int cond,
>      } else {
>          tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
>      }
> -    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index);
> +    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
>  }
>  
>  /* XXX: we implement it at the target level to avoid having to
>     handle cross basic blocks temporaries */
> -static void tcg_out_brcond2(TCGContext *s,
> -                            const TCGArg *args, const int *const_args)
> +static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
> +                            const int *const_args, int small)
>  {
>      int label_next;
>      label_next = gen_new_label();
>      switch(args[4]) {
>      case TCG_COND_EQ:
> -        tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2], label_next);
> -        tcg_out_brcond(s, TCG_COND_EQ, args[1], args[3], const_args[3], args[5]);
> +        tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2],
> +                       label_next, small);
> +        tcg_out_brcond(s, TCG_COND_EQ, args[1], args[3], const_args[3],
> +                       args[5], small);
>          break;
>      case TCG_COND_NE:
> -        tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2], args[5]);
> -        tcg_out_brcond(s, TCG_COND_NE, args[1], args[3], const_args[3], args[5]);
> +        tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2],
> +                       args[5], small);
> +        tcg_out_brcond(s, TCG_COND_NE, args[1], args[3], const_args[3],
> +                       args[5], small);
>          break;
>      case TCG_COND_LT:
> -        tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3], args[5]);
> -        tcg_out_jxx(s, JCC_JNE, label_next);
> -        tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2], args[5]);
> +        tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3],
> +                       args[5], small);
> +        tcg_out_jxx(s, JCC_JNE, label_next, 1);
> +        tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2],
> +                       args[5], small);
>          break;
>      case TCG_COND_LE:
> -        tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3], args[5]);
> -        tcg_out_jxx(s, JCC_JNE, label_next);
> -        tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2], args[5]);
> +        tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3],
> +                       args[5], small);
> +        tcg_out_jxx(s, JCC_JNE, label_next, 1);
> +        tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2],
> +                       args[5], small);
>          break;
>      case TCG_COND_GT:
> -        tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3], args[5]);
> -        tcg_out_jxx(s, JCC_JNE, label_next);
> -        tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2], args[5]);
> +        tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3],
> +                       args[5], small);
> +        tcg_out_jxx(s, JCC_JNE, label_next, 1);
> +        tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2],
> +                       args[5], small);
>          break;
>      case TCG_COND_GE:
> -        tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3], args[5]);
> -        tcg_out_jxx(s, JCC_JNE, label_next);
> -        tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2], args[5]);
> +        tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3],
> +                       args[5], small);
> +        tcg_out_jxx(s, JCC_JNE, label_next, 1);
> +        tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2],
> +                       args[5], small);
>          break;
>      case TCG_COND_LTU:
> -        tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3], args[5]);
> -        tcg_out_jxx(s, JCC_JNE, label_next);
> -        tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2], args[5]);
> +        tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3],
> +                       args[5], small);
> +        tcg_out_jxx(s, JCC_JNE, label_next, 1);
> +        tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2],
> +                       args[5], small);
>          break;
>      case TCG_COND_LEU:
> -        tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3], args[5]);
> -        tcg_out_jxx(s, JCC_JNE, label_next);
> -        tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2], args[5]);
> +        tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3],
> +                       args[5], small);
> +        tcg_out_jxx(s, JCC_JNE, label_next, 1);
> +        tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2],
> +                       args[5], small);
>          break;
>      case TCG_COND_GTU:
> -        tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3], args[5]);
> -        tcg_out_jxx(s, JCC_JNE, label_next);
> -        tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2], args[5]);
> +        tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3],
> +                       args[5], small);
> +        tcg_out_jxx(s, JCC_JNE, label_next, 1);
> +        tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2],
> +                       args[5], small);
>          break;
>      case TCG_COND_GEU:
> -        tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3], args[5]);
> -        tcg_out_jxx(s, JCC_JNE, label_next);
> -        tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2], args[5]);
> +        tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3],
> +                       args[5], small);
> +        tcg_out_jxx(s, JCC_JNE, label_next, 1);
> +        tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2],
> +                       args[5], small);
>          break;
>      default:
>          tcg_abort();
> @@ -913,7 +952,7 @@ static inline void tcg_out_op(TCGContext *s, int opc,
>          }
>          break;
>      case INDEX_op_br:
> -        tcg_out_jxx(s, JCC_JMP, args[0]);
> +        tcg_out_jxx(s, JCC_JMP, args[0], 0);
>          break;
>      case INDEX_op_movi_i32:
>          tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
> @@ -1044,10 +1083,11 @@ static inline void tcg_out_op(TCGContext *s, int opc,
>              tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5], args[1]);
>          break;
>      case INDEX_op_brcond_i32:
> -        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]);
> +        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
> +                       args[3], 0);
>          break;
>      case INDEX_op_brcond2_i32:
> -        tcg_out_brcond2(s, args, const_args);
> +        tcg_out_brcond2(s, args, const_args, 0);
>          break;
>  
>      case INDEX_op_bswap16_i32:
> -- 
> 1.6.5.2
> 
> 
> 
> 

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH 4/5] tcg: Add tcg_invert_cond.
  2009-12-19 18:44 ` [Qemu-devel] [PATCH 4/5] tcg: Add tcg_invert_cond Richard Henderson
@ 2009-12-19 23:11   ` Aurelien Jarno
  0 siblings, 0 replies; 34+ messages in thread
From: Aurelien Jarno @ 2009-12-19 23:11 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel

On Sat, Dec 19, 2009 at 10:44:52AM -0800, Richard Henderson wrote:
> It is very handy to have a reliable mapping of a condition to its inverse.

Acked-by: Aurelien Jarno <aurelien@aurel32.net>

> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/tcg.h |    5 +++++
>  1 files changed, 5 insertions(+), 0 deletions(-)
> 
> diff --git a/tcg/tcg.h b/tcg/tcg.h
> index 9824493..376d6af 100644
> --- a/tcg/tcg.h
> +++ b/tcg/tcg.h
> @@ -205,6 +205,11 @@ typedef enum {
>      TCG_COND_GTU,
>  } TCGCond;
>  
> +static inline TCGCond tcg_invert_cond(TCGCond c)
> +{
> +    return (TCGCond)(c ^ 1);
> +}
> +
>  #define TEMP_VAL_DEAD  0
>  #define TEMP_VAL_REG   1
>  #define TEMP_VAL_MEM   2
> -- 
> 1.6.5.2
> 
> 
> 
> 

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH 5/5] tcg-i386: Implement setcond.
  2009-12-19 18:46 ` [Qemu-devel] [PATCH 5/5] tcg-i386: Implement setcond Richard Henderson
@ 2009-12-19 23:11   ` Aurelien Jarno
  2009-12-22 12:20     ` Laurent Desnogues
  0 siblings, 1 reply; 34+ messages in thread
From: Aurelien Jarno @ 2009-12-19 23:11 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel

On Sat, Dec 19, 2009 at 10:46:38AM -0800, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <rth@twiddle.net>

This looks ok, though I would appreciate someone else to review it in
details.

> ---
>  tcg/i386/tcg-target.c |   73 +++++++++++++++++++++++++++++++++++++++++++++++--
>  1 files changed, 70 insertions(+), 3 deletions(-)
> 
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index 4c42caf..43e0155 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -360,9 +360,8 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
>      }
>  }
>  
> -static void tcg_out_brcond(TCGContext *s, int cond, 
> -                           TCGArg arg1, TCGArg arg2, int const_arg2,
> -                           int label_index, int small)
> +static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
> +                        int const_arg2)
>  {
>      if (const_arg2) {
>          if (arg2 == 0) {
> @@ -374,6 +373,13 @@ static void tcg_out_brcond(TCGContext *s, int cond,
>      } else {
>          tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
>      }
> +}
> +
> +static void tcg_out_brcond(TCGContext *s, int cond,
> +                           TCGArg arg1, TCGArg arg2, int const_arg2,
> +                           int label_index, int small)
> +{
> +    tcg_out_cmp(s, arg1, arg2, const_arg2);
>      tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
>  }
>  
> @@ -459,6 +465,57 @@ static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
>      tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
>  }
>  
> +static void tcg_out_setcond(TCGContext *s, int cond, TCGArg dest,
> +                            TCGArg arg1, TCGArg arg2, int const_arg2)
> +{
> +    tcg_out_cmp(s, arg1, arg2, const_arg2);
> +    /* setcc */
> +    tcg_out_modrm(s, 0x90 | tcg_cond_to_jcc[cond] | P_EXT, 0, dest);
> +    tgen_arithi(s, ARITH_AND, dest, 0xff, 0);
> +}
> +
> +static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
> +                             const int *const_args)
> +{
> +    TCGArg new_args[6];
> +    int label_true, label_over;
> +
> +    memcpy(new_args, args+1, 5*sizeof(TCGArg));
> +
> +    if (args[0] == args[1] || args[0] == args[2]
> +        || (!const_args[3] && args[0] == args[3])
> +        || (!const_args[4] && args[0] == args[4])) {
> +        /* When the destination overlaps with one of the argument
> +           registers, don't do anything tricky.  */
> +        label_true = gen_new_label();
> +        label_over = gen_new_label();
> +
> +        new_args[5] = label_true;
> +        tcg_out_brcond2(s, new_args, const_args+1, 1);
> +
> +        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
> +        tcg_out_jxx(s, JCC_JMP, label_over, 1);
> +        tcg_out_label(s, label_true, (tcg_target_long)s->code_ptr);
> +
> +        tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
> +        tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
> +    } else {
> +        /* When the destination does not overlap one of the arguments,
> +           clear the destination first, jump if cond false, and emit an
> +           increment in the true case.  This results in smaller code.  */
> +
> +        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
> +
> +        label_over = gen_new_label();
> +        new_args[4] = tcg_invert_cond(new_args[4]);
> +        new_args[5] = label_over;
> +        tcg_out_brcond2(s, new_args, const_args+1, 1);
> +
> +        tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
> +        tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
> +    }
> +}
> +
>  #if defined(CONFIG_SOFTMMU)
>  
>  #include "../../softmmu_defs.h"
> @@ -1120,6 +1177,13 @@ static inline void tcg_out_op(TCGContext *s, int opc,
>          tcg_out_modrm(s, 0xb7 | P_EXT, args[0], args[1]);
>          break;
>  
> +    case INDEX_op_setcond_i32:
> +        tcg_out_setcond(s, args[3], args[0], args[1], args[2], const_args[2]);
> +        break;
> +    case INDEX_op_setcond2_i32:
> +        tcg_out_setcond2(s, args, const_args);
> +        break;
> +
>      case INDEX_op_qemu_ld8u:
>          tcg_out_qemu_ld(s, args, 0);
>          break;
> @@ -1208,6 +1272,9 @@ static const TCGTargetOpDef x86_op_defs[] = {
>      { INDEX_op_ext8u_i32, { "r", "q"} },
>      { INDEX_op_ext16u_i32, { "r", "r"} },
>  
> +    { INDEX_op_setcond_i32, { "q", "r", "ri" } },
> +    { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
> +
>  #if TARGET_LONG_BITS == 32
>      { INDEX_op_qemu_ld8u, { "r", "L" } },
>      { INDEX_op_qemu_ld8s, { "r", "L" } },
> -- 
> 1.6.5.2
> 
> 
> 
> 

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH 1/5] tcg: Generic support for conditional set
  2009-12-19 23:11   ` Aurelien Jarno
@ 2009-12-19 23:24     ` Richard Henderson
  2009-12-19 23:45       ` Aurelien Jarno
  0 siblings, 1 reply; 34+ messages in thread
From: Richard Henderson @ 2009-12-19 23:24 UTC (permalink / raw)
  To: Aurelien Jarno; +Cc: qemu-devel

On 12/19/2009 03:11 PM, Aurelien Jarno wrote:
> On Sat, Dec 19, 2009 at 10:01:57AM -0800, Richard Henderson wrote:
>> Defines setcond_{i32,i64} and setcond2_i32 for 64-on-32-bit.
>
> I do wonder if setcond2_i32 and brcond2_i32 should be added there. Those
> are internal ops that are actually not exported in tcg-op.h.

I think it's probably useful documentation, even if it is internal.
One does have to know what it means in order to implement it in the
backends.


r~

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH 3/5] tcg-i386: Implement small forward branches.
  2009-12-19 18:44 ` [Qemu-devel] [PATCH 3/5] tcg-i386: Implement small forward branches Richard Henderson
  2009-12-19 23:11   ` Aurelien Jarno
@ 2009-12-19 23:32   ` Laurent Desnogues
  2009-12-20  1:17     ` Richard Henderson
  1 sibling, 1 reply; 34+ messages in thread
From: Laurent Desnogues @ 2009-12-19 23:32 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, aurelien

On Sat, Dec 19, 2009 at 7:44 PM, Richard Henderson <rth@twiddle.net> wrote:
> There are places, like brcond2, where we know that the destination
> of a forward branch will be within 127 bytes.
>
> Add the R_386_PC8 relocation type to support this.  Add a flag to
> tcg_out_jxx and tcg_out_brcond* to enable it.  Set the flag in the
> brcond2 label_next branches; pass along the input flag otherwise.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
>
> ---
>  elf.h                 |    2 +
>  tcg/i386/tcg-target.c |  116 +++++++++++++++++++++++++++++++++----------------
>  2 files changed, 80 insertions(+), 38 deletions(-)
>
> diff --git a/elf.h b/elf.h
> index 11674d7..c84c8ab 100644
> --- a/elf.h
> +++ b/elf.h
> @@ -243,6 +243,8 @@ typedef struct {
>  #define R_386_GOTOFF   9
>  #define R_386_GOTPC    10
>  #define R_386_NUM      11
> +/* Not a dynamic reloc, so not included in R_386_NUM.  Used in TCG.  */
> +#define R_386_PC8      23
>
>  #define R_MIPS_NONE            0
>  #define R_MIPS_16              1
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index 972b102..4c42caf 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -61,6 +61,12 @@ static void patch_reloc(uint8_t *code_ptr, int type,
>     case R_386_PC32:
>         *(uint32_t *)code_ptr = value - (long)code_ptr;
>         break;
> +    case R_386_PC8:
> +        value -= (long)code_ptr;
> +        if (value != (int8_t)value)
> +            tcg_abort();
> +        *(uint8_t *)code_ptr = value;
> +        break;
>     default:
>         tcg_abort();
>     }
> @@ -305,7 +311,8 @@ static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
>         tgen_arithi(s, ARITH_ADD, reg, val, 0);
>  }
>
> -static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
> +/* Use SMALL != 0 to force a short forward branch.  */
> +static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
>  {
>     int32_t val, val1;
>     TCGLabel *l = &s->labels[label_index];
> @@ -314,12 +321,16 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
>         val = l->u.value - (tcg_target_long)s->code_ptr;
>         val1 = val - 2;
>         if ((int8_t)val1 == val1) {
> -            if (opc == -1)
> +            if (opc == -1) {
>                 tcg_out8(s, 0xeb);
> -            else
> +            } else {
>                 tcg_out8(s, 0x70 + opc);
> +            }
>             tcg_out8(s, val1);
>         } else {
> +            if (small) {
> +                tcg_abort();
> +            }
>             if (opc == -1) {
>                 tcg_out8(s, 0xe9);
>                 tcg_out32(s, val - 5);
> @@ -329,6 +340,14 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
>                 tcg_out32(s, val - 6);
>             }
>         }
> +    } else if (small) {
> +        if (opc == -1) {
> +            tcg_out8(s, 0xeb);
> +        } else {
> +            tcg_out8(s, 0x70 + opc);
> +        }
> +        tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
> +        s->code_ptr += 1;
>     } else {
>         if (opc == -1) {
>             tcg_out8(s, 0xe9);
> @@ -343,7 +362,7 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
>
>  static void tcg_out_brcond(TCGContext *s, int cond,
>                            TCGArg arg1, TCGArg arg2, int const_arg2,
> -                           int label_index)
> +                           int label_index, int small)
>  {
>     if (const_arg2) {
>         if (arg2 == 0) {
> @@ -355,64 +374,84 @@ static void tcg_out_brcond(TCGContext *s, int cond,
>     } else {
>         tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
>     }
> -    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index);
> +    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
>  }
>
>  /* XXX: we implement it at the target level to avoid having to
>    handle cross basic blocks temporaries */
> -static void tcg_out_brcond2(TCGContext *s,
> -                            const TCGArg *args, const int *const_args)
> +static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
> +                            const int *const_args, int small)
>  {
>     int label_next;
>     label_next = gen_new_label();
>     switch(args[4]) {
>     case TCG_COND_EQ:
> -        tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2], label_next);
> -        tcg_out_brcond(s, TCG_COND_EQ, args[1], args[3], const_args[3], args[5]);
> +        tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2],
> +                       label_next, small);

Shouldn't it be 1 instead of small?

The rest is OK.


Laurent

> +        tcg_out_brcond(s, TCG_COND_EQ, args[1], args[3], const_args[3],
> +                       args[5], small);
>         break;
>     case TCG_COND_NE:
> -        tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2], args[5]);
> -        tcg_out_brcond(s, TCG_COND_NE, args[1], args[3], const_args[3], args[5]);
> +        tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2],
> +                       args[5], small);
> +        tcg_out_brcond(s, TCG_COND_NE, args[1], args[3], const_args[3],
> +                       args[5], small);
>         break;
>     case TCG_COND_LT:
> -        tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3], args[5]);
> -        tcg_out_jxx(s, JCC_JNE, label_next);
> -        tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2], args[5]);
> +        tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3],
> +                       args[5], small);
> +        tcg_out_jxx(s, JCC_JNE, label_next, 1);
> +        tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2],
> +                       args[5], small);
>         break;
>     case TCG_COND_LE:
> -        tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3], args[5]);
> -        tcg_out_jxx(s, JCC_JNE, label_next);
> -        tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2], args[5]);
> +        tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3],
> +                       args[5], small);
> +        tcg_out_jxx(s, JCC_JNE, label_next, 1);
> +        tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2],
> +                       args[5], small);
>         break;
>     case TCG_COND_GT:
> -        tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3], args[5]);
> -        tcg_out_jxx(s, JCC_JNE, label_next);
> -        tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2], args[5]);
> +        tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3],
> +                       args[5], small);
> +        tcg_out_jxx(s, JCC_JNE, label_next, 1);
> +        tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2],
> +                       args[5], small);
>         break;
>     case TCG_COND_GE:
> -        tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3], args[5]);
> -        tcg_out_jxx(s, JCC_JNE, label_next);
> -        tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2], args[5]);
> +        tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3],
> +                       args[5], small);
> +        tcg_out_jxx(s, JCC_JNE, label_next, 1);
> +        tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2],
> +                       args[5], small);
>         break;
>     case TCG_COND_LTU:
> -        tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3], args[5]);
> -        tcg_out_jxx(s, JCC_JNE, label_next);
> -        tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2], args[5]);
> +        tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3],
> +                       args[5], small);
> +        tcg_out_jxx(s, JCC_JNE, label_next, 1);
> +        tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2],
> +                       args[5], small);
>         break;
>     case TCG_COND_LEU:
> -        tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3], args[5]);
> -        tcg_out_jxx(s, JCC_JNE, label_next);
> -        tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2], args[5]);
> +        tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3],
> +                       args[5], small);
> +        tcg_out_jxx(s, JCC_JNE, label_next, 1);
> +        tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2],
> +                       args[5], small);
>         break;
>     case TCG_COND_GTU:
> -        tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3], args[5]);
> -        tcg_out_jxx(s, JCC_JNE, label_next);
> -        tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2], args[5]);
> +        tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3],
> +                       args[5], small);
> +        tcg_out_jxx(s, JCC_JNE, label_next, 1);
> +        tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2],
> +                       args[5], small);
>         break;
>     case TCG_COND_GEU:
> -        tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3], args[5]);
> -        tcg_out_jxx(s, JCC_JNE, label_next);
> -        tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2], args[5]);
> +        tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3],
> +                       args[5], small);
> +        tcg_out_jxx(s, JCC_JNE, label_next, 1);
> +        tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2],
> +                       args[5], small);
>         break;
>     default:
>         tcg_abort();
> @@ -913,7 +952,7 @@ static inline void tcg_out_op(TCGContext *s, int opc,
>         }
>         break;
>     case INDEX_op_br:
> -        tcg_out_jxx(s, JCC_JMP, args[0]);
> +        tcg_out_jxx(s, JCC_JMP, args[0], 0);
>         break;
>     case INDEX_op_movi_i32:
>         tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
> @@ -1044,10 +1083,11 @@ static inline void tcg_out_op(TCGContext *s, int opc,
>             tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5], args[1]);
>         break;
>     case INDEX_op_brcond_i32:
> -        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]);
> +        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
> +                       args[3], 0);
>         break;
>     case INDEX_op_brcond2_i32:
> -        tcg_out_brcond2(s, args, const_args);
> +        tcg_out_brcond2(s, args, const_args, 0);
>         break;
>
>     case INDEX_op_bswap16_i32:
> --
> 1.6.5.2
>
>
>
>

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH 0/5] tcg conditional set, round 4
  2009-12-19 23:11 ` [Qemu-devel] [PATCH 0/5] tcg conditional set, round 4 Aurelien Jarno
@ 2009-12-19 23:43   ` malc
  2009-12-20 11:03     ` Blue Swirl
  0 siblings, 1 reply; 34+ messages in thread
From: malc @ 2009-12-19 23:43 UTC (permalink / raw)
  To: Aurelien Jarno; +Cc: Blue Swirl, qemu-devel, Richard Henderson

On Sun, 20 Dec 2009, Aurelien Jarno wrote:

> On Sat, Dec 19, 2009 at 10:52:52AM -0800, Richard Henderson wrote:
> > Changes from round 3:
> > 
> >  * Drop movcond for now.
> >  * Only use movzbl and not xor in setcond.
> > 
> > 
> 
> Thanks for the update, it looks like ready, I only have cosmetic or
> minor comments. See my comments in the individual patches
> 
> I think we should commit it now, but wait a bit until it is implemented
> in all TCG targets to use this new ops. We can circulate patches in the
> meanwhile.
> 

This doesn't really parse for me, but anyways, i've added setcond/setcond2
to PPC/PPC64 so no objections from my side [1].

Blue Swirl?

-- 
mailto:av1474@comtv.ru

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH 1/5] tcg: Generic support for conditional set
  2009-12-19 23:24     ` Richard Henderson
@ 2009-12-19 23:45       ` Aurelien Jarno
  0 siblings, 0 replies; 34+ messages in thread
From: Aurelien Jarno @ 2009-12-19 23:45 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel

On Sat, Dec 19, 2009 at 03:24:22PM -0800, Richard Henderson wrote:
> On 12/19/2009 03:11 PM, Aurelien Jarno wrote:
>> On Sat, Dec 19, 2009 at 10:01:57AM -0800, Richard Henderson wrote:
>>> Defines setcond_{i32,i64} and setcond2_i32 for 64-on-32-bit.
>>
>> I do wonder if setcond2_i32 and brcond2_i32 should be added there. Those
>> are internal ops that are actually not exported in tcg-op.h.
>
> I think it's probably useful documentation, even if it is internal.
> One does have to know what it means in order to implement it in the
> backends.
>

Then we should probably move it into another section, as people also use
this doc to know what tcg_gen_* ops are available.

-- 
Aurelien Jarno	                        GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH 3/5] tcg-i386: Implement small forward branches.
  2009-12-19 23:32   ` Laurent Desnogues
@ 2009-12-20  1:17     ` Richard Henderson
  0 siblings, 0 replies; 34+ messages in thread
From: Richard Henderson @ 2009-12-20  1:17 UTC (permalink / raw)
  To: Laurent Desnogues; +Cc: qemu-devel, aurelien

On 12/19/2009 03:32 PM, Laurent Desnogues wrote:
>>      switch(args[4]) {
>>      case TCG_COND_EQ:
>> -        tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2], label_next);
>> -        tcg_out_brcond(s, TCG_COND_EQ, args[1], args[3], const_args[3], args[5]);
>> +        tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2],
>> +                       label_next, small);
>
> Shouldn't it be 1 instead of small?

Sigh, yes indeed.


r~

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH 0/5] tcg conditional set, round 4
  2009-12-19 23:43   ` malc
@ 2009-12-20 11:03     ` Blue Swirl
  0 siblings, 0 replies; 34+ messages in thread
From: Blue Swirl @ 2009-12-20 11:03 UTC (permalink / raw)
  To: malc; +Cc: qemu-devel, Aurelien Jarno, Richard Henderson

On Sat, Dec 19, 2009 at 11:43 PM, malc <av1474@comtv.ru> wrote:
> On Sun, 20 Dec 2009, Aurelien Jarno wrote:
>
>> On Sat, Dec 19, 2009 at 10:52:52AM -0800, Richard Henderson wrote:
>> > Changes from round 3:
>> >
>> >  * Drop movcond for now.
>> >  * Only use movzbl and not xor in setcond.
>> >
>> >
>>
>> Thanks for the update, it looks like ready, I only have cosmetic or
>> minor comments. See my comments in the individual patches
>>
>> I think we should commit it now, but wait a bit until it is implemented
>> in all TCG targets to use this new ops. We can circulate patches in the
>> meanwhile.
>>
>
> This doesn't really parse for me, but anyways, i've added setcond/setcond2
> to PPC/PPC64 so no objections from my side [1].
>
> Blue Swirl?

No objection, though I have no idea if this is useful.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH 0/5] tcg conditional set, round 4
  2009-12-19 18:52 [Qemu-devel] [PATCH 0/5] tcg conditional set, round 4 Richard Henderson
                   ` (5 preceding siblings ...)
  2009-12-19 23:11 ` [Qemu-devel] [PATCH 0/5] tcg conditional set, round 4 Aurelien Jarno
@ 2009-12-20 22:57 ` Paul Brook
  2009-12-21  2:00   ` Richard Henderson
  6 siblings, 1 reply; 34+ messages in thread
From: Paul Brook @ 2009-12-20 22:57 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, Richard Henderson

On Saturday 19 December 2009, Richard Henderson wrote:
> Changes from round 3:
> 
>  * Drop movcond for now.
>  * Only use movzbl and not xor in setcond.

I'm still catching up on mail backlog from this thread, but I'm concerned that 
we're exposing setcond to the target translation code if we're planning on 
implementing movcond later.  My guess is that in a lot of cases we want a 
value other than 1, and I'd prefer to avoid proliferation of set+shift/mask 
sequences if we're going to get movcond anyway.

I don't suppose you've tried something along the lines of
  #define tcg_gen_movcond_i32(cond...) gen_helper_mov##cond(...)
And see how that compares in practice?

Paul

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH 0/5] tcg conditional set, round 4
  2009-12-20 22:57 ` Paul Brook
@ 2009-12-21  2:00   ` Richard Henderson
  2009-12-21  9:13     ` Laurent Desnogues
  2009-12-21 10:08     ` Aurelien Jarno
  0 siblings, 2 replies; 34+ messages in thread
From: Richard Henderson @ 2009-12-21  2:00 UTC (permalink / raw)
  To: Paul Brook; +Cc: qemu-devel, aurelien

On 12/20/2009 02:57 PM, Paul Brook wrote:
> On Saturday 19 December 2009, Richard Henderson wrote:
>> Changes from round 3:
>>
>>   * Drop movcond for now.
>>   * Only use movzbl and not xor in setcond.
>
> I'm still catching up on mail backlog from this thread, but I'm concerned that
> we're exposing setcond to the target translation code if we're planning on
> implementing movcond later.

I personally was only planning to use setcond in those cases where the 
target actually wants a 0/1 value.  E.g. i386 setcc, or alpha cmp*, or 
mips slt*.

As for shifts and masks, that wasn't in my plans.  I had a comment in 
there about all the tricks that gcc plays with a conditional move of two 
constants, and the fact that I didn't think it worth the effort.

Indeed, my plan for today was to cut down my existing movcond patch to 
make it simpler, as that seems to be the way Aurelien wants this to go. 
  Life conspired against and I got nothing done, but still.

I *am* convinced that to remove either VTRUE or VFALSE as arguments to 
the movcond primitive (implementing dest = (cond ? vtrue : dest) would 
do too much violence to both the liveness analysis and the register 
allocator within TCG.  The best I can do to remove the complexity is:

static void tcg_out_movcond(TCGContext *s, int cond, TCGArg dest,
                             TCGArg cmp1, TCGArg cmp2, int const_cmp2,
                             TCGArg vtrue, int rexw)
{
     tcg_out_cmp(s, cond, cmp1, cmp2, const_cmp2, rexw);
     /* cmovcc */
     tcg_out_modrm(s, 0x40 | tcg_cond_to_jcc[cond] | P_EXT | rexw,
                   dest, vtrue);
}
...
     { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
     { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },

using matching constraints in the target and directly implement the 
conditional move.  This eliminates the code I previously had that 
checked for things like dest=vfalse and inverted the condition.  I had 
planned to simplify the i386 version similarly, even in the case where 
cmovcc is not available.

I would appreciate some direction here, so as to avoid wasting my time 
with a solution that won't be accepted.


r~

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH 0/5] tcg conditional set, round 4
  2009-12-21  2:00   ` Richard Henderson
@ 2009-12-21  9:13     ` Laurent Desnogues
  2009-12-21  9:47       ` [Qemu-devel] " Paolo Bonzini
  2009-12-21 20:28       ` [Qemu-devel] " Richard Henderson
  2009-12-21 10:08     ` Aurelien Jarno
  1 sibling, 2 replies; 34+ messages in thread
From: Laurent Desnogues @ 2009-12-21  9:13 UTC (permalink / raw)
  To: Richard Henderson; +Cc: Paul Brook, aurelien, qemu-devel

On Mon, Dec 21, 2009 at 3:00 AM, Richard Henderson <rth@twiddle.net> wrote:
[...]
> I *am* convinced that to remove either VTRUE or VFALSE as arguments to the
> movcond primitive (implementing dest = (cond ? vtrue : dest) would do too
> much violence to both the liveness analysis and the register allocator
> within TCG.  The best I can do to remove the complexity is:
>
> static void tcg_out_movcond(TCGContext *s, int cond, TCGArg dest,
>                            TCGArg cmp1, TCGArg cmp2, int const_cmp2,
>                            TCGArg vtrue, int rexw)
> {
>    tcg_out_cmp(s, cond, cmp1, cmp2, const_cmp2, rexw);
>    /* cmovcc */
>    tcg_out_modrm(s, 0x40 | tcg_cond_to_jcc[cond] | P_EXT | rexw,
>                  dest, vtrue);
> }
> ...
>    { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
>    { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
>
> using matching constraints in the target and directly implement the
> conditional move.  This eliminates the code I previously had that checked
> for things like dest=vfalse and inverted the condition.  I had planned to
> simplify the i386 version similarly, even in the case where cmovcc is not
> available.
>
> I would appreciate some direction here, so as to avoid wasting my time with
> a solution that won't be accepted.

The question for the generalized movcond is how useful is it?
Which front-ends would need it and would the cost to generate
code for it on some (most?) back-ends be amortized?

My guess (I use that word given that I didn't do any benchmark
to sustain my claim) is that your implementation is too complex.

Of course setcond can be implemented in terms of movcond,
but my guess (again that word...) is that setcond could be
enough and even faster in most cases.

To sum up, simplicity is extremely desirable as some profiles
are highly dependent on code generation time, and we lack
a set of benchmarks to check how good or bad our ideas
and implementations are.

Regarding your patches, I would like to see setcond put in
mainline with a simplified version for i386.  From there
people could implement it for all front-ends and back-ends.
Then we could move on to movcond and see how useful
it is.  I am all in favor of doing this incrementally as if we
aim for the best solution without participation from many
people, it'll be the best way not to do anything (cf. my
previous proposal for setcond more than a year ago).


Laurent

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [Qemu-devel] Re: [PATCH 0/5] tcg conditional set, round 4
  2009-12-21  9:13     ` Laurent Desnogues
@ 2009-12-21  9:47       ` Paolo Bonzini
  2009-12-21 10:03         ` Laurent Desnogues
  2009-12-21 20:28       ` [Qemu-devel] " Richard Henderson
  1 sibling, 1 reply; 34+ messages in thread
From: Paolo Bonzini @ 2009-12-21  9:47 UTC (permalink / raw)
  To: Laurent Desnogues; +Cc: qemu-devel, Paul Brook, aurelien, Richard Henderson

On 12/21/2009 10:13 AM, Laurent Desnogues wrote:
> Which front-ends would need it and would the cost to generate
> code for it on some (most?) back-ends be amortized?

The ARM front-end could definitely use a backend's ability to do 
predication (via movcond).

Paolo

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [Qemu-devel] Re: [PATCH 0/5] tcg conditional set, round 4
  2009-12-21  9:47       ` [Qemu-devel] " Paolo Bonzini
@ 2009-12-21 10:03         ` Laurent Desnogues
  0 siblings, 0 replies; 34+ messages in thread
From: Laurent Desnogues @ 2009-12-21 10:03 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-devel, Paul Brook, aurelien, Richard Henderson

On Mon, Dec 21, 2009 at 10:47 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> On 12/21/2009 10:13 AM, Laurent Desnogues wrote:
>>
>> Which front-ends would need it and would the cost to generate
>> code for it on some (most?) back-ends be amortized?
>
> The ARM front-end could definitely use a backend's ability to do predication
> (via movcond).

I think most things done in the ARM front-end can be done
with a simpler movcond that'd be:

   movcond cond, arg_cond1, arg_cond2, dest, vtrue

though as Richard said this could impact tcg reg alloc and
liveness analysis.

Also, I wonder what would be the impact on speed.


Laurent

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH 0/5] tcg conditional set, round 4
  2009-12-21  2:00   ` Richard Henderson
  2009-12-21  9:13     ` Laurent Desnogues
@ 2009-12-21 10:08     ` Aurelien Jarno
  1 sibling, 0 replies; 34+ messages in thread
From: Aurelien Jarno @ 2009-12-21 10:08 UTC (permalink / raw)
  To: Richard Henderson; +Cc: Paul Brook, qemu-devel

On Sun, Dec 20, 2009 at 06:00:48PM -0800, Richard Henderson wrote:
> On 12/20/2009 02:57 PM, Paul Brook wrote:
>> On Saturday 19 December 2009, Richard Henderson wrote:
>>> Changes from round 3:
>>>
>>>   * Drop movcond for now.
>>>   * Only use movzbl and not xor in setcond.
>>
>> I'm still catching up on mail backlog from this thread, but I'm concerned that
>> we're exposing setcond to the target translation code if we're planning on
>> implementing movcond later.
>
> I personally was only planning to use setcond in those cases where the  
> target actually wants a 0/1 value.  E.g. i386 setcc, or alpha cmp*, or  
> mips slt*.
>
> As for shifts and masks, that wasn't in my plans.  I had a comment in  
> there about all the tricks that gcc plays with a conditional move of two  
> constants, and the fact that I didn't think it worth the effort.
>
> Indeed, my plan for today was to cut down my existing movcond patch to  
> make it simpler, as that seems to be the way Aurelien wants this to go.  
> Life conspired against and I got nothing done, but still.
>
> I *am* convinced that to remove either VTRUE or VFALSE as arguments to  
> the movcond primitive (implementing dest = (cond ? vtrue : dest) would  
> do too much violence to both the liveness analysis and the register  
> allocator within TCG.  The best I can do to remove the complexity is:
>
> static void tcg_out_movcond(TCGContext *s, int cond, TCGArg dest,
>                             TCGArg cmp1, TCGArg cmp2, int const_cmp2,
>                             TCGArg vtrue, int rexw)
> {
>     tcg_out_cmp(s, cond, cmp1, cmp2, const_cmp2, rexw);
>     /* cmovcc */
>     tcg_out_modrm(s, 0x40 | tcg_cond_to_jcc[cond] | P_EXT | rexw,
>                   dest, vtrue);
> }
> ...
>     { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
>     { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
>
> using matching constraints in the target and directly implement the  
> conditional move.  This eliminates the code I previously had that  
> checked for things like dest=vfalse and inverted the condition.  I had  
> planned to simplify the i386 version similarly, even in the case where  
> cmovcc is not available.
>
> I would appreciate some direction here, so as to avoid wasting my time  
> with a solution that won't be accepted.
>

As Paul pointed, there is some redundancy between setcond and movcond,
the first one being a subset of the second one. I am not personally
convinced of the gain of both these new instructions on the performance
side, but at least I am convinced that setcond makes code in targets 
much simpler compared to brcond and jumps.

I have also very bad experience in generating highly optimized code, as
the code generation then becomes complex, which often results in a slow
down. It's clearly a fearing I have for movcond, but again I haven't
seen any benchmark going in one or the other direction. On the other 
hand setcond implementation for the various hosts is much simpler, even
on 32-bit hosts.

That said, we have had various setcond implementations discussed on the
mailing list for more than a year, and we haven't made any progress
on that, while it is an item from Fabrice's TODO list.

My proposal would be to drop movcond for now (meaning at least until
after next release to answer Paul), unless you can convince us that
movcond can bring a big gain on performance. This way we can focus
on setcond, have implementations for all hosts, and get it used in all
targets. After the next release or later, when setcond is really used
wherever possible, we can try to see if movcond is really necessary or
not.

-- 
Aurelien Jarno	                        GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH 0/5] tcg conditional set, round 4
  2009-12-21  9:13     ` Laurent Desnogues
  2009-12-21  9:47       ` [Qemu-devel] " Paolo Bonzini
@ 2009-12-21 20:28       ` Richard Henderson
  2009-12-21 22:21         ` Laurent Desnogues
  2009-12-22  7:19         ` Aurelien Jarno
  1 sibling, 2 replies; 34+ messages in thread
From: Richard Henderson @ 2009-12-21 20:28 UTC (permalink / raw)
  To: Laurent Desnogues; +Cc: Paul Brook, aurelien, qemu-devel

On 12/21/2009 01:13 AM, Laurent Desnogues wrote:
> The question for the generalized movcond is how useful is it?
> Which front-ends would need it and would the cost to generate
> code for it on some (most?) back-ends be amortized?

... Any front end that has a conditional move instruction?
Sparcv9, Mips32, Alpha, ARM...

That said, I think the *biggest* gains are to be had because with 
movcond -- at least on some targets -- we can have one BB per TB, and 
avoid any intermediate spilling of global registers back to memory.

> My guess (I use that word given that I didn't do any benchmark
> to sustain my claim) is that your implementation is too complex.

Too complex for what?  The message against which you are quoting has an 
implementation of 2 lines.

> Of course setcond can be implemented in terms of movcond,
> but my guess (again that word...) is that setcond could be
> enough and even faster in most cases.

To implement condition codes, yes, to implement compare instructions 
(e.g. mips slt, alpha cmp{eq,lt,lte}), yes.  To implement conditional 
moves, no.  At least not without using 5 instructions where 1 would suffice.

> Regarding your patches, I would like to see setcond put in
> mainline with a simplified version for i386.

Again, simplified from what?  The last setcond implementation was 2 lines.


r~

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH 0/5] tcg conditional set, round 4
  2009-12-21 20:28       ` [Qemu-devel] " Richard Henderson
@ 2009-12-21 22:21         ` Laurent Desnogues
  2009-12-21 22:50           ` Richard Henderson
  2009-12-22  7:19         ` Aurelien Jarno
  1 sibling, 1 reply; 34+ messages in thread
From: Laurent Desnogues @ 2009-12-21 22:21 UTC (permalink / raw)
  To: Richard Henderson; +Cc: Paul Brook, aurelien, qemu-devel

On Mon, Dec 21, 2009 at 9:28 PM, Richard Henderson <rth@twiddle.net> wrote:
> On 12/21/2009 01:13 AM, Laurent Desnogues wrote:
>>
>> The question for the generalized movcond is how useful is it?
>> Which front-ends would need it and would the cost to generate
>> code for it on some (most?) back-ends be amortized?
>
> ... Any front end that has a conditional move instruction?
> Sparcv9, Mips32, Alpha, ARM...

As far as I know these CPU's don't need the full movcond but
only the variant with vtrue. Even if movcond was quick to generate
host code, for instance for ARM, you'd have to explicitly detect
conditional moves, which probably wouldn't be worth the cost;
I might be wrong, since no one has given it a try.

> That said, I think the *biggest* gains are to be had because with movcond --
> at least on some targets -- we can have one BB per TB, and avoid any
> intermediate spilling of global registers back to memory.

I can't count the number of times I thought some branch removal
could only bring improvement, only to see QEMU slow down.
The balance between simplicity and good generated code is
very hard to achieve (and in that particular case, benchmarking
on an Intel just shows how Intel engineers are good at designing
branch predictors :-).

>> My guess (I use that word given that I didn't do any benchmark
>> to sustain my claim) is that your implementation is too complex.
>
> Too complex for what?  The message against which you are quoting has an
> implementation of 2 lines.

Well I answered to this mail after seeing the SPARC
implementation :)  Indeed your implementation for i386 setcond2
(setcond is trivial) is not that complex.

>> Of course setcond can be implemented in terms of movcond,
>> but my guess (again that word...) is that setcond could be
>> enough and even faster in most cases.
>
> To implement condition codes, yes, to implement compare instructions (e.g.
> mips slt, alpha cmp{eq,lt,lte}), yes.  To implement conditional moves, no.
>  At least not without using 5 instructions where 1 would suffice.

How many instructions would you need to generate one
host instruction?  If the block is not executed that often, it could
be a waste.  If you want I can provide you with dynamic counts
of ARM conditional mov when running SPEC;  but that wouldn't
be enough, someone would need to do that for the kernel
boot too.

I'm not saying movcond is useless, I'm just wondering if it
would bring improvements.  That's why I would prefer to do
all of that stuff incrementally:  setcond, then movcond.

>> Regarding your patches, I would like to see setcond put in
>> mainline with a simplified version for i386.
>
> Again, simplified from what?  The last setcond implementation was 2 lines.

I was wrong sorry, I mixed several of your patches.


Laurent

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH 0/5] tcg conditional set, round 4
  2009-12-21 22:21         ` Laurent Desnogues
@ 2009-12-21 22:50           ` Richard Henderson
  2009-12-21 23:08             ` Laurent Desnogues
  0 siblings, 1 reply; 34+ messages in thread
From: Richard Henderson @ 2009-12-21 22:50 UTC (permalink / raw)
  To: Laurent Desnogues; +Cc: Paul Brook, aurelien, qemu-devel

On 12/21/2009 02:21 PM, Laurent Desnogues wrote:
> As far as I know these CPU's don't need the full movcond but
> only the variant with vtrue.

I know that.  And I looked into TCG very closely to figure out how to 
implement just that.  Except then I have to modify TCG to special-case 
movcond to know that DEST is both input and output.  This is *much* more 
work than simply having the back-end use a matching constraint on VFALSE.

> Even if movcond was quick to generate
> host code, for instance for ARM, you'd have to explicitly detect
> conditional moves

One of us is confused.  Why would I have to explicitly detect 
conditional moves?


r~

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH 0/5] tcg conditional set, round 4
  2009-12-21 22:50           ` Richard Henderson
@ 2009-12-21 23:08             ` Laurent Desnogues
  2009-12-22  0:02               ` Richard Henderson
  0 siblings, 1 reply; 34+ messages in thread
From: Laurent Desnogues @ 2009-12-21 23:08 UTC (permalink / raw)
  To: Richard Henderson; +Cc: Paul Brook, aurelien, qemu-devel

On Mon, Dec 21, 2009 at 11:50 PM, Richard Henderson <rth@twiddle.net> wrote:
[...]
>> Even if movcond was quick to generate
>> host code, for instance for ARM, you'd have to explicitly detect
>> conditional moves
>
> One of us is confused.  Why would I have to explicitly detect conditional
> moves?

Most ARM instructions are conditional, with condition code being the
top 4 bits of the instruction.  So the front-end does it the simplest
way possible:

    if (cond != 0xe) {
        /* if not always execute, we generate a conditional jump to
           next instruction */
        s->condlabel = gen_new_label();
        gen_test_cc(cond ^ 1, s->condlabel);
        s->condjmp = 1;
    }

and then generates the code for the instruction as if it wasn't
conditional.  If you wanted to use movcond, you'd have to make
cond + move a special case, which would add some cost to
all conditional instructions.  OTOH that cost could be amortized
by generating less TCG ops for that instruction, and by a
potentially faster generated code.  But if this isn't measured I
won't bet which one is faster, I have been wrong too often.


Laurent

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH 0/5] tcg conditional set, round 4
  2009-12-21 23:08             ` Laurent Desnogues
@ 2009-12-22  0:02               ` Richard Henderson
  2009-12-22 14:46                 ` Laurent Desnogues
  0 siblings, 1 reply; 34+ messages in thread
From: Richard Henderson @ 2009-12-22  0:02 UTC (permalink / raw)
  To: Laurent Desnogues; +Cc: Paul Brook, aurelien, qemu-devel

On 12/21/2009 03:08 PM, Laurent Desnogues wrote:
> If you wanted to use movcond, you'd have to make
> cond + move a special case...

You'd certainly want the ARM front-end to use movcond more often than 
that.  For instance:

   addeq r1,r2,r3
-->
   add_i32 tmp,r2,r3
   movcond_i32 r1,ZF,0,tmp,r1,eq

You'd want to continue to use a branch around if the instruction has 
side effects like cpu fault (e.g. load, store) or updating flags.

It ought not be very hard to arrange for something like

   if (cond != 0xe) {
     if (may_use_movcond(insn)) {
       s->condlabel = -1;
       /* Save the true destination register.  */
       s->conddest = cpu_R[dest];
       /* Implement the instruction into a temporary.  */
       cpu_R[dest] = tcg_temp_new();
     } else {
       s->condlabel = gen_new_label();
       ArmConditional cmp = gen_test_cc(cond ^ 1);
       tcg_gen_brcondi_i32(cmp.cond, cmp.reg, 0, s->condlabel);
     }
     s->condjmp = 1;
   }

   // ... implement the instruction as we currently do.

   if (s->condjmp) {
     if (s->condlabel == -1) {
       /* Conditionally move the temporary result into the
          true destination register.  */
       ArmConditional cmp = gen_test_cc(cond);
       tcg_gen_movcond_i32(cmp.cond, s->conddest, cmp.reg, 0,
                           cpu_R[dest], s->conddest);
       tcg_temp_free(cpu_R[dest]);
       /* Restore the true destination register.  */
       cpu_R[dest] = s->conddest;
     } else {
       tcg_set_label(d->condlabel);
     }
   }


r~

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH 0/5] tcg conditional set, round 4
  2009-12-21 20:28       ` [Qemu-devel] " Richard Henderson
  2009-12-21 22:21         ` Laurent Desnogues
@ 2009-12-22  7:19         ` Aurelien Jarno
  1 sibling, 0 replies; 34+ messages in thread
From: Aurelien Jarno @ 2009-12-22  7:19 UTC (permalink / raw)
  To: Richard Henderson; +Cc: Laurent Desnogues, Paul Brook, qemu-devel

On Mon, Dec 21, 2009 at 12:28:03PM -0800, Richard Henderson wrote:
> On 12/21/2009 01:13 AM, Laurent Desnogues wrote:
>> The question for the generalized movcond is how useful is it?
>> Which front-ends would need it and would the cost to generate
>> code for it on some (most?) back-ends be amortized?
>
> ... Any front end that has a conditional move instruction?
> Sparcv9, Mips32, Alpha, ARM...
>
> That said, I think the *biggest* gains are to be had because with  
               ^^^^^
This word is the main reason why I am not convinced at all by movcond.

> movcond -- at least on some targets -- we can have one BB per TB, and  
> avoid any intermediate spilling of global registers back to memory.

Memory load/stores are far more frequent, and also spill global
registers back to memory.

-- 
Aurelien Jarno	                        GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH 1/5] tcg: Generic support for conditional set
  2009-12-19 18:01 ` [Qemu-devel] [PATCH 1/5] tcg: Generic support for conditional set Richard Henderson
  2009-12-19 23:11   ` Aurelien Jarno
@ 2009-12-22 11:27   ` Laurent Desnogues
  2009-12-22 16:09     ` Richard Henderson
  1 sibling, 1 reply; 34+ messages in thread
From: Laurent Desnogues @ 2009-12-22 11:27 UTC (permalink / raw)
  To: aurelien; +Cc: qemu-devel, Richard Henderson

On Sat, Dec 19, 2009 at 7:01 PM, Richard Henderson <rth@twiddle.net> wrote:
> Defines setcond_{i32,i64} and setcond2_i32 for 64-on-32-bit.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/README    |   20 +++++++++++++++++++-
>  tcg/tcg-op.h  |   47 +++++++++++++++++++++++++++++++++++++++++++++++
>  tcg/tcg-opc.h |    3 +++
>  tcg/tcg.c     |   21 +++++++++++++++------
>  4 files changed, 84 insertions(+), 7 deletions(-)
[...]
> diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
> index faf2e8b..70a75a0 100644
> --- a/tcg/tcg-op.h
> +++ b/tcg/tcg-op.h
> @@ -280,6 +280,32 @@ static inline void tcg_gen_op6_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2,
>     *gen_opparam_ptr++ = GET_TCGV_I64(arg6);
>  }
>
> +static inline void tcg_gen_op6i_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2,
> +                                    TCGv_i32 arg3, TCGv_i32 arg4,
> +                                    TCGv_i32 arg5, TCGArg arg6)
> +{
> +    *gen_opc_ptr++ = opc;
> +    *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
> +    *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
> +    *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
> +    *gen_opparam_ptr++ = GET_TCGV_I32(arg4);
> +    *gen_opparam_ptr++ = GET_TCGV_I32(arg5);
> +    *gen_opparam_ptr++ = arg6;
> +}
> +
> +static inline void tcg_gen_op6i_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2,
> +                                    TCGv_i64 arg3, TCGv_i64 arg4,
> +                                    TCGv_i64 arg5, TCGArg arg6)
> +{
> +    *gen_opc_ptr++ = opc;
> +    *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
> +    *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
> +    *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
> +    *gen_opparam_ptr++ = GET_TCGV_I64(arg4);
> +    *gen_opparam_ptr++ = GET_TCGV_I64(arg5);
> +    *gen_opparam_ptr++ = arg6;
> +}
> +
>  static inline void tcg_gen_op6ii_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2,
>                                      TCGv_i32 arg3, TCGv_i32 arg4, TCGArg arg5,
>                                      TCGArg arg6)
> @@ -1795,6 +1821,25 @@ static inline void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
>     }
>  }
>
> +static inline void tcg_gen_setcond_i32(int cond, TCGv_i32 ret,
> +                                       TCGv_i32 arg1, TCGv_i32 arg2)
> +{
> +    tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
> +}
> +
> +static inline void tcg_gen_setcond_i64(int cond, TCGv_i64 ret,
> +                                       TCGv_i64 arg1, TCGv_i64 arg2)
> +{
> +#if TCG_TARGET_REG_BITS == 64
> +    tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
> +#else
> +    tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
> +                     TCGV_LOW(arg1), TCGV_HIGH(arg1),
> +                     TCGV_LOW(arg2), TCGV_HIGH(arg2), cond);
> +    tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
> +#endif
> +}

I wonder if it wouldn't be better to let the back-ends emit the
clearing of TCGV_HIGH(ret).  This would reduce the number
of emitted TCG ops.  Any thoughts?


Laurent

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH 5/5] tcg-i386: Implement setcond.
  2009-12-19 23:11   ` Aurelien Jarno
@ 2009-12-22 12:20     ` Laurent Desnogues
  0 siblings, 0 replies; 34+ messages in thread
From: Laurent Desnogues @ 2009-12-22 12:20 UTC (permalink / raw)
  To: Aurelien Jarno; +Cc: qemu-devel, Richard Henderson

On Sun, Dec 20, 2009 at 12:11 AM, Aurelien Jarno <aurelien@aurel32.net> wrote:
> On Sat, Dec 19, 2009 at 10:46:38AM -0800, Richard Henderson wrote:
>> Signed-off-by: Richard Henderson <rth@twiddle.net>
>
> This looks ok, though I would appreciate someone else to review it in
> details.

It looks good to me too (though I didn't test it explicitly).


Laurent

>> ---
>>  tcg/i386/tcg-target.c |   73 +++++++++++++++++++++++++++++++++++++++++++++++--
>>  1 files changed, 70 insertions(+), 3 deletions(-)
>>
>> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
>> index 4c42caf..43e0155 100644
>> --- a/tcg/i386/tcg-target.c
>> +++ b/tcg/i386/tcg-target.c
>> @@ -360,9 +360,8 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
>>      }
>>  }
>>
>> -static void tcg_out_brcond(TCGContext *s, int cond,
>> -                           TCGArg arg1, TCGArg arg2, int const_arg2,
>> -                           int label_index, int small)
>> +static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
>> +                        int const_arg2)
>>  {
>>      if (const_arg2) {
>>          if (arg2 == 0) {
>> @@ -374,6 +373,13 @@ static void tcg_out_brcond(TCGContext *s, int cond,
>>      } else {
>>          tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
>>      }
>> +}
>> +
>> +static void tcg_out_brcond(TCGContext *s, int cond,
>> +                           TCGArg arg1, TCGArg arg2, int const_arg2,
>> +                           int label_index, int small)
>> +{
>> +    tcg_out_cmp(s, arg1, arg2, const_arg2);
>>      tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
>>  }
>>
>> @@ -459,6 +465,57 @@ static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
>>      tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
>>  }
>>
>> +static void tcg_out_setcond(TCGContext *s, int cond, TCGArg dest,
>> +                            TCGArg arg1, TCGArg arg2, int const_arg2)
>> +{
>> +    tcg_out_cmp(s, arg1, arg2, const_arg2);
>> +    /* setcc */
>> +    tcg_out_modrm(s, 0x90 | tcg_cond_to_jcc[cond] | P_EXT, 0, dest);
>> +    tgen_arithi(s, ARITH_AND, dest, 0xff, 0);
>> +}
>> +
>> +static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
>> +                             const int *const_args)
>> +{
>> +    TCGArg new_args[6];
>> +    int label_true, label_over;
>> +
>> +    memcpy(new_args, args+1, 5*sizeof(TCGArg));
>> +
>> +    if (args[0] == args[1] || args[0] == args[2]
>> +        || (!const_args[3] && args[0] == args[3])
>> +        || (!const_args[4] && args[0] == args[4])) {
>> +        /* When the destination overlaps with one of the argument
>> +           registers, don't do anything tricky.  */
>> +        label_true = gen_new_label();
>> +        label_over = gen_new_label();
>> +
>> +        new_args[5] = label_true;
>> +        tcg_out_brcond2(s, new_args, const_args+1, 1);
>> +
>> +        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
>> +        tcg_out_jxx(s, JCC_JMP, label_over, 1);
>> +        tcg_out_label(s, label_true, (tcg_target_long)s->code_ptr);
>> +
>> +        tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
>> +        tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
>> +    } else {
>> +        /* When the destination does not overlap one of the arguments,
>> +           clear the destination first, jump if cond false, and emit an
>> +           increment in the true case.  This results in smaller code.  */
>> +
>> +        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
>> +
>> +        label_over = gen_new_label();
>> +        new_args[4] = tcg_invert_cond(new_args[4]);
>> +        new_args[5] = label_over;
>> +        tcg_out_brcond2(s, new_args, const_args+1, 1);
>> +
>> +        tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
>> +        tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
>> +    }
>> +}
>> +
>>  #if defined(CONFIG_SOFTMMU)
>>
>>  #include "../../softmmu_defs.h"
>> @@ -1120,6 +1177,13 @@ static inline void tcg_out_op(TCGContext *s, int opc,
>>          tcg_out_modrm(s, 0xb7 | P_EXT, args[0], args[1]);
>>          break;
>>
>> +    case INDEX_op_setcond_i32:
>> +        tcg_out_setcond(s, args[3], args[0], args[1], args[2], const_args[2]);
>> +        break;
>> +    case INDEX_op_setcond2_i32:
>> +        tcg_out_setcond2(s, args, const_args);
>> +        break;
>> +
>>      case INDEX_op_qemu_ld8u:
>>          tcg_out_qemu_ld(s, args, 0);
>>          break;
>> @@ -1208,6 +1272,9 @@ static const TCGTargetOpDef x86_op_defs[] = {
>>      { INDEX_op_ext8u_i32, { "r", "q"} },
>>      { INDEX_op_ext16u_i32, { "r", "r"} },
>>
>> +    { INDEX_op_setcond_i32, { "q", "r", "ri" } },
>> +    { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
>> +
>>  #if TARGET_LONG_BITS == 32
>>      { INDEX_op_qemu_ld8u, { "r", "L" } },
>>      { INDEX_op_qemu_ld8s, { "r", "L" } },
>> --
>> 1.6.5.2
>>
>>
>>
>>
>
> --
> Aurelien Jarno                          GPG: 1024D/F1BCDB73
> aurelien@aurel32.net                 http://www.aurel32.net
>
>
>

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH 0/5] tcg conditional set, round 4
  2009-12-22  0:02               ` Richard Henderson
@ 2009-12-22 14:46                 ` Laurent Desnogues
  0 siblings, 0 replies; 34+ messages in thread
From: Laurent Desnogues @ 2009-12-22 14:46 UTC (permalink / raw)
  To: Richard Henderson; +Cc: Paul Brook, aurelien, qemu-devel

On Tue, Dec 22, 2009 at 1:02 AM, Richard Henderson <rth@twiddle.net> wrote:
> On 12/21/2009 03:08 PM, Laurent Desnogues wrote:
>>
>> If you wanted to use movcond, you'd have to make
>> cond + move a special case...
>
> You'd certainly want the ARM front-end to use movcond more often than that.
>  For instance:
>
>  addeq r1,r2,r3
> -->
>  add_i32 tmp,r2,r3
>  movcond_i32 r1,ZF,0,tmp,r1,eq
>
> You'd want to continue to use a branch around if the instruction has side
> effects like cpu fault (e.g. load, store) or updating flags.
>
> It ought not be very hard to arrange for something like
>
>  if (cond != 0xe) {
>    if (may_use_movcond(insn)) {
>      s->condlabel = -1;
>      /* Save the true destination register.  */
>      s->conddest = cpu_R[dest];
>      /* Implement the instruction into a temporary.  */
>      cpu_R[dest] = tcg_temp_new();
>    } else {
>      s->condlabel = gen_new_label();
>      ArmConditional cmp = gen_test_cc(cond ^ 1);
>      tcg_gen_brcondi_i32(cmp.cond, cmp.reg, 0, s->condlabel);
>    }
>    s->condjmp = 1;
>  }
>
>  // ... implement the instruction as we currently do.
>
>  if (s->condjmp) {
>    if (s->condlabel == -1) {
>      /* Conditionally move the temporary result into the
>         true destination register.  */
>      ArmConditional cmp = gen_test_cc(cond);
>      tcg_gen_movcond_i32(cmp.cond, s->conddest, cmp.reg, 0,
>                          cpu_R[dest], s->conddest);
>      tcg_temp_free(cpu_R[dest]);
>      /* Restore the true destination register.  */
>      cpu_R[dest] = s->conddest;
>    } else {
>      tcg_set_label(d->condlabel);
>    }
>  }

I agree, that looks nice.  But I'll let you dig into ARM instruction
encoding and see how to implement may_use_movcond and
getting the correct dest to save is not that cheap (and before
you get back to me, yes, you could only consider a small
subset of the instructions for which you want to do that :-).

There's a point I have kept on insisting on that you keep on
not answering :-)  How does all of that perform in practice?
We can discuss forever, as long as it isn't measured, we are
just guessing.


Laurent

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH 1/5] tcg: Generic support for conditional set
  2009-12-22 11:27   ` Laurent Desnogues
@ 2009-12-22 16:09     ` Richard Henderson
  0 siblings, 0 replies; 34+ messages in thread
From: Richard Henderson @ 2009-12-22 16:09 UTC (permalink / raw)
  To: Laurent Desnogues; +Cc: qemu-devel, aurelien

On 12/22/2009 03:27 AM, Laurent Desnogues wrote:
>> +#if TCG_TARGET_REG_BITS == 64
>> +    tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
>> +#else
>> +    tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
>> +                     TCGV_LOW(arg1), TCGV_HIGH(arg1),
>> +                     TCGV_LOW(arg2), TCGV_HIGH(arg2), cond);
>> +    tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
>> +#endif
>> +}
>
> I wonder if it wouldn't be better to let the back-ends emit the
> clearing of TCGV_HIGH(ret).  This would reduce the number
> of emitted TCG ops.  Any thoughts?

(1) That would require 6 registers on i386 simultaneously.
(2) You lose the constant propagation that TCG would perform.


r~

^ permalink raw reply	[flat|nested] 34+ messages in thread

end of thread, other threads:[~2009-12-22 16:09 UTC | newest]

Thread overview: 34+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-12-19 18:52 [Qemu-devel] [PATCH 0/5] tcg conditional set, round 4 Richard Henderson
2009-12-19 16:50 ` [Qemu-devel] [PATCH 2/5] tcg-x86_64: Implement setcond Richard Henderson
2009-12-19 23:11   ` Aurelien Jarno
2009-12-19 18:01 ` [Qemu-devel] [PATCH 1/5] tcg: Generic support for conditional set Richard Henderson
2009-12-19 23:11   ` Aurelien Jarno
2009-12-19 23:24     ` Richard Henderson
2009-12-19 23:45       ` Aurelien Jarno
2009-12-22 11:27   ` Laurent Desnogues
2009-12-22 16:09     ` Richard Henderson
2009-12-19 18:44 ` [Qemu-devel] [PATCH 3/5] tcg-i386: Implement small forward branches Richard Henderson
2009-12-19 23:11   ` Aurelien Jarno
2009-12-19 23:32   ` Laurent Desnogues
2009-12-20  1:17     ` Richard Henderson
2009-12-19 18:44 ` [Qemu-devel] [PATCH 4/5] tcg: Add tcg_invert_cond Richard Henderson
2009-12-19 23:11   ` Aurelien Jarno
2009-12-19 18:46 ` [Qemu-devel] [PATCH 5/5] tcg-i386: Implement setcond Richard Henderson
2009-12-19 23:11   ` Aurelien Jarno
2009-12-22 12:20     ` Laurent Desnogues
2009-12-19 23:11 ` [Qemu-devel] [PATCH 0/5] tcg conditional set, round 4 Aurelien Jarno
2009-12-19 23:43   ` malc
2009-12-20 11:03     ` Blue Swirl
2009-12-20 22:57 ` Paul Brook
2009-12-21  2:00   ` Richard Henderson
2009-12-21  9:13     ` Laurent Desnogues
2009-12-21  9:47       ` [Qemu-devel] " Paolo Bonzini
2009-12-21 10:03         ` Laurent Desnogues
2009-12-21 20:28       ` [Qemu-devel] " Richard Henderson
2009-12-21 22:21         ` Laurent Desnogues
2009-12-21 22:50           ` Richard Henderson
2009-12-21 23:08             ` Laurent Desnogues
2009-12-22  0:02               ` Richard Henderson
2009-12-22 14:46                 ` Laurent Desnogues
2009-12-22  7:19         ` Aurelien Jarno
2009-12-21 10:08     ` Aurelien Jarno

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).