qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [RFC] TCG new op: setcond
@ 2008-11-08 19:13 Laurent Desnogues
  2008-11-08 19:32 ` [Qemu-devel] " Laurent Desnogues
  0 siblings, 1 reply; 3+ messages in thread
From: Laurent Desnogues @ 2008-11-08 19:13 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 722 bytes --]

Hello,

this patch implements a new TCG op, setcond, that sets a temp
to 1 if the condition is true, else to 0.  The benefit is the potential
removal of brcond instructions, and helpers size reduction which
can lead to using TCG instead of helpers.

setcond(i)_i{32,64} have been implemented only for x86_64 and
i386 TCG back-ends.

One ARM helper was converted to TCG using setcond.  Alpha
and MIPS are also patched to use setcond.

On my TODO list:

 - implement it in all backends (arm, ppc, ppc64, sparc)
 - use it at least once in every frontend

Comments starting with // in the patch are questions and/or
TODO.

Please feel free to comment.


Laurent

Signed-off-by: Laurent Desnogues <laurent.desnogues@gmail.com>

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: setcond-3.patch --]
[-- Type: text/x-patch; name=setcond-3.patch, Size: 19422 bytes --]

Index: target-alpha/translate.c
===================================================================
--- target-alpha/translate.c	(revision 5650)
+++ target-alpha/translate.c	(working copy)
@@ -504,30 +504,20 @@
                                   int ra, int rb, int rc,
                                   int islit, uint8_t lit)
 {
-    int l1, l2;
     TCGv tmp;
 
     if (unlikely(rc == 31))
-    return;
+        return;
 
-    l1 = gen_new_label();
-    l2 = gen_new_label();
-
     if (ra != 31) {
         tmp = tcg_temp_new(TCG_TYPE_I64);
         tcg_gen_mov_i64(tmp, cpu_ir[ra]);
     } else
         tmp = tcg_const_i64(0);
     if (islit)
-        tcg_gen_brcondi_i64(cond, tmp, lit, l1);
+        tcg_gen_setcondi_i64(cond, cpu_ir[rc], tmp, lit);
     else
-        tcg_gen_brcond_i64(cond, tmp, cpu_ir[rb], l1);
-
-    tcg_gen_movi_i64(cpu_ir[rc], 0);
-    tcg_gen_br(l2);
-    gen_set_label(l1);
-    tcg_gen_movi_i64(cpu_ir[rc], 1);
-    gen_set_label(l2);
+        tcg_gen_setcond_i64(cond, cpu_ir[rc], tmp, cpu_ir[rb]);
 }
 
 static always_inline int translate_one (DisasContext *ctx, uint32_t insn)
Index: target-mips/translate.c
===================================================================
--- target-mips/translate.c	(revision 5650)
+++ target-mips/translate.c	(working copy)
@@ -771,15 +771,7 @@
 #define OP_COND(name, cond)                                   \
 static inline void glue(gen_op_, name) (TCGv t0, TCGv t1)     \
 {                                                             \
-    int l1 = gen_new_label();                                 \
-    int l2 = gen_new_label();                                 \
-                                                              \
-    tcg_gen_brcond_tl(cond, t0, t1, l1);                      \
-    tcg_gen_movi_tl(t0, 0);                                   \
-    tcg_gen_br(l2);                                           \
-    gen_set_label(l1);                                        \
-    tcg_gen_movi_tl(t0, 1);                                   \
-    gen_set_label(l2);                                        \
+    tcg_gen_setcond_tl(cond, t0, t0, t1);                     \
 }
 OP_COND(eq, TCG_COND_EQ);
 OP_COND(ne, TCG_COND_NE);
@@ -792,15 +784,7 @@
 #define OP_CONDI(name, cond)                                  \
 static inline void glue(gen_op_, name) (TCGv t, target_ulong val) \
 {                                                             \
-    int l1 = gen_new_label();                                 \
-    int l2 = gen_new_label();                                 \
-                                                              \
-    tcg_gen_brcondi_tl(cond, t, val, l1);                     \
-    tcg_gen_movi_tl(t, 0);                                    \
-    tcg_gen_br(l2);                                           \
-    gen_set_label(l1);                                        \
-    tcg_gen_movi_tl(t, 1);                                    \
-    gen_set_label(l2);                                        \
+    tcg_gen_setcondi_tl(cond, t, t, val);                     \
 }
 OP_CONDI(lti, TCG_COND_LT);
 OP_CONDI(ltiu, TCG_COND_LTU);
@@ -809,15 +793,7 @@
 #define OP_CONDZ(name, cond)                                  \
 static inline void glue(gen_op_, name) (TCGv t)               \
 {                                                             \
-    int l1 = gen_new_label();                                 \
-    int l2 = gen_new_label();                                 \
-                                                              \
-    tcg_gen_brcondi_tl(cond, t, 0, l1);                       \
-    tcg_gen_movi_tl(t, 0);                                    \
-    tcg_gen_br(l2);                                           \
-    gen_set_label(l1);                                        \
-    tcg_gen_movi_tl(t, 1);                                    \
-    gen_set_label(l2);                                        \
+    tcg_gen_setcondi_tl(cond, t, t, 0);                       \
 }
 OP_CONDZ(gez, TCG_COND_GE);
 OP_CONDZ(gtz, TCG_COND_GT);
Index: tcg/tcg-op.h
===================================================================
--- tcg/tcg-op.h	(revision 5650)
+++ tcg/tcg-op.h	(working copy)
@@ -159,6 +159,21 @@
     *gen_opparam_ptr++ = arg6;
 }
 
+static inline void tcg_gen_op7i(int opc, TCGv arg1, TCGv arg2,
+                                TCGv arg3, TCGv arg4,
+                                TCGv arg5, TCGv arg6,
+                                TCGArg arg7)
+{
+    *gen_opc_ptr++ = opc;
+    *gen_opparam_ptr++ = GET_TCGV(arg1);
+    *gen_opparam_ptr++ = GET_TCGV(arg2);
+    *gen_opparam_ptr++ = GET_TCGV(arg3);
+    *gen_opparam_ptr++ = GET_TCGV(arg4);
+    *gen_opparam_ptr++ = GET_TCGV(arg5);
+    *gen_opparam_ptr++ = GET_TCGV(arg6);
+    *gen_opparam_ptr++ = arg7;
+}
+
 static inline void gen_set_label(int n)
 {
     tcg_gen_op1i(INDEX_op_set_label, n);
@@ -499,6 +514,20 @@
     }
 }
 
+static inline void tcg_gen_setcond_i32(int cond, TCGv ret,
+                                       TCGv arg1, int32_t arg2)
+{
+    tcg_gen_op4i(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
+}
+
+static inline void tcg_gen_setcondi_i32(int cond, TCGv ret,
+                                        TCGv arg1, int32_t arg2)
+{
+    TCGv t0 = tcg_const_i32(arg2);
+    tcg_gen_setcond_i32(cond, ret, arg1, t0);
+    tcg_temp_free(t0);
+}
+
 static inline void tcg_gen_brcond_i32(int cond, TCGv arg1, TCGv arg2, 
                                       int label_index)
 {
@@ -754,6 +783,14 @@
     tcg_gen_shifti_i64(ret, arg1, arg2, 1, 1);
 }
 
+static inline void tcg_gen_setcond_i64(int cond, TCGv ret,
+                                       TCGv arg1, TCGv arg2)
+{
+    tcg_gen_op7i(INDEX_op_setcond2_i32,
+                 ret, TCGV_HIGH(ret), arg1, TCGV_HIGH(arg1),
+                 arg2, TCGV_HIGH(arg2), cond);
+}
+
 static inline void tcg_gen_brcond_i64(int cond, TCGv arg1, TCGv arg2, 
                                       int label_index)
 {
@@ -972,6 +1009,12 @@
     }
 }
 
+static inline void tcg_gen_setcond_i64(int cond, TCGv ret,
+                                       TCGv arg1, int32_t arg2)
+{
+    tcg_gen_op4i(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
+}
+
 static inline void tcg_gen_brcond_i64(int cond, TCGv arg1, TCGv arg2, 
                                       int label_index)
 {
@@ -1073,6 +1116,15 @@
         tcg_temp_free(t0);
     }
 }
+
+static inline void tcg_gen_setcondi_i64(int cond, TCGv ret,
+                                        TCGv arg1, int32_t arg2)
+{
+    TCGv t0 = tcg_const_i64(arg2);
+    tcg_gen_setcond_i64(cond, ret, arg1, t0);
+    tcg_temp_free(t0);
+}
+
 static inline void tcg_gen_brcondi_i64(int cond, TCGv arg1, int64_t arg2,
                                        int label_index)
 {
@@ -1088,7 +1140,6 @@
     tcg_temp_free(t0);
 }
 
-
 /***************************************/
 /* optional operations */
 
@@ -1864,6 +1915,8 @@
 #define tcg_gen_shri_tl tcg_gen_shri_i64
 #define tcg_gen_sar_tl tcg_gen_sar_i64
 #define tcg_gen_sari_tl tcg_gen_sari_i64
+#define tcg_gen_setcond_tl tcg_gen_setcond_i64
+#define tcg_gen_setcondi_tl tcg_gen_setcondi_i64
 #define tcg_gen_brcond_tl tcg_gen_brcond_i64
 #define tcg_gen_brcondi_tl tcg_gen_brcondi_i64
 #define tcg_gen_mul_tl tcg_gen_mul_i64
@@ -1927,6 +1980,8 @@
 #define tcg_gen_shri_tl tcg_gen_shri_i32
 #define tcg_gen_sar_tl tcg_gen_sar_i32
 #define tcg_gen_sari_tl tcg_gen_sari_i32
+#define tcg_gen_setcond_tl tcg_gen_setcond_i32
+#define tcg_gen_setcondi_tl tcg_gen_setcondi_i32
 #define tcg_gen_brcond_tl tcg_gen_brcond_i32
 #define tcg_gen_brcondi_tl tcg_gen_brcondi_i32
 #define tcg_gen_mul_tl tcg_gen_mul_i32
Index: tcg/tcg-opc.h
===================================================================
--- tcg/tcg-opc.h	(revision 5650)
+++ tcg/tcg-opc.h	(working copy)
@@ -76,10 +76,13 @@
 DEF2(shr_i32, 1, 2, 0, 0)
 DEF2(sar_i32, 1, 2, 0, 0)
 
+DEF2(setcond_i32, 1, 2, 1, 0)
+
 DEF2(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 #if TCG_TARGET_REG_BITS == 32
 DEF2(add2_i32, 2, 4, 0, 0)
 DEF2(sub2_i32, 2, 4, 0, 0)
+DEF2(setcond2_i32, 2, 4, 1, 0)
 DEF2(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 DEF2(mulu2_i32, 2, 2, 0, 0)
 #endif
@@ -129,6 +132,8 @@
 DEF2(shr_i64, 1, 2, 0, 0)
 DEF2(sar_i64, 1, 2, 0, 0)
 
+DEF2(setcond_i64, 1, 2, 1, 0)
+
 DEF2(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 #ifdef TCG_TARGET_HAS_ext8s_i64
 DEF2(ext8s_i64, 1, 1, 0, 0)
Index: tcg/i386/tcg-target.c
===================================================================
--- tcg/i386/tcg-target.c	(revision 5650)
+++ tcg/i386/tcg-target.c	(working copy)
@@ -162,6 +162,7 @@
 #define SHIFT_SHR 5
 #define SHIFT_SAR 7
 
+/* The following defines apply to setcc instruction too. */
 #define JCC_JMP (-1)
 #define JCC_JO  0x0
 #define JCC_JNO 0x1
@@ -327,6 +328,114 @@
     }
 }
 
+// TODO const_arg optimization?
+static void tcg_out_setcond(TCGContext *s, int cond,
+                            TCGArg ret, TCGArg arg1, TCGArg arg2)
+{
+    /* cmp */
+    tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
+    /* setcc */
+    tcg_out_modrm(s, (0x90 + tcg_cond_to_jcc[cond]) | P_EXT, 0, ret);
+    /* zero extend ret with movzbl since setcc only sets the lower 8 bits */
+    tcg_out_modrm(s, 0xb6 | P_EXT, ret, ret);
+}
+
+// TODO const_arg optimization?
+static void tcg_out_setcond2_brcond(TCGContext *s, int cond,
+                                    TCGArg arg1, TCGArg arg2,
+                                    int label_index)
+{
+    /* cmp */
+    tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
+    /* jcc */
+    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index);
+}
+
+// TODO const_arg optimization?
+// TODO use cmov (i686 only...)?
+/* args:
+ *   0 low(ret)
+ *   1 high(ret)
+ *   2 low(arg1)
+ *   3 hi(arg1)
+ *   4 low(arg2)
+ *   5 hi(arg2)
+ *   6 condition
+ * Note:  this is basically a copy of tcg_out_brcond2
+ */
+static void tcg_out_setcond2(TCGContext *s,
+                             const TCGArg *args)
+{
+    int label_zero, label_one;
+
+    label_zero = gen_new_label();
+    label_one = gen_new_label();
+    switch (args[6]) {
+    case TCG_COND_EQ:
+        tcg_out_setcond2_brcond(s, TCG_COND_NE, args[2], args[4], label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_EQ, args[3], args[5], label_one);
+        break;
+    case TCG_COND_NE:
+        tcg_out_setcond2_brcond(s, TCG_COND_NE, args[2], args[4], label_one);
+        tcg_out_setcond2_brcond(s, TCG_COND_NE, args[3], args[5], label_one);
+        break;
+    case TCG_COND_LT:
+        tcg_out_setcond2_brcond(s, TCG_COND_LT, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_LTU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_LE:
+        tcg_out_setcond2_brcond(s, TCG_COND_LT, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_LEU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_GT:
+        tcg_out_setcond2_brcond(s, TCG_COND_GT, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_GTU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_GE:
+        tcg_out_setcond2_brcond(s, TCG_COND_GT, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_GEU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_LTU:
+        tcg_out_setcond2_brcond(s, TCG_COND_LTU, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_LTU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_LEU:
+        tcg_out_setcond2_brcond(s, TCG_COND_LTU, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_LEU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_GTU:
+        tcg_out_setcond2_brcond(s, TCG_COND_GTU, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_GTU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_GEU:
+        tcg_out_setcond2_brcond(s, TCG_COND_GTU, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_GEU, args[2], args[4], label_one);
+        break;
+    default:
+        tcg_abort();
+    }
+    tcg_out_label(s, label_zero, (tcg_target_long)s->code_ptr);
+    /* clear lower part of result */
+    tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3), args[0], args[0]);
+    /* branch over next instruction which is 5 bytes long */
+    tcg_out8(s, 0xeb);
+    tcg_out8(s, 5);
+    /* set lower part of result to 1 */
+    tcg_out_label(s, label_one, (tcg_target_long)s->code_ptr);
+    tcg_out8(s, 0xb8 + args[0]);
+    tcg_out32(s, 1);
+    /* clear higher part of result */
+    tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3), args[1], args[1]);
+}
+
 static void tcg_out_brcond(TCGContext *s, int cond, 
                            TCGArg arg1, TCGArg arg2, int const_arg2,
                            int label_index)
@@ -1013,6 +1122,12 @@
         else
             tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5], args[1]);
         break;
+    case INDEX_op_setcond_i32:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2]);
+        break;
+    case INDEX_op_setcond2_i32:
+        tcg_out_setcond2(s, args);
+        break;
     case INDEX_op_brcond_i32:
         tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]);
         break;
@@ -1088,6 +1203,11 @@
     { INDEX_op_shr_i32, { "r", "0", "ci" } },
     { INDEX_op_sar_i32, { "r", "0", "ci" } },
 
+    // TODO add 'i' to third "r" (second input argument)
+    { INDEX_op_setcond_i32, { "q", "r", "r" } },
+    // TODO add 'i'
+    { INDEX_op_setcond2_i32, { "r", "r", "r", "r", "r", "r" } },
+
     { INDEX_op_brcond_i32, { "r", "ri" } },
 
     { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
Index: tcg/README
===================================================================
--- tcg/README	(revision 5650)
+++ tcg/README	(working copy)
@@ -277,6 +277,10 @@
 
 64 bit byte swap
 
+* setcond_i32/i64 cond, t0, t1, t2
+
+Set t0 to 1 if t1 cond t2 is true, else t0 is set to 0. (cf brcond for cond.)
+
 * discard_i32/i64 t0
 
 Indicate that the value of t0 won't be used later. It is useful to
Index: tcg/tcg.c
===================================================================
--- tcg/tcg.c	(revision 5650)
+++ tcg/tcg.c	(working copy)
@@ -877,6 +877,12 @@
 #elif TCG_TARGET_REG_BITS == 64
                 || c == INDEX_op_brcond_i64
 #endif
+                || c == INDEX_op_setcond_i32
+#if TCG_TARGET_REG_BITS == 32
+                || c == INDEX_op_setcond2_i32
+#elif TCG_TARGET_REG_BITS == 64
+                || c == INDEX_op_setcond_i64
+#endif
                 ) {
                 if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]])
                     fprintf(outfile, ",%s", cond_name[args[k++]]);
Index: tcg/x86_64/tcg-target.c
===================================================================
--- tcg/x86_64/tcg-target.c	(revision 5650)
+++ tcg/x86_64/tcg-target.c	(working copy)
@@ -198,6 +198,7 @@
 #define SHIFT_SHR 5
 #define SHIFT_SAR 7
 
+/* The following defines apply to setcc instruction too. */
 #define JCC_JMP (-1)
 #define JCC_JO  0x0
 #define JCC_JNO 0x1
@@ -464,6 +465,26 @@
     }
 }
 
+// TODO const_arg optimization
+static void tcg_out_setcond(TCGContext *s, int cond,
+                            TCGArg ret, TCGArg arg1, TCGArg arg2, int rexw)
+{
+    /* cmp */
+    tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3) | rexw, arg2, arg1);
+    /* setcc */
+    // TODO this should use tcg_out_modrm
+    //      however currently tcg_out_modrm outputs an extra byte for [abcd]l
+    //tcg_out_modrm(s, (0x90 + tcg_cond_to_jcc[cond]) | P_EXT | P_REXB, ret, 0);
+    if (ret > 3)
+        tcg_out8(s, 0x40 + (ret >> 3));
+    tcg_out8(s, 0x0f);
+    tcg_out8(s, 0x90 + tcg_cond_to_jcc[cond]);
+    tcg_out8(s, 0xc0 + (ret & 7));
+    /* zero extend ret with movzbl since setcc only sets the lower 8 bits */
+    // TODO that doesn't look pretty
+    tcg_out_modrm(s, 0xb6 | P_EXT | (rexw ? rexw : P_REXB), ret, ret);
+}
+
 static void tcg_out_brcond(TCGContext *s, int cond, 
                            TCGArg arg1, TCGArg arg2, int const_arg2,
                            int label_index, int rexw)
@@ -1065,6 +1086,14 @@
         c = SHIFT_SAR;
         goto gen_shift64;
         
+    case INDEX_op_setcond_i32:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2], 0);
+        break;
+
+    case INDEX_op_setcond_i64:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2], P_REXW);
+        break;
+
     case INDEX_op_brcond_i32:
         tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], 
                        args[3], 0);
@@ -1225,6 +1254,9 @@
     { INDEX_op_shr_i32, { "r", "0", "ci" } },
     { INDEX_op_sar_i32, { "r", "0", "ci" } },
 
+    // TODO add 'i' to third "r" (second input argument)
+    { INDEX_op_setcond_i32, { "r", "r", "r" } },
+
     { INDEX_op_brcond_i32, { "r", "ri" } },
 
     { INDEX_op_mov_i64, { "r", "r" } },
@@ -1254,6 +1286,9 @@
     { INDEX_op_shr_i64, { "r", "0", "ci" } },
     { INDEX_op_sar_i64, { "r", "0", "ci" } },
 
+    // TODO add 'i' to third "r" (second input argument)
+    { INDEX_op_setcond_i64, { "r", "r", "r" } },
+
     { INDEX_op_brcond_i64, { "r", "re" } },
 
     { INDEX_op_bswap_i32, { "r", "0" } },
Index: target-arm/translate.c
===================================================================
--- target-arm/translate.c	(revision 5650)
+++ target-arm/translate.c	(working copy)
@@ -201,7 +201,6 @@
 
 #define gen_op_addl_T0_T1_cc() gen_helper_add_cc(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_adcl_T0_T1_cc() gen_helper_adc_cc(cpu_T[0], cpu_T[0], cpu_T[1])
-#define gen_op_subl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_sbcl_T0_T1_cc() gen_helper_sbc_cc(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_rsbl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[1], cpu_T[0])
 #define gen_op_rscl_T0_T1_cc() gen_helper_sbc_cc(cpu_T[0], cpu_T[1], cpu_T[0])
@@ -243,6 +242,36 @@
     dead_tmp(tmp);
 }
 
+//#define gen_op_subl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[0], cpu_T[1])
+static inline void gen_op_subl_T0_T1_cc(void)
+{
+    TCGv tmp32_res;
+    TCGv tmp1;
+    TCGv tmp2;
+
+    tmp32_res = new_tmp();
+    tcg_gen_sub_i32(tmp32_res, cpu_T[0], cpu_T[1]);
+    tcg_gen_st_i32(tmp32_res, cpu_env, offsetof(CPUState, NF));
+    tcg_gen_st_i32(tmp32_res, cpu_env, offsetof(CPUState, ZF));
+
+    tmp1 = new_tmp();
+
+    tcg_gen_setcond_i32(TCG_COND_GEU, tmp1, cpu_T[0], cpu_T[1]);
+    tcg_gen_st_i32(tmp1, cpu_env, offsetof(CPUState, CF));
+
+    tmp2 = new_tmp();
+    tcg_gen_xor_i32(tmp1, cpu_T[0], cpu_T[1]);
+    tcg_gen_xor_i32(tmp2, cpu_T[0], tmp32_res);
+    tcg_gen_and_i32(tmp1, tmp1, tmp2);
+    dead_tmp(tmp2);
+    tcg_gen_st_i32(tmp1, cpu_env, offsetof(CPUState, VF));
+    dead_tmp(tmp1);
+
+    tcg_gen_mov_i32(cpu_T[0], tmp32_res);
+
+    dead_tmp(tmp32_res);
+}
+
 static void gen_smul_dual(TCGv a, TCGv b)
 {
     TCGv tmp1 = new_tmp();

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [Qemu-devel] Re: [RFC] TCG new op: setcond
  2008-11-08 19:13 [Qemu-devel] [RFC] TCG new op: setcond Laurent Desnogues
@ 2008-11-08 19:32 ` Laurent Desnogues
  2008-11-08 19:55   ` Laurent Desnogues
  0 siblings, 1 reply; 3+ messages in thread
From: Laurent Desnogues @ 2008-11-08 19:32 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 984 bytes --]

Same patch with a fix shown by DEBUG_TCGV. Thanks to Aurelien.


Laurent

Signed-off-by: Laurent Desnogues <laurent.desnogues@gmail.com>

On Sat, Nov 8, 2008 at 8:13 PM, Laurent Desnogues
<laurent.desnogues@gmail.com> wrote:
>
> this patch implements a new TCG op, setcond, that sets a temp
> to 1 if the condition is true, else to 0.  The benefit is the potential
> removal of brcond instructions, and helpers size reduction which
> can lead to using TCG instead of helpers.
>
> setcond(i)_i{32,64} have been implemented only for x86_64 and
> i386 TCG back-ends.
>
> One ARM helper was converted to TCG using setcond.  Alpha
> and MIPS are also patched to use setcond.
>
> On my TODO list:
>
>  - implement it in all backends (arm, ppc, ppc64, sparc)
>  - use it at least once in every frontend
>
> Comments starting with // in the patch are questions and/or
> TODO.
>
> Please feel free to comment.
>
>
> Laurent
>
> Signed-off-by: Laurent Desnogues <laurent.desnogues@gmail.com>
>

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: setcond-3-fixed.patch --]
[-- Type: text/x-patch; name=setcond-3-fixed.patch, Size: 19416 bytes --]

Index: target-alpha/translate.c
===================================================================
--- target-alpha/translate.c	(revision 5650)
+++ target-alpha/translate.c	(working copy)
@@ -504,30 +504,20 @@
                                   int ra, int rb, int rc,
                                   int islit, uint8_t lit)
 {
-    int l1, l2;
     TCGv tmp;
 
     if (unlikely(rc == 31))
-    return;
+        return;
 
-    l1 = gen_new_label();
-    l2 = gen_new_label();
-
     if (ra != 31) {
         tmp = tcg_temp_new(TCG_TYPE_I64);
         tcg_gen_mov_i64(tmp, cpu_ir[ra]);
     } else
         tmp = tcg_const_i64(0);
     if (islit)
-        tcg_gen_brcondi_i64(cond, tmp, lit, l1);
+        tcg_gen_setcondi_i64(cond, cpu_ir[rc], tmp, lit);
     else
-        tcg_gen_brcond_i64(cond, tmp, cpu_ir[rb], l1);
-
-    tcg_gen_movi_i64(cpu_ir[rc], 0);
-    tcg_gen_br(l2);
-    gen_set_label(l1);
-    tcg_gen_movi_i64(cpu_ir[rc], 1);
-    gen_set_label(l2);
+        tcg_gen_setcond_i64(cond, cpu_ir[rc], tmp, cpu_ir[rb]);
 }
 
 static always_inline int translate_one (DisasContext *ctx, uint32_t insn)
Index: target-mips/translate.c
===================================================================
--- target-mips/translate.c	(revision 5650)
+++ target-mips/translate.c	(working copy)
@@ -771,15 +771,7 @@
 #define OP_COND(name, cond)                                   \
 static inline void glue(gen_op_, name) (TCGv t0, TCGv t1)     \
 {                                                             \
-    int l1 = gen_new_label();                                 \
-    int l2 = gen_new_label();                                 \
-                                                              \
-    tcg_gen_brcond_tl(cond, t0, t1, l1);                      \
-    tcg_gen_movi_tl(t0, 0);                                   \
-    tcg_gen_br(l2);                                           \
-    gen_set_label(l1);                                        \
-    tcg_gen_movi_tl(t0, 1);                                   \
-    gen_set_label(l2);                                        \
+    tcg_gen_setcond_tl(cond, t0, t0, t1);                     \
 }
 OP_COND(eq, TCG_COND_EQ);
 OP_COND(ne, TCG_COND_NE);
@@ -792,15 +784,7 @@
 #define OP_CONDI(name, cond)                                  \
 static inline void glue(gen_op_, name) (TCGv t, target_ulong val) \
 {                                                             \
-    int l1 = gen_new_label();                                 \
-    int l2 = gen_new_label();                                 \
-                                                              \
-    tcg_gen_brcondi_tl(cond, t, val, l1);                     \
-    tcg_gen_movi_tl(t, 0);                                    \
-    tcg_gen_br(l2);                                           \
-    gen_set_label(l1);                                        \
-    tcg_gen_movi_tl(t, 1);                                    \
-    gen_set_label(l2);                                        \
+    tcg_gen_setcondi_tl(cond, t, t, val);                     \
 }
 OP_CONDI(lti, TCG_COND_LT);
 OP_CONDI(ltiu, TCG_COND_LTU);
@@ -809,15 +793,7 @@
 #define OP_CONDZ(name, cond)                                  \
 static inline void glue(gen_op_, name) (TCGv t)               \
 {                                                             \
-    int l1 = gen_new_label();                                 \
-    int l2 = gen_new_label();                                 \
-                                                              \
-    tcg_gen_brcondi_tl(cond, t, 0, l1);                       \
-    tcg_gen_movi_tl(t, 0);                                    \
-    tcg_gen_br(l2);                                           \
-    gen_set_label(l1);                                        \
-    tcg_gen_movi_tl(t, 1);                                    \
-    gen_set_label(l2);                                        \
+    tcg_gen_setcondi_tl(cond, t, t, 0);                       \
 }
 OP_CONDZ(gez, TCG_COND_GE);
 OP_CONDZ(gtz, TCG_COND_GT);
Index: tcg/tcg-op.h
===================================================================
--- tcg/tcg-op.h	(revision 5650)
+++ tcg/tcg-op.h	(working copy)
@@ -159,6 +159,21 @@
     *gen_opparam_ptr++ = arg6;
 }
 
+static inline void tcg_gen_op7i(int opc, TCGv arg1, TCGv arg2,
+                                TCGv arg3, TCGv arg4,
+                                TCGv arg5, TCGv arg6,
+                                TCGArg arg7)
+{
+    *gen_opc_ptr++ = opc;
+    *gen_opparam_ptr++ = GET_TCGV(arg1);
+    *gen_opparam_ptr++ = GET_TCGV(arg2);
+    *gen_opparam_ptr++ = GET_TCGV(arg3);
+    *gen_opparam_ptr++ = GET_TCGV(arg4);
+    *gen_opparam_ptr++ = GET_TCGV(arg5);
+    *gen_opparam_ptr++ = GET_TCGV(arg6);
+    *gen_opparam_ptr++ = arg7;
+}
+
 static inline void gen_set_label(int n)
 {
     tcg_gen_op1i(INDEX_op_set_label, n);
@@ -499,6 +514,20 @@
     }
 }
 
+static inline void tcg_gen_setcond_i32(int cond, TCGv ret,
+                                       TCGv arg1, TCGv arg2)
+{
+    tcg_gen_op4i(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
+}
+
+static inline void tcg_gen_setcondi_i32(int cond, TCGv ret,
+                                        TCGv arg1, int32_t arg2)
+{
+    TCGv t0 = tcg_const_i32(arg2);
+    tcg_gen_setcond_i32(cond, ret, arg1, t0);
+    tcg_temp_free(t0);
+}
+
 static inline void tcg_gen_brcond_i32(int cond, TCGv arg1, TCGv arg2, 
                                       int label_index)
 {
@@ -754,6 +783,14 @@
     tcg_gen_shifti_i64(ret, arg1, arg2, 1, 1);
 }
 
+static inline void tcg_gen_setcond_i64(int cond, TCGv ret,
+                                       TCGv arg1, TCGv arg2)
+{
+    tcg_gen_op7i(INDEX_op_setcond2_i32,
+                 ret, TCGV_HIGH(ret), arg1, TCGV_HIGH(arg1),
+                 arg2, TCGV_HIGH(arg2), cond);
+}
+
 static inline void tcg_gen_brcond_i64(int cond, TCGv arg1, TCGv arg2, 
                                       int label_index)
 {
@@ -972,6 +1009,12 @@
     }
 }
 
+static inline void tcg_gen_setcond_i64(int cond, TCGv ret,
+                                       TCGv arg1, TCGv arg2)
+{
+    tcg_gen_op4i(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
+}
+
 static inline void tcg_gen_brcond_i64(int cond, TCGv arg1, TCGv arg2, 
                                       int label_index)
 {
@@ -1073,6 +1116,15 @@
         tcg_temp_free(t0);
     }
 }
+
+static inline void tcg_gen_setcondi_i64(int cond, TCGv ret,
+                                        TCGv arg1, int32_t arg2)
+{
+    TCGv t0 = tcg_const_i64(arg2);
+    tcg_gen_setcond_i64(cond, ret, arg1, t0);
+    tcg_temp_free(t0);
+}
+
 static inline void tcg_gen_brcondi_i64(int cond, TCGv arg1, int64_t arg2,
                                        int label_index)
 {
@@ -1088,7 +1140,6 @@
     tcg_temp_free(t0);
 }
 
-
 /***************************************/
 /* optional operations */
 
@@ -1864,6 +1915,8 @@
 #define tcg_gen_shri_tl tcg_gen_shri_i64
 #define tcg_gen_sar_tl tcg_gen_sar_i64
 #define tcg_gen_sari_tl tcg_gen_sari_i64
+#define tcg_gen_setcond_tl tcg_gen_setcond_i64
+#define tcg_gen_setcondi_tl tcg_gen_setcondi_i64
 #define tcg_gen_brcond_tl tcg_gen_brcond_i64
 #define tcg_gen_brcondi_tl tcg_gen_brcondi_i64
 #define tcg_gen_mul_tl tcg_gen_mul_i64
@@ -1927,6 +1980,8 @@
 #define tcg_gen_shri_tl tcg_gen_shri_i32
 #define tcg_gen_sar_tl tcg_gen_sar_i32
 #define tcg_gen_sari_tl tcg_gen_sari_i32
+#define tcg_gen_setcond_tl tcg_gen_setcond_i32
+#define tcg_gen_setcondi_tl tcg_gen_setcondi_i32
 #define tcg_gen_brcond_tl tcg_gen_brcond_i32
 #define tcg_gen_brcondi_tl tcg_gen_brcondi_i32
 #define tcg_gen_mul_tl tcg_gen_mul_i32
Index: tcg/tcg-opc.h
===================================================================
--- tcg/tcg-opc.h	(revision 5650)
+++ tcg/tcg-opc.h	(working copy)
@@ -76,10 +76,13 @@
 DEF2(shr_i32, 1, 2, 0, 0)
 DEF2(sar_i32, 1, 2, 0, 0)
 
+DEF2(setcond_i32, 1, 2, 1, 0)
+
 DEF2(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 #if TCG_TARGET_REG_BITS == 32
 DEF2(add2_i32, 2, 4, 0, 0)
 DEF2(sub2_i32, 2, 4, 0, 0)
+DEF2(setcond2_i32, 2, 4, 1, 0)
 DEF2(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 DEF2(mulu2_i32, 2, 2, 0, 0)
 #endif
@@ -129,6 +132,8 @@
 DEF2(shr_i64, 1, 2, 0, 0)
 DEF2(sar_i64, 1, 2, 0, 0)
 
+DEF2(setcond_i64, 1, 2, 1, 0)
+
 DEF2(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 #ifdef TCG_TARGET_HAS_ext8s_i64
 DEF2(ext8s_i64, 1, 1, 0, 0)
Index: tcg/i386/tcg-target.c
===================================================================
--- tcg/i386/tcg-target.c	(revision 5650)
+++ tcg/i386/tcg-target.c	(working copy)
@@ -162,6 +162,7 @@
 #define SHIFT_SHR 5
 #define SHIFT_SAR 7
 
+/* The following defines apply to setcc instruction too. */
 #define JCC_JMP (-1)
 #define JCC_JO  0x0
 #define JCC_JNO 0x1
@@ -327,6 +328,114 @@
     }
 }
 
+// TODO const_arg optimization?
+static void tcg_out_setcond(TCGContext *s, int cond,
+                            TCGArg ret, TCGArg arg1, TCGArg arg2)
+{
+    /* cmp */
+    tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
+    /* setcc */
+    tcg_out_modrm(s, (0x90 + tcg_cond_to_jcc[cond]) | P_EXT, 0, ret);
+    /* zero extend ret with movzbl since setcc only sets the lower 8 bits */
+    tcg_out_modrm(s, 0xb6 | P_EXT, ret, ret);
+}
+
+// TODO const_arg optimization?
+static void tcg_out_setcond2_brcond(TCGContext *s, int cond,
+                                    TCGArg arg1, TCGArg arg2,
+                                    int label_index)
+{
+    /* cmp */
+    tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
+    /* jcc */
+    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index);
+}
+
+// TODO const_arg optimization?
+// TODO use cmov (i686 only...)?
+/* args:
+ *   0 low(ret)
+ *   1 high(ret)
+ *   2 low(arg1)
+ *   3 hi(arg1)
+ *   4 low(arg2)
+ *   5 hi(arg2)
+ *   6 condition
+ * Note:  this is basically a copy of tcg_out_brcond2
+ */
+static void tcg_out_setcond2(TCGContext *s,
+                             const TCGArg *args)
+{
+    int label_zero, label_one;
+
+    label_zero = gen_new_label();
+    label_one = gen_new_label();
+    switch (args[6]) {
+    case TCG_COND_EQ:
+        tcg_out_setcond2_brcond(s, TCG_COND_NE, args[2], args[4], label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_EQ, args[3], args[5], label_one);
+        break;
+    case TCG_COND_NE:
+        tcg_out_setcond2_brcond(s, TCG_COND_NE, args[2], args[4], label_one);
+        tcg_out_setcond2_brcond(s, TCG_COND_NE, args[3], args[5], label_one);
+        break;
+    case TCG_COND_LT:
+        tcg_out_setcond2_brcond(s, TCG_COND_LT, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_LTU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_LE:
+        tcg_out_setcond2_brcond(s, TCG_COND_LT, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_LEU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_GT:
+        tcg_out_setcond2_brcond(s, TCG_COND_GT, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_GTU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_GE:
+        tcg_out_setcond2_brcond(s, TCG_COND_GT, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_GEU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_LTU:
+        tcg_out_setcond2_brcond(s, TCG_COND_LTU, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_LTU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_LEU:
+        tcg_out_setcond2_brcond(s, TCG_COND_LTU, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_LEU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_GTU:
+        tcg_out_setcond2_brcond(s, TCG_COND_GTU, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_GTU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_GEU:
+        tcg_out_setcond2_brcond(s, TCG_COND_GTU, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_GEU, args[2], args[4], label_one);
+        break;
+    default:
+        tcg_abort();
+    }
+    tcg_out_label(s, label_zero, (tcg_target_long)s->code_ptr);
+    /* clear lower part of result */
+    tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3), args[0], args[0]);
+    /* branch over next instruction which is 5 bytes long */
+    tcg_out8(s, 0xeb);
+    tcg_out8(s, 5);
+    /* set lower part of result to 1 */
+    tcg_out_label(s, label_one, (tcg_target_long)s->code_ptr);
+    tcg_out8(s, 0xb8 + args[0]);
+    tcg_out32(s, 1);
+    /* clear higher part of result */
+    tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3), args[1], args[1]);
+}
+
 static void tcg_out_brcond(TCGContext *s, int cond, 
                            TCGArg arg1, TCGArg arg2, int const_arg2,
                            int label_index)
@@ -1013,6 +1122,12 @@
         else
             tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5], args[1]);
         break;
+    case INDEX_op_setcond_i32:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2]);
+        break;
+    case INDEX_op_setcond2_i32:
+        tcg_out_setcond2(s, args);
+        break;
     case INDEX_op_brcond_i32:
         tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]);
         break;
@@ -1088,6 +1203,11 @@
     { INDEX_op_shr_i32, { "r", "0", "ci" } },
     { INDEX_op_sar_i32, { "r", "0", "ci" } },
 
+    // TODO add 'i' to third "r" (second input argument)
+    { INDEX_op_setcond_i32, { "q", "r", "r" } },
+    // TODO add 'i'
+    { INDEX_op_setcond2_i32, { "r", "r", "r", "r", "r", "r" } },
+
     { INDEX_op_brcond_i32, { "r", "ri" } },
 
     { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
Index: tcg/README
===================================================================
--- tcg/README	(revision 5650)
+++ tcg/README	(working copy)
@@ -277,6 +277,10 @@
 
 64 bit byte swap
 
+* setcond_i32/i64 cond, t0, t1, t2
+
+Set t0 to 1 if t1 cond t2 is true, else t0 is set to 0. (cf brcond for cond.)
+
 * discard_i32/i64 t0
 
 Indicate that the value of t0 won't be used later. It is useful to
Index: tcg/tcg.c
===================================================================
--- tcg/tcg.c	(revision 5650)
+++ tcg/tcg.c	(working copy)
@@ -877,6 +877,12 @@
 #elif TCG_TARGET_REG_BITS == 64
                 || c == INDEX_op_brcond_i64
 #endif
+                || c == INDEX_op_setcond_i32
+#if TCG_TARGET_REG_BITS == 32
+                || c == INDEX_op_setcond2_i32
+#elif TCG_TARGET_REG_BITS == 64
+                || c == INDEX_op_setcond_i64
+#endif
                 ) {
                 if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]])
                     fprintf(outfile, ",%s", cond_name[args[k++]]);
Index: tcg/x86_64/tcg-target.c
===================================================================
--- tcg/x86_64/tcg-target.c	(revision 5650)
+++ tcg/x86_64/tcg-target.c	(working copy)
@@ -198,6 +198,7 @@
 #define SHIFT_SHR 5
 #define SHIFT_SAR 7
 
+/* The following defines apply to setcc instruction too. */
 #define JCC_JMP (-1)
 #define JCC_JO  0x0
 #define JCC_JNO 0x1
@@ -464,6 +465,26 @@
     }
 }
 
+// TODO const_arg optimization
+static void tcg_out_setcond(TCGContext *s, int cond,
+                            TCGArg ret, TCGArg arg1, TCGArg arg2, int rexw)
+{
+    /* cmp */
+    tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3) | rexw, arg2, arg1);
+    /* setcc */
+    // TODO this should use tcg_out_modrm
+    //      however currently tcg_out_modrm outputs an extra byte for [abcd]l
+    //tcg_out_modrm(s, (0x90 + tcg_cond_to_jcc[cond]) | P_EXT | P_REXB, ret, 0);
+    if (ret > 3)
+        tcg_out8(s, 0x40 + (ret >> 3));
+    tcg_out8(s, 0x0f);
+    tcg_out8(s, 0x90 + tcg_cond_to_jcc[cond]);
+    tcg_out8(s, 0xc0 + (ret & 7));
+    /* zero extend ret with movzbl since setcc only sets the lower 8 bits */
+    // TODO that doesn't look pretty
+    tcg_out_modrm(s, 0xb6 | P_EXT | (rexw ? rexw : P_REXB), ret, ret);
+}
+
 static void tcg_out_brcond(TCGContext *s, int cond, 
                            TCGArg arg1, TCGArg arg2, int const_arg2,
                            int label_index, int rexw)
@@ -1065,6 +1086,14 @@
         c = SHIFT_SAR;
         goto gen_shift64;
         
+    case INDEX_op_setcond_i32:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2], 0);
+        break;
+
+    case INDEX_op_setcond_i64:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2], P_REXW);
+        break;
+
     case INDEX_op_brcond_i32:
         tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], 
                        args[3], 0);
@@ -1225,6 +1254,9 @@
     { INDEX_op_shr_i32, { "r", "0", "ci" } },
     { INDEX_op_sar_i32, { "r", "0", "ci" } },
 
+    // TODO add 'i' to third "r" (second input argument)
+    { INDEX_op_setcond_i32, { "r", "r", "r" } },
+
     { INDEX_op_brcond_i32, { "r", "ri" } },
 
     { INDEX_op_mov_i64, { "r", "r" } },
@@ -1254,6 +1286,9 @@
     { INDEX_op_shr_i64, { "r", "0", "ci" } },
     { INDEX_op_sar_i64, { "r", "0", "ci" } },
 
+    // TODO add 'i' to third "r" (second input argument)
+    { INDEX_op_setcond_i64, { "r", "r", "r" } },
+
     { INDEX_op_brcond_i64, { "r", "re" } },
 
     { INDEX_op_bswap_i32, { "r", "0" } },
Index: target-arm/translate.c
===================================================================
--- target-arm/translate.c	(revision 5650)
+++ target-arm/translate.c	(working copy)
@@ -201,7 +201,6 @@
 
 #define gen_op_addl_T0_T1_cc() gen_helper_add_cc(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_adcl_T0_T1_cc() gen_helper_adc_cc(cpu_T[0], cpu_T[0], cpu_T[1])
-#define gen_op_subl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_sbcl_T0_T1_cc() gen_helper_sbc_cc(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_rsbl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[1], cpu_T[0])
 #define gen_op_rscl_T0_T1_cc() gen_helper_sbc_cc(cpu_T[0], cpu_T[1], cpu_T[0])
@@ -243,6 +242,36 @@
     dead_tmp(tmp);
 }
 
+//#define gen_op_subl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[0], cpu_T[1])
+static inline void gen_op_subl_T0_T1_cc(void)
+{
+    TCGv tmp32_res;
+    TCGv tmp1;
+    TCGv tmp2;
+
+    tmp32_res = new_tmp();
+    tcg_gen_sub_i32(tmp32_res, cpu_T[0], cpu_T[1]);
+    tcg_gen_st_i32(tmp32_res, cpu_env, offsetof(CPUState, NF));
+    tcg_gen_st_i32(tmp32_res, cpu_env, offsetof(CPUState, ZF));
+
+    tmp1 = new_tmp();
+
+    tcg_gen_setcond_i32(TCG_COND_GEU, tmp1, cpu_T[0], cpu_T[1]);
+    tcg_gen_st_i32(tmp1, cpu_env, offsetof(CPUState, CF));
+
+    tmp2 = new_tmp();
+    tcg_gen_xor_i32(tmp1, cpu_T[0], cpu_T[1]);
+    tcg_gen_xor_i32(tmp2, cpu_T[0], tmp32_res);
+    tcg_gen_and_i32(tmp1, tmp1, tmp2);
+    dead_tmp(tmp2);
+    tcg_gen_st_i32(tmp1, cpu_env, offsetof(CPUState, VF));
+    dead_tmp(tmp1);
+
+    tcg_gen_mov_i32(cpu_T[0], tmp32_res);
+
+    dead_tmp(tmp32_res);
+}
+
 static void gen_smul_dual(TCGv a, TCGv b)
 {
     TCGv tmp1 = new_tmp();

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [Qemu-devel] Re: [RFC] TCG new op: setcond
  2008-11-08 19:32 ` [Qemu-devel] " Laurent Desnogues
@ 2008-11-08 19:55   ` Laurent Desnogues
  0 siblings, 0 replies; 3+ messages in thread
From: Laurent Desnogues @ 2008-11-08 19:55 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 1263 bytes --]

And again another copy/paste error spotted by Stuart.
Sorry for all that.


Laurent

Signed-off-by: Laurent Desnogues <laurent.desnogues@gmail.com>

On Sat, Nov 8, 2008 at 8:32 PM, Laurent Desnogues
<laurent.desnogues@gmail.com> wrote:
> Same patch with a fix shown by DEBUG_TCGV. Thanks to Aurelien.
>
>
> Laurent
>
> Signed-off-by: Laurent Desnogues <laurent.desnogues@gmail.com>
>
> On Sat, Nov 8, 2008 at 8:13 PM, Laurent Desnogues
> <laurent.desnogues@gmail.com> wrote:
>>
>> this patch implements a new TCG op, setcond, that sets a temp
>> to 1 if the condition is true, else to 0.  The benefit is the potential
>> removal of brcond instructions, and helpers size reduction which
>> can lead to using TCG instead of helpers.
>>
>> setcond(i)_i{32,64} have been implemented only for x86_64 and
>> i386 TCG back-ends.
>>
>> One ARM helper was converted to TCG using setcond.  Alpha
>> and MIPS are also patched to use setcond.
>>
>> On my TODO list:
>>
>>  - implement it in all backends (arm, ppc, ppc64, sparc)
>>  - use it at least once in every frontend
>>
>> Comments starting with // in the patch are questions and/or
>> TODO.
>>
>> Please feel free to comment.
>>
>>
>> Laurent
>>
>> Signed-off-by: Laurent Desnogues <laurent.desnogues@gmail.com>
>>
>

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: setcond-3-fixed2.patch --]
[-- Type: text/x-patch; name=setcond-3-fixed2.patch, Size: 19416 bytes --]

Index: target-alpha/translate.c
===================================================================
--- target-alpha/translate.c	(revision 5650)
+++ target-alpha/translate.c	(working copy)
@@ -504,30 +504,20 @@
                                   int ra, int rb, int rc,
                                   int islit, uint8_t lit)
 {
-    int l1, l2;
     TCGv tmp;
 
     if (unlikely(rc == 31))
-    return;
+        return;
 
-    l1 = gen_new_label();
-    l2 = gen_new_label();
-
     if (ra != 31) {
         tmp = tcg_temp_new(TCG_TYPE_I64);
         tcg_gen_mov_i64(tmp, cpu_ir[ra]);
     } else
         tmp = tcg_const_i64(0);
     if (islit)
-        tcg_gen_brcondi_i64(cond, tmp, lit, l1);
+        tcg_gen_setcondi_i64(cond, cpu_ir[rc], tmp, lit);
     else
-        tcg_gen_brcond_i64(cond, tmp, cpu_ir[rb], l1);
-
-    tcg_gen_movi_i64(cpu_ir[rc], 0);
-    tcg_gen_br(l2);
-    gen_set_label(l1);
-    tcg_gen_movi_i64(cpu_ir[rc], 1);
-    gen_set_label(l2);
+        tcg_gen_setcond_i64(cond, cpu_ir[rc], tmp, cpu_ir[rb]);
 }
 
 static always_inline int translate_one (DisasContext *ctx, uint32_t insn)
Index: target-mips/translate.c
===================================================================
--- target-mips/translate.c	(revision 5650)
+++ target-mips/translate.c	(working copy)
@@ -771,15 +771,7 @@
 #define OP_COND(name, cond)                                   \
 static inline void glue(gen_op_, name) (TCGv t0, TCGv t1)     \
 {                                                             \
-    int l1 = gen_new_label();                                 \
-    int l2 = gen_new_label();                                 \
-                                                              \
-    tcg_gen_brcond_tl(cond, t0, t1, l1);                      \
-    tcg_gen_movi_tl(t0, 0);                                   \
-    tcg_gen_br(l2);                                           \
-    gen_set_label(l1);                                        \
-    tcg_gen_movi_tl(t0, 1);                                   \
-    gen_set_label(l2);                                        \
+    tcg_gen_setcond_tl(cond, t0, t0, t1);                     \
 }
 OP_COND(eq, TCG_COND_EQ);
 OP_COND(ne, TCG_COND_NE);
@@ -792,15 +784,7 @@
 #define OP_CONDI(name, cond)                                  \
 static inline void glue(gen_op_, name) (TCGv t, target_ulong val) \
 {                                                             \
-    int l1 = gen_new_label();                                 \
-    int l2 = gen_new_label();                                 \
-                                                              \
-    tcg_gen_brcondi_tl(cond, t, val, l1);                     \
-    tcg_gen_movi_tl(t, 0);                                    \
-    tcg_gen_br(l2);                                           \
-    gen_set_label(l1);                                        \
-    tcg_gen_movi_tl(t, 1);                                    \
-    gen_set_label(l2);                                        \
+    tcg_gen_setcondi_tl(cond, t, t, val);                     \
 }
 OP_CONDI(lti, TCG_COND_LT);
 OP_CONDI(ltiu, TCG_COND_LTU);
@@ -809,15 +793,7 @@
 #define OP_CONDZ(name, cond)                                  \
 static inline void glue(gen_op_, name) (TCGv t)               \
 {                                                             \
-    int l1 = gen_new_label();                                 \
-    int l2 = gen_new_label();                                 \
-                                                              \
-    tcg_gen_brcondi_tl(cond, t, 0, l1);                       \
-    tcg_gen_movi_tl(t, 0);                                    \
-    tcg_gen_br(l2);                                           \
-    gen_set_label(l1);                                        \
-    tcg_gen_movi_tl(t, 1);                                    \
-    gen_set_label(l2);                                        \
+    tcg_gen_setcondi_tl(cond, t, t, 0);                       \
 }
 OP_CONDZ(gez, TCG_COND_GE);
 OP_CONDZ(gtz, TCG_COND_GT);
Index: tcg/tcg-op.h
===================================================================
--- tcg/tcg-op.h	(revision 5650)
+++ tcg/tcg-op.h	(working copy)
@@ -159,6 +159,21 @@
     *gen_opparam_ptr++ = arg6;
 }
 
+static inline void tcg_gen_op7i(int opc, TCGv arg1, TCGv arg2,
+                                TCGv arg3, TCGv arg4,
+                                TCGv arg5, TCGv arg6,
+                                TCGArg arg7)
+{
+    *gen_opc_ptr++ = opc;
+    *gen_opparam_ptr++ = GET_TCGV(arg1);
+    *gen_opparam_ptr++ = GET_TCGV(arg2);
+    *gen_opparam_ptr++ = GET_TCGV(arg3);
+    *gen_opparam_ptr++ = GET_TCGV(arg4);
+    *gen_opparam_ptr++ = GET_TCGV(arg5);
+    *gen_opparam_ptr++ = GET_TCGV(arg6);
+    *gen_opparam_ptr++ = arg7;
+}
+
 static inline void gen_set_label(int n)
 {
     tcg_gen_op1i(INDEX_op_set_label, n);
@@ -499,6 +514,20 @@
     }
 }
 
+static inline void tcg_gen_setcond_i32(int cond, TCGv ret,
+                                       TCGv arg1, TCGv arg2)
+{
+    tcg_gen_op4i(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
+}
+
+static inline void tcg_gen_setcondi_i32(int cond, TCGv ret,
+                                        TCGv arg1, int32_t arg2)
+{
+    TCGv t0 = tcg_const_i32(arg2);
+    tcg_gen_setcond_i32(cond, ret, arg1, t0);
+    tcg_temp_free(t0);
+}
+
 static inline void tcg_gen_brcond_i32(int cond, TCGv arg1, TCGv arg2, 
                                       int label_index)
 {
@@ -754,6 +783,14 @@
     tcg_gen_shifti_i64(ret, arg1, arg2, 1, 1);
 }
 
+static inline void tcg_gen_setcond_i64(int cond, TCGv ret,
+                                       TCGv arg1, TCGv arg2)
+{
+    tcg_gen_op7i(INDEX_op_setcond2_i32,
+                 ret, TCGV_HIGH(ret), arg1, TCGV_HIGH(arg1),
+                 arg2, TCGV_HIGH(arg2), cond);
+}
+
 static inline void tcg_gen_brcond_i64(int cond, TCGv arg1, TCGv arg2, 
                                       int label_index)
 {
@@ -972,6 +1009,12 @@
     }
 }
 
+static inline void tcg_gen_setcond_i64(int cond, TCGv ret,
+                                       TCGv arg1, TCGv arg2)
+{
+    tcg_gen_op4i(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
+}
+
 static inline void tcg_gen_brcond_i64(int cond, TCGv arg1, TCGv arg2, 
                                       int label_index)
 {
@@ -1073,6 +1116,15 @@
         tcg_temp_free(t0);
     }
 }
+
+static inline void tcg_gen_setcondi_i64(int cond, TCGv ret,
+                                        TCGv arg1, int64_t arg2)
+{
+    TCGv t0 = tcg_const_i64(arg2);
+    tcg_gen_setcond_i64(cond, ret, arg1, t0);
+    tcg_temp_free(t0);
+}
+
 static inline void tcg_gen_brcondi_i64(int cond, TCGv arg1, int64_t arg2,
                                        int label_index)
 {
@@ -1088,7 +1140,6 @@
     tcg_temp_free(t0);
 }
 
-
 /***************************************/
 /* optional operations */
 
@@ -1864,6 +1915,8 @@
 #define tcg_gen_shri_tl tcg_gen_shri_i64
 #define tcg_gen_sar_tl tcg_gen_sar_i64
 #define tcg_gen_sari_tl tcg_gen_sari_i64
+#define tcg_gen_setcond_tl tcg_gen_setcond_i64
+#define tcg_gen_setcondi_tl tcg_gen_setcondi_i64
 #define tcg_gen_brcond_tl tcg_gen_brcond_i64
 #define tcg_gen_brcondi_tl tcg_gen_brcondi_i64
 #define tcg_gen_mul_tl tcg_gen_mul_i64
@@ -1927,6 +1980,8 @@
 #define tcg_gen_shri_tl tcg_gen_shri_i32
 #define tcg_gen_sar_tl tcg_gen_sar_i32
 #define tcg_gen_sari_tl tcg_gen_sari_i32
+#define tcg_gen_setcond_tl tcg_gen_setcond_i32
+#define tcg_gen_setcondi_tl tcg_gen_setcondi_i32
 #define tcg_gen_brcond_tl tcg_gen_brcond_i32
 #define tcg_gen_brcondi_tl tcg_gen_brcondi_i32
 #define tcg_gen_mul_tl tcg_gen_mul_i32
Index: tcg/tcg-opc.h
===================================================================
--- tcg/tcg-opc.h	(revision 5650)
+++ tcg/tcg-opc.h	(working copy)
@@ -76,10 +76,13 @@
 DEF2(shr_i32, 1, 2, 0, 0)
 DEF2(sar_i32, 1, 2, 0, 0)
 
+DEF2(setcond_i32, 1, 2, 1, 0)
+
 DEF2(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 #if TCG_TARGET_REG_BITS == 32
 DEF2(add2_i32, 2, 4, 0, 0)
 DEF2(sub2_i32, 2, 4, 0, 0)
+DEF2(setcond2_i32, 2, 4, 1, 0)
 DEF2(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 DEF2(mulu2_i32, 2, 2, 0, 0)
 #endif
@@ -129,6 +132,8 @@
 DEF2(shr_i64, 1, 2, 0, 0)
 DEF2(sar_i64, 1, 2, 0, 0)
 
+DEF2(setcond_i64, 1, 2, 1, 0)
+
 DEF2(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 #ifdef TCG_TARGET_HAS_ext8s_i64
 DEF2(ext8s_i64, 1, 1, 0, 0)
Index: tcg/i386/tcg-target.c
===================================================================
--- tcg/i386/tcg-target.c	(revision 5650)
+++ tcg/i386/tcg-target.c	(working copy)
@@ -162,6 +162,7 @@
 #define SHIFT_SHR 5
 #define SHIFT_SAR 7
 
+/* The following defines apply to setcc instruction too. */
 #define JCC_JMP (-1)
 #define JCC_JO  0x0
 #define JCC_JNO 0x1
@@ -327,6 +328,114 @@
     }
 }
 
+// TODO const_arg optimization?
+static void tcg_out_setcond(TCGContext *s, int cond,
+                            TCGArg ret, TCGArg arg1, TCGArg arg2)
+{
+    /* cmp */
+    tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
+    /* setcc */
+    tcg_out_modrm(s, (0x90 + tcg_cond_to_jcc[cond]) | P_EXT, 0, ret);
+    /* zero extend ret with movzbl since setcc only sets the lower 8 bits */
+    tcg_out_modrm(s, 0xb6 | P_EXT, ret, ret);
+}
+
+// TODO const_arg optimization?
+static void tcg_out_setcond2_brcond(TCGContext *s, int cond,
+                                    TCGArg arg1, TCGArg arg2,
+                                    int label_index)
+{
+    /* cmp */
+    tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
+    /* jcc */
+    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index);
+}
+
+// TODO const_arg optimization?
+// TODO use cmov (i686 only...)?
+/* args:
+ *   0 low(ret)
+ *   1 high(ret)
+ *   2 low(arg1)
+ *   3 hi(arg1)
+ *   4 low(arg2)
+ *   5 hi(arg2)
+ *   6 condition
+ * Note:  this is basically a copy of tcg_out_brcond2
+ */
+static void tcg_out_setcond2(TCGContext *s,
+                             const TCGArg *args)
+{
+    int label_zero, label_one;
+
+    label_zero = gen_new_label();
+    label_one = gen_new_label();
+    switch (args[6]) {
+    case TCG_COND_EQ:
+        tcg_out_setcond2_brcond(s, TCG_COND_NE, args[2], args[4], label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_EQ, args[3], args[5], label_one);
+        break;
+    case TCG_COND_NE:
+        tcg_out_setcond2_brcond(s, TCG_COND_NE, args[2], args[4], label_one);
+        tcg_out_setcond2_brcond(s, TCG_COND_NE, args[3], args[5], label_one);
+        break;
+    case TCG_COND_LT:
+        tcg_out_setcond2_brcond(s, TCG_COND_LT, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_LTU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_LE:
+        tcg_out_setcond2_brcond(s, TCG_COND_LT, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_LEU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_GT:
+        tcg_out_setcond2_brcond(s, TCG_COND_GT, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_GTU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_GE:
+        tcg_out_setcond2_brcond(s, TCG_COND_GT, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_GEU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_LTU:
+        tcg_out_setcond2_brcond(s, TCG_COND_LTU, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_LTU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_LEU:
+        tcg_out_setcond2_brcond(s, TCG_COND_LTU, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_LEU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_GTU:
+        tcg_out_setcond2_brcond(s, TCG_COND_GTU, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_GTU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_GEU:
+        tcg_out_setcond2_brcond(s, TCG_COND_GTU, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_GEU, args[2], args[4], label_one);
+        break;
+    default:
+        tcg_abort();
+    }
+    tcg_out_label(s, label_zero, (tcg_target_long)s->code_ptr);
+    /* clear lower part of result */
+    tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3), args[0], args[0]);
+    /* branch over next instruction which is 5 bytes long */
+    tcg_out8(s, 0xeb);
+    tcg_out8(s, 5);
+    /* set lower part of result to 1 */
+    tcg_out_label(s, label_one, (tcg_target_long)s->code_ptr);
+    tcg_out8(s, 0xb8 + args[0]);
+    tcg_out32(s, 1);
+    /* clear higher part of result */
+    tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3), args[1], args[1]);
+}
+
 static void tcg_out_brcond(TCGContext *s, int cond, 
                            TCGArg arg1, TCGArg arg2, int const_arg2,
                            int label_index)
@@ -1013,6 +1122,12 @@
         else
             tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5], args[1]);
         break;
+    case INDEX_op_setcond_i32:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2]);
+        break;
+    case INDEX_op_setcond2_i32:
+        tcg_out_setcond2(s, args);
+        break;
     case INDEX_op_brcond_i32:
         tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]);
         break;
@@ -1088,6 +1203,11 @@
     { INDEX_op_shr_i32, { "r", "0", "ci" } },
     { INDEX_op_sar_i32, { "r", "0", "ci" } },
 
+    // TODO add 'i' to third "r" (second input argument)
+    { INDEX_op_setcond_i32, { "q", "r", "r" } },
+    // TODO add 'i'
+    { INDEX_op_setcond2_i32, { "r", "r", "r", "r", "r", "r" } },
+
     { INDEX_op_brcond_i32, { "r", "ri" } },
 
     { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
Index: tcg/README
===================================================================
--- tcg/README	(revision 5650)
+++ tcg/README	(working copy)
@@ -277,6 +277,10 @@
 
 64 bit byte swap
 
+* setcond_i32/i64 cond, t0, t1, t2
+
+Set t0 to 1 if t1 cond t2 is true, else t0 is set to 0. (cf brcond for cond.)
+
 * discard_i32/i64 t0
 
 Indicate that the value of t0 won't be used later. It is useful to
Index: tcg/tcg.c
===================================================================
--- tcg/tcg.c	(revision 5650)
+++ tcg/tcg.c	(working copy)
@@ -877,6 +877,12 @@
 #elif TCG_TARGET_REG_BITS == 64
                 || c == INDEX_op_brcond_i64
 #endif
+                || c == INDEX_op_setcond_i32
+#if TCG_TARGET_REG_BITS == 32
+                || c == INDEX_op_setcond2_i32
+#elif TCG_TARGET_REG_BITS == 64
+                || c == INDEX_op_setcond_i64
+#endif
                 ) {
                 if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]])
                     fprintf(outfile, ",%s", cond_name[args[k++]]);
Index: tcg/x86_64/tcg-target.c
===================================================================
--- tcg/x86_64/tcg-target.c	(revision 5650)
+++ tcg/x86_64/tcg-target.c	(working copy)
@@ -198,6 +198,7 @@
 #define SHIFT_SHR 5
 #define SHIFT_SAR 7
 
+/* The following defines apply to setcc instruction too. */
 #define JCC_JMP (-1)
 #define JCC_JO  0x0
 #define JCC_JNO 0x1
@@ -464,6 +465,26 @@
     }
 }
 
+// TODO const_arg optimization
+static void tcg_out_setcond(TCGContext *s, int cond,
+                            TCGArg ret, TCGArg arg1, TCGArg arg2, int rexw)
+{
+    /* cmp */
+    tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3) | rexw, arg2, arg1);
+    /* setcc */
+    // TODO this should use tcg_out_modrm
+    //      however currently tcg_out_modrm outputs an extra byte for [abcd]l
+    //tcg_out_modrm(s, (0x90 + tcg_cond_to_jcc[cond]) | P_EXT | P_REXB, ret, 0);
+    if (ret > 3)
+        tcg_out8(s, 0x40 + (ret >> 3));
+    tcg_out8(s, 0x0f);
+    tcg_out8(s, 0x90 + tcg_cond_to_jcc[cond]);
+    tcg_out8(s, 0xc0 + (ret & 7));
+    /* zero extend ret with movzbl since setcc only sets the lower 8 bits */
+    // TODO that doesn't look pretty
+    tcg_out_modrm(s, 0xb6 | P_EXT | (rexw ? rexw : P_REXB), ret, ret);
+}
+
 static void tcg_out_brcond(TCGContext *s, int cond, 
                            TCGArg arg1, TCGArg arg2, int const_arg2,
                            int label_index, int rexw)
@@ -1065,6 +1086,14 @@
         c = SHIFT_SAR;
         goto gen_shift64;
         
+    case INDEX_op_setcond_i32:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2], 0);
+        break;
+
+    case INDEX_op_setcond_i64:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2], P_REXW);
+        break;
+
     case INDEX_op_brcond_i32:
         tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], 
                        args[3], 0);
@@ -1225,6 +1254,9 @@
     { INDEX_op_shr_i32, { "r", "0", "ci" } },
     { INDEX_op_sar_i32, { "r", "0", "ci" } },
 
+    // TODO add 'i' to third "r" (second input argument)
+    { INDEX_op_setcond_i32, { "r", "r", "r" } },
+
     { INDEX_op_brcond_i32, { "r", "ri" } },
 
     { INDEX_op_mov_i64, { "r", "r" } },
@@ -1254,6 +1286,9 @@
     { INDEX_op_shr_i64, { "r", "0", "ci" } },
     { INDEX_op_sar_i64, { "r", "0", "ci" } },
 
+    // TODO add 'i' to third "r" (second input argument)
+    { INDEX_op_setcond_i64, { "r", "r", "r" } },
+
     { INDEX_op_brcond_i64, { "r", "re" } },
 
     { INDEX_op_bswap_i32, { "r", "0" } },
Index: target-arm/translate.c
===================================================================
--- target-arm/translate.c	(revision 5650)
+++ target-arm/translate.c	(working copy)
@@ -201,7 +201,6 @@
 
 #define gen_op_addl_T0_T1_cc() gen_helper_add_cc(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_adcl_T0_T1_cc() gen_helper_adc_cc(cpu_T[0], cpu_T[0], cpu_T[1])
-#define gen_op_subl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_sbcl_T0_T1_cc() gen_helper_sbc_cc(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_rsbl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[1], cpu_T[0])
 #define gen_op_rscl_T0_T1_cc() gen_helper_sbc_cc(cpu_T[0], cpu_T[1], cpu_T[0])
@@ -243,6 +242,36 @@
     dead_tmp(tmp);
 }
 
+//#define gen_op_subl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[0], cpu_T[1])
+static inline void gen_op_subl_T0_T1_cc(void)
+{
+    TCGv tmp32_res;
+    TCGv tmp1;
+    TCGv tmp2;
+
+    tmp32_res = new_tmp();
+    tcg_gen_sub_i32(tmp32_res, cpu_T[0], cpu_T[1]);
+    tcg_gen_st_i32(tmp32_res, cpu_env, offsetof(CPUState, NF));
+    tcg_gen_st_i32(tmp32_res, cpu_env, offsetof(CPUState, ZF));
+
+    tmp1 = new_tmp();
+
+    tcg_gen_setcond_i32(TCG_COND_GEU, tmp1, cpu_T[0], cpu_T[1]);
+    tcg_gen_st_i32(tmp1, cpu_env, offsetof(CPUState, CF));
+
+    tmp2 = new_tmp();
+    tcg_gen_xor_i32(tmp1, cpu_T[0], cpu_T[1]);
+    tcg_gen_xor_i32(tmp2, cpu_T[0], tmp32_res);
+    tcg_gen_and_i32(tmp1, tmp1, tmp2);
+    dead_tmp(tmp2);
+    tcg_gen_st_i32(tmp1, cpu_env, offsetof(CPUState, VF));
+    dead_tmp(tmp1);
+
+    tcg_gen_mov_i32(cpu_T[0], tmp32_res);
+
+    dead_tmp(tmp32_res);
+}
+
 static void gen_smul_dual(TCGv a, TCGv b)
 {
     TCGv tmp1 = new_tmp();

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2008-11-08 19:55 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-11-08 19:13 [Qemu-devel] [RFC] TCG new op: setcond Laurent Desnogues
2008-11-08 19:32 ` [Qemu-devel] " Laurent Desnogues
2008-11-08 19:55   ` Laurent Desnogues

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).