qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [RFC] TCG new op: setcond
@ 2008-11-08 19:13 Laurent Desnogues
  2008-11-08 19:32 ` [Qemu-devel] " Laurent Desnogues
  0 siblings, 1 reply; 9+ messages in thread
From: Laurent Desnogues @ 2008-11-08 19:13 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 722 bytes --]

Hello,

this patch implements a new TCG op, setcond, that sets a temp
to 1 if the condition is true, else to 0.  The benefit is the potential
removal of brcond instructions, and helpers size reduction which
can lead to using TCG instead of helpers.

setcond(i)_i{32,64} have been implemented only for x86_64 and
i386 TCG back-ends.

One ARM helper was converted to TCG using setcond.  Alpha
and MIPS are also patched to use setcond.

On my TODO list:

 - implement it in all backends (arm, ppc, ppc64, sparc)
 - use it at least once in every frontend

Comments starting with // in the patch are questions and/or
TODO.

Please feel free to comment.


Laurent

Signed-off-by: Laurent Desnogues <laurent.desnogues@gmail.com>

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: setcond-3.patch --]
[-- Type: text/x-patch; name=setcond-3.patch, Size: 19422 bytes --]

Index: target-alpha/translate.c
===================================================================
--- target-alpha/translate.c	(revision 5650)
+++ target-alpha/translate.c	(working copy)
@@ -504,30 +504,20 @@
                                   int ra, int rb, int rc,
                                   int islit, uint8_t lit)
 {
-    int l1, l2;
     TCGv tmp;
 
     if (unlikely(rc == 31))
-    return;
+        return;
 
-    l1 = gen_new_label();
-    l2 = gen_new_label();
-
     if (ra != 31) {
         tmp = tcg_temp_new(TCG_TYPE_I64);
         tcg_gen_mov_i64(tmp, cpu_ir[ra]);
     } else
         tmp = tcg_const_i64(0);
     if (islit)
-        tcg_gen_brcondi_i64(cond, tmp, lit, l1);
+        tcg_gen_setcondi_i64(cond, cpu_ir[rc], tmp, lit);
     else
-        tcg_gen_brcond_i64(cond, tmp, cpu_ir[rb], l1);
-
-    tcg_gen_movi_i64(cpu_ir[rc], 0);
-    tcg_gen_br(l2);
-    gen_set_label(l1);
-    tcg_gen_movi_i64(cpu_ir[rc], 1);
-    gen_set_label(l2);
+        tcg_gen_setcond_i64(cond, cpu_ir[rc], tmp, cpu_ir[rb]);
 }
 
 static always_inline int translate_one (DisasContext *ctx, uint32_t insn)
Index: target-mips/translate.c
===================================================================
--- target-mips/translate.c	(revision 5650)
+++ target-mips/translate.c	(working copy)
@@ -771,15 +771,7 @@
 #define OP_COND(name, cond)                                   \
 static inline void glue(gen_op_, name) (TCGv t0, TCGv t1)     \
 {                                                             \
-    int l1 = gen_new_label();                                 \
-    int l2 = gen_new_label();                                 \
-                                                              \
-    tcg_gen_brcond_tl(cond, t0, t1, l1);                      \
-    tcg_gen_movi_tl(t0, 0);                                   \
-    tcg_gen_br(l2);                                           \
-    gen_set_label(l1);                                        \
-    tcg_gen_movi_tl(t0, 1);                                   \
-    gen_set_label(l2);                                        \
+    tcg_gen_setcond_tl(cond, t0, t0, t1);                     \
 }
 OP_COND(eq, TCG_COND_EQ);
 OP_COND(ne, TCG_COND_NE);
@@ -792,15 +784,7 @@
 #define OP_CONDI(name, cond)                                  \
 static inline void glue(gen_op_, name) (TCGv t, target_ulong val) \
 {                                                             \
-    int l1 = gen_new_label();                                 \
-    int l2 = gen_new_label();                                 \
-                                                              \
-    tcg_gen_brcondi_tl(cond, t, val, l1);                     \
-    tcg_gen_movi_tl(t, 0);                                    \
-    tcg_gen_br(l2);                                           \
-    gen_set_label(l1);                                        \
-    tcg_gen_movi_tl(t, 1);                                    \
-    gen_set_label(l2);                                        \
+    tcg_gen_setcondi_tl(cond, t, t, val);                     \
 }
 OP_CONDI(lti, TCG_COND_LT);
 OP_CONDI(ltiu, TCG_COND_LTU);
@@ -809,15 +793,7 @@
 #define OP_CONDZ(name, cond)                                  \
 static inline void glue(gen_op_, name) (TCGv t)               \
 {                                                             \
-    int l1 = gen_new_label();                                 \
-    int l2 = gen_new_label();                                 \
-                                                              \
-    tcg_gen_brcondi_tl(cond, t, 0, l1);                       \
-    tcg_gen_movi_tl(t, 0);                                    \
-    tcg_gen_br(l2);                                           \
-    gen_set_label(l1);                                        \
-    tcg_gen_movi_tl(t, 1);                                    \
-    gen_set_label(l2);                                        \
+    tcg_gen_setcondi_tl(cond, t, t, 0);                       \
 }
 OP_CONDZ(gez, TCG_COND_GE);
 OP_CONDZ(gtz, TCG_COND_GT);
Index: tcg/tcg-op.h
===================================================================
--- tcg/tcg-op.h	(revision 5650)
+++ tcg/tcg-op.h	(working copy)
@@ -159,6 +159,21 @@
     *gen_opparam_ptr++ = arg6;
 }
 
+static inline void tcg_gen_op7i(int opc, TCGv arg1, TCGv arg2,
+                                TCGv arg3, TCGv arg4,
+                                TCGv arg5, TCGv arg6,
+                                TCGArg arg7)
+{
+    *gen_opc_ptr++ = opc;
+    *gen_opparam_ptr++ = GET_TCGV(arg1);
+    *gen_opparam_ptr++ = GET_TCGV(arg2);
+    *gen_opparam_ptr++ = GET_TCGV(arg3);
+    *gen_opparam_ptr++ = GET_TCGV(arg4);
+    *gen_opparam_ptr++ = GET_TCGV(arg5);
+    *gen_opparam_ptr++ = GET_TCGV(arg6);
+    *gen_opparam_ptr++ = arg7;
+}
+
 static inline void gen_set_label(int n)
 {
     tcg_gen_op1i(INDEX_op_set_label, n);
@@ -499,6 +514,20 @@
     }
 }
 
+static inline void tcg_gen_setcond_i32(int cond, TCGv ret,
+                                       TCGv arg1, int32_t arg2)
+{
+    tcg_gen_op4i(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
+}
+
+static inline void tcg_gen_setcondi_i32(int cond, TCGv ret,
+                                        TCGv arg1, int32_t arg2)
+{
+    TCGv t0 = tcg_const_i32(arg2);
+    tcg_gen_setcond_i32(cond, ret, arg1, t0);
+    tcg_temp_free(t0);
+}
+
 static inline void tcg_gen_brcond_i32(int cond, TCGv arg1, TCGv arg2, 
                                       int label_index)
 {
@@ -754,6 +783,14 @@
     tcg_gen_shifti_i64(ret, arg1, arg2, 1, 1);
 }
 
+static inline void tcg_gen_setcond_i64(int cond, TCGv ret,
+                                       TCGv arg1, TCGv arg2)
+{
+    tcg_gen_op7i(INDEX_op_setcond2_i32,
+                 ret, TCGV_HIGH(ret), arg1, TCGV_HIGH(arg1),
+                 arg2, TCGV_HIGH(arg2), cond);
+}
+
 static inline void tcg_gen_brcond_i64(int cond, TCGv arg1, TCGv arg2, 
                                       int label_index)
 {
@@ -972,6 +1009,12 @@
     }
 }
 
+static inline void tcg_gen_setcond_i64(int cond, TCGv ret,
+                                       TCGv arg1, int32_t arg2)
+{
+    tcg_gen_op4i(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
+}
+
 static inline void tcg_gen_brcond_i64(int cond, TCGv arg1, TCGv arg2, 
                                       int label_index)
 {
@@ -1073,6 +1116,15 @@
         tcg_temp_free(t0);
     }
 }
+
+static inline void tcg_gen_setcondi_i64(int cond, TCGv ret,
+                                        TCGv arg1, int32_t arg2)
+{
+    TCGv t0 = tcg_const_i64(arg2);
+    tcg_gen_setcond_i64(cond, ret, arg1, t0);
+    tcg_temp_free(t0);
+}
+
 static inline void tcg_gen_brcondi_i64(int cond, TCGv arg1, int64_t arg2,
                                        int label_index)
 {
@@ -1088,7 +1140,6 @@
     tcg_temp_free(t0);
 }
 
-
 /***************************************/
 /* optional operations */
 
@@ -1864,6 +1915,8 @@
 #define tcg_gen_shri_tl tcg_gen_shri_i64
 #define tcg_gen_sar_tl tcg_gen_sar_i64
 #define tcg_gen_sari_tl tcg_gen_sari_i64
+#define tcg_gen_setcond_tl tcg_gen_setcond_i64
+#define tcg_gen_setcondi_tl tcg_gen_setcondi_i64
 #define tcg_gen_brcond_tl tcg_gen_brcond_i64
 #define tcg_gen_brcondi_tl tcg_gen_brcondi_i64
 #define tcg_gen_mul_tl tcg_gen_mul_i64
@@ -1927,6 +1980,8 @@
 #define tcg_gen_shri_tl tcg_gen_shri_i32
 #define tcg_gen_sar_tl tcg_gen_sar_i32
 #define tcg_gen_sari_tl tcg_gen_sari_i32
+#define tcg_gen_setcond_tl tcg_gen_setcond_i32
+#define tcg_gen_setcondi_tl tcg_gen_setcondi_i32
 #define tcg_gen_brcond_tl tcg_gen_brcond_i32
 #define tcg_gen_brcondi_tl tcg_gen_brcondi_i32
 #define tcg_gen_mul_tl tcg_gen_mul_i32
Index: tcg/tcg-opc.h
===================================================================
--- tcg/tcg-opc.h	(revision 5650)
+++ tcg/tcg-opc.h	(working copy)
@@ -76,10 +76,13 @@
 DEF2(shr_i32, 1, 2, 0, 0)
 DEF2(sar_i32, 1, 2, 0, 0)
 
+DEF2(setcond_i32, 1, 2, 1, 0)
+
 DEF2(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 #if TCG_TARGET_REG_BITS == 32
 DEF2(add2_i32, 2, 4, 0, 0)
 DEF2(sub2_i32, 2, 4, 0, 0)
+DEF2(setcond2_i32, 2, 4, 1, 0)
 DEF2(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 DEF2(mulu2_i32, 2, 2, 0, 0)
 #endif
@@ -129,6 +132,8 @@
 DEF2(shr_i64, 1, 2, 0, 0)
 DEF2(sar_i64, 1, 2, 0, 0)
 
+DEF2(setcond_i64, 1, 2, 1, 0)
+
 DEF2(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 #ifdef TCG_TARGET_HAS_ext8s_i64
 DEF2(ext8s_i64, 1, 1, 0, 0)
Index: tcg/i386/tcg-target.c
===================================================================
--- tcg/i386/tcg-target.c	(revision 5650)
+++ tcg/i386/tcg-target.c	(working copy)
@@ -162,6 +162,7 @@
 #define SHIFT_SHR 5
 #define SHIFT_SAR 7
 
+/* The following defines apply to setcc instruction too. */
 #define JCC_JMP (-1)
 #define JCC_JO  0x0
 #define JCC_JNO 0x1
@@ -327,6 +328,114 @@
     }
 }
 
+// TODO const_arg optimization?
+static void tcg_out_setcond(TCGContext *s, int cond,
+                            TCGArg ret, TCGArg arg1, TCGArg arg2)
+{
+    /* cmp */
+    tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
+    /* setcc */
+    tcg_out_modrm(s, (0x90 + tcg_cond_to_jcc[cond]) | P_EXT, 0, ret);
+    /* zero extend ret with movzbl since setcc only sets the lower 8 bits */
+    tcg_out_modrm(s, 0xb6 | P_EXT, ret, ret);
+}
+
+// TODO const_arg optimization?
+static void tcg_out_setcond2_brcond(TCGContext *s, int cond,
+                                    TCGArg arg1, TCGArg arg2,
+                                    int label_index)
+{
+    /* cmp */
+    tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
+    /* jcc */
+    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index);
+}
+
+// TODO const_arg optimization?
+// TODO use cmov (i686 only...)?
+/* args:
+ *   0 low(ret)
+ *   1 high(ret)
+ *   2 low(arg1)
+ *   3 hi(arg1)
+ *   4 low(arg2)
+ *   5 hi(arg2)
+ *   6 condition
+ * Note:  this is basically a copy of tcg_out_brcond2
+ */
+static void tcg_out_setcond2(TCGContext *s,
+                             const TCGArg *args)
+{
+    int label_zero, label_one;
+
+    label_zero = gen_new_label();
+    label_one = gen_new_label();
+    switch (args[6]) {
+    case TCG_COND_EQ:
+        tcg_out_setcond2_brcond(s, TCG_COND_NE, args[2], args[4], label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_EQ, args[3], args[5], label_one);
+        break;
+    case TCG_COND_NE:
+        tcg_out_setcond2_brcond(s, TCG_COND_NE, args[2], args[4], label_one);
+        tcg_out_setcond2_brcond(s, TCG_COND_NE, args[3], args[5], label_one);
+        break;
+    case TCG_COND_LT:
+        tcg_out_setcond2_brcond(s, TCG_COND_LT, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_LTU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_LE:
+        tcg_out_setcond2_brcond(s, TCG_COND_LT, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_LEU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_GT:
+        tcg_out_setcond2_brcond(s, TCG_COND_GT, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_GTU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_GE:
+        tcg_out_setcond2_brcond(s, TCG_COND_GT, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_GEU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_LTU:
+        tcg_out_setcond2_brcond(s, TCG_COND_LTU, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_LTU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_LEU:
+        tcg_out_setcond2_brcond(s, TCG_COND_LTU, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_LEU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_GTU:
+        tcg_out_setcond2_brcond(s, TCG_COND_GTU, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_GTU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_GEU:
+        tcg_out_setcond2_brcond(s, TCG_COND_GTU, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_GEU, args[2], args[4], label_one);
+        break;
+    default:
+        tcg_abort();
+    }
+    tcg_out_label(s, label_zero, (tcg_target_long)s->code_ptr);
+    /* clear lower part of result */
+    tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3), args[0], args[0]);
+    /* branch over next instruction which is 5 bytes long */
+    tcg_out8(s, 0xeb);
+    tcg_out8(s, 5);
+    /* set lower part of result to 1 */
+    tcg_out_label(s, label_one, (tcg_target_long)s->code_ptr);
+    tcg_out8(s, 0xb8 + args[0]);
+    tcg_out32(s, 1);
+    /* clear higher part of result */
+    tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3), args[1], args[1]);
+}
+
 static void tcg_out_brcond(TCGContext *s, int cond, 
                            TCGArg arg1, TCGArg arg2, int const_arg2,
                            int label_index)
@@ -1013,6 +1122,12 @@
         else
             tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5], args[1]);
         break;
+    case INDEX_op_setcond_i32:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2]);
+        break;
+    case INDEX_op_setcond2_i32:
+        tcg_out_setcond2(s, args);
+        break;
     case INDEX_op_brcond_i32:
         tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]);
         break;
@@ -1088,6 +1203,11 @@
     { INDEX_op_shr_i32, { "r", "0", "ci" } },
     { INDEX_op_sar_i32, { "r", "0", "ci" } },
 
+    // TODO add 'i' to third "r" (second input argument)
+    { INDEX_op_setcond_i32, { "q", "r", "r" } },
+    // TODO add 'i'
+    { INDEX_op_setcond2_i32, { "r", "r", "r", "r", "r", "r" } },
+
     { INDEX_op_brcond_i32, { "r", "ri" } },
 
     { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
Index: tcg/README
===================================================================
--- tcg/README	(revision 5650)
+++ tcg/README	(working copy)
@@ -277,6 +277,10 @@
 
 64 bit byte swap
 
+* setcond_i32/i64 cond, t0, t1, t2
+
+Set t0 to 1 if t1 cond t2 is true, else t0 is set to 0. (cf brcond for cond.)
+
 * discard_i32/i64 t0
 
 Indicate that the value of t0 won't be used later. It is useful to
Index: tcg/tcg.c
===================================================================
--- tcg/tcg.c	(revision 5650)
+++ tcg/tcg.c	(working copy)
@@ -877,6 +877,12 @@
 #elif TCG_TARGET_REG_BITS == 64
                 || c == INDEX_op_brcond_i64
 #endif
+                || c == INDEX_op_setcond_i32
+#if TCG_TARGET_REG_BITS == 32
+                || c == INDEX_op_setcond2_i32
+#elif TCG_TARGET_REG_BITS == 64
+                || c == INDEX_op_setcond_i64
+#endif
                 ) {
                 if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]])
                     fprintf(outfile, ",%s", cond_name[args[k++]]);
Index: tcg/x86_64/tcg-target.c
===================================================================
--- tcg/x86_64/tcg-target.c	(revision 5650)
+++ tcg/x86_64/tcg-target.c	(working copy)
@@ -198,6 +198,7 @@
 #define SHIFT_SHR 5
 #define SHIFT_SAR 7
 
+/* The following defines apply to setcc instruction too. */
 #define JCC_JMP (-1)
 #define JCC_JO  0x0
 #define JCC_JNO 0x1
@@ -464,6 +465,26 @@
     }
 }
 
+// TODO const_arg optimization
+static void tcg_out_setcond(TCGContext *s, int cond,
+                            TCGArg ret, TCGArg arg1, TCGArg arg2, int rexw)
+{
+    /* cmp */
+    tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3) | rexw, arg2, arg1);
+    /* setcc */
+    // TODO this should use tcg_out_modrm
+    //      however currently tcg_out_modrm outputs an extra byte for [abcd]l
+    //tcg_out_modrm(s, (0x90 + tcg_cond_to_jcc[cond]) | P_EXT | P_REXB, ret, 0);
+    if (ret > 3)
+        tcg_out8(s, 0x40 + (ret >> 3));
+    tcg_out8(s, 0x0f);
+    tcg_out8(s, 0x90 + tcg_cond_to_jcc[cond]);
+    tcg_out8(s, 0xc0 + (ret & 7));
+    /* zero extend ret with movzbl since setcc only sets the lower 8 bits */
+    // TODO that doesn't look pretty
+    tcg_out_modrm(s, 0xb6 | P_EXT | (rexw ? rexw : P_REXB), ret, ret);
+}
+
 static void tcg_out_brcond(TCGContext *s, int cond, 
                            TCGArg arg1, TCGArg arg2, int const_arg2,
                            int label_index, int rexw)
@@ -1065,6 +1086,14 @@
         c = SHIFT_SAR;
         goto gen_shift64;
         
+    case INDEX_op_setcond_i32:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2], 0);
+        break;
+
+    case INDEX_op_setcond_i64:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2], P_REXW);
+        break;
+
     case INDEX_op_brcond_i32:
         tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], 
                        args[3], 0);
@@ -1225,6 +1254,9 @@
     { INDEX_op_shr_i32, { "r", "0", "ci" } },
     { INDEX_op_sar_i32, { "r", "0", "ci" } },
 
+    // TODO add 'i' to third "r" (second input argument)
+    { INDEX_op_setcond_i32, { "r", "r", "r" } },
+
     { INDEX_op_brcond_i32, { "r", "ri" } },
 
     { INDEX_op_mov_i64, { "r", "r" } },
@@ -1254,6 +1286,9 @@
     { INDEX_op_shr_i64, { "r", "0", "ci" } },
     { INDEX_op_sar_i64, { "r", "0", "ci" } },
 
+    // TODO add 'i' to third "r" (second input argument)
+    { INDEX_op_setcond_i64, { "r", "r", "r" } },
+
     { INDEX_op_brcond_i64, { "r", "re" } },
 
     { INDEX_op_bswap_i32, { "r", "0" } },
Index: target-arm/translate.c
===================================================================
--- target-arm/translate.c	(revision 5650)
+++ target-arm/translate.c	(working copy)
@@ -201,7 +201,6 @@
 
 #define gen_op_addl_T0_T1_cc() gen_helper_add_cc(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_adcl_T0_T1_cc() gen_helper_adc_cc(cpu_T[0], cpu_T[0], cpu_T[1])
-#define gen_op_subl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_sbcl_T0_T1_cc() gen_helper_sbc_cc(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_rsbl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[1], cpu_T[0])
 #define gen_op_rscl_T0_T1_cc() gen_helper_sbc_cc(cpu_T[0], cpu_T[1], cpu_T[0])
@@ -243,6 +242,36 @@
     dead_tmp(tmp);
 }
 
+//#define gen_op_subl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[0], cpu_T[1])
+static inline void gen_op_subl_T0_T1_cc(void)
+{
+    TCGv tmp32_res;
+    TCGv tmp1;
+    TCGv tmp2;
+
+    tmp32_res = new_tmp();
+    tcg_gen_sub_i32(tmp32_res, cpu_T[0], cpu_T[1]);
+    tcg_gen_st_i32(tmp32_res, cpu_env, offsetof(CPUState, NF));
+    tcg_gen_st_i32(tmp32_res, cpu_env, offsetof(CPUState, ZF));
+
+    tmp1 = new_tmp();
+
+    tcg_gen_setcond_i32(TCG_COND_GEU, tmp1, cpu_T[0], cpu_T[1]);
+    tcg_gen_st_i32(tmp1, cpu_env, offsetof(CPUState, CF));
+
+    tmp2 = new_tmp();
+    tcg_gen_xor_i32(tmp1, cpu_T[0], cpu_T[1]);
+    tcg_gen_xor_i32(tmp2, cpu_T[0], tmp32_res);
+    tcg_gen_and_i32(tmp1, tmp1, tmp2);
+    dead_tmp(tmp2);
+    tcg_gen_st_i32(tmp1, cpu_env, offsetof(CPUState, VF));
+    dead_tmp(tmp1);
+
+    tcg_gen_mov_i32(cpu_T[0], tmp32_res);
+
+    dead_tmp(tmp32_res);
+}
+
 static void gen_smul_dual(TCGv a, TCGv b)
 {
     TCGv tmp1 = new_tmp();

^ permalink raw reply	[flat|nested] 9+ messages in thread
* [Qemu-devel] [RFC] TCG new op: setcond
@ 2008-11-04 10:15 Laurent Desnogues
  2008-11-04 13:16 ` Paul Brook
  2008-11-04 14:24 ` Avi Kivity
  0 siblings, 2 replies; 9+ messages in thread
From: Laurent Desnogues @ 2008-11-04 10:15 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 966 bytes --]

Hello,

this patch implements a new TCG op, setcond, that sets a temp
to 1 if the condition is true, else to 0.  The benefit is the potential
removal of brcond instructions, and helpers size reduction which
can lead to using TCG instead of helpers.

This patch is only posted here to get comments before I dig
further into that and propose a proper update.

One of the ARM helpers (sub with flag settings) has been
converted to TCG and uses setcond to compute carry.

setcond has been implemented only for x86_64 TCG back-end.

On my TODO list:

  - document setcond in tcg/README
  - implement it in all backends
      * arm
      * hppa (not applicable according to Stuart Brady)
      * i386
      * ppc
      * ppc64
      * sparc
      * x86_64 (done and partly tested)
  - use it at least once in every frontend
  - setcondi
  - 64 bit setcond's
  - a variant that sets -1 instead of 1 for masking
  - provide a test

Please feel free to comment :-)


Laurent

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: setcond-1.patch --]
[-- Type: text/x-patch; name=setcond-1.patch, Size: 5736 bytes --]

Index: tcg/tcg-op.h
===================================================================
--- tcg/tcg-op.h	(revision 5617)
+++ tcg/tcg-op.h	(working copy)
@@ -499,6 +499,12 @@
     }
 }
 
+static inline void tcg_gen_setcond_i32(int cond, TCGv ret,
+                                       TCGv arg1, int32_t arg2)
+{
+    tcg_gen_op4i(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
+}
+
 static inline void tcg_gen_brcond_i32(int cond, TCGv arg1, TCGv arg2, 
                                       int label_index)
 {
Index: tcg/tcg-opc.h
===================================================================
--- tcg/tcg-opc.h	(revision 5617)
+++ tcg/tcg-opc.h	(working copy)
@@ -76,6 +76,8 @@
 DEF2(shr_i32, 1, 2, 0, 0)
 DEF2(sar_i32, 1, 2, 0, 0)
 
+DEF2(setcond_i32, 1, 2, 1, 0)
+
 DEF2(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 #if TCG_TARGET_REG_BITS == 32
 DEF2(add2_i32, 2, 4, 0, 0)
@@ -129,6 +131,8 @@
 DEF2(shr_i64, 1, 2, 0, 0)
 DEF2(sar_i64, 1, 2, 0, 0)
 
+DEF2(setcond_i64, 1, 2, 0, 0)
+
 DEF2(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 #ifdef TCG_TARGET_HAS_ext8s_i64
 DEF2(ext8s_i64, 1, 1, 0, 0)
Index: tcg/x86_64/tcg-target.c
===================================================================
--- tcg/x86_64/tcg-target.c	(revision 5617)
+++ tcg/x86_64/tcg-target.c	(working copy)
@@ -198,6 +198,7 @@
 #define SHIFT_SHR 5
 #define SHIFT_SAR 7
 
+/* The following defines apply to setcc instruction too. */
 #define JCC_JMP (-1)
 #define JCC_JO  0x0
 #define JCC_JNO 0x1
@@ -464,6 +465,27 @@
     }
 }
 
+// TODO should apply to setcond_i64 but not tested
+static void tcg_out_setcond(TCGContext *s, int cond,
+                            TCGArg ret, TCGArg arg1, TCGArg arg2, int rexw)
+{
+    /* clear ret since setcc only sets the lower 8 bits */
+    tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3) | rexw, ret, ret);
+    /* cmp */
+    tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3) | rexw, arg2, arg1);
+    /* setcc */
+    // TODO this should use tcg_out_modrm
+    //      however currently tcg_out_modrm outputs an extra byte for [abcd]l
+    //tcg_out_modrm(s, (0x90 + tcg_cond_to_jcc[cond]) | P_EXT | P_REXB, ret, 0);
+    if (ret > 3)
+        tcg_out8(s, 0x40);
+    else if (ret > 7)
+        tcg_out8(s, 0x41);
+    tcg_out8(s, 0x0f);
+    tcg_out8(s, 0x90 + tcg_cond_to_jcc[cond]);
+    tcg_out8(s, 0xc0 + (ret & 7));
+}
+
 static void tcg_out_brcond(TCGContext *s, int cond, 
                            TCGArg arg1, TCGArg arg2, int const_arg2,
                            int label_index, int rexw)
@@ -1065,6 +1087,14 @@
         c = SHIFT_SAR;
         goto gen_shift64;
         
+    case INDEX_op_setcond_i32:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2], 0);
+        break;
+
+    case INDEX_op_setcond_i64:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2], P_REXW);
+        break;
+
     case INDEX_op_brcond_i32:
         tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], 
                        args[3], 0);
@@ -1225,6 +1255,8 @@
     { INDEX_op_shr_i32, { "r", "0", "ci" } },
     { INDEX_op_sar_i32, { "r", "0", "ci" } },
 
+    { INDEX_op_setcond_i32, { "r", "r", "r" } },
+
     { INDEX_op_brcond_i32, { "r", "ri" } },
 
     { INDEX_op_mov_i64, { "r", "r" } },
@@ -1254,6 +1286,8 @@
     { INDEX_op_shr_i64, { "r", "0", "ci" } },
     { INDEX_op_sar_i64, { "r", "0", "ci" } },
 
+    { INDEX_op_setcond_i64, { "r", "r", "r" } },
+
     { INDEX_op_brcond_i64, { "r", "re" } },
 
     { INDEX_op_bswap_i32, { "r", "0" } },
Index: tcg/tcg.c
===================================================================
--- tcg/tcg.c	(revision 5617)
+++ tcg/tcg.c	(working copy)
@@ -877,6 +877,8 @@
 #elif TCG_TARGET_REG_BITS == 64
                 || c == INDEX_op_brcond_i64
 #endif
+                || c == INDEX_op_setcond_i32
+                || c == INDEX_op_setcond_i64
                 ) {
                 if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]])
                     fprintf(outfile, ",%s", cond_name[args[k++]]);
Index: target-arm/translate.c
===================================================================
--- target-arm/translate.c	(revision 5617)
+++ target-arm/translate.c	(working copy)
@@ -201,7 +201,6 @@
 
 #define gen_op_addl_T0_T1_cc() gen_helper_add_cc(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_adcl_T0_T1_cc() gen_helper_adc_cc(cpu_T[0], cpu_T[0], cpu_T[1])
-#define gen_op_subl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_sbcl_T0_T1_cc() gen_helper_sbc_cc(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_rsbl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[1], cpu_T[0])
 #define gen_op_rscl_T0_T1_cc() gen_helper_sbc_cc(cpu_T[0], cpu_T[1], cpu_T[0])
@@ -243,6 +242,36 @@
     dead_tmp(tmp);
 }
 
+//#define gen_op_subl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[0], cpu_T[1])
+static inline void gen_op_subl_T0_T1_cc(void)
+{
+    TCGv tmp32_res;
+    TCGv tmp1;
+    TCGv tmp2;
+
+    tmp32_res = new_tmp();
+    tcg_gen_sub_i32(tmp32_res, cpu_T[0], cpu_T[1]);
+    tcg_gen_st_i32(tmp32_res, cpu_env, offsetof(CPUState, NF));
+    tcg_gen_st_i32(tmp32_res, cpu_env, offsetof(CPUState, ZF));
+
+    tmp1 = new_tmp();
+
+    tcg_gen_setcond_i32(TCG_COND_GEU, tmp1, cpu_T[0], cpu_T[1]);
+    tcg_gen_st_i32(tmp1, cpu_env, offsetof(CPUState, CF));
+
+    tmp2 = new_tmp();
+    tcg_gen_xor_i32(tmp1, cpu_T[0], cpu_T[1]);
+    tcg_gen_xor_i32(tmp2, cpu_T[0], tmp32_res);
+    tcg_gen_and_i32(tmp1, tmp1, tmp2);
+    dead_tmp(tmp2);
+    tcg_gen_st_i32(tmp1, cpu_env, offsetof(CPUState, VF));
+    dead_tmp(tmp1);
+
+    tcg_gen_mov_i32(cpu_T[0], tmp32_res);
+
+    dead_tmp(tmp32_res);
+}
+
 static void gen_smul_dual(TCGv a, TCGv b)
 {
     TCGv tmp1 = new_tmp();

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2008-11-09 10:50 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-11-08 19:13 [Qemu-devel] [RFC] TCG new op: setcond Laurent Desnogues
2008-11-08 19:32 ` [Qemu-devel] " Laurent Desnogues
2008-11-08 19:55   ` Laurent Desnogues
  -- strict thread matches above, loose matches on Subject: below --
2008-11-04 10:15 [Qemu-devel] " Laurent Desnogues
2008-11-04 13:16 ` Paul Brook
2008-11-04 13:33   ` Laurent Desnogues
2008-11-04 14:24 ` Avi Kivity
2008-11-05 16:11   ` Laurent Desnogues
2008-11-09 10:50     ` Blue Swirl

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).