qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [RFC] TCG new op: setcond
@ 2008-11-04 10:15 Laurent Desnogues
  2008-11-04 13:16 ` Paul Brook
  2008-11-04 14:24 ` Avi Kivity
  0 siblings, 2 replies; 9+ messages in thread
From: Laurent Desnogues @ 2008-11-04 10:15 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 966 bytes --]

Hello,

this patch implements a new TCG op, setcond, that sets a temp
to 1 if the condition is true, else to 0.  The benefit is the potential
removal of brcond instructions, and helpers size reduction which
can lead to using TCG instead of helpers.

This patch is only posted here to get comments before I dig
further into that and propose a proper update.

One of the ARM helpers (sub with flag settings) has been
converted to TCG and uses setcond to compute carry.

setcond has been implemented only for x86_64 TCG back-end.

On my TODO list:

  - document setcond in tcg/README
  - implement it in all backends
      * arm
      * hppa (not applicable according to Stuart Brady)
      * i386
      * ppc
      * ppc64
      * sparc
      * x86_64 (done and partly tested)
  - use it at least once in every frontend
  - setcondi
  - 64 bit setcond's
  - a variant that sets -1 instead of 1 for masking
  - provide a test

Please feel free to comment :-)


Laurent

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: setcond-1.patch --]
[-- Type: text/x-patch; name=setcond-1.patch, Size: 5736 bytes --]

Index: tcg/tcg-op.h
===================================================================
--- tcg/tcg-op.h	(revision 5617)
+++ tcg/tcg-op.h	(working copy)
@@ -499,6 +499,12 @@
     }
 }
 
+static inline void tcg_gen_setcond_i32(int cond, TCGv ret,
+                                       TCGv arg1, int32_t arg2)
+{
+    tcg_gen_op4i(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
+}
+
 static inline void tcg_gen_brcond_i32(int cond, TCGv arg1, TCGv arg2, 
                                       int label_index)
 {
Index: tcg/tcg-opc.h
===================================================================
--- tcg/tcg-opc.h	(revision 5617)
+++ tcg/tcg-opc.h	(working copy)
@@ -76,6 +76,8 @@
 DEF2(shr_i32, 1, 2, 0, 0)
 DEF2(sar_i32, 1, 2, 0, 0)
 
+DEF2(setcond_i32, 1, 2, 1, 0)
+
 DEF2(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 #if TCG_TARGET_REG_BITS == 32
 DEF2(add2_i32, 2, 4, 0, 0)
@@ -129,6 +131,8 @@
 DEF2(shr_i64, 1, 2, 0, 0)
 DEF2(sar_i64, 1, 2, 0, 0)
 
+DEF2(setcond_i64, 1, 2, 0, 0)
+
 DEF2(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 #ifdef TCG_TARGET_HAS_ext8s_i64
 DEF2(ext8s_i64, 1, 1, 0, 0)
Index: tcg/x86_64/tcg-target.c
===================================================================
--- tcg/x86_64/tcg-target.c	(revision 5617)
+++ tcg/x86_64/tcg-target.c	(working copy)
@@ -198,6 +198,7 @@
 #define SHIFT_SHR 5
 #define SHIFT_SAR 7
 
+/* The following defines apply to setcc instruction too. */
 #define JCC_JMP (-1)
 #define JCC_JO  0x0
 #define JCC_JNO 0x1
@@ -464,6 +465,27 @@
     }
 }
 
+// TODO should apply to setcond_i64 but not tested
+static void tcg_out_setcond(TCGContext *s, int cond,
+                            TCGArg ret, TCGArg arg1, TCGArg arg2, int rexw)
+{
+    /* clear ret since setcc only sets the lower 8 bits */
+    tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3) | rexw, ret, ret);
+    /* cmp */
+    tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3) | rexw, arg2, arg1);
+    /* setcc */
+    // TODO this should use tcg_out_modrm
+    //      however currently tcg_out_modrm outputs an extra byte for [abcd]l
+    //tcg_out_modrm(s, (0x90 + tcg_cond_to_jcc[cond]) | P_EXT | P_REXB, ret, 0);
+    if (ret > 3)
+        tcg_out8(s, 0x40);
+    else if (ret > 7)
+        tcg_out8(s, 0x41);
+    tcg_out8(s, 0x0f);
+    tcg_out8(s, 0x90 + tcg_cond_to_jcc[cond]);
+    tcg_out8(s, 0xc0 + (ret & 7));
+}
+
 static void tcg_out_brcond(TCGContext *s, int cond, 
                            TCGArg arg1, TCGArg arg2, int const_arg2,
                            int label_index, int rexw)
@@ -1065,6 +1087,14 @@
         c = SHIFT_SAR;
         goto gen_shift64;
         
+    case INDEX_op_setcond_i32:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2], 0);
+        break;
+
+    case INDEX_op_setcond_i64:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2], P_REXW);
+        break;
+
     case INDEX_op_brcond_i32:
         tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], 
                        args[3], 0);
@@ -1225,6 +1255,8 @@
     { INDEX_op_shr_i32, { "r", "0", "ci" } },
     { INDEX_op_sar_i32, { "r", "0", "ci" } },
 
+    { INDEX_op_setcond_i32, { "r", "r", "r" } },
+
     { INDEX_op_brcond_i32, { "r", "ri" } },
 
     { INDEX_op_mov_i64, { "r", "r" } },
@@ -1254,6 +1286,8 @@
     { INDEX_op_shr_i64, { "r", "0", "ci" } },
     { INDEX_op_sar_i64, { "r", "0", "ci" } },
 
+    { INDEX_op_setcond_i64, { "r", "r", "r" } },
+
     { INDEX_op_brcond_i64, { "r", "re" } },
 
     { INDEX_op_bswap_i32, { "r", "0" } },
Index: tcg/tcg.c
===================================================================
--- tcg/tcg.c	(revision 5617)
+++ tcg/tcg.c	(working copy)
@@ -877,6 +877,8 @@
 #elif TCG_TARGET_REG_BITS == 64
                 || c == INDEX_op_brcond_i64
 #endif
+                || c == INDEX_op_setcond_i32
+                || c == INDEX_op_setcond_i64
                 ) {
                 if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]])
                     fprintf(outfile, ",%s", cond_name[args[k++]]);
Index: target-arm/translate.c
===================================================================
--- target-arm/translate.c	(revision 5617)
+++ target-arm/translate.c	(working copy)
@@ -201,7 +201,6 @@
 
 #define gen_op_addl_T0_T1_cc() gen_helper_add_cc(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_adcl_T0_T1_cc() gen_helper_adc_cc(cpu_T[0], cpu_T[0], cpu_T[1])
-#define gen_op_subl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_sbcl_T0_T1_cc() gen_helper_sbc_cc(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_rsbl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[1], cpu_T[0])
 #define gen_op_rscl_T0_T1_cc() gen_helper_sbc_cc(cpu_T[0], cpu_T[1], cpu_T[0])
@@ -243,6 +242,36 @@
     dead_tmp(tmp);
 }
 
+//#define gen_op_subl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[0], cpu_T[1])
+static inline void gen_op_subl_T0_T1_cc(void)
+{
+    TCGv tmp32_res;
+    TCGv tmp1;
+    TCGv tmp2;
+
+    tmp32_res = new_tmp();
+    tcg_gen_sub_i32(tmp32_res, cpu_T[0], cpu_T[1]);
+    tcg_gen_st_i32(tmp32_res, cpu_env, offsetof(CPUState, NF));
+    tcg_gen_st_i32(tmp32_res, cpu_env, offsetof(CPUState, ZF));
+
+    tmp1 = new_tmp();
+
+    tcg_gen_setcond_i32(TCG_COND_GEU, tmp1, cpu_T[0], cpu_T[1]);
+    tcg_gen_st_i32(tmp1, cpu_env, offsetof(CPUState, CF));
+
+    tmp2 = new_tmp();
+    tcg_gen_xor_i32(tmp1, cpu_T[0], cpu_T[1]);
+    tcg_gen_xor_i32(tmp2, cpu_T[0], tmp32_res);
+    tcg_gen_and_i32(tmp1, tmp1, tmp2);
+    dead_tmp(tmp2);
+    tcg_gen_st_i32(tmp1, cpu_env, offsetof(CPUState, VF));
+    dead_tmp(tmp1);
+
+    tcg_gen_mov_i32(cpu_T[0], tmp32_res);
+
+    dead_tmp(tmp32_res);
+}
+
 static void gen_smul_dual(TCGv a, TCGv b)
 {
     TCGv tmp1 = new_tmp();

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [RFC] TCG new op: setcond
  2008-11-04 10:15 [Qemu-devel] " Laurent Desnogues
@ 2008-11-04 13:16 ` Paul Brook
  2008-11-04 13:33   ` Laurent Desnogues
  2008-11-04 14:24 ` Avi Kivity
  1 sibling, 1 reply; 9+ messages in thread
From: Paul Brook @ 2008-11-04 13:16 UTC (permalink / raw)
  To: qemu-devel; +Cc: Laurent Desnogues

> this patch implements a new TCG op, setcond, that sets a temp
> to 1 if the condition is true, else to 0.  The benefit is the potential
> removal of brcond instructions, and helpers size reduction which
> can lead to using TCG instead of helpers.

>   - a variant that sets -1 instead of 1 for masking

I'm worried about this. If we're not careful we'll end up with an explosion of 
different patterns, many of which aren't optimal of different hosts.

>   - 64 bit setcond's

You should do this sooner rather than later, and on a 32-bit host.

> +    /* clear ret since setcc only sets the lower 8 bits */
> +    tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3) | rexw, ret, ret);

This is broken. Inputs and outputs may overlap.

> +    // TODO this should use tcg_out_modrm
> +    //      however currently tcg_out_modrm outputs an extra byte for
> [abcd]l +    //tcg_out_modrm(s, (0x90 + tcg_cond_to_jcc[cond]) | P_EXT |
> P_REXB, ret, 0)

This is the wrong way to fix this. If you really care about the extra code 
byte (which is harmless) you should fix tcg_out_modrm.

Also, please use C comments, not c++ style //.

Paul

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [RFC] TCG new op: setcond
  2008-11-04 13:16 ` Paul Brook
@ 2008-11-04 13:33   ` Laurent Desnogues
  0 siblings, 0 replies; 9+ messages in thread
From: Laurent Desnogues @ 2008-11-04 13:33 UTC (permalink / raw)
  To: Paul Brook; +Cc: qemu-devel

On Tue, Nov 4, 2008 at 2:16 PM, Paul Brook <paul@codesourcery.com> wrote:
>> this patch implements a new TCG op, setcond, that sets a temp
>> to 1 if the condition is true, else to 0.  The benefit is the potential
>> removal of brcond instructions, and helpers size reduction which
>> can lead to using TCG instead of helpers.
>
>>   - a variant that sets -1 instead of 1 for masking
>
> I'm worried about this. If we're not careful we'll end up with an explosion of
> different patterns, many of which aren't optimal of different hosts.

I am worried too.  I added that because you mentioned you would
prefer -1 over 1.  I am still unsure which one of these variants is
the most useful and/or host runtime critical (one can be derived
from the other by generating one extra TCG op).

>>   - 64 bit setcond's
>
> You should do this sooner rather than later, and on a 32-bit host.

OK.

>> +    /* clear ret since setcc only sets the lower 8 bits */
>> +    tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3) | rexw, ret, ret);
>
> This is broken. Inputs and outputs may overlap.

Oh right.  I will use some bit extending instruction.

>> +    // TODO this should use tcg_out_modrm
>> +    //      however currently tcg_out_modrm outputs an extra byte for
>> [abcd]l +    //tcg_out_modrm(s, (0x90 + tcg_cond_to_jcc[cond]) | P_EXT |
>> P_REXB, ret, 0)
>
> This is the wrong way to fix this. If you really care about the extra code
> byte (which is harmless) you should fix tcg_out_modrm.

I asked about that directly to Fabrice yesterday.  I am waiting for
his answer.  I certainly am not very good at x86 and its baroque
encoding :)

> Also, please use C comments, not c++ style //.

These were done so that it is explicit they are here temporarily for
reviewers.  They will disappear in the final patch.

Thanks for the comments.


Laurent

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [RFC] TCG new op: setcond
  2008-11-04 10:15 [Qemu-devel] " Laurent Desnogues
  2008-11-04 13:16 ` Paul Brook
@ 2008-11-04 14:24 ` Avi Kivity
  2008-11-05 16:11   ` Laurent Desnogues
  1 sibling, 1 reply; 9+ messages in thread
From: Avi Kivity @ 2008-11-04 14:24 UTC (permalink / raw)
  To: qemu-devel

Laurent Desnogues wrote:
> this patch implements a new TCG op, setcond, that sets a temp
> to 1 if the condition is true, else to 0.  The benefit is the potential
> removal of brcond instructions, and helpers size reduction which
> can lead to using TCG instead of helpers.
>
> This patch is only posted here to get comments before I dig
> further into that and propose a proper update.
>
> One of the ARM helpers (sub with flag settings) has been
> converted to TCG and uses setcond to compute carry.
>
> setcond has been implemented only for x86_64 TCG back-end.
>
>   
> +// TODO should apply to setcond_i64 but not tested
> +static void tcg_out_setcond(TCGContext *s, int cond,
> +                            TCGArg ret, TCGArg arg1, TCGArg arg2, int rexw)
> +{
> +    /* clear ret since setcc only sets the lower 8 bits */
> +    tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3) | rexw, ret, ret);
> +    /* cmp */
> +    tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3) | rexw, arg2, arg1);
> +    /* setcc */
> +    // TODO this should use tcg_out_modrm
> +    //      however currently tcg_out_modrm outputs an extra byte for [abcd]l
> +    //tcg_out_modrm(s, (0x90 + tcg_cond_to_jcc[cond]) | P_EXT | P_REXB, ret, 0);
> +    if (ret > 3)
> +        tcg_out8(s, 0x40);
> +    else if (ret > 7)
> +        tcg_out8(s, 0x41);
>   

if ret == 8 this fails.  You need to reverse the order of the tests.

> +    tcg_out8(s, 0x0f);
> +    tcg_out8(s, 0x90 + tcg_cond_to_jcc[cond]);
> +    tcg_out8(s, 0xc0 + (ret & 7));
> +}
> +
>  
>   

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [RFC] TCG new op: setcond
  2008-11-04 14:24 ` Avi Kivity
@ 2008-11-05 16:11   ` Laurent Desnogues
  2008-11-09 10:50     ` Blue Swirl
  0 siblings, 1 reply; 9+ messages in thread
From: Laurent Desnogues @ 2008-11-05 16:11 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 1280 bytes --]

Hello,

Here is an updated patch for setcond.

Changes since the previous patch:

  - support TCG i386 for 32 and 64 bit variants
  - support TCG x86_64 support for 32 and 64 bit variants
  - fix for x86_64 prefix output
  - fix for input/output overlap
  - use setcond for Alpha cmp instructions
  - add setcondi

Digging into x86 is certainly not a pleasant experience for me,
especially as I have no experience on it.  So this patch may
contain bugs and overlooks;  note however my changes were
tested on x86_64 and i386 platforms with test programs for
Alpha and ARM.

What remains to be done:

  - document setcond in tcg/README
  - implement it in all backends (ARM, PPC, PPC64, SPARC)
  - use it at least once in every frontend (cris, i386, m68k, MIPS,
    PPC, sh4, SPARC)
  - provide tests.

Also there's a choice to be done between return 1 if the test
succeeds or -1.

I am concerned by 64 bit setcond on the register-started i386.
Isn't there a high risk the register allocator will run out of
registers?  If it's so, I guess it should be rewritten with a helper.

Thanks to Paul and Avi for their comments and uncovering silly
bugs and overlooks.

Comments and criticisms welcome as usual :-)


Laurent

Signed-off-by: Laurent Desnogues <laurent.desnogues@gmail.com>

[-- Attachment #2: setcond-2.patch.bz2 --]
[-- Type: application/x-bzip2, Size: 3143 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Qemu-devel] [RFC] TCG new op: setcond
@ 2008-11-08 19:13 Laurent Desnogues
  2008-11-08 19:32 ` [Qemu-devel] " Laurent Desnogues
  0 siblings, 1 reply; 9+ messages in thread
From: Laurent Desnogues @ 2008-11-08 19:13 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 722 bytes --]

Hello,

this patch implements a new TCG op, setcond, that sets a temp
to 1 if the condition is true, else to 0.  The benefit is the potential
removal of brcond instructions, and helpers size reduction which
can lead to using TCG instead of helpers.

setcond(i)_i{32,64} have been implemented only for x86_64 and
i386 TCG back-ends.

One ARM helper was converted to TCG using setcond.  Alpha
and MIPS are also patched to use setcond.

On my TODO list:

 - implement it in all backends (arm, ppc, ppc64, sparc)
 - use it at least once in every frontend

Comments starting with // in the patch are questions and/or
TODO.

Please feel free to comment.


Laurent

Signed-off-by: Laurent Desnogues <laurent.desnogues@gmail.com>

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: setcond-3.patch --]
[-- Type: text/x-patch; name=setcond-3.patch, Size: 19422 bytes --]

Index: target-alpha/translate.c
===================================================================
--- target-alpha/translate.c	(revision 5650)
+++ target-alpha/translate.c	(working copy)
@@ -504,30 +504,20 @@
                                   int ra, int rb, int rc,
                                   int islit, uint8_t lit)
 {
-    int l1, l2;
     TCGv tmp;
 
     if (unlikely(rc == 31))
-    return;
+        return;
 
-    l1 = gen_new_label();
-    l2 = gen_new_label();
-
     if (ra != 31) {
         tmp = tcg_temp_new(TCG_TYPE_I64);
         tcg_gen_mov_i64(tmp, cpu_ir[ra]);
     } else
         tmp = tcg_const_i64(0);
     if (islit)
-        tcg_gen_brcondi_i64(cond, tmp, lit, l1);
+        tcg_gen_setcondi_i64(cond, cpu_ir[rc], tmp, lit);
     else
-        tcg_gen_brcond_i64(cond, tmp, cpu_ir[rb], l1);
-
-    tcg_gen_movi_i64(cpu_ir[rc], 0);
-    tcg_gen_br(l2);
-    gen_set_label(l1);
-    tcg_gen_movi_i64(cpu_ir[rc], 1);
-    gen_set_label(l2);
+        tcg_gen_setcond_i64(cond, cpu_ir[rc], tmp, cpu_ir[rb]);
 }
 
 static always_inline int translate_one (DisasContext *ctx, uint32_t insn)
Index: target-mips/translate.c
===================================================================
--- target-mips/translate.c	(revision 5650)
+++ target-mips/translate.c	(working copy)
@@ -771,15 +771,7 @@
 #define OP_COND(name, cond)                                   \
 static inline void glue(gen_op_, name) (TCGv t0, TCGv t1)     \
 {                                                             \
-    int l1 = gen_new_label();                                 \
-    int l2 = gen_new_label();                                 \
-                                                              \
-    tcg_gen_brcond_tl(cond, t0, t1, l1);                      \
-    tcg_gen_movi_tl(t0, 0);                                   \
-    tcg_gen_br(l2);                                           \
-    gen_set_label(l1);                                        \
-    tcg_gen_movi_tl(t0, 1);                                   \
-    gen_set_label(l2);                                        \
+    tcg_gen_setcond_tl(cond, t0, t0, t1);                     \
 }
 OP_COND(eq, TCG_COND_EQ);
 OP_COND(ne, TCG_COND_NE);
@@ -792,15 +784,7 @@
 #define OP_CONDI(name, cond)                                  \
 static inline void glue(gen_op_, name) (TCGv t, target_ulong val) \
 {                                                             \
-    int l1 = gen_new_label();                                 \
-    int l2 = gen_new_label();                                 \
-                                                              \
-    tcg_gen_brcondi_tl(cond, t, val, l1);                     \
-    tcg_gen_movi_tl(t, 0);                                    \
-    tcg_gen_br(l2);                                           \
-    gen_set_label(l1);                                        \
-    tcg_gen_movi_tl(t, 1);                                    \
-    gen_set_label(l2);                                        \
+    tcg_gen_setcondi_tl(cond, t, t, val);                     \
 }
 OP_CONDI(lti, TCG_COND_LT);
 OP_CONDI(ltiu, TCG_COND_LTU);
@@ -809,15 +793,7 @@
 #define OP_CONDZ(name, cond)                                  \
 static inline void glue(gen_op_, name) (TCGv t)               \
 {                                                             \
-    int l1 = gen_new_label();                                 \
-    int l2 = gen_new_label();                                 \
-                                                              \
-    tcg_gen_brcondi_tl(cond, t, 0, l1);                       \
-    tcg_gen_movi_tl(t, 0);                                    \
-    tcg_gen_br(l2);                                           \
-    gen_set_label(l1);                                        \
-    tcg_gen_movi_tl(t, 1);                                    \
-    gen_set_label(l2);                                        \
+    tcg_gen_setcondi_tl(cond, t, t, 0);                       \
 }
 OP_CONDZ(gez, TCG_COND_GE);
 OP_CONDZ(gtz, TCG_COND_GT);
Index: tcg/tcg-op.h
===================================================================
--- tcg/tcg-op.h	(revision 5650)
+++ tcg/tcg-op.h	(working copy)
@@ -159,6 +159,21 @@
     *gen_opparam_ptr++ = arg6;
 }
 
+static inline void tcg_gen_op7i(int opc, TCGv arg1, TCGv arg2,
+                                TCGv arg3, TCGv arg4,
+                                TCGv arg5, TCGv arg6,
+                                TCGArg arg7)
+{
+    *gen_opc_ptr++ = opc;
+    *gen_opparam_ptr++ = GET_TCGV(arg1);
+    *gen_opparam_ptr++ = GET_TCGV(arg2);
+    *gen_opparam_ptr++ = GET_TCGV(arg3);
+    *gen_opparam_ptr++ = GET_TCGV(arg4);
+    *gen_opparam_ptr++ = GET_TCGV(arg5);
+    *gen_opparam_ptr++ = GET_TCGV(arg6);
+    *gen_opparam_ptr++ = arg7;
+}
+
 static inline void gen_set_label(int n)
 {
     tcg_gen_op1i(INDEX_op_set_label, n);
@@ -499,6 +514,20 @@
     }
 }
 
+static inline void tcg_gen_setcond_i32(int cond, TCGv ret,
+                                       TCGv arg1, int32_t arg2)
+{
+    tcg_gen_op4i(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
+}
+
+static inline void tcg_gen_setcondi_i32(int cond, TCGv ret,
+                                        TCGv arg1, int32_t arg2)
+{
+    TCGv t0 = tcg_const_i32(arg2);
+    tcg_gen_setcond_i32(cond, ret, arg1, t0);
+    tcg_temp_free(t0);
+}
+
 static inline void tcg_gen_brcond_i32(int cond, TCGv arg1, TCGv arg2, 
                                       int label_index)
 {
@@ -754,6 +783,14 @@
     tcg_gen_shifti_i64(ret, arg1, arg2, 1, 1);
 }
 
+static inline void tcg_gen_setcond_i64(int cond, TCGv ret,
+                                       TCGv arg1, TCGv arg2)
+{
+    tcg_gen_op7i(INDEX_op_setcond2_i32,
+                 ret, TCGV_HIGH(ret), arg1, TCGV_HIGH(arg1),
+                 arg2, TCGV_HIGH(arg2), cond);
+}
+
 static inline void tcg_gen_brcond_i64(int cond, TCGv arg1, TCGv arg2, 
                                       int label_index)
 {
@@ -972,6 +1009,12 @@
     }
 }
 
+static inline void tcg_gen_setcond_i64(int cond, TCGv ret,
+                                       TCGv arg1, int32_t arg2)
+{
+    tcg_gen_op4i(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
+}
+
 static inline void tcg_gen_brcond_i64(int cond, TCGv arg1, TCGv arg2, 
                                       int label_index)
 {
@@ -1073,6 +1116,15 @@
         tcg_temp_free(t0);
     }
 }
+
+static inline void tcg_gen_setcondi_i64(int cond, TCGv ret,
+                                        TCGv arg1, int32_t arg2)
+{
+    TCGv t0 = tcg_const_i64(arg2);
+    tcg_gen_setcond_i64(cond, ret, arg1, t0);
+    tcg_temp_free(t0);
+}
+
 static inline void tcg_gen_brcondi_i64(int cond, TCGv arg1, int64_t arg2,
                                        int label_index)
 {
@@ -1088,7 +1140,6 @@
     tcg_temp_free(t0);
 }
 
-
 /***************************************/
 /* optional operations */
 
@@ -1864,6 +1915,8 @@
 #define tcg_gen_shri_tl tcg_gen_shri_i64
 #define tcg_gen_sar_tl tcg_gen_sar_i64
 #define tcg_gen_sari_tl tcg_gen_sari_i64
+#define tcg_gen_setcond_tl tcg_gen_setcond_i64
+#define tcg_gen_setcondi_tl tcg_gen_setcondi_i64
 #define tcg_gen_brcond_tl tcg_gen_brcond_i64
 #define tcg_gen_brcondi_tl tcg_gen_brcondi_i64
 #define tcg_gen_mul_tl tcg_gen_mul_i64
@@ -1927,6 +1980,8 @@
 #define tcg_gen_shri_tl tcg_gen_shri_i32
 #define tcg_gen_sar_tl tcg_gen_sar_i32
 #define tcg_gen_sari_tl tcg_gen_sari_i32
+#define tcg_gen_setcond_tl tcg_gen_setcond_i32
+#define tcg_gen_setcondi_tl tcg_gen_setcondi_i32
 #define tcg_gen_brcond_tl tcg_gen_brcond_i32
 #define tcg_gen_brcondi_tl tcg_gen_brcondi_i32
 #define tcg_gen_mul_tl tcg_gen_mul_i32
Index: tcg/tcg-opc.h
===================================================================
--- tcg/tcg-opc.h	(revision 5650)
+++ tcg/tcg-opc.h	(working copy)
@@ -76,10 +76,13 @@
 DEF2(shr_i32, 1, 2, 0, 0)
 DEF2(sar_i32, 1, 2, 0, 0)
 
+DEF2(setcond_i32, 1, 2, 1, 0)
+
 DEF2(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 #if TCG_TARGET_REG_BITS == 32
 DEF2(add2_i32, 2, 4, 0, 0)
 DEF2(sub2_i32, 2, 4, 0, 0)
+DEF2(setcond2_i32, 2, 4, 1, 0)
 DEF2(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 DEF2(mulu2_i32, 2, 2, 0, 0)
 #endif
@@ -129,6 +132,8 @@
 DEF2(shr_i64, 1, 2, 0, 0)
 DEF2(sar_i64, 1, 2, 0, 0)
 
+DEF2(setcond_i64, 1, 2, 1, 0)
+
 DEF2(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 #ifdef TCG_TARGET_HAS_ext8s_i64
 DEF2(ext8s_i64, 1, 1, 0, 0)
Index: tcg/i386/tcg-target.c
===================================================================
--- tcg/i386/tcg-target.c	(revision 5650)
+++ tcg/i386/tcg-target.c	(working copy)
@@ -162,6 +162,7 @@
 #define SHIFT_SHR 5
 #define SHIFT_SAR 7
 
+/* The following defines apply to setcc instruction too. */
 #define JCC_JMP (-1)
 #define JCC_JO  0x0
 #define JCC_JNO 0x1
@@ -327,6 +328,114 @@
     }
 }
 
+// TODO const_arg optimization?
+static void tcg_out_setcond(TCGContext *s, int cond,
+                            TCGArg ret, TCGArg arg1, TCGArg arg2)
+{
+    /* cmp */
+    tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
+    /* setcc */
+    tcg_out_modrm(s, (0x90 + tcg_cond_to_jcc[cond]) | P_EXT, 0, ret);
+    /* zero extend ret with movzbl since setcc only sets the lower 8 bits */
+    tcg_out_modrm(s, 0xb6 | P_EXT, ret, ret);
+}
+
+// TODO const_arg optimization?
+static void tcg_out_setcond2_brcond(TCGContext *s, int cond,
+                                    TCGArg arg1, TCGArg arg2,
+                                    int label_index)
+{
+    /* cmp */
+    tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
+    /* jcc */
+    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index);
+}
+
+// TODO const_arg optimization?
+// TODO use cmov (i686 only...)?
+/* args:
+ *   0 low(ret)
+ *   1 high(ret)
+ *   2 low(arg1)
+ *   3 hi(arg1)
+ *   4 low(arg2)
+ *   5 hi(arg2)
+ *   6 condition
+ * Note:  this is basically a copy of tcg_out_brcond2
+ */
+static void tcg_out_setcond2(TCGContext *s,
+                             const TCGArg *args)
+{
+    int label_zero, label_one;
+
+    label_zero = gen_new_label();
+    label_one = gen_new_label();
+    switch (args[6]) {
+    case TCG_COND_EQ:
+        tcg_out_setcond2_brcond(s, TCG_COND_NE, args[2], args[4], label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_EQ, args[3], args[5], label_one);
+        break;
+    case TCG_COND_NE:
+        tcg_out_setcond2_brcond(s, TCG_COND_NE, args[2], args[4], label_one);
+        tcg_out_setcond2_brcond(s, TCG_COND_NE, args[3], args[5], label_one);
+        break;
+    case TCG_COND_LT:
+        tcg_out_setcond2_brcond(s, TCG_COND_LT, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_LTU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_LE:
+        tcg_out_setcond2_brcond(s, TCG_COND_LT, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_LEU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_GT:
+        tcg_out_setcond2_brcond(s, TCG_COND_GT, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_GTU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_GE:
+        tcg_out_setcond2_brcond(s, TCG_COND_GT, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_GEU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_LTU:
+        tcg_out_setcond2_brcond(s, TCG_COND_LTU, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_LTU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_LEU:
+        tcg_out_setcond2_brcond(s, TCG_COND_LTU, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_LEU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_GTU:
+        tcg_out_setcond2_brcond(s, TCG_COND_GTU, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_GTU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_GEU:
+        tcg_out_setcond2_brcond(s, TCG_COND_GTU, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_GEU, args[2], args[4], label_one);
+        break;
+    default:
+        tcg_abort();
+    }
+    tcg_out_label(s, label_zero, (tcg_target_long)s->code_ptr);
+    /* clear lower part of result */
+    tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3), args[0], args[0]);
+    /* branch over next instruction which is 5 bytes long */
+    tcg_out8(s, 0xeb);
+    tcg_out8(s, 5);
+    /* set lower part of result to 1 */
+    tcg_out_label(s, label_one, (tcg_target_long)s->code_ptr);
+    tcg_out8(s, 0xb8 + args[0]);
+    tcg_out32(s, 1);
+    /* clear higher part of result */
+    tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3), args[1], args[1]);
+}
+
 static void tcg_out_brcond(TCGContext *s, int cond, 
                            TCGArg arg1, TCGArg arg2, int const_arg2,
                            int label_index)
@@ -1013,6 +1122,12 @@
         else
             tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5], args[1]);
         break;
+    case INDEX_op_setcond_i32:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2]);
+        break;
+    case INDEX_op_setcond2_i32:
+        tcg_out_setcond2(s, args);
+        break;
     case INDEX_op_brcond_i32:
         tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]);
         break;
@@ -1088,6 +1203,11 @@
     { INDEX_op_shr_i32, { "r", "0", "ci" } },
     { INDEX_op_sar_i32, { "r", "0", "ci" } },
 
+    // TODO add 'i' to third "r" (second input argument)
+    { INDEX_op_setcond_i32, { "q", "r", "r" } },
+    // TODO add 'i'
+    { INDEX_op_setcond2_i32, { "r", "r", "r", "r", "r", "r" } },
+
     { INDEX_op_brcond_i32, { "r", "ri" } },
 
     { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
Index: tcg/README
===================================================================
--- tcg/README	(revision 5650)
+++ tcg/README	(working copy)
@@ -277,6 +277,10 @@
 
 64 bit byte swap
 
+* setcond_i32/i64 cond, t0, t1, t2
+
+Set t0 to 1 if t1 cond t2 is true, else t0 is set to 0. (cf brcond for cond.)
+
 * discard_i32/i64 t0
 
 Indicate that the value of t0 won't be used later. It is useful to
Index: tcg/tcg.c
===================================================================
--- tcg/tcg.c	(revision 5650)
+++ tcg/tcg.c	(working copy)
@@ -877,6 +877,12 @@
 #elif TCG_TARGET_REG_BITS == 64
                 || c == INDEX_op_brcond_i64
 #endif
+                || c == INDEX_op_setcond_i32
+#if TCG_TARGET_REG_BITS == 32
+                || c == INDEX_op_setcond2_i32
+#elif TCG_TARGET_REG_BITS == 64
+                || c == INDEX_op_setcond_i64
+#endif
                 ) {
                 if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]])
                     fprintf(outfile, ",%s", cond_name[args[k++]]);
Index: tcg/x86_64/tcg-target.c
===================================================================
--- tcg/x86_64/tcg-target.c	(revision 5650)
+++ tcg/x86_64/tcg-target.c	(working copy)
@@ -198,6 +198,7 @@
 #define SHIFT_SHR 5
 #define SHIFT_SAR 7
 
+/* The following defines apply to setcc instruction too. */
 #define JCC_JMP (-1)
 #define JCC_JO  0x0
 #define JCC_JNO 0x1
@@ -464,6 +465,26 @@
     }
 }
 
+// TODO const_arg optimization
+static void tcg_out_setcond(TCGContext *s, int cond,
+                            TCGArg ret, TCGArg arg1, TCGArg arg2, int rexw)
+{
+    /* cmp */
+    tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3) | rexw, arg2, arg1);
+    /* setcc */
+    // TODO this should use tcg_out_modrm
+    //      however currently tcg_out_modrm outputs an extra byte for [abcd]l
+    //tcg_out_modrm(s, (0x90 + tcg_cond_to_jcc[cond]) | P_EXT | P_REXB, ret, 0);
+    if (ret > 3)
+        tcg_out8(s, 0x40 + (ret >> 3));
+    tcg_out8(s, 0x0f);
+    tcg_out8(s, 0x90 + tcg_cond_to_jcc[cond]);
+    tcg_out8(s, 0xc0 + (ret & 7));
+    /* zero extend ret with movzbl since setcc only sets the lower 8 bits */
+    // TODO that doesn't look pretty
+    tcg_out_modrm(s, 0xb6 | P_EXT | (rexw ? rexw : P_REXB), ret, ret);
+}
+
 static void tcg_out_brcond(TCGContext *s, int cond, 
                            TCGArg arg1, TCGArg arg2, int const_arg2,
                            int label_index, int rexw)
@@ -1065,6 +1086,14 @@
         c = SHIFT_SAR;
         goto gen_shift64;
         
+    case INDEX_op_setcond_i32:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2], 0);
+        break;
+
+    case INDEX_op_setcond_i64:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2], P_REXW);
+        break;
+
     case INDEX_op_brcond_i32:
         tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], 
                        args[3], 0);
@@ -1225,6 +1254,9 @@
     { INDEX_op_shr_i32, { "r", "0", "ci" } },
     { INDEX_op_sar_i32, { "r", "0", "ci" } },
 
+    // TODO add 'i' to third "r" (second input argument)
+    { INDEX_op_setcond_i32, { "r", "r", "r" } },
+
     { INDEX_op_brcond_i32, { "r", "ri" } },
 
     { INDEX_op_mov_i64, { "r", "r" } },
@@ -1254,6 +1286,9 @@
     { INDEX_op_shr_i64, { "r", "0", "ci" } },
     { INDEX_op_sar_i64, { "r", "0", "ci" } },
 
+    // TODO add 'i' to third "r" (second input argument)
+    { INDEX_op_setcond_i64, { "r", "r", "r" } },
+
     { INDEX_op_brcond_i64, { "r", "re" } },
 
     { INDEX_op_bswap_i32, { "r", "0" } },
Index: target-arm/translate.c
===================================================================
--- target-arm/translate.c	(revision 5650)
+++ target-arm/translate.c	(working copy)
@@ -201,7 +201,6 @@
 
 #define gen_op_addl_T0_T1_cc() gen_helper_add_cc(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_adcl_T0_T1_cc() gen_helper_adc_cc(cpu_T[0], cpu_T[0], cpu_T[1])
-#define gen_op_subl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_sbcl_T0_T1_cc() gen_helper_sbc_cc(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_rsbl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[1], cpu_T[0])
 #define gen_op_rscl_T0_T1_cc() gen_helper_sbc_cc(cpu_T[0], cpu_T[1], cpu_T[0])
@@ -243,6 +242,36 @@
     dead_tmp(tmp);
 }
 
+//#define gen_op_subl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[0], cpu_T[1])
+static inline void gen_op_subl_T0_T1_cc(void)
+{
+    TCGv tmp32_res;
+    TCGv tmp1;
+    TCGv tmp2;
+
+    tmp32_res = new_tmp();
+    tcg_gen_sub_i32(tmp32_res, cpu_T[0], cpu_T[1]);
+    tcg_gen_st_i32(tmp32_res, cpu_env, offsetof(CPUState, NF));
+    tcg_gen_st_i32(tmp32_res, cpu_env, offsetof(CPUState, ZF));
+
+    tmp1 = new_tmp();
+
+    tcg_gen_setcond_i32(TCG_COND_GEU, tmp1, cpu_T[0], cpu_T[1]);
+    tcg_gen_st_i32(tmp1, cpu_env, offsetof(CPUState, CF));
+
+    tmp2 = new_tmp();
+    tcg_gen_xor_i32(tmp1, cpu_T[0], cpu_T[1]);
+    tcg_gen_xor_i32(tmp2, cpu_T[0], tmp32_res);
+    tcg_gen_and_i32(tmp1, tmp1, tmp2);
+    dead_tmp(tmp2);
+    tcg_gen_st_i32(tmp1, cpu_env, offsetof(CPUState, VF));
+    dead_tmp(tmp1);
+
+    tcg_gen_mov_i32(cpu_T[0], tmp32_res);
+
+    dead_tmp(tmp32_res);
+}
+
 static void gen_smul_dual(TCGv a, TCGv b)
 {
     TCGv tmp1 = new_tmp();

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Qemu-devel] Re: [RFC] TCG new op: setcond
  2008-11-08 19:13 [Qemu-devel] [RFC] TCG new op: setcond Laurent Desnogues
@ 2008-11-08 19:32 ` Laurent Desnogues
  2008-11-08 19:55   ` Laurent Desnogues
  0 siblings, 1 reply; 9+ messages in thread
From: Laurent Desnogues @ 2008-11-08 19:32 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 984 bytes --]

Same patch with a fix shown by DEBUG_TCGV. Thanks to Aurelien.


Laurent

Signed-off-by: Laurent Desnogues <laurent.desnogues@gmail.com>

On Sat, Nov 8, 2008 at 8:13 PM, Laurent Desnogues
<laurent.desnogues@gmail.com> wrote:
>
> this patch implements a new TCG op, setcond, that sets a temp
> to 1 if the condition is true, else to 0.  The benefit is the potential
> removal of brcond instructions, and helpers size reduction which
> can lead to using TCG instead of helpers.
>
> setcond(i)_i{32,64} have been implemented only for x86_64 and
> i386 TCG back-ends.
>
> One ARM helper was converted to TCG using setcond.  Alpha
> and MIPS are also patched to use setcond.
>
> On my TODO list:
>
>  - implement it in all backends (arm, ppc, ppc64, sparc)
>  - use it at least once in every frontend
>
> Comments starting with // in the patch are questions and/or
> TODO.
>
> Please feel free to comment.
>
>
> Laurent
>
> Signed-off-by: Laurent Desnogues <laurent.desnogues@gmail.com>
>

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: setcond-3-fixed.patch --]
[-- Type: text/x-patch; name=setcond-3-fixed.patch, Size: 19416 bytes --]

Index: target-alpha/translate.c
===================================================================
--- target-alpha/translate.c	(revision 5650)
+++ target-alpha/translate.c	(working copy)
@@ -504,30 +504,20 @@
                                   int ra, int rb, int rc,
                                   int islit, uint8_t lit)
 {
-    int l1, l2;
     TCGv tmp;
 
     if (unlikely(rc == 31))
-    return;
+        return;
 
-    l1 = gen_new_label();
-    l2 = gen_new_label();
-
     if (ra != 31) {
         tmp = tcg_temp_new(TCG_TYPE_I64);
         tcg_gen_mov_i64(tmp, cpu_ir[ra]);
     } else
         tmp = tcg_const_i64(0);
     if (islit)
-        tcg_gen_brcondi_i64(cond, tmp, lit, l1);
+        tcg_gen_setcondi_i64(cond, cpu_ir[rc], tmp, lit);
     else
-        tcg_gen_brcond_i64(cond, tmp, cpu_ir[rb], l1);
-
-    tcg_gen_movi_i64(cpu_ir[rc], 0);
-    tcg_gen_br(l2);
-    gen_set_label(l1);
-    tcg_gen_movi_i64(cpu_ir[rc], 1);
-    gen_set_label(l2);
+        tcg_gen_setcond_i64(cond, cpu_ir[rc], tmp, cpu_ir[rb]);
 }
 
 static always_inline int translate_one (DisasContext *ctx, uint32_t insn)
Index: target-mips/translate.c
===================================================================
--- target-mips/translate.c	(revision 5650)
+++ target-mips/translate.c	(working copy)
@@ -771,15 +771,7 @@
 #define OP_COND(name, cond)                                   \
 static inline void glue(gen_op_, name) (TCGv t0, TCGv t1)     \
 {                                                             \
-    int l1 = gen_new_label();                                 \
-    int l2 = gen_new_label();                                 \
-                                                              \
-    tcg_gen_brcond_tl(cond, t0, t1, l1);                      \
-    tcg_gen_movi_tl(t0, 0);                                   \
-    tcg_gen_br(l2);                                           \
-    gen_set_label(l1);                                        \
-    tcg_gen_movi_tl(t0, 1);                                   \
-    gen_set_label(l2);                                        \
+    tcg_gen_setcond_tl(cond, t0, t0, t1);                     \
 }
 OP_COND(eq, TCG_COND_EQ);
 OP_COND(ne, TCG_COND_NE);
@@ -792,15 +784,7 @@
 #define OP_CONDI(name, cond)                                  \
 static inline void glue(gen_op_, name) (TCGv t, target_ulong val) \
 {                                                             \
-    int l1 = gen_new_label();                                 \
-    int l2 = gen_new_label();                                 \
-                                                              \
-    tcg_gen_brcondi_tl(cond, t, val, l1);                     \
-    tcg_gen_movi_tl(t, 0);                                    \
-    tcg_gen_br(l2);                                           \
-    gen_set_label(l1);                                        \
-    tcg_gen_movi_tl(t, 1);                                    \
-    gen_set_label(l2);                                        \
+    tcg_gen_setcondi_tl(cond, t, t, val);                     \
 }
 OP_CONDI(lti, TCG_COND_LT);
 OP_CONDI(ltiu, TCG_COND_LTU);
@@ -809,15 +793,7 @@
 #define OP_CONDZ(name, cond)                                  \
 static inline void glue(gen_op_, name) (TCGv t)               \
 {                                                             \
-    int l1 = gen_new_label();                                 \
-    int l2 = gen_new_label();                                 \
-                                                              \
-    tcg_gen_brcondi_tl(cond, t, 0, l1);                       \
-    tcg_gen_movi_tl(t, 0);                                    \
-    tcg_gen_br(l2);                                           \
-    gen_set_label(l1);                                        \
-    tcg_gen_movi_tl(t, 1);                                    \
-    gen_set_label(l2);                                        \
+    tcg_gen_setcondi_tl(cond, t, t, 0);                       \
 }
 OP_CONDZ(gez, TCG_COND_GE);
 OP_CONDZ(gtz, TCG_COND_GT);
Index: tcg/tcg-op.h
===================================================================
--- tcg/tcg-op.h	(revision 5650)
+++ tcg/tcg-op.h	(working copy)
@@ -159,6 +159,21 @@
     *gen_opparam_ptr++ = arg6;
 }
 
+static inline void tcg_gen_op7i(int opc, TCGv arg1, TCGv arg2,
+                                TCGv arg3, TCGv arg4,
+                                TCGv arg5, TCGv arg6,
+                                TCGArg arg7)
+{
+    *gen_opc_ptr++ = opc;
+    *gen_opparam_ptr++ = GET_TCGV(arg1);
+    *gen_opparam_ptr++ = GET_TCGV(arg2);
+    *gen_opparam_ptr++ = GET_TCGV(arg3);
+    *gen_opparam_ptr++ = GET_TCGV(arg4);
+    *gen_opparam_ptr++ = GET_TCGV(arg5);
+    *gen_opparam_ptr++ = GET_TCGV(arg6);
+    *gen_opparam_ptr++ = arg7;
+}
+
 static inline void gen_set_label(int n)
 {
     tcg_gen_op1i(INDEX_op_set_label, n);
@@ -499,6 +514,20 @@
     }
 }
 
+static inline void tcg_gen_setcond_i32(int cond, TCGv ret,
+                                       TCGv arg1, TCGv arg2)
+{
+    tcg_gen_op4i(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
+}
+
+static inline void tcg_gen_setcondi_i32(int cond, TCGv ret,
+                                        TCGv arg1, int32_t arg2)
+{
+    TCGv t0 = tcg_const_i32(arg2);
+    tcg_gen_setcond_i32(cond, ret, arg1, t0);
+    tcg_temp_free(t0);
+}
+
 static inline void tcg_gen_brcond_i32(int cond, TCGv arg1, TCGv arg2, 
                                       int label_index)
 {
@@ -754,6 +783,14 @@
     tcg_gen_shifti_i64(ret, arg1, arg2, 1, 1);
 }
 
+static inline void tcg_gen_setcond_i64(int cond, TCGv ret,
+                                       TCGv arg1, TCGv arg2)
+{
+    tcg_gen_op7i(INDEX_op_setcond2_i32,
+                 ret, TCGV_HIGH(ret), arg1, TCGV_HIGH(arg1),
+                 arg2, TCGV_HIGH(arg2), cond);
+}
+
 static inline void tcg_gen_brcond_i64(int cond, TCGv arg1, TCGv arg2, 
                                       int label_index)
 {
@@ -972,6 +1009,12 @@
     }
 }
 
+static inline void tcg_gen_setcond_i64(int cond, TCGv ret,
+                                       TCGv arg1, TCGv arg2)
+{
+    tcg_gen_op4i(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
+}
+
 static inline void tcg_gen_brcond_i64(int cond, TCGv arg1, TCGv arg2, 
                                       int label_index)
 {
@@ -1073,6 +1116,15 @@
         tcg_temp_free(t0);
     }
 }
+
+static inline void tcg_gen_setcondi_i64(int cond, TCGv ret,
+                                        TCGv arg1, int32_t arg2)
+{
+    TCGv t0 = tcg_const_i64(arg2);
+    tcg_gen_setcond_i64(cond, ret, arg1, t0);
+    tcg_temp_free(t0);
+}
+
 static inline void tcg_gen_brcondi_i64(int cond, TCGv arg1, int64_t arg2,
                                        int label_index)
 {
@@ -1088,7 +1140,6 @@
     tcg_temp_free(t0);
 }
 
-
 /***************************************/
 /* optional operations */
 
@@ -1864,6 +1915,8 @@
 #define tcg_gen_shri_tl tcg_gen_shri_i64
 #define tcg_gen_sar_tl tcg_gen_sar_i64
 #define tcg_gen_sari_tl tcg_gen_sari_i64
+#define tcg_gen_setcond_tl tcg_gen_setcond_i64
+#define tcg_gen_setcondi_tl tcg_gen_setcondi_i64
 #define tcg_gen_brcond_tl tcg_gen_brcond_i64
 #define tcg_gen_brcondi_tl tcg_gen_brcondi_i64
 #define tcg_gen_mul_tl tcg_gen_mul_i64
@@ -1927,6 +1980,8 @@
 #define tcg_gen_shri_tl tcg_gen_shri_i32
 #define tcg_gen_sar_tl tcg_gen_sar_i32
 #define tcg_gen_sari_tl tcg_gen_sari_i32
+#define tcg_gen_setcond_tl tcg_gen_setcond_i32
+#define tcg_gen_setcondi_tl tcg_gen_setcondi_i32
 #define tcg_gen_brcond_tl tcg_gen_brcond_i32
 #define tcg_gen_brcondi_tl tcg_gen_brcondi_i32
 #define tcg_gen_mul_tl tcg_gen_mul_i32
Index: tcg/tcg-opc.h
===================================================================
--- tcg/tcg-opc.h	(revision 5650)
+++ tcg/tcg-opc.h	(working copy)
@@ -76,10 +76,13 @@
 DEF2(shr_i32, 1, 2, 0, 0)
 DEF2(sar_i32, 1, 2, 0, 0)
 
+DEF2(setcond_i32, 1, 2, 1, 0)
+
 DEF2(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 #if TCG_TARGET_REG_BITS == 32
 DEF2(add2_i32, 2, 4, 0, 0)
 DEF2(sub2_i32, 2, 4, 0, 0)
+DEF2(setcond2_i32, 2, 4, 1, 0)
 DEF2(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 DEF2(mulu2_i32, 2, 2, 0, 0)
 #endif
@@ -129,6 +132,8 @@
 DEF2(shr_i64, 1, 2, 0, 0)
 DEF2(sar_i64, 1, 2, 0, 0)
 
+DEF2(setcond_i64, 1, 2, 1, 0)
+
 DEF2(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 #ifdef TCG_TARGET_HAS_ext8s_i64
 DEF2(ext8s_i64, 1, 1, 0, 0)
Index: tcg/i386/tcg-target.c
===================================================================
--- tcg/i386/tcg-target.c	(revision 5650)
+++ tcg/i386/tcg-target.c	(working copy)
@@ -162,6 +162,7 @@
 #define SHIFT_SHR 5
 #define SHIFT_SAR 7
 
+/* The following defines apply to setcc instruction too. */
 #define JCC_JMP (-1)
 #define JCC_JO  0x0
 #define JCC_JNO 0x1
@@ -327,6 +328,114 @@
     }
 }
 
+// TODO const_arg optimization?
+static void tcg_out_setcond(TCGContext *s, int cond,
+                            TCGArg ret, TCGArg arg1, TCGArg arg2)
+{
+    /* cmp */
+    tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
+    /* setcc */
+    tcg_out_modrm(s, (0x90 + tcg_cond_to_jcc[cond]) | P_EXT, 0, ret);
+    /* zero extend ret with movzbl since setcc only sets the lower 8 bits */
+    tcg_out_modrm(s, 0xb6 | P_EXT, ret, ret);
+}
+
+// TODO const_arg optimization?
+static void tcg_out_setcond2_brcond(TCGContext *s, int cond,
+                                    TCGArg arg1, TCGArg arg2,
+                                    int label_index)
+{
+    /* cmp */
+    tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
+    /* jcc */
+    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index);
+}
+
+// TODO const_arg optimization?
+// TODO use cmov (i686 only...)?
+/* args:
+ *   0 low(ret)
+ *   1 high(ret)
+ *   2 low(arg1)
+ *   3 hi(arg1)
+ *   4 low(arg2)
+ *   5 hi(arg2)
+ *   6 condition
+ * Note:  this is basically a copy of tcg_out_brcond2
+ */
+static void tcg_out_setcond2(TCGContext *s,
+                             const TCGArg *args)
+{
+    int label_zero, label_one;
+
+    label_zero = gen_new_label();
+    label_one = gen_new_label();
+    switch (args[6]) {
+    case TCG_COND_EQ:
+        tcg_out_setcond2_brcond(s, TCG_COND_NE, args[2], args[4], label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_EQ, args[3], args[5], label_one);
+        break;
+    case TCG_COND_NE:
+        tcg_out_setcond2_brcond(s, TCG_COND_NE, args[2], args[4], label_one);
+        tcg_out_setcond2_brcond(s, TCG_COND_NE, args[3], args[5], label_one);
+        break;
+    case TCG_COND_LT:
+        tcg_out_setcond2_brcond(s, TCG_COND_LT, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_LTU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_LE:
+        tcg_out_setcond2_brcond(s, TCG_COND_LT, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_LEU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_GT:
+        tcg_out_setcond2_brcond(s, TCG_COND_GT, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_GTU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_GE:
+        tcg_out_setcond2_brcond(s, TCG_COND_GT, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_GEU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_LTU:
+        tcg_out_setcond2_brcond(s, TCG_COND_LTU, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_LTU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_LEU:
+        tcg_out_setcond2_brcond(s, TCG_COND_LTU, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_LEU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_GTU:
+        tcg_out_setcond2_brcond(s, TCG_COND_GTU, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_GTU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_GEU:
+        tcg_out_setcond2_brcond(s, TCG_COND_GTU, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_GEU, args[2], args[4], label_one);
+        break;
+    default:
+        tcg_abort();
+    }
+    tcg_out_label(s, label_zero, (tcg_target_long)s->code_ptr);
+    /* clear lower part of result */
+    tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3), args[0], args[0]);
+    /* branch over next instruction which is 5 bytes long */
+    tcg_out8(s, 0xeb);
+    tcg_out8(s, 5);
+    /* set lower part of result to 1 */
+    tcg_out_label(s, label_one, (tcg_target_long)s->code_ptr);
+    tcg_out8(s, 0xb8 + args[0]);
+    tcg_out32(s, 1);
+    /* clear higher part of result */
+    tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3), args[1], args[1]);
+}
+
 static void tcg_out_brcond(TCGContext *s, int cond, 
                            TCGArg arg1, TCGArg arg2, int const_arg2,
                            int label_index)
@@ -1013,6 +1122,12 @@
         else
             tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5], args[1]);
         break;
+    case INDEX_op_setcond_i32:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2]);
+        break;
+    case INDEX_op_setcond2_i32:
+        tcg_out_setcond2(s, args);
+        break;
     case INDEX_op_brcond_i32:
         tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]);
         break;
@@ -1088,6 +1203,11 @@
     { INDEX_op_shr_i32, { "r", "0", "ci" } },
     { INDEX_op_sar_i32, { "r", "0", "ci" } },
 
+    // TODO add 'i' to third "r" (second input argument)
+    { INDEX_op_setcond_i32, { "q", "r", "r" } },
+    // TODO add 'i'
+    { INDEX_op_setcond2_i32, { "r", "r", "r", "r", "r", "r" } },
+
     { INDEX_op_brcond_i32, { "r", "ri" } },
 
     { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
Index: tcg/README
===================================================================
--- tcg/README	(revision 5650)
+++ tcg/README	(working copy)
@@ -277,6 +277,10 @@
 
 64 bit byte swap
 
+* setcond_i32/i64 cond, t0, t1, t2
+
+Set t0 to 1 if t1 cond t2 is true, else t0 is set to 0. (cf brcond for cond.)
+
 * discard_i32/i64 t0
 
 Indicate that the value of t0 won't be used later. It is useful to
Index: tcg/tcg.c
===================================================================
--- tcg/tcg.c	(revision 5650)
+++ tcg/tcg.c	(working copy)
@@ -877,6 +877,12 @@
 #elif TCG_TARGET_REG_BITS == 64
                 || c == INDEX_op_brcond_i64
 #endif
+                || c == INDEX_op_setcond_i32
+#if TCG_TARGET_REG_BITS == 32
+                || c == INDEX_op_setcond2_i32
+#elif TCG_TARGET_REG_BITS == 64
+                || c == INDEX_op_setcond_i64
+#endif
                 ) {
                 if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]])
                     fprintf(outfile, ",%s", cond_name[args[k++]]);
Index: tcg/x86_64/tcg-target.c
===================================================================
--- tcg/x86_64/tcg-target.c	(revision 5650)
+++ tcg/x86_64/tcg-target.c	(working copy)
@@ -198,6 +198,7 @@
 #define SHIFT_SHR 5
 #define SHIFT_SAR 7
 
+/* The following defines apply to setcc instruction too. */
 #define JCC_JMP (-1)
 #define JCC_JO  0x0
 #define JCC_JNO 0x1
@@ -464,6 +465,26 @@
     }
 }
 
+// TODO const_arg optimization
+static void tcg_out_setcond(TCGContext *s, int cond,
+                            TCGArg ret, TCGArg arg1, TCGArg arg2, int rexw)
+{
+    /* cmp */
+    tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3) | rexw, arg2, arg1);
+    /* setcc */
+    // TODO this should use tcg_out_modrm
+    //      however currently tcg_out_modrm outputs an extra byte for [abcd]l
+    //tcg_out_modrm(s, (0x90 + tcg_cond_to_jcc[cond]) | P_EXT | P_REXB, ret, 0);
+    if (ret > 3)
+        tcg_out8(s, 0x40 + (ret >> 3));
+    tcg_out8(s, 0x0f);
+    tcg_out8(s, 0x90 + tcg_cond_to_jcc[cond]);
+    tcg_out8(s, 0xc0 + (ret & 7));
+    /* zero extend ret with movzbl since setcc only sets the lower 8 bits */
+    // TODO that doesn't look pretty
+    tcg_out_modrm(s, 0xb6 | P_EXT | (rexw ? rexw : P_REXB), ret, ret);
+}
+
 static void tcg_out_brcond(TCGContext *s, int cond, 
                            TCGArg arg1, TCGArg arg2, int const_arg2,
                            int label_index, int rexw)
@@ -1065,6 +1086,14 @@
         c = SHIFT_SAR;
         goto gen_shift64;
         
+    case INDEX_op_setcond_i32:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2], 0);
+        break;
+
+    case INDEX_op_setcond_i64:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2], P_REXW);
+        break;
+
     case INDEX_op_brcond_i32:
         tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], 
                        args[3], 0);
@@ -1225,6 +1254,9 @@
     { INDEX_op_shr_i32, { "r", "0", "ci" } },
     { INDEX_op_sar_i32, { "r", "0", "ci" } },
 
+    // TODO add 'i' to third "r" (second input argument)
+    { INDEX_op_setcond_i32, { "r", "r", "r" } },
+
     { INDEX_op_brcond_i32, { "r", "ri" } },
 
     { INDEX_op_mov_i64, { "r", "r" } },
@@ -1254,6 +1286,9 @@
     { INDEX_op_shr_i64, { "r", "0", "ci" } },
     { INDEX_op_sar_i64, { "r", "0", "ci" } },
 
+    // TODO add 'i' to third "r" (second input argument)
+    { INDEX_op_setcond_i64, { "r", "r", "r" } },
+
     { INDEX_op_brcond_i64, { "r", "re" } },
 
     { INDEX_op_bswap_i32, { "r", "0" } },
Index: target-arm/translate.c
===================================================================
--- target-arm/translate.c	(revision 5650)
+++ target-arm/translate.c	(working copy)
@@ -201,7 +201,6 @@
 
 #define gen_op_addl_T0_T1_cc() gen_helper_add_cc(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_adcl_T0_T1_cc() gen_helper_adc_cc(cpu_T[0], cpu_T[0], cpu_T[1])
-#define gen_op_subl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_sbcl_T0_T1_cc() gen_helper_sbc_cc(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_rsbl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[1], cpu_T[0])
 #define gen_op_rscl_T0_T1_cc() gen_helper_sbc_cc(cpu_T[0], cpu_T[1], cpu_T[0])
@@ -243,6 +242,36 @@
     dead_tmp(tmp);
 }
 
+//#define gen_op_subl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[0], cpu_T[1])
+static inline void gen_op_subl_T0_T1_cc(void)
+{
+    TCGv tmp32_res;
+    TCGv tmp1;
+    TCGv tmp2;
+
+    tmp32_res = new_tmp();
+    tcg_gen_sub_i32(tmp32_res, cpu_T[0], cpu_T[1]);
+    tcg_gen_st_i32(tmp32_res, cpu_env, offsetof(CPUState, NF));
+    tcg_gen_st_i32(tmp32_res, cpu_env, offsetof(CPUState, ZF));
+
+    tmp1 = new_tmp();
+
+    tcg_gen_setcond_i32(TCG_COND_GEU, tmp1, cpu_T[0], cpu_T[1]);
+    tcg_gen_st_i32(tmp1, cpu_env, offsetof(CPUState, CF));
+
+    tmp2 = new_tmp();
+    tcg_gen_xor_i32(tmp1, cpu_T[0], cpu_T[1]);
+    tcg_gen_xor_i32(tmp2, cpu_T[0], tmp32_res);
+    tcg_gen_and_i32(tmp1, tmp1, tmp2);
+    dead_tmp(tmp2);
+    tcg_gen_st_i32(tmp1, cpu_env, offsetof(CPUState, VF));
+    dead_tmp(tmp1);
+
+    tcg_gen_mov_i32(cpu_T[0], tmp32_res);
+
+    dead_tmp(tmp32_res);
+}
+
 static void gen_smul_dual(TCGv a, TCGv b)
 {
     TCGv tmp1 = new_tmp();

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Qemu-devel] Re: [RFC] TCG new op: setcond
  2008-11-08 19:32 ` [Qemu-devel] " Laurent Desnogues
@ 2008-11-08 19:55   ` Laurent Desnogues
  0 siblings, 0 replies; 9+ messages in thread
From: Laurent Desnogues @ 2008-11-08 19:55 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 1263 bytes --]

And again another copy/paste error spotted by Stuart.
Sorry for all that.


Laurent

Signed-off-by: Laurent Desnogues <laurent.desnogues@gmail.com>

On Sat, Nov 8, 2008 at 8:32 PM, Laurent Desnogues
<laurent.desnogues@gmail.com> wrote:
> Same patch with a fix shown by DEBUG_TCGV. Thanks to Aurelien.
>
>
> Laurent
>
> Signed-off-by: Laurent Desnogues <laurent.desnogues@gmail.com>
>
> On Sat, Nov 8, 2008 at 8:13 PM, Laurent Desnogues
> <laurent.desnogues@gmail.com> wrote:
>>
>> this patch implements a new TCG op, setcond, that sets a temp
>> to 1 if the condition is true, else to 0.  The benefit is the potential
>> removal of brcond instructions, and helpers size reduction which
>> can lead to using TCG instead of helpers.
>>
>> setcond(i)_i{32,64} have been implemented only for x86_64 and
>> i386 TCG back-ends.
>>
>> One ARM helper was converted to TCG using setcond.  Alpha
>> and MIPS are also patched to use setcond.
>>
>> On my TODO list:
>>
>>  - implement it in all backends (arm, ppc, ppc64, sparc)
>>  - use it at least once in every frontend
>>
>> Comments starting with // in the patch are questions and/or
>> TODO.
>>
>> Please feel free to comment.
>>
>>
>> Laurent
>>
>> Signed-off-by: Laurent Desnogues <laurent.desnogues@gmail.com>
>>
>

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: setcond-3-fixed2.patch --]
[-- Type: text/x-patch; name=setcond-3-fixed2.patch, Size: 19416 bytes --]

Index: target-alpha/translate.c
===================================================================
--- target-alpha/translate.c	(revision 5650)
+++ target-alpha/translate.c	(working copy)
@@ -504,30 +504,20 @@
                                   int ra, int rb, int rc,
                                   int islit, uint8_t lit)
 {
-    int l1, l2;
     TCGv tmp;
 
     if (unlikely(rc == 31))
-    return;
+        return;
 
-    l1 = gen_new_label();
-    l2 = gen_new_label();
-
     if (ra != 31) {
         tmp = tcg_temp_new(TCG_TYPE_I64);
         tcg_gen_mov_i64(tmp, cpu_ir[ra]);
     } else
         tmp = tcg_const_i64(0);
     if (islit)
-        tcg_gen_brcondi_i64(cond, tmp, lit, l1);
+        tcg_gen_setcondi_i64(cond, cpu_ir[rc], tmp, lit);
     else
-        tcg_gen_brcond_i64(cond, tmp, cpu_ir[rb], l1);
-
-    tcg_gen_movi_i64(cpu_ir[rc], 0);
-    tcg_gen_br(l2);
-    gen_set_label(l1);
-    tcg_gen_movi_i64(cpu_ir[rc], 1);
-    gen_set_label(l2);
+        tcg_gen_setcond_i64(cond, cpu_ir[rc], tmp, cpu_ir[rb]);
 }
 
 static always_inline int translate_one (DisasContext *ctx, uint32_t insn)
Index: target-mips/translate.c
===================================================================
--- target-mips/translate.c	(revision 5650)
+++ target-mips/translate.c	(working copy)
@@ -771,15 +771,7 @@
 #define OP_COND(name, cond)                                   \
 static inline void glue(gen_op_, name) (TCGv t0, TCGv t1)     \
 {                                                             \
-    int l1 = gen_new_label();                                 \
-    int l2 = gen_new_label();                                 \
-                                                              \
-    tcg_gen_brcond_tl(cond, t0, t1, l1);                      \
-    tcg_gen_movi_tl(t0, 0);                                   \
-    tcg_gen_br(l2);                                           \
-    gen_set_label(l1);                                        \
-    tcg_gen_movi_tl(t0, 1);                                   \
-    gen_set_label(l2);                                        \
+    tcg_gen_setcond_tl(cond, t0, t0, t1);                     \
 }
 OP_COND(eq, TCG_COND_EQ);
 OP_COND(ne, TCG_COND_NE);
@@ -792,15 +784,7 @@
 #define OP_CONDI(name, cond)                                  \
 static inline void glue(gen_op_, name) (TCGv t, target_ulong val) \
 {                                                             \
-    int l1 = gen_new_label();                                 \
-    int l2 = gen_new_label();                                 \
-                                                              \
-    tcg_gen_brcondi_tl(cond, t, val, l1);                     \
-    tcg_gen_movi_tl(t, 0);                                    \
-    tcg_gen_br(l2);                                           \
-    gen_set_label(l1);                                        \
-    tcg_gen_movi_tl(t, 1);                                    \
-    gen_set_label(l2);                                        \
+    tcg_gen_setcondi_tl(cond, t, t, val);                     \
 }
 OP_CONDI(lti, TCG_COND_LT);
 OP_CONDI(ltiu, TCG_COND_LTU);
@@ -809,15 +793,7 @@
 #define OP_CONDZ(name, cond)                                  \
 static inline void glue(gen_op_, name) (TCGv t)               \
 {                                                             \
-    int l1 = gen_new_label();                                 \
-    int l2 = gen_new_label();                                 \
-                                                              \
-    tcg_gen_brcondi_tl(cond, t, 0, l1);                       \
-    tcg_gen_movi_tl(t, 0);                                    \
-    tcg_gen_br(l2);                                           \
-    gen_set_label(l1);                                        \
-    tcg_gen_movi_tl(t, 1);                                    \
-    gen_set_label(l2);                                        \
+    tcg_gen_setcondi_tl(cond, t, t, 0);                       \
 }
 OP_CONDZ(gez, TCG_COND_GE);
 OP_CONDZ(gtz, TCG_COND_GT);
Index: tcg/tcg-op.h
===================================================================
--- tcg/tcg-op.h	(revision 5650)
+++ tcg/tcg-op.h	(working copy)
@@ -159,6 +159,21 @@
     *gen_opparam_ptr++ = arg6;
 }
 
+static inline void tcg_gen_op7i(int opc, TCGv arg1, TCGv arg2,
+                                TCGv arg3, TCGv arg4,
+                                TCGv arg5, TCGv arg6,
+                                TCGArg arg7)
+{
+    *gen_opc_ptr++ = opc;
+    *gen_opparam_ptr++ = GET_TCGV(arg1);
+    *gen_opparam_ptr++ = GET_TCGV(arg2);
+    *gen_opparam_ptr++ = GET_TCGV(arg3);
+    *gen_opparam_ptr++ = GET_TCGV(arg4);
+    *gen_opparam_ptr++ = GET_TCGV(arg5);
+    *gen_opparam_ptr++ = GET_TCGV(arg6);
+    *gen_opparam_ptr++ = arg7;
+}
+
 static inline void gen_set_label(int n)
 {
     tcg_gen_op1i(INDEX_op_set_label, n);
@@ -499,6 +514,20 @@
     }
 }
 
+static inline void tcg_gen_setcond_i32(int cond, TCGv ret,
+                                       TCGv arg1, TCGv arg2)
+{
+    tcg_gen_op4i(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
+}
+
+static inline void tcg_gen_setcondi_i32(int cond, TCGv ret,
+                                        TCGv arg1, int32_t arg2)
+{
+    TCGv t0 = tcg_const_i32(arg2);
+    tcg_gen_setcond_i32(cond, ret, arg1, t0);
+    tcg_temp_free(t0);
+}
+
 static inline void tcg_gen_brcond_i32(int cond, TCGv arg1, TCGv arg2, 
                                       int label_index)
 {
@@ -754,6 +783,14 @@
     tcg_gen_shifti_i64(ret, arg1, arg2, 1, 1);
 }
 
+static inline void tcg_gen_setcond_i64(int cond, TCGv ret,
+                                       TCGv arg1, TCGv arg2)
+{
+    tcg_gen_op7i(INDEX_op_setcond2_i32,
+                 ret, TCGV_HIGH(ret), arg1, TCGV_HIGH(arg1),
+                 arg2, TCGV_HIGH(arg2), cond);
+}
+
 static inline void tcg_gen_brcond_i64(int cond, TCGv arg1, TCGv arg2, 
                                       int label_index)
 {
@@ -972,6 +1009,12 @@
     }
 }
 
+static inline void tcg_gen_setcond_i64(int cond, TCGv ret,
+                                       TCGv arg1, TCGv arg2)
+{
+    tcg_gen_op4i(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
+}
+
 static inline void tcg_gen_brcond_i64(int cond, TCGv arg1, TCGv arg2, 
                                       int label_index)
 {
@@ -1073,6 +1116,15 @@
         tcg_temp_free(t0);
     }
 }
+
+static inline void tcg_gen_setcondi_i64(int cond, TCGv ret,
+                                        TCGv arg1, int64_t arg2)
+{
+    TCGv t0 = tcg_const_i64(arg2);
+    tcg_gen_setcond_i64(cond, ret, arg1, t0);
+    tcg_temp_free(t0);
+}
+
 static inline void tcg_gen_brcondi_i64(int cond, TCGv arg1, int64_t arg2,
                                        int label_index)
 {
@@ -1088,7 +1140,6 @@
     tcg_temp_free(t0);
 }
 
-
 /***************************************/
 /* optional operations */
 
@@ -1864,6 +1915,8 @@
 #define tcg_gen_shri_tl tcg_gen_shri_i64
 #define tcg_gen_sar_tl tcg_gen_sar_i64
 #define tcg_gen_sari_tl tcg_gen_sari_i64
+#define tcg_gen_setcond_tl tcg_gen_setcond_i64
+#define tcg_gen_setcondi_tl tcg_gen_setcondi_i64
 #define tcg_gen_brcond_tl tcg_gen_brcond_i64
 #define tcg_gen_brcondi_tl tcg_gen_brcondi_i64
 #define tcg_gen_mul_tl tcg_gen_mul_i64
@@ -1927,6 +1980,8 @@
 #define tcg_gen_shri_tl tcg_gen_shri_i32
 #define tcg_gen_sar_tl tcg_gen_sar_i32
 #define tcg_gen_sari_tl tcg_gen_sari_i32
+#define tcg_gen_setcond_tl tcg_gen_setcond_i32
+#define tcg_gen_setcondi_tl tcg_gen_setcondi_i32
 #define tcg_gen_brcond_tl tcg_gen_brcond_i32
 #define tcg_gen_brcondi_tl tcg_gen_brcondi_i32
 #define tcg_gen_mul_tl tcg_gen_mul_i32
Index: tcg/tcg-opc.h
===================================================================
--- tcg/tcg-opc.h	(revision 5650)
+++ tcg/tcg-opc.h	(working copy)
@@ -76,10 +76,13 @@
 DEF2(shr_i32, 1, 2, 0, 0)
 DEF2(sar_i32, 1, 2, 0, 0)
 
+DEF2(setcond_i32, 1, 2, 1, 0)
+
 DEF2(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 #if TCG_TARGET_REG_BITS == 32
 DEF2(add2_i32, 2, 4, 0, 0)
 DEF2(sub2_i32, 2, 4, 0, 0)
+DEF2(setcond2_i32, 2, 4, 1, 0)
 DEF2(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 DEF2(mulu2_i32, 2, 2, 0, 0)
 #endif
@@ -129,6 +132,8 @@
 DEF2(shr_i64, 1, 2, 0, 0)
 DEF2(sar_i64, 1, 2, 0, 0)
 
+DEF2(setcond_i64, 1, 2, 1, 0)
+
 DEF2(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 #ifdef TCG_TARGET_HAS_ext8s_i64
 DEF2(ext8s_i64, 1, 1, 0, 0)
Index: tcg/i386/tcg-target.c
===================================================================
--- tcg/i386/tcg-target.c	(revision 5650)
+++ tcg/i386/tcg-target.c	(working copy)
@@ -162,6 +162,7 @@
 #define SHIFT_SHR 5
 #define SHIFT_SAR 7
 
+/* The following defines apply to setcc instruction too. */
 #define JCC_JMP (-1)
 #define JCC_JO  0x0
 #define JCC_JNO 0x1
@@ -327,6 +328,114 @@
     }
 }
 
+// TODO const_arg optimization?
+static void tcg_out_setcond(TCGContext *s, int cond,
+                            TCGArg ret, TCGArg arg1, TCGArg arg2)
+{
+    /* cmp */
+    tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
+    /* setcc */
+    tcg_out_modrm(s, (0x90 + tcg_cond_to_jcc[cond]) | P_EXT, 0, ret);
+    /* zero extend ret with movzbl since setcc only sets the lower 8 bits */
+    tcg_out_modrm(s, 0xb6 | P_EXT, ret, ret);
+}
+
+// TODO const_arg optimization?
+static void tcg_out_setcond2_brcond(TCGContext *s, int cond,
+                                    TCGArg arg1, TCGArg arg2,
+                                    int label_index)
+{
+    /* cmp */
+    tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
+    /* jcc */
+    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index);
+}
+
+// TODO const_arg optimization?
+// TODO use cmov (i686 only...)?
+/* args:
+ *   0 low(ret)
+ *   1 high(ret)
+ *   2 low(arg1)
+ *   3 hi(arg1)
+ *   4 low(arg2)
+ *   5 hi(arg2)
+ *   6 condition
+ * Note:  this is basically a copy of tcg_out_brcond2
+ */
+static void tcg_out_setcond2(TCGContext *s,
+                             const TCGArg *args)
+{
+    int label_zero, label_one;
+
+    label_zero = gen_new_label();
+    label_one = gen_new_label();
+    switch (args[6]) {
+    case TCG_COND_EQ:
+        tcg_out_setcond2_brcond(s, TCG_COND_NE, args[2], args[4], label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_EQ, args[3], args[5], label_one);
+        break;
+    case TCG_COND_NE:
+        tcg_out_setcond2_brcond(s, TCG_COND_NE, args[2], args[4], label_one);
+        tcg_out_setcond2_brcond(s, TCG_COND_NE, args[3], args[5], label_one);
+        break;
+    case TCG_COND_LT:
+        tcg_out_setcond2_brcond(s, TCG_COND_LT, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_LTU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_LE:
+        tcg_out_setcond2_brcond(s, TCG_COND_LT, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_LEU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_GT:
+        tcg_out_setcond2_brcond(s, TCG_COND_GT, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_GTU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_GE:
+        tcg_out_setcond2_brcond(s, TCG_COND_GT, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_GEU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_LTU:
+        tcg_out_setcond2_brcond(s, TCG_COND_LTU, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_LTU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_LEU:
+        tcg_out_setcond2_brcond(s, TCG_COND_LTU, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_LEU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_GTU:
+        tcg_out_setcond2_brcond(s, TCG_COND_GTU, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_GTU, args[2], args[4], label_one);
+        break;
+    case TCG_COND_GEU:
+        tcg_out_setcond2_brcond(s, TCG_COND_GTU, args[3], args[5], label_one);
+        tcg_out_jxx(s, JCC_JNE, label_zero);
+        tcg_out_setcond2_brcond(s, TCG_COND_GEU, args[2], args[4], label_one);
+        break;
+    default:
+        tcg_abort();
+    }
+    tcg_out_label(s, label_zero, (tcg_target_long)s->code_ptr);
+    /* clear lower part of result */
+    tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3), args[0], args[0]);
+    /* branch over next instruction which is 5 bytes long */
+    tcg_out8(s, 0xeb);
+    tcg_out8(s, 5);
+    /* set lower part of result to 1 */
+    tcg_out_label(s, label_one, (tcg_target_long)s->code_ptr);
+    tcg_out8(s, 0xb8 + args[0]);
+    tcg_out32(s, 1);
+    /* clear higher part of result */
+    tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3), args[1], args[1]);
+}
+
 static void tcg_out_brcond(TCGContext *s, int cond, 
                            TCGArg arg1, TCGArg arg2, int const_arg2,
                            int label_index)
@@ -1013,6 +1122,12 @@
         else
             tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5], args[1]);
         break;
+    case INDEX_op_setcond_i32:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2]);
+        break;
+    case INDEX_op_setcond2_i32:
+        tcg_out_setcond2(s, args);
+        break;
     case INDEX_op_brcond_i32:
         tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]);
         break;
@@ -1088,6 +1203,11 @@
     { INDEX_op_shr_i32, { "r", "0", "ci" } },
     { INDEX_op_sar_i32, { "r", "0", "ci" } },
 
+    // TODO add 'i' to third "r" (second input argument)
+    { INDEX_op_setcond_i32, { "q", "r", "r" } },
+    // TODO add 'i'
+    { INDEX_op_setcond2_i32, { "r", "r", "r", "r", "r", "r" } },
+
     { INDEX_op_brcond_i32, { "r", "ri" } },
 
     { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
Index: tcg/README
===================================================================
--- tcg/README	(revision 5650)
+++ tcg/README	(working copy)
@@ -277,6 +277,10 @@
 
 64 bit byte swap
 
+* setcond_i32/i64 cond, t0, t1, t2
+
+Set t0 to 1 if t1 cond t2 is true, else t0 is set to 0. (cf brcond for cond.)
+
 * discard_i32/i64 t0
 
 Indicate that the value of t0 won't be used later. It is useful to
Index: tcg/tcg.c
===================================================================
--- tcg/tcg.c	(revision 5650)
+++ tcg/tcg.c	(working copy)
@@ -877,6 +877,12 @@
 #elif TCG_TARGET_REG_BITS == 64
                 || c == INDEX_op_brcond_i64
 #endif
+                || c == INDEX_op_setcond_i32
+#if TCG_TARGET_REG_BITS == 32
+                || c == INDEX_op_setcond2_i32
+#elif TCG_TARGET_REG_BITS == 64
+                || c == INDEX_op_setcond_i64
+#endif
                 ) {
                 if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]])
                     fprintf(outfile, ",%s", cond_name[args[k++]]);
Index: tcg/x86_64/tcg-target.c
===================================================================
--- tcg/x86_64/tcg-target.c	(revision 5650)
+++ tcg/x86_64/tcg-target.c	(working copy)
@@ -198,6 +198,7 @@
 #define SHIFT_SHR 5
 #define SHIFT_SAR 7
 
+/* The following defines apply to setcc instruction too. */
 #define JCC_JMP (-1)
 #define JCC_JO  0x0
 #define JCC_JNO 0x1
@@ -464,6 +465,26 @@
     }
 }
 
+// TODO const_arg optimization
+static void tcg_out_setcond(TCGContext *s, int cond,
+                            TCGArg ret, TCGArg arg1, TCGArg arg2, int rexw)
+{
+    /* cmp */
+    tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3) | rexw, arg2, arg1);
+    /* setcc */
+    // TODO this should use tcg_out_modrm
+    //      however currently tcg_out_modrm outputs an extra byte for [abcd]l
+    //tcg_out_modrm(s, (0x90 + tcg_cond_to_jcc[cond]) | P_EXT | P_REXB, ret, 0);
+    if (ret > 3)
+        tcg_out8(s, 0x40 + (ret >> 3));
+    tcg_out8(s, 0x0f);
+    tcg_out8(s, 0x90 + tcg_cond_to_jcc[cond]);
+    tcg_out8(s, 0xc0 + (ret & 7));
+    /* zero extend ret with movzbl since setcc only sets the lower 8 bits */
+    // TODO that doesn't look pretty
+    tcg_out_modrm(s, 0xb6 | P_EXT | (rexw ? rexw : P_REXB), ret, ret);
+}
+
 static void tcg_out_brcond(TCGContext *s, int cond, 
                            TCGArg arg1, TCGArg arg2, int const_arg2,
                            int label_index, int rexw)
@@ -1065,6 +1086,14 @@
         c = SHIFT_SAR;
         goto gen_shift64;
         
+    case INDEX_op_setcond_i32:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2], 0);
+        break;
+
+    case INDEX_op_setcond_i64:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2], P_REXW);
+        break;
+
     case INDEX_op_brcond_i32:
         tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], 
                        args[3], 0);
@@ -1225,6 +1254,9 @@
     { INDEX_op_shr_i32, { "r", "0", "ci" } },
     { INDEX_op_sar_i32, { "r", "0", "ci" } },
 
+    // TODO add 'i' to third "r" (second input argument)
+    { INDEX_op_setcond_i32, { "r", "r", "r" } },
+
     { INDEX_op_brcond_i32, { "r", "ri" } },
 
     { INDEX_op_mov_i64, { "r", "r" } },
@@ -1254,6 +1286,9 @@
     { INDEX_op_shr_i64, { "r", "0", "ci" } },
     { INDEX_op_sar_i64, { "r", "0", "ci" } },
 
+    // TODO add 'i' to third "r" (second input argument)
+    { INDEX_op_setcond_i64, { "r", "r", "r" } },
+
     { INDEX_op_brcond_i64, { "r", "re" } },
 
     { INDEX_op_bswap_i32, { "r", "0" } },
Index: target-arm/translate.c
===================================================================
--- target-arm/translate.c	(revision 5650)
+++ target-arm/translate.c	(working copy)
@@ -201,7 +201,6 @@
 
 #define gen_op_addl_T0_T1_cc() gen_helper_add_cc(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_adcl_T0_T1_cc() gen_helper_adc_cc(cpu_T[0], cpu_T[0], cpu_T[1])
-#define gen_op_subl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_sbcl_T0_T1_cc() gen_helper_sbc_cc(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_rsbl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[1], cpu_T[0])
 #define gen_op_rscl_T0_T1_cc() gen_helper_sbc_cc(cpu_T[0], cpu_T[1], cpu_T[0])
@@ -243,6 +242,36 @@
     dead_tmp(tmp);
 }
 
+//#define gen_op_subl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[0], cpu_T[1])
+static inline void gen_op_subl_T0_T1_cc(void)
+{
+    TCGv tmp32_res;
+    TCGv tmp1;
+    TCGv tmp2;
+
+    tmp32_res = new_tmp();
+    tcg_gen_sub_i32(tmp32_res, cpu_T[0], cpu_T[1]);
+    tcg_gen_st_i32(tmp32_res, cpu_env, offsetof(CPUState, NF));
+    tcg_gen_st_i32(tmp32_res, cpu_env, offsetof(CPUState, ZF));
+
+    tmp1 = new_tmp();
+
+    tcg_gen_setcond_i32(TCG_COND_GEU, tmp1, cpu_T[0], cpu_T[1]);
+    tcg_gen_st_i32(tmp1, cpu_env, offsetof(CPUState, CF));
+
+    tmp2 = new_tmp();
+    tcg_gen_xor_i32(tmp1, cpu_T[0], cpu_T[1]);
+    tcg_gen_xor_i32(tmp2, cpu_T[0], tmp32_res);
+    tcg_gen_and_i32(tmp1, tmp1, tmp2);
+    dead_tmp(tmp2);
+    tcg_gen_st_i32(tmp1, cpu_env, offsetof(CPUState, VF));
+    dead_tmp(tmp1);
+
+    tcg_gen_mov_i32(cpu_T[0], tmp32_res);
+
+    dead_tmp(tmp32_res);
+}
+
 static void gen_smul_dual(TCGv a, TCGv b)
 {
     TCGv tmp1 = new_tmp();

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [RFC] TCG new op: setcond
  2008-11-05 16:11   ` Laurent Desnogues
@ 2008-11-09 10:50     ` Blue Swirl
  0 siblings, 0 replies; 9+ messages in thread
From: Blue Swirl @ 2008-11-09 10:50 UTC (permalink / raw)
  To: qemu-devel

On 11/5/08, Laurent Desnogues <laurent.desnogues@gmail.com> wrote:
> Hello,
>
>  Here is an updated patch for setcond.
>
>  Changes since the previous patch:
>
>   - support TCG i386 for 32 and 64 bit variants
>   - support TCG x86_64 support for 32 and 64 bit variants
>   - fix for x86_64 prefix output
>   - fix for input/output overlap
>   - use setcond for Alpha cmp instructions
>   - add setcondi
>
>  Digging into x86 is certainly not a pleasant experience for me,
>  especially as I have no experience on it.  So this patch may
>  contain bugs and overlooks;  note however my changes were
>  tested on x86_64 and i386 platforms with test programs for
>  Alpha and ARM.
>
>  What remains to be done:
>
>
>   - document setcond in tcg/README
>
>   - implement it in all backends (ARM, PPC, PPC64, SPARC)

There is no equivalent instruction for Sparc32 (V8), it should be
implemented otherwise. Though currently we assume that V9 is always
available.

Sparc64 (V9) has a conditional move instruction (MOVcc). The manual
suggests a three-instruction sequence (cmp, mov, movcc) to implement a
setcond-type operation. For setcondi where the immediate value is
zero, MOVR may be used to skip the comparison (mov, movrcc).

>  - use it at least once in every frontend (cris, i386, m68k, MIPS,
>    PPC, sh4, SPARC)

Candidate brconds to be replaced by setcond in target-sparc/translate
are: all of gen_cc_*, gen_op_mulscc, gen_op_div_cc and gen_cond_reg.
Especially gen_cond_reg should benefit.

> Also there's a choice to be done between return 1 if the test
> succeeds or -1.

'1' would be much more useful than '-1' for Sparc target, because it
can be shifted to form PSR_xxx values that can be or'ed to the
condition codes. All of the above candidates except gen_cond_reg would
need additional masking to remove the other bits than the zeroth.

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2008-11-09 10:50 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-11-08 19:13 [Qemu-devel] [RFC] TCG new op: setcond Laurent Desnogues
2008-11-08 19:32 ` [Qemu-devel] " Laurent Desnogues
2008-11-08 19:55   ` Laurent Desnogues
  -- strict thread matches above, loose matches on Subject: below --
2008-11-04 10:15 [Qemu-devel] " Laurent Desnogues
2008-11-04 13:16 ` Paul Brook
2008-11-04 13:33   ` Laurent Desnogues
2008-11-04 14:24 ` Avi Kivity
2008-11-05 16:11   ` Laurent Desnogues
2008-11-09 10:50     ` Blue Swirl

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).