qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/4] target/i386: use TSTEQ/TSTNE in x86 frontend
@ 2024-02-28 11:11 Paolo Bonzini
  2024-02-28 11:11 ` [PATCH 1/4] target/i386: use TSTEQ/TSTNE to test low bits Paolo Bonzini
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Paolo Bonzini @ 2024-02-28 11:11 UTC (permalink / raw)
  To: qemu-devel; +Cc: richard.henderson

The TSTEQ/TSTNE operations can be useful to evaluate many conditionals.
They can be used to test the sign bit or equality to zero of a partial
register, as well as to check individual bits in EFLAGS.

Paolo

Based-on: <20240228110641.287205-1-pbonzini@redhat.com>

Paolo Bonzini (4):
  target/i386: use TSTEQ/TSTNE to test low bits
  target/i386: use TSTEQ/TSTNE to check flags
  target/i386: remove mask from CCPrepare
  tcg/optimize: optimize TSTNE using smask and zmask

 target/i386/tcg/translate.c | 115 ++++++++++++++++--------------------
 tcg/optimize.c              |  13 ++--
 target/i386/tcg/emit.c.inc  |   5 +-
 3 files changed, 60 insertions(+), 73 deletions(-)

-- 
2.43.2



^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/4] target/i386: use TSTEQ/TSTNE to test low bits
  2024-02-28 11:11 [PATCH 0/4] target/i386: use TSTEQ/TSTNE in x86 frontend Paolo Bonzini
@ 2024-02-28 11:11 ` Paolo Bonzini
  2024-02-28 22:28   ` Richard Henderson
  2024-02-28 11:11 ` [PATCH 2/4] target/i386: use TSTEQ/TSTNE to check flags Paolo Bonzini
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 11+ messages in thread
From: Paolo Bonzini @ 2024-02-28 11:11 UTC (permalink / raw)
  To: qemu-devel; +Cc: richard.henderson

When testing the sign bit or equality to zero of a partial register, it
is useful to use a single TSTEQ or TSTNE operation.  It can also be used
to test the parity flag, using bit 0 of the population count.

Do not do this for 32- and 64-bit values however, to avoid
large immediates.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/tcg/translate.c | 28 ++++++++++++++++++++--------
 target/i386/tcg/emit.c.inc  |  5 ++---
 2 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 07f642dc9e9..fe9021833e3 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -927,11 +927,21 @@ typedef struct CCPrepare {
     bool no_setcond;
 } CCPrepare;
 
+static CCPrepare gen_prepare_sign_nz(TCGv src, MemOp size)
+{
+    if (size == MO_TL) {
+        return (CCPrepare) { .cond = TCG_COND_LT, .reg = src, .mask = -1 };
+    } else {
+        return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = src, .mask = -1,
+                             .imm = 1ull << ((8 << size) - 1) };
+    }
+}
+
 /* compute eflags.C to reg */
 static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
 {
     TCGv t0, t1;
-    int size, shift;
+    MemOp size;
 
     switch (s->cc_op) {
     case CC_OP_SUBB ... CC_OP_SUBQ:
@@ -966,9 +976,7 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
     case CC_OP_SHLB ... CC_OP_SHLQ:
         /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
         size = s->cc_op - CC_OP_SHLB;
-        shift = (8 << size) - 1;
-        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
-                             .mask = (target_ulong)1 << shift };
+        return gen_prepare_sign_nz(cpu_cc_src, size);
 
     case CC_OP_MULB ... CC_OP_MULQ:
         return (CCPrepare) { .cond = TCG_COND_NE,
@@ -1028,8 +1036,7 @@ static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
     default:
         {
             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
-            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
-            return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
+            return gen_prepare_sign_nz(cpu_cc_dst, size);
         }
     }
 }
@@ -1076,8 +1083,13 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
     default:
         {
             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
-            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
-            return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
+            if (size == MO_TL) {
+                return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_dst,
+                                     .mask = -1 };
+            } else {
+                return (CCPrepare) { .cond = TCG_COND_TSTEQ, .reg = cpu_cc_dst,
+                                     .mask = -1, .imm = (1ull << (8 << size)) - 1 };
+            }
         }
     }
 }
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 6bcf88ecd71..0e00f6635dd 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1209,7 +1209,7 @@ static void gen_CMPccXADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
         [JCC_Z] = TCG_COND_EQ,
         [JCC_BE] = TCG_COND_LEU,
         [JCC_S] = TCG_COND_LT,  /* test sign bit by comparing against 0 */
-        [JCC_P] = TCG_COND_EQ,  /* even parity - tests low bit of popcount */
+        [JCC_P] = TCG_COND_TSTEQ,  /* even parity - tests low bit of popcount */
         [JCC_L] = TCG_COND_LT,
         [JCC_LE] = TCG_COND_LE,
     };
@@ -1260,8 +1260,7 @@ static void gen_CMPccXADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
     case JCC_P:
         tcg_gen_ext8u_tl(s->tmp0, s->T0);
         tcg_gen_ctpop_tl(s->tmp0, s->tmp0);
-        tcg_gen_andi_tl(s->tmp0, s->tmp0, 1);
-        cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(0);
+        cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(1);
         break;
 
     case JCC_S:
-- 
2.43.2



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/4] target/i386: use TSTEQ/TSTNE to check flags
  2024-02-28 11:11 [PATCH 0/4] target/i386: use TSTEQ/TSTNE in x86 frontend Paolo Bonzini
  2024-02-28 11:11 ` [PATCH 1/4] target/i386: use TSTEQ/TSTNE to test low bits Paolo Bonzini
@ 2024-02-28 11:11 ` Paolo Bonzini
  2024-02-28 22:34   ` Richard Henderson
  2024-02-28 11:11 ` [PATCH 3/4] target/i386: remove mask from CCPrepare Paolo Bonzini
  2024-02-28 11:11 ` [PATCH 4/4] tcg/optimize: optimize TSTNE using smask and zmask Paolo Bonzini
  3 siblings, 1 reply; 11+ messages in thread
From: Paolo Bonzini @ 2024-02-28 11:11 UTC (permalink / raw)
  To: qemu-devel; +Cc: richard.henderson

The new condition obviously come in handy when testing individual bits
of EFLAGS, and they make it possible to remove the .mask field of
CCPrepare.

Lowering to shift+and is done by the optimizer if necessary.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/tcg/translate.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index fe9021833e3..63d520e0cba 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -995,8 +995,8 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
     case CC_OP_EFLAGS:
     case CC_OP_SARB ... CC_OP_SARQ:
         /* CC_SRC & 1 */
-        return (CCPrepare) { .cond = TCG_COND_NE,
-                             .reg = cpu_cc_src, .mask = CC_C };
+        return (CCPrepare) { .cond = TCG_COND_TSTNE,
+                             .reg = cpu_cc_src, .mask = -1, .imm = CC_C };
 
     default:
        /* The need to compute only C from CC_OP_DYNAMIC is important
@@ -1013,8 +1013,8 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
 static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
 {
     gen_compute_eflags(s);
-    return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
-                         .mask = CC_P };
+    return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src,
+                         .mask = -1, .imm = CC_P };
 }
 
 /* compute eflags.S to reg */
@@ -1028,8 +1028,8 @@ static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
     case CC_OP_ADCX:
     case CC_OP_ADOX:
     case CC_OP_ADCOX:
-        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
-                             .mask = CC_S };
+        return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src,
+                             .mask = -1, .imm = CC_S };
     case CC_OP_CLR:
     case CC_OP_POPCNT:
         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
@@ -1057,8 +1057,8 @@ static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
                              .reg = cpu_cc_src, .mask = -1 };
     default:
         gen_compute_eflags(s);
-        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
-                             .mask = CC_O };
+        return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src,
+                             .mask = -1, .imm = CC_O };
     }
 }
 
@@ -1073,8 +1073,8 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
     case CC_OP_ADCX:
     case CC_OP_ADOX:
     case CC_OP_ADCOX:
-        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
-                             .mask = CC_Z };
+        return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src,
+                             .mask = -1, .imm = CC_Z };
     case CC_OP_CLR:
         return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
     case CC_OP_POPCNT:
@@ -1152,8 +1152,8 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
             break;
         case JCC_BE:
             gen_compute_eflags(s);
-            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
-                               .mask = CC_Z | CC_C };
+            cc = (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src,
+                               .mask = -1, .imm = CC_Z | CC_C };
             break;
         case JCC_S:
             cc = gen_prepare_eflags_s(s, reg);
@@ -1167,8 +1167,8 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
                 reg = s->tmp0;
             }
             tcg_gen_addi_tl(reg, cpu_cc_src, CC_O - CC_S);
-            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
-                               .mask = CC_O };
+            cc = (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = reg,
+                               .mask = -1, .imm = CC_O };
             break;
         default:
         case JCC_LE:
@@ -1177,8 +1177,8 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
                 reg = s->tmp0;
             }
             tcg_gen_addi_tl(reg, cpu_cc_src, CC_O - CC_S);
-            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
-                               .mask = CC_O | CC_Z };
+            cc = (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = reg,
+                               .mask = -1, .imm = CC_O | CC_Z };
             break;
         }
         break;
-- 
2.43.2



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/4] target/i386: remove mask from CCPrepare
  2024-02-28 11:11 [PATCH 0/4] target/i386: use TSTEQ/TSTNE in x86 frontend Paolo Bonzini
  2024-02-28 11:11 ` [PATCH 1/4] target/i386: use TSTEQ/TSTNE to test low bits Paolo Bonzini
  2024-02-28 11:11 ` [PATCH 2/4] target/i386: use TSTEQ/TSTNE to check flags Paolo Bonzini
@ 2024-02-28 11:11 ` Paolo Bonzini
  2024-02-28 22:36   ` Richard Henderson
  2024-02-28 11:11 ` [PATCH 4/4] tcg/optimize: optimize TSTNE using smask and zmask Paolo Bonzini
  3 siblings, 1 reply; 11+ messages in thread
From: Paolo Bonzini @ 2024-02-28 11:11 UTC (permalink / raw)
  To: qemu-devel; +Cc: richard.henderson

With the introduction of TSTEQ and TSTNE the .mask field is always -1,
so remove all the now-unnecessary code.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/tcg/translate.c | 81 +++++++++++++------------------------
 1 file changed, 27 insertions(+), 54 deletions(-)

diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 63d520e0cba..6b4522c226d 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -922,7 +922,6 @@ typedef struct CCPrepare {
     TCGv reg;
     TCGv reg2;
     target_ulong imm;
-    target_ulong mask;
     bool use_reg2;
     bool no_setcond;
 } CCPrepare;
@@ -930,9 +929,9 @@ typedef struct CCPrepare {
 static CCPrepare gen_prepare_sign_nz(TCGv src, MemOp size)
 {
     if (size == MO_TL) {
-        return (CCPrepare) { .cond = TCG_COND_LT, .reg = src, .mask = -1 };
+        return (CCPrepare) { .cond = TCG_COND_LT, .reg = src };
     } else {
-        return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = src, .mask = -1,
+        return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = src,
                              .imm = 1ull << ((8 << size) - 1) };
     }
 }
@@ -961,17 +960,17 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
         t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
     add_sub:
         return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
-                             .reg2 = t1, .mask = -1, .use_reg2 = true };
+                             .reg2 = t1, .use_reg2 = true };
 
     case CC_OP_LOGICB ... CC_OP_LOGICQ:
     case CC_OP_CLR:
     case CC_OP_POPCNT:
-        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
+        return (CCPrepare) { .cond = TCG_COND_NEVER };
 
     case CC_OP_INCB ... CC_OP_INCQ:
     case CC_OP_DECB ... CC_OP_DECQ:
         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
-                             .mask = -1, .no_setcond = true };
+                             .no_setcond = true };
 
     case CC_OP_SHLB ... CC_OP_SHLQ:
         /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
@@ -980,23 +979,23 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
 
     case CC_OP_MULB ... CC_OP_MULQ:
         return (CCPrepare) { .cond = TCG_COND_NE,
-                             .reg = cpu_cc_src, .mask = -1 };
+                             .reg = cpu_cc_src };
 
     case CC_OP_BMILGB ... CC_OP_BMILGQ:
         size = s->cc_op - CC_OP_BMILGB;
         t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
-        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
+        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0 };
 
     case CC_OP_ADCX:
     case CC_OP_ADCOX:
         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
-                             .mask = -1, .no_setcond = true };
+                             .no_setcond = true };
 
     case CC_OP_EFLAGS:
     case CC_OP_SARB ... CC_OP_SARQ:
         /* CC_SRC & 1 */
         return (CCPrepare) { .cond = TCG_COND_TSTNE,
-                             .reg = cpu_cc_src, .mask = -1, .imm = CC_C };
+                             .reg = cpu_cc_src, .imm = CC_C };
 
     default:
        /* The need to compute only C from CC_OP_DYNAMIC is important
@@ -1005,7 +1004,7 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
        gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
                                cpu_cc_src2, cpu_cc_op);
        return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
-                            .mask = -1, .no_setcond = true };
+                            .no_setcond = true };
     }
 }
 
@@ -1014,7 +1013,7 @@ static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
 {
     gen_compute_eflags(s);
     return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src,
-                         .mask = -1, .imm = CC_P };
+                         .imm = CC_P };
 }
 
 /* compute eflags.S to reg */
@@ -1029,10 +1028,10 @@ static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
     case CC_OP_ADOX:
     case CC_OP_ADCOX:
         return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src,
-                             .mask = -1, .imm = CC_S };
+                             .imm = CC_S };
     case CC_OP_CLR:
     case CC_OP_POPCNT:
-        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
+        return (CCPrepare) { .cond = TCG_COND_NEVER };
     default:
         {
             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
@@ -1048,17 +1047,16 @@ static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
     case CC_OP_ADOX:
     case CC_OP_ADCOX:
         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
-                             .mask = -1, .no_setcond = true };
+                             .no_setcond = true };
     case CC_OP_CLR:
     case CC_OP_POPCNT:
-        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
+        return (CCPrepare) { .cond = TCG_COND_NEVER };
     case CC_OP_MULB ... CC_OP_MULQ:
-        return (CCPrepare) { .cond = TCG_COND_NE,
-                             .reg = cpu_cc_src, .mask = -1 };
+        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src };
     default:
         gen_compute_eflags(s);
         return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src,
-                             .mask = -1, .imm = CC_O };
+                             .imm = CC_O };
     }
 }
 
@@ -1074,21 +1072,19 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
     case CC_OP_ADOX:
     case CC_OP_ADCOX:
         return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src,
-                             .mask = -1, .imm = CC_Z };
+                             .imm = CC_Z };
     case CC_OP_CLR:
-        return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
+        return (CCPrepare) { .cond = TCG_COND_ALWAYS };
     case CC_OP_POPCNT:
-        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src,
-                             .mask = -1 };
+        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src };
     default:
         {
             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
             if (size == MO_TL) {
-                return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_dst,
-                                     .mask = -1 };
+                return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_dst };
             } else {
                 return (CCPrepare) { .cond = TCG_COND_TSTEQ, .reg = cpu_cc_dst,
-                                     .mask = -1, .imm = (1ull << (8 << size)) - 1 };
+                                     .imm = (1ull << (8 << size)) - 1 };
             }
         }
     }
@@ -1116,7 +1112,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
             gen_extu(size, s->tmp4);
             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
             cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
-                               .reg2 = t0, .mask = -1, .use_reg2 = true };
+                               .reg2 = t0, .use_reg2 = true };
             break;
 
         case JCC_L:
@@ -1129,7 +1125,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
             gen_exts(size, s->tmp4);
             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
             cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
-                               .reg2 = t0, .mask = -1, .use_reg2 = true };
+                               .reg2 = t0, .use_reg2 = true };
             break;
 
         default:
@@ -1153,7 +1149,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
         case JCC_BE:
             gen_compute_eflags(s);
             cc = (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src,
-                               .mask = -1, .imm = CC_Z | CC_C };
+                               .imm = CC_Z | CC_C };
             break;
         case JCC_S:
             cc = gen_prepare_eflags_s(s, reg);
@@ -1168,7 +1164,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
             }
             tcg_gen_addi_tl(reg, cpu_cc_src, CC_O - CC_S);
             cc = (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = reg,
-                               .mask = -1, .imm = CC_O };
+                               .imm = CC_O };
             break;
         default:
         case JCC_LE:
@@ -1178,7 +1174,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
             }
             tcg_gen_addi_tl(reg, cpu_cc_src, CC_O - CC_S);
             cc = (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = reg,
-                               .mask = -1, .imm = CC_O | CC_Z };
+                               .imm = CC_O | CC_Z };
             break;
         }
         break;
@@ -1203,16 +1199,6 @@ static void gen_setcc1(DisasContext *s, int b, TCGv reg)
         return;
     }
 
-    if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
-        cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
-        tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
-        tcg_gen_andi_tl(reg, reg, 1);
-        return;
-    }
-    if (cc.mask != -1) {
-        tcg_gen_andi_tl(reg, cc.reg, cc.mask);
-        cc.reg = reg;
-    }
     if (cc.use_reg2) {
         tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
     } else {
@@ -1231,10 +1217,6 @@ static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
 {
     CCPrepare cc = gen_prepare_cc(s, b, s->T0);
 
-    if (cc.mask != -1) {
-        tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
-        cc.reg = s->T0;
-    }
     if (cc.use_reg2) {
         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
     } else {
@@ -1250,10 +1232,6 @@ static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
     CCPrepare cc = gen_prepare_cc(s, b, s->T0);
 
     gen_update_cc_op(s);
-    if (cc.mask != -1) {
-        tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
-        cc.reg = s->T0;
-    }
     set_cc_op(s, CC_OP_DYNAMIC);
     if (cc.use_reg2) {
         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
@@ -2518,11 +2496,6 @@ static void gen_cmovcc1(DisasContext *s, int b, TCGv dest, TCGv src)
 {
     CCPrepare cc = gen_prepare_cc(s, b, s->T1);
 
-    if (cc.mask != -1) {
-        TCGv t0 = tcg_temp_new();
-        tcg_gen_andi_tl(t0, cc.reg, cc.mask);
-        cc.reg = t0;
-    }
     if (!cc.use_reg2) {
         cc.reg2 = tcg_constant_tl(cc.imm);
     }
-- 
2.43.2



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 4/4] tcg/optimize: optimize TSTNE using smask and zmask
  2024-02-28 11:11 [PATCH 0/4] target/i386: use TSTEQ/TSTNE in x86 frontend Paolo Bonzini
                   ` (2 preceding siblings ...)
  2024-02-28 11:11 ` [PATCH 3/4] target/i386: remove mask from CCPrepare Paolo Bonzini
@ 2024-02-28 11:11 ` Paolo Bonzini
  2024-02-28 23:10   ` Richard Henderson
  3 siblings, 1 reply; 11+ messages in thread
From: Paolo Bonzini @ 2024-02-28 11:11 UTC (permalink / raw)
  To: qemu-devel; +Cc: richard.henderson

Generalize the existing optimization of "TSTNE x,sign" and "TSTNE x,-1".

This can be useful in some cases when the i386 frontend creates opcodes
that test against 0xff or 0x80.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tcg/optimize.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 752cc5c56b6..ab976a5bbe7 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -793,6 +793,7 @@ static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest,
                                      TCGArg *p1, TCGArg *p2, TCGArg *pcond)
 {
     TCGCond cond;
+    TempOptInfo *i1;
     bool swap;
     int r;
 
@@ -810,19 +811,21 @@ static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest,
         return -1;
     }
 
+    i1 = arg_info(*p1);
+
     /*
      * TSTNE x,x -> NE x,0
-     * TSTNE x,-1 -> NE x,0
+     * TSTNE x,i -> NE x,0 if i includes all nonzero bits of x
      */
-    if (args_are_copies(*p1, *p2) || arg_is_const_val(*p2, -1)) {
+    if (args_are_copies(*p1, *p2) ||
+        (arg_is_const(*p2) && (i1->z_mask & ~arg_info(*p2)->val) == 0)) {
         *p2 = arg_new_constant(ctx, 0);
         *pcond = tcg_tst_eqne_cond(cond);
         return -1;
     }
 
-    /* TSTNE x,sign -> LT x,0 */
-    if (arg_is_const_val(*p2, (ctx->type == TCG_TYPE_I32
-                               ? INT32_MIN : INT64_MIN))) {
+    /* TSTNE x,i -> LT x,0 if i only includes sign bit copies */
+    if (arg_is_const(*p2) && (arg_info(*p2)->val & ~i1->s_mask) == 0) {
         *p2 = arg_new_constant(ctx, 0);
         *pcond = tcg_tst_ltge_cond(cond);
         return -1;
-- 
2.43.2



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/4] target/i386: use TSTEQ/TSTNE to test low bits
  2024-02-28 11:11 ` [PATCH 1/4] target/i386: use TSTEQ/TSTNE to test low bits Paolo Bonzini
@ 2024-02-28 22:28   ` Richard Henderson
  0 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2024-02-28 22:28 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel

On 2/28/24 01:11, Paolo Bonzini wrote:
> When testing the sign bit or equality to zero of a partial register, it
> is useful to use a single TSTEQ or TSTNE operation.  It can also be used
> to test the parity flag, using bit 0 of the population count.
> 
> Do not do this for 32- and 64-bit values however, to avoid
> large immediates.
> 
> Signed-off-by: Paolo Bonzini<pbonzini@redhat.com>
> ---
>   target/i386/tcg/translate.c | 28 ++++++++++++++++++++--------
>   target/i386/tcg/emit.c.inc  |  5 ++---
>   2 files changed, 22 insertions(+), 11 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

r~


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/4] target/i386: use TSTEQ/TSTNE to check flags
  2024-02-28 11:11 ` [PATCH 2/4] target/i386: use TSTEQ/TSTNE to check flags Paolo Bonzini
@ 2024-02-28 22:34   ` Richard Henderson
  0 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2024-02-28 22:34 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel

On 2/28/24 01:11, Paolo Bonzini wrote:
> The new condition obviously come in handy when testing individual bits
> of EFLAGS, and they make it possible to remove the .mask field of
> CCPrepare.
> 
> Lowering to shift+and is done by the optimizer if necessary.
> 
> Signed-off-by: Paolo Bonzini<pbonzini@redhat.com>
> ---
>   target/i386/tcg/translate.c | 32 ++++++++++++++++----------------
>   1 file changed, 16 insertions(+), 16 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

r~


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/4] target/i386: remove mask from CCPrepare
  2024-02-28 11:11 ` [PATCH 3/4] target/i386: remove mask from CCPrepare Paolo Bonzini
@ 2024-02-28 22:36   ` Richard Henderson
  0 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2024-02-28 22:36 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel

On 2/28/24 01:11, Paolo Bonzini wrote:
> With the introduction of TSTEQ and TSTNE the .mask field is always -1,
> so remove all the now-unnecessary code.
> 
> Signed-off-by: Paolo Bonzini<pbonzini@redhat.com>
> ---
>   target/i386/tcg/translate.c | 81 +++++++++++++------------------------
>   1 file changed, 27 insertions(+), 54 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

r~


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 4/4] tcg/optimize: optimize TSTNE using smask and zmask
  2024-02-28 11:11 ` [PATCH 4/4] tcg/optimize: optimize TSTNE using smask and zmask Paolo Bonzini
@ 2024-02-28 23:10   ` Richard Henderson
  2024-02-29  9:35     ` Paolo Bonzini
  0 siblings, 1 reply; 11+ messages in thread
From: Richard Henderson @ 2024-02-28 23:10 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel

On 2/28/24 01:11, Paolo Bonzini wrote:
> -    /* TSTNE x,sign -> LT x,0 */
> -    if (arg_is_const_val(*p2, (ctx->type == TCG_TYPE_I32
> -                               ? INT32_MIN : INT64_MIN))) {
> +    /* TSTNE x,i -> LT x,0 if i only includes sign bit copies */
> +    if (arg_is_const(*p2) && (arg_info(*p2)->val & ~i1->s_mask) == 0) {

This is a good idea, but s_mask isn't defined like you think -- it is *repetitions* of the 
sign bit, but not including the sign bit itself.  For INT64_MIN, s_mask == 0.

So for TSTNE min,min, (min & ~0) != 0, so the test won't pass.

r~


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 4/4] tcg/optimize: optimize TSTNE using smask and zmask
  2024-02-28 23:10   ` Richard Henderson
@ 2024-02-29  9:35     ` Paolo Bonzini
  2024-02-29 17:17       ` Richard Henderson
  0 siblings, 1 reply; 11+ messages in thread
From: Paolo Bonzini @ 2024-02-29  9:35 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel

On 2/29/24 00:10, Richard Henderson wrote:
> On 2/28/24 01:11, Paolo Bonzini wrote:
>> -    /* TSTNE x,sign -> LT x,0 */
>> -    if (arg_is_const_val(*p2, (ctx->type == TCG_TYPE_I32
>> -                               ? INT32_MIN : INT64_MIN))) {
>> +    /* TSTNE x,i -> LT x,0 if i only includes sign bit copies */
>> +    if (arg_is_const(*p2) && (arg_info(*p2)->val & ~i1->s_mask) == 0) {
> 
> This is a good idea, but s_mask isn't defined like you think -- it is 
> *repetitions* of the sign bit, but not including the sign bit itself.  
> For INT64_MIN, s_mask == 0.
> 
> So for TSTNE min,min, (min & ~0) != 0, so the test won't pass.

Oh! So I have to squash:

diff --git a/tcg/optimize.c b/tcg/optimize.c
index ab976a5bbe7..44d1b1a6d8a 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -140,6 +140,12 @@ static inline bool arg_is_const_val(TCGArg arg, uint64_t val)
      return ts_is_const_val(arg_temp(arg), val);
  }
  
+/* Calculate all the copies of the sign bit, both redundant and not. */
+static inline uint64_t all_sign_bit_copies(TempOptInfo *info)
+{
+    return (info->s_mask >> 1) | INT64_MIN;
+}
+
  static inline bool ts_is_copy(TCGTemp *ts)
  {
      return ts_info(ts)->next_copy != ts;
@@ -825,7 +831,7 @@ static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest,
      }
  
      /* TSTNE x,i -> LT x,0 if i only includes sign bit copies */
-    if (arg_is_const(*p2) && (arg_info(*p2)->val & ~i1->s_mask) == 0) {
+    if (arg_is_const(*p2) && (arg_info(*p2)->val & ~all_sign_bit_copies(i1)) == 0) {
          *p2 = arg_new_constant(ctx, 0);
          *pcond = tcg_tst_ltge_cond(cond);
          return -1;


I tested with

    movq $0xffffffff80000000, %rbx
    test %ebx, %ebx
    js y

and I get

  brcond_i64 cc_dst,$0x80000000,tstne,$L1

which works and matches your explanation:

  i1.s_mask == 0xffffffff00000000
  i2.val == 0x80000000
  all_sign_bit_copies(i1) == 0xffffffff80000000
  u2.val & ~all_sign_bit_copies(i1) == 0

Thanks!

Paolo



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 4/4] tcg/optimize: optimize TSTNE using smask and zmask
  2024-02-29  9:35     ` Paolo Bonzini
@ 2024-02-29 17:17       ` Richard Henderson
  0 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2024-02-29 17:17 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel

On 2/28/24 23:35, Paolo Bonzini wrote:
> On 2/29/24 00:10, Richard Henderson wrote:
>> On 2/28/24 01:11, Paolo Bonzini wrote:
>>> -    /* TSTNE x,sign -> LT x,0 */
>>> -    if (arg_is_const_val(*p2, (ctx->type == TCG_TYPE_I32
>>> -                               ? INT32_MIN : INT64_MIN))) {
>>> +    /* TSTNE x,i -> LT x,0 if i only includes sign bit copies */
>>> +    if (arg_is_const(*p2) && (arg_info(*p2)->val & ~i1->s_mask) == 0) {
>>
>> This is a good idea, but s_mask isn't defined like you think -- it is *repetitions* of 
>> the sign bit, but not including the sign bit itself. For INT64_MIN, s_mask == 0.
>>
>> So for TSTNE min,min, (min & ~0) != 0, so the test won't pass.
> 
> Oh! So I have to squash:
> 
> diff --git a/tcg/optimize.c b/tcg/optimize.c
> index ab976a5bbe7..44d1b1a6d8a 100644
> --- a/tcg/optimize.c
> +++ b/tcg/optimize.c
> @@ -140,6 +140,12 @@ static inline bool arg_is_const_val(TCGArg arg, uint64_t val)
>       return ts_is_const_val(arg_temp(arg), val);
>   }
> 
> +/* Calculate all the copies of the sign bit, both redundant and not. */
> +static inline uint64_t all_sign_bit_copies(TempOptInfo *info)
> +{
> +    return (info->s_mask >> 1) | INT64_MIN;
> +}

You need to care about type too -- for TCG_TYPE_I32, you'll want to OR in INT32_MIN.  The 
high bits of s_mask will be unknown (might be 1's from fold_masks, might be 0 from reset_ts).

But otherwise that's a good solution.


r~


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2024-02-29 17:18 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-02-28 11:11 [PATCH 0/4] target/i386: use TSTEQ/TSTNE in x86 frontend Paolo Bonzini
2024-02-28 11:11 ` [PATCH 1/4] target/i386: use TSTEQ/TSTNE to test low bits Paolo Bonzini
2024-02-28 22:28   ` Richard Henderson
2024-02-28 11:11 ` [PATCH 2/4] target/i386: use TSTEQ/TSTNE to check flags Paolo Bonzini
2024-02-28 22:34   ` Richard Henderson
2024-02-28 11:11 ` [PATCH 3/4] target/i386: remove mask from CCPrepare Paolo Bonzini
2024-02-28 22:36   ` Richard Henderson
2024-02-28 11:11 ` [PATCH 4/4] tcg/optimize: optimize TSTNE using smask and zmask Paolo Bonzini
2024-02-28 23:10   ` Richard Henderson
2024-02-29  9:35     ` Paolo Bonzini
2024-02-29 17:17       ` Richard Henderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).