qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: philmd@linaro.org, pbonzini@redhat.com
Subject: [PATCH v3 25/38] tcg/i386: Improve TSTNE/TESTEQ vs powers of two
Date: Thu, 11 Jan 2024 09:43:55 +1100	[thread overview]
Message-ID: <20240110224408.10444-26-richard.henderson@linaro.org> (raw)
In-Reply-To: <20240110224408.10444-1-richard.henderson@linaro.org>

Use "test x,x" when the bit is one of the 4 sign bits.
Use "bt imm,x" otherwise.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/i386/tcg-target-con-set.h |  6 ++--
 tcg/i386/tcg-target-con-str.h |  1 +
 tcg/i386/tcg-target.c.inc     | 54 +++++++++++++++++++++++++++++++----
 3 files changed, 53 insertions(+), 8 deletions(-)

diff --git a/tcg/i386/tcg-target-con-set.h b/tcg/i386/tcg-target-con-set.h
index 7d00a7dde8..e24241cfa2 100644
--- a/tcg/i386/tcg-target-con-set.h
+++ b/tcg/i386/tcg-target-con-set.h
@@ -20,7 +20,7 @@ C_O0_I2(L, L)
 C_O0_I2(qi, r)
 C_O0_I2(re, r)
 C_O0_I2(ri, r)
-C_O0_I2(r, re)
+C_O0_I2(r, reT)
 C_O0_I2(s, L)
 C_O0_I2(x, r)
 C_O0_I3(L, L, L)
@@ -34,7 +34,7 @@ C_O1_I1(r, r)
 C_O1_I1(x, r)
 C_O1_I1(x, x)
 C_O1_I2(q, 0, qi)
-C_O1_I2(q, r, re)
+C_O1_I2(q, r, reT)
 C_O1_I2(r, 0, ci)
 C_O1_I2(r, 0, r)
 C_O1_I2(r, 0, re)
@@ -50,7 +50,7 @@ C_N1_I2(r, r, r)
 C_N1_I2(r, r, rW)
 C_O1_I3(x, 0, x, x)
 C_O1_I3(x, x, x, x)
-C_O1_I4(r, r, re, r, 0)
+C_O1_I4(r, r, reT, r, 0)
 C_O1_I4(r, r, r, ri, ri)
 C_O2_I1(r, r, L)
 C_O2_I2(a, d, a, r)
diff --git a/tcg/i386/tcg-target-con-str.h b/tcg/i386/tcg-target-con-str.h
index 95a30e58cd..cc22db227b 100644
--- a/tcg/i386/tcg-target-con-str.h
+++ b/tcg/i386/tcg-target-con-str.h
@@ -28,5 +28,6 @@ REGS('s', ALL_BYTEL_REGS & ~SOFTMMU_RESERVE_REGS)    /* qemu_st8_i32 data */
  */
 CONST('e', TCG_CT_CONST_S32)
 CONST('I', TCG_CT_CONST_I32)
+CONST('T', TCG_CT_CONST_TST)
 CONST('W', TCG_CT_CONST_WSZ)
 CONST('Z', TCG_CT_CONST_U32)
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index f2414177bd..0b8c60d021 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -132,6 +132,7 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
 #define TCG_CT_CONST_U32 0x200
 #define TCG_CT_CONST_I32 0x400
 #define TCG_CT_CONST_WSZ 0x800
+#define TCG_CT_CONST_TST 0x1000
 
 /* Registers used with L constraint, which are the first argument
    registers on x86_64, and two random call clobbered registers on
@@ -202,7 +203,8 @@ static bool tcg_target_const_match(int64_t val, int ct,
         return 1;
     }
     if (type == TCG_TYPE_I32) {
-        if (ct & (TCG_CT_CONST_S32 | TCG_CT_CONST_U32 | TCG_CT_CONST_I32)) {
+        if (ct & (TCG_CT_CONST_S32 | TCG_CT_CONST_U32 |
+                  TCG_CT_CONST_I32 | TCG_CT_CONST_TST)) {
             return 1;
         }
     } else {
@@ -215,6 +217,17 @@ static bool tcg_target_const_match(int64_t val, int ct,
         if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
             return 1;
         }
+        /*
+         * This will be used in combination with TCG_CT_CONST_S32,
+         * so "normal" TESTQ is already matched.  Also accept:
+         *    TESTQ -> TESTL   (uint32_t)
+         *    TESTQ -> BT      (is_power_of_2)
+         */
+        if ((ct & TCG_CT_CONST_TST)
+            && is_tst_cond(cond)
+            && (val == (uint32_t)val || is_power_of_2(val))) {
+            return 1;
+        }
     }
     if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
         return 1;
@@ -396,6 +409,7 @@ static bool tcg_target_const_match(int64_t val, int ct,
 #define OPC_SHLX        (0xf7 | P_EXT38 | P_DATA16)
 #define OPC_SHRX        (0xf7 | P_EXT38 | P_SIMDF2)
 #define OPC_SHRD_Ib     (0xac | P_EXT)
+#define OPC_TESTB	(0x84)
 #define OPC_TESTL	(0x85)
 #define OPC_TZCNT       (0xbc | P_EXT | P_SIMDF3)
 #define OPC_UD2         (0x0b | P_EXT)
@@ -442,6 +456,12 @@ static bool tcg_target_const_match(int64_t val, int ct,
 #define OPC_GRP3_Ev     (0xf7)
 #define OPC_GRP5        (0xff)
 #define OPC_GRP14       (0x73 | P_EXT | P_DATA16)
+#define OPC_GRPBT       (0xba | P_EXT)
+
+#define OPC_GRPBT_BT    4
+#define OPC_GRPBT_BTS   5
+#define OPC_GRPBT_BTR   6
+#define OPC_GRPBT_BTC   7
 
 /* Group 1 opcode extensions for 0x80-0x83.
    These are also used as modifiers for OPC_ARITH.  */
@@ -1454,7 +1474,7 @@ static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, bool small)
 static int tcg_out_cmp(TCGContext *s, TCGCond cond, TCGArg arg1,
                        TCGArg arg2, int const_arg2, int rexw)
 {
-    int jz;
+    int jz, js;
 
     if (!is_tst_cond(cond)) {
         if (!const_arg2) {
@@ -1469,6 +1489,7 @@ static int tcg_out_cmp(TCGContext *s, TCGCond cond, TCGArg arg1,
     }
 
     jz = tcg_cond_to_jcc[cond];
+    js = (cond == TCG_COND_TSTNE ? JCC_JS : JCC_JNS);
 
     if (!const_arg2) {
         tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg2);
@@ -1476,17 +1497,40 @@ static int tcg_out_cmp(TCGContext *s, TCGCond cond, TCGArg arg1,
     }
 
     if (arg2 <= 0xff && (TCG_TARGET_REG_BITS == 64 || arg1 < 4)) {
+        if (arg2 == 0x80) {
+            tcg_out_modrm(s, OPC_TESTB | P_REXB_R, arg1, arg1);
+            return js;
+        }
         tcg_out_modrm(s, OPC_GRP3_Eb | P_REXB_RM, EXT3_TESTi, arg1);
         tcg_out8(s, arg2);
         return jz;
     }
 
     if ((arg2 & ~0xff00) == 0 && arg1 < 4) {
+        if (arg2 == 0x8000) {
+            tcg_out_modrm(s, OPC_TESTB, arg1 + 4, arg1 + 4);
+            return js;
+        }
         tcg_out_modrm(s, OPC_GRP3_Eb, EXT3_TESTi, arg1 + 4);
         tcg_out8(s, arg2 >> 8);
         return jz;
     }
 
+    if (is_power_of_2(rexw ? arg2 : (uint32_t)arg2)) {
+        int jc = (cond == TCG_COND_TSTNE ? JCC_JB : JCC_JAE);
+        int sh = ctz64(arg2);
+
+        rexw = (sh & 32 ? P_REXW : 0);
+        if ((sh & 31) == 31) {
+            tcg_out_modrm(s, OPC_TESTL | rexw, arg1, arg1);
+            return js;
+        } else {
+            tcg_out_modrm(s, OPC_GRPBT | rexw, OPC_GRPBT_BT, arg1);
+            tcg_out8(s, sh);
+            return jc;
+        }
+    }
+
     if (rexw) {
         if (arg2 == (uint32_t)arg2) {
             rexw = 0;
@@ -3399,7 +3443,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
 
     case INDEX_op_brcond_i32:
     case INDEX_op_brcond_i64:
-        return C_O0_I2(r, re);
+        return C_O0_I2(r, reT);
 
     case INDEX_op_bswap16_i32:
     case INDEX_op_bswap16_i64:
@@ -3447,11 +3491,11 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
     case INDEX_op_setcond_i64:
     case INDEX_op_negsetcond_i32:
     case INDEX_op_negsetcond_i64:
-        return C_O1_I2(q, r, re);
+        return C_O1_I2(q, r, reT);
 
     case INDEX_op_movcond_i32:
     case INDEX_op_movcond_i64:
-        return C_O1_I4(r, r, re, r, 0);
+        return C_O1_I4(r, r, reT, r, 0);
 
     case INDEX_op_div2_i32:
     case INDEX_op_div2_i64:
-- 
2.34.1



  parent reply	other threads:[~2024-01-10 22:47 UTC|newest]

Thread overview: 70+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-10 22:43 [PATCH v3 00/38] tcg: Introduce TCG_COND_TST{EQ,NE} Richard Henderson
2024-01-10 22:43 ` [PATCH v3 01/38] " Richard Henderson
2024-01-10 22:43 ` [PATCH v3 02/38] tcg: Introduce TCG_TARGET_HAS_tst Richard Henderson
2024-01-16 21:42   ` Philippe Mathieu-Daudé
2024-01-10 22:43 ` [PATCH v3 03/38] tcg/optimize: Split out arg_is_const_val Richard Henderson
2024-01-10 22:43 ` [PATCH v3 04/38] tcg/optimize: Split out do_constant_folding_cond1 Richard Henderson
2024-01-10 22:43 ` [PATCH v3 05/38] tcg/optimize: Do swap_commutative2 in do_constant_folding_cond2 Richard Henderson
2024-01-10 22:43 ` [PATCH v3 06/38] tcg/optimize: Handle TCG_COND_TST{EQ,NE} Richard Henderson
2024-01-10 22:43 ` [PATCH v3 07/38] tcg/optimize: Lower TCG_COND_TST{EQ, NE} if unsupported Richard Henderson
2024-01-16 22:02   ` [PATCH v3 07/38] tcg/optimize: Lower TCG_COND_TST{EQ,NE} " Philippe Mathieu-Daudé
2024-01-10 22:43 ` [PATCH v3 08/38] target/alpha: Pass immediate value to gen_bcond_internal() Richard Henderson
2024-01-16 22:02   ` Philippe Mathieu-Daudé
2024-01-10 22:43 ` [PATCH v3 09/38] target/alpha: Use TCG_COND_TST{EQ,NE} for BLB{C,S} Richard Henderson
2024-01-16 22:03   ` Philippe Mathieu-Daudé
2024-01-10 22:43 ` [PATCH v3 10/38] target/alpha: Use TCG_COND_TST{EQ, NE} for CMOVLB{C, S} Richard Henderson
2024-01-10 22:43 ` [PATCH v3 11/38] target/alpha: Use TCG_COND_TSTNE for gen_fold_mzero Richard Henderson
2024-01-10 22:43 ` [PATCH v3 12/38] target/m68k: Use TCG_COND_TST{EQ, NE} in gen_fcc_cond Richard Henderson
2024-01-16 22:06   ` [PATCH v3 12/38] target/m68k: Use TCG_COND_TST{EQ,NE} " Philippe Mathieu-Daudé
2024-01-10 22:43 ` [PATCH v3 13/38] target/sparc: Use TCG_COND_TSTEQ in gen_op_mulscc Richard Henderson
2024-01-16 21:44   ` Philippe Mathieu-Daudé
2024-01-10 22:43 ` [PATCH v3 14/38] target/s390x: Use TCG_COND_TSTNE for CC_OP_{TM,ICM} Richard Henderson
2024-01-19 21:59   ` Philippe Mathieu-Daudé
2024-01-10 22:43 ` [PATCH v3 15/38] target/s390x: Improve general case of disas_jcc Richard Henderson
2024-01-16 22:19   ` Philippe Mathieu-Daudé
2024-01-17  3:19     ` Richard Henderson
2024-01-19 23:27       ` Philippe Mathieu-Daudé
2024-01-19 23:22   ` [PATCH v3 15/38 1/6] target/s390x: Reorder CC_OP_STATIC switch case in disas_jcc (1/5) Philippe Mathieu-Daudé
2024-01-19 23:22   ` [PATCH v3 15/38 2/6] target/s390x: Reorder CC_OP_STATIC switch case in disas_jcc (2/5) Philippe Mathieu-Daudé
2024-01-19 23:22   ` [PATCH v3 15/38 3/6] target/s390x: Reorder CC_OP_STATIC switch case in disas_jcc (3/5) Philippe Mathieu-Daudé
2024-01-19 23:23   ` [PATCH v3 15/38 4/6] target/s390x: Reorder CC_OP_STATIC switch case in disas_jcc (4/5) Philippe Mathieu-Daudé
2024-01-19 23:23   ` [PATCH v3 15/38 5/6] target/s390x: Reorder CC_OP_STATIC switch case in disas_jcc (5/5) Philippe Mathieu-Daudé
2024-01-19 23:23   ` [PATCH v3 15/38 6/6] target/s390x: Improve general case of disas_jcc Philippe Mathieu-Daudé
2024-01-19 23:27     ` Philippe Mathieu-Daudé
2024-01-22 21:38     ` Ilya Leoshkevich
2024-01-10 22:43 ` [PATCH v3 16/38] tcg: Add TCGConst argument to tcg_target_const_match Richard Henderson
2024-01-10 22:43 ` [PATCH v3 17/38] tcg/aarch64: Support TCG_COND_TST{EQ,NE} Richard Henderson
2024-01-19 22:09   ` Philippe Mathieu-Daudé
2024-01-10 22:43 ` [PATCH v3 18/38] tcg/aarch64: Generate TBZ, TBNZ Richard Henderson
2024-01-19 22:47   ` [PATCH v3 18/38 1/2] tcg/aarch64: Massage tcg_out_brcond() Philippe Mathieu-Daudé
2024-01-19 22:47   ` [PATCH v3 18/38 2/2] tcg/aarch64: Generate TBZ, TBNZ Philippe Mathieu-Daudé
2024-01-10 22:43 ` [PATCH v3 19/38] tcg/aarch64: Generate CBNZ for TSTNE of UINT32_MAX Richard Henderson
2024-01-22 14:20   ` Philippe Mathieu-Daudé
2024-01-10 22:43 ` [PATCH v3 20/38] tcg/arm: Factor tcg_out_cmp() out Richard Henderson
2024-01-16 22:22   ` Philippe Mathieu-Daudé
2024-01-10 22:43 ` [PATCH v3 21/38] tcg/arm: Support TCG_COND_TST{EQ,NE} Richard Henderson
2024-01-16 22:26   ` Philippe Mathieu-Daudé
2024-01-10 22:43 ` [PATCH v3 22/38] tcg/i386: Pass x86 condition codes to tcg_out_cmov Richard Henderson
2024-01-10 22:43 ` [PATCH v3 23/38] tcg/i386: Move tcg_cond_to_jcc[] into tcg_out_cmp Richard Henderson
2024-01-10 22:43 ` [PATCH v3 24/38] tcg/i386: Support TCG_COND_TST{EQ,NE} Richard Henderson
2024-01-10 22:43 ` Richard Henderson [this message]
2024-01-10 22:43 ` [PATCH v3 26/38] tcg/i386: Use TEST r,r to test 8/16/32 bits Richard Henderson
2024-01-20 11:02   ` Philippe Mathieu-Daudé
2024-01-10 22:43 ` [PATCH v3 27/38] tcg/sparc64: Hoist read of tcg_cond_to_rcond Richard Henderson
2024-01-10 22:43 ` [PATCH v3 28/38] tcg/sparc64: Pass TCGCond to tcg_out_cmp Richard Henderson
2024-01-10 22:43 ` [PATCH v3 29/38] tcg/sparc64: Support TCG_COND_TST{EQ,NE} Richard Henderson
2024-01-10 22:44 ` [PATCH v3 30/38] tcg/ppc: Sink tcg_to_bc usage into tcg_out_bc Richard Henderson
2024-01-10 22:44 ` [PATCH v3 31/38] tcg/ppc: Use cr0 in tcg_to_bc and tcg_to_isel Richard Henderson
2024-01-16 21:51   ` Philippe Mathieu-Daudé
2024-01-10 22:44 ` [PATCH v3 32/38] tcg/ppc: Tidy up tcg_target_const_match Richard Henderson
2024-01-10 22:44 ` [PATCH v3 33/38] tcg/ppc: Add TCG_CT_CONST_CMP Richard Henderson
2024-01-19 22:12   ` Philippe Mathieu-Daudé
2024-01-10 22:44 ` [PATCH v3 34/38] tcg/ppc: Support TCG_COND_TST{EQ,NE} Richard Henderson
2024-01-19 22:20   ` Philippe Mathieu-Daudé
2024-01-10 22:44 ` [PATCH v3 35/38] tcg/s390x: Split constraint A into J+U Richard Henderson
2024-01-16 21:55   ` Philippe Mathieu-Daudé
2024-01-10 22:44 ` [PATCH v3 36/38] tcg/s390x: Add TCG_CT_CONST_CMP Richard Henderson
2024-01-16 21:57   ` Philippe Mathieu-Daudé
2024-01-10 22:44 ` [PATCH v3 37/38] tcg/s390x: Support TCG_COND_TST{EQ,NE} Richard Henderson
2024-01-23  5:36   ` Philippe Mathieu-Daudé
2024-01-10 22:44 ` [PATCH v3 38/38] tcg/tci: " Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240110224408.10444-26-richard.henderson@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=pbonzini@redhat.com \
    --cc=philmd@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).