From: Richard Henderson <rth@twiddle.net>
To: qemu-devel@nongnu.org
Subject: [Qemu-devel] [PATCH 16/25] tcg/i386: Handle ctz and clz opcodes
Date: Wed, 16 Nov 2016 20:25:26 +0100 [thread overview]
Message-ID: <1479324335-2074-17-git-send-email-rth@twiddle.net> (raw)
In-Reply-To: <1479324335-2074-1-git-send-email-rth@twiddle.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/i386/tcg-target.h | 8 ++---
tcg/i386/tcg-target.inc.c | 83 ++++++++++++++++++++++++++++++++++++++++++-----
2 files changed, 78 insertions(+), 13 deletions(-)
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index f2d9955..8fff287 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -93,8 +93,8 @@ extern bool have_bmi1;
#define TCG_TARGET_HAS_eqv_i32 0
#define TCG_TARGET_HAS_nand_i32 0
#define TCG_TARGET_HAS_nor_i32 0
-#define TCG_TARGET_HAS_clz_i32 0
-#define TCG_TARGET_HAS_ctz_i32 0
+#define TCG_TARGET_HAS_clz_i32 1
+#define TCG_TARGET_HAS_ctz_i32 1
#define TCG_TARGET_HAS_deposit_i32 1
#define TCG_TARGET_HAS_extract_i32 1
#define TCG_TARGET_HAS_sextract_i32 1
@@ -127,8 +127,8 @@ extern bool have_bmi1;
#define TCG_TARGET_HAS_eqv_i64 0
#define TCG_TARGET_HAS_nand_i64 0
#define TCG_TARGET_HAS_nor_i64 0
-#define TCG_TARGET_HAS_clz_i64 0
-#define TCG_TARGET_HAS_ctz_i64 0
+#define TCG_TARGET_HAS_clz_i64 1
+#define TCG_TARGET_HAS_ctz_i64 1
#define TCG_TARGET_HAS_deposit_i64 1
#define TCG_TARGET_HAS_extract_i64 1
#define TCG_TARGET_HAS_sextract_i64 0
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
index 39f62bd..3eeb58f 100644
--- a/tcg/i386/tcg-target.inc.c
+++ b/tcg/i386/tcg-target.inc.c
@@ -92,6 +92,7 @@ static const int tcg_target_call_oarg_regs[] = {
#define TCG_CT_CONST_S32 0x100
#define TCG_CT_CONST_U32 0x200
#define TCG_CT_CONST_I32 0x400
+#define TCG_CT_CONST_WSZ 0x800
/* Registers used with L constraint, which are the first argument
registers on x86_64, and two random call clobbered registers on
@@ -225,6 +226,12 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
} else {
goto case_c;
}
+ case 'W':
+ /* With TZCNT/LZCNT, we can have operand-size as an input. */
+ if (have_bmi1) {
+ ct->ct |= TCG_CT_CONST_WSZ;
+ }
+ break;
/* qemu_ld/st address constraint */
case 'L':
@@ -273,6 +280,9 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
return 1;
}
+ if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
+ return 1;
+ }
return 0;
}
@@ -306,6 +316,8 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
#define OPC_ANDN (0xf2 | P_EXT38)
#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
+#define OPC_BSF (0xbc | P_EXT)
+#define OPC_BSR (0xbd | P_EXT)
#define OPC_BSWAP (0xc8 | P_EXT)
#define OPC_CALL_Jz (0xe8)
#define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */
@@ -320,6 +332,7 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
#define OPC_JMP_long (0xe9)
#define OPC_JMP_short (0xeb)
#define OPC_LEA (0x8d)
+#define OPC_LZCNT (0xbd | P_EXT | P_SIMDF3)
#define OPC_MOVB_EvGv (0x88) /* stores, more or less */
#define OPC_MOVL_EvGv (0x89) /* stores, more or less */
#define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
@@ -346,6 +359,7 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
#define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16)
#define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2)
#define OPC_TESTL (0x85)
+#define OPC_TZCNT (0xbc | P_EXT | P_SIMDF3)
#define OPC_XCHG_ax_r32 (0x90)
#define OPC_GRP3_Ev (0xf7)
@@ -431,6 +445,11 @@ static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
if (opc & P_ADDR32) {
tcg_out8(s, 0x67);
}
+ if (opc & P_SIMDF3) {
+ tcg_out8(s, 0xf3);
+ } else if (opc & P_SIMDF2) {
+ tcg_out8(s, 0xf2);
+ }
rex = 0;
rex |= (opc & P_REXW) ? 0x8 : 0x0; /* REX.W */
@@ -465,6 +484,11 @@ static void tcg_out_opc(TCGContext *s, int opc)
if (opc & P_DATA16) {
tcg_out8(s, 0x66);
}
+ if (opc & P_SIMDF3) {
+ tcg_out8(s, 0xf3);
+ } else if (opc & P_SIMDF2) {
+ tcg_out8(s, 0xf2);
+ }
if (opc & (P_EXT | P_EXT38)) {
tcg_out8(s, 0x0f);
if (opc & P_EXT38) {
@@ -1093,13 +1117,11 @@ static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
}
#endif
-static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
- TCGArg c1, TCGArg c2, int const_c2,
- TCGArg v1)
+static void tcg_out_cmov(TCGContext *s, TCGCond cond, int rexw,
+ TCGReg dest, TCGReg v1)
{
- tcg_out_cmp(s, c1, c2, const_c2, 0);
if (have_cmov) {
- tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
+ tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | rexw, dest, v1);
} else {
TCGLabel *over = gen_new_label();
tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
@@ -1108,13 +1130,21 @@ static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
}
}
+static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGReg dest,
+ TCGReg c1, TCGArg c2, int const_c2,
+ TCGReg v1)
+{
+ tcg_out_cmp(s, c1, c2, const_c2, 0);
+ tcg_out_cmov(s, cond, 0, dest, v1);
+}
+
#if TCG_TARGET_REG_BITS == 64
-static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
- TCGArg c1, TCGArg c2, int const_c2,
- TCGArg v1)
+static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGReg dest,
+ TCGReg c1, TCGArg c2, int const_c2,
+ TCGReg v1)
{
tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
- tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
+ tcg_out_cmov(s, cond, P_REXW, dest, v1);
}
#endif
@@ -1993,6 +2023,37 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
break;
+ OP_32_64(ctz):
+ if (const_args[2]) {
+ tcg_debug_assert(have_bmi1);
+ tcg_debug_assert(args[2] == (rexw ? 64 : 32));
+ tcg_out_modrm(s, OPC_TZCNT + rexw, args[0], args[1]);
+ } else {
+ /* ??? The manual says that the output is undefined when the
+ input is zero, but real hardware leaves it unchanged. As
+ noted in target-i386/translate.c, real programs depend on
+ this -- now we are one more of those. */
+ tcg_out_modrm(s, OPC_BSF + rexw, args[0], args[1]);
+ if (args[0] != args[2]) {
+ tcg_out_cmov(s, TCG_COND_EQ, rexw, args[0], args[2]);
+ }
+ }
+ break;
+
+ OP_32_64(clz):
+ if (const_args[2]) {
+ tcg_debug_assert(have_bmi1);
+ tcg_debug_assert(args[2] == (rexw ? 64 : 32));
+ tcg_out_modrm(s, OPC_LZCNT + rexw, args[0], args[1]);
+ } else {
+ /* ??? See above. */
+ tcg_out_modrm(s, OPC_BSR + rexw, args[0], args[1]);
+ if (args[0] != args[2]) {
+ tcg_out_cmov(s, TCG_COND_EQ, rexw, args[0], args[2]);
+ }
+ }
+ break;
+
case INDEX_op_brcond_i32:
tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
arg_label(args[3]), 0);
@@ -2220,6 +2281,8 @@ static const TCGTargetOpDef x86_op_defs[] = {
{ INDEX_op_sar_i32, { "r", "0", "Ci" } },
{ INDEX_op_rotl_i32, { "r", "0", "ci" } },
{ INDEX_op_rotr_i32, { "r", "0", "ci" } },
+ { INDEX_op_clz_i32, { "r", "r", "rW" } },
+ { INDEX_op_ctz_i32, { "r", "r", "rW" } },
{ INDEX_op_brcond_i32, { "r", "ri" } },
@@ -2281,6 +2344,8 @@ static const TCGTargetOpDef x86_op_defs[] = {
{ INDEX_op_sar_i64, { "r", "0", "Ci" } },
{ INDEX_op_rotl_i64, { "r", "0", "ci" } },
{ INDEX_op_rotr_i64, { "r", "0", "ci" } },
+ { INDEX_op_clz_i64, { "r", "r", "rW" } },
+ { INDEX_op_ctz_i64, { "r", "r", "rW" } },
{ INDEX_op_brcond_i64, { "r", "re" } },
{ INDEX_op_setcond_i64, { "r", "r", "re" } },
--
2.7.4
next prev parent reply other threads:[~2016-11-16 19:26 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-11-16 19:25 [Qemu-devel] [PATCH 00/25] tcg: Handle clz, ctz, and clrsb generically Richard Henderson
2016-11-16 19:25 ` [Qemu-devel] [PATCH 01/25] tcg: Add clz and ctz opcodes Richard Henderson
2016-11-21 15:11 ` Alex Bennée
2016-11-21 16:05 ` Richard Henderson
2016-11-16 19:25 ` [Qemu-devel] [PATCH 02/25] target-alpha: Use the ctz and clz opcodes Richard Henderson
2016-11-16 19:25 ` [Qemu-devel] [PATCH 03/25] target-cris: Use clz opcode Richard Henderson
2016-11-16 19:25 ` [Qemu-devel] [PATCH 04/25] target-microblaze: " Richard Henderson
2016-11-16 19:25 ` [Qemu-devel] [PATCH 05/25] target-mips: " Richard Henderson
2016-11-16 19:25 ` [Qemu-devel] [PATCH 06/25] target-openrisc: Use clz and ctz opcodes Richard Henderson
2016-11-16 19:25 ` [Qemu-devel] [PATCH 07/25] target-ppc: " Richard Henderson
2016-11-17 3:09 ` David Gibson
2016-11-16 19:25 ` [Qemu-devel] [PATCH 08/25] target-s390x: Use clz opcode Richard Henderson
2016-11-16 19:25 ` [Qemu-devel] [PATCH 09/25] target-tilegx: Use clz and ctz opcodes Richard Henderson
2016-11-16 19:25 ` [Qemu-devel] [PATCH 10/25] target-tricore: Use clz opcode Richard Henderson
2016-11-17 14:42 ` Bastian Koppelmann
2016-11-17 15:47 ` Bastian Koppelmann
2016-11-16 19:25 ` [Qemu-devel] [PATCH 11/25] target-unicore32: " Richard Henderson
2016-11-16 19:25 ` [Qemu-devel] [PATCH 12/25] target-xtensa: " Richard Henderson
2016-11-16 19:25 ` [Qemu-devel] [PATCH 13/25] target-arm: " Richard Henderson
2016-11-16 19:25 ` [Qemu-devel] [PATCH 14/25] target-i386: Use clz and ctz opcodes Richard Henderson
2016-11-16 19:25 ` [Qemu-devel] [PATCH 15/25] disas/i386.c: Handle tzcnt Richard Henderson
2016-11-16 19:25 ` Richard Henderson [this message]
2016-11-17 16:50 ` [Qemu-devel] [PATCH 16/25] tcg/i386: Handle ctz and clz opcodes Bastian Koppelmann
2016-11-17 19:53 ` Richard Henderson
2016-11-17 19:59 ` Richard Henderson
2016-11-17 22:09 ` Bastian Koppelmann
2016-11-17 23:03 ` Richard Henderson
2016-11-18 12:48 ` Bastian Koppelmann
2016-11-21 10:37 ` Richard Henderson
2016-11-16 19:25 ` [Qemu-devel] [PATCH 17/25] tcg/ppc: " Richard Henderson
2016-11-16 19:25 ` [Qemu-devel] [PATCH 18/25] tcg/aarch64: " Richard Henderson
2016-11-17 11:53 ` Richard Henderson
2016-11-22 10:41 ` Alex Bennée
2016-11-16 19:25 ` [Qemu-devel] [PATCH 19/25] tcg/arm: " Richard Henderson
2016-11-16 19:25 ` [Qemu-devel] [PATCH 20/25] tcg/mips: Handle clz opcode Richard Henderson
2016-11-16 19:25 ` [Qemu-devel] [PATCH 21/25] tcg/s390: " Richard Henderson
2016-11-16 19:25 ` [Qemu-devel] [PATCH 22/25] tcg: Add helpers for clrsb Richard Henderson
2016-11-16 19:25 ` [Qemu-devel] [PATCH 23/25] target-arm: Use clrsb helper Richard Henderson
2016-11-16 19:25 ` [Qemu-devel] [PATCH 24/25] target-tricore: " Richard Henderson
2016-11-16 19:25 ` [Qemu-devel] [PATCH 25/25] target-xtensa: " Richard Henderson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1479324335-2074-17-git-send-email-rth@twiddle.net \
--to=rth@twiddle.net \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).