From: Paolo Bonzini <pbonzini@redhat.com>
To: qemu-devel@nongnu.org
Subject: [PATCH 18/25] target/i386: convert LZCNT/TZCNT/BSF/BSR/POPCNT to new decoder
Date: Sat, 8 Jun 2024 10:41:06 +0200 [thread overview]
Message-ID: <20240608084113.2770363-19-pbonzini@redhat.com> (raw)
In-Reply-To: <20240608084113.2770363-1-pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/decode-new.h | 1 +
target/i386/tcg/translate.c | 74 ----------------------------
target/i386/tcg/decode-new.c.inc | 51 +++++++++++++++++++-
target/i386/tcg/emit.c.inc | 82 ++++++++++++++++++++++++++++++++
4 files changed, 132 insertions(+), 76 deletions(-)
diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h
index f781bb5bbec..13be23145a8 100644
--- a/target/i386/tcg/decode-new.h
+++ b/target/i386/tcg/decode-new.h
@@ -119,6 +119,7 @@ typedef enum X86CPUIDFeature {
X86_FEAT_FXSR,
X86_FEAT_MOVBE,
X86_FEAT_PCLMULQDQ,
+ X86_FEAT_POPCNT,
X86_FEAT_SHA_NI,
X86_FEAT_SSE,
X86_FEAT_SSE2,
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 1e9036eb6e3..a9cf1332b43 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -818,11 +818,6 @@ static void gen_movs(DisasContext *s, MemOp ot)
gen_op_add_reg(s, s->aflag, R_EDI, dshift);
}
-static void gen_op_update1_cc(DisasContext *s)
-{
- tcg_gen_mov_tl(cpu_cc_dst, s->T0);
-}
-
static void gen_op_update2_cc(DisasContext *s)
{
tcg_gen_mov_tl(cpu_cc_src, s->T1);
@@ -3167,56 +3162,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
}
break;
- case 0x1bc: /* bsf / tzcnt */
- case 0x1bd: /* bsr / lzcnt */
- ot = dflag;
- modrm = x86_ldub_code(env, s);
- reg = ((modrm >> 3) & 7) | REX_R(s);
- gen_ld_modrm(env, s, modrm, ot);
- gen_extu(ot, s->T0);
-
- /* Note that lzcnt and tzcnt are in different extensions. */
- if ((prefixes & PREFIX_REPZ)
- && (b & 1
- ? s->cpuid_ext3_features & CPUID_EXT3_ABM
- : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
- int size = 8 << ot;
- /* For lzcnt/tzcnt, C bit is defined related to the input. */
- tcg_gen_mov_tl(cpu_cc_src, s->T0);
- if (b & 1) {
- /* For lzcnt, reduce the target_ulong result by the
- number of zeros that we expect to find at the top. */
- tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
- tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size);
- } else {
- /* For tzcnt, a zero input must return the operand size. */
- tcg_gen_ctzi_tl(s->T0, s->T0, size);
- }
- /* For lzcnt/tzcnt, Z bit is defined related to the result. */
- gen_op_update1_cc(s);
- set_cc_op(s, CC_OP_BMILGB + ot);
- } else {
- /* For bsr/bsf, only the Z bit is defined and it is related
- to the input and not the result. */
- tcg_gen_mov_tl(cpu_cc_dst, s->T0);
- set_cc_op(s, CC_OP_LOGICB + ot);
-
- /* ??? The manual says that the output is undefined when the
- input is zero, but real hardware leaves it unchanged, and
- real programs appear to depend on that. Accomplish this
- by passing the output as the value to return upon zero. */
- if (b & 1) {
- /* For bsr, return the bit index of the first 1 bit,
- not the count of leading zeros. */
- tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
- tcg_gen_clz_tl(s->T0, s->T0, s->T1);
- tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
- } else {
- tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
- }
- }
- gen_op_mov_reg_v(s, ot, reg, s->T0);
- break;
case 0x100:
modrm = x86_ldub_code(env, s);
mod = (modrm >> 6) & 3;
@@ -3811,25 +3756,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
}
gen_nop_modrm(env, s, modrm);
break;
- case 0x1b8: /* SSE4.2 popcnt */
- if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
- PREFIX_REPZ)
- goto illegal_op;
- if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
- goto illegal_op;
-
- modrm = x86_ldub_code(env, s);
- reg = ((modrm >> 3) & 7) | REX_R(s);
-
- ot = dflag;
- gen_ld_modrm(env, s, modrm, ot);
- gen_extu(ot, s->T0);
- tcg_gen_mov_tl(cpu_cc_src, s->T0);
- tcg_gen_ctpop_tl(s->T0, s->T0);
- gen_op_mov_reg_v(s, ot, reg, s->T0);
-
- set_cc_op(s, CC_OP_POPCNT);
- break;
default:
g_assert_not_reached();
}
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index bd9e7cd4df9..64ec731bf4a 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -469,6 +469,50 @@ static void decode_0F7F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, ui
*entry = *decode_by_prefix(s, opcodes_0F7F);
}
+static void decode_0FB8(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+ static const X86OpEntry popcnt =
+ X86_OP_ENTRYwr(POPCNT, G,v, E,v, cpuid(POPCNT) zextT0);
+
+ if (s->prefix & PREFIX_REPZ) {
+ *entry = popcnt;
+ } else {
+ memset(entry, 0, sizeof(*entry));
+ }
+}
+
+static void decode_0FBC(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+ /* For BSF, pass 2op as the third operand so that we can use zextT0 */
+ static const X86OpEntry opcodes_0FBC[4] = {
+ X86_OP_ENTRY3(BSF, G,v, E,v, 2op,v, zextT0),
+ X86_OP_ENTRY3(BSF, G,v, E,v, 2op,v, zextT0), /* 0x66 */
+ X86_OP_ENTRYwr(TZCNT, G,v, E,v, zextT0), /* 0xf3 */
+ X86_OP_ENTRY3(BSF, G,v, E,v, 2op,v, zextT0), /* 0xf2 */
+ };
+ if (!(s->cpuid_ext3_features & CPUID_EXT3_ABM)) {
+ *entry = opcodes_0FBC[0];
+ } else {
+ *entry = *decode_by_prefix(s, opcodes_0FBC);
+ }
+}
+
+static void decode_0FBD(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+ /* For BSR, pass 2op as the third operand so that we can use zextT0 */
+ static const X86OpEntry opcodes_0FBD[4] = {
+ X86_OP_ENTRY3(BSR, G,v, E,v, 2op,v, zextT0),
+ X86_OP_ENTRY3(BSR, G,v, E,v, 2op,v, zextT0), /* 0x66 */
+ X86_OP_ENTRYwr(LZCNT, G,v, E,v, zextT0), /* 0xf3 */
+ X86_OP_ENTRY3(BSR, G,v, E,v, 2op,v, zextT0), /* 0xf2 */
+ };
+ if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
+ *entry = opcodes_0FBD[0];
+ } else {
+ *entry = *decode_by_prefix(s, opcodes_0FBD);
+ }
+}
+
static void decode_0FD6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
{
static const X86OpEntry movq[4] = {
@@ -1273,10 +1317,13 @@ static const X86OpEntry opcodes_0F[256] = {
*/
[0xaf] = X86_OP_ENTRY3(IMUL3, G,v, E,v, 2op,v, sextT0),
+ [0xb8] = X86_OP_GROUP0(0FB8),
/* decoded as modrm, which is visible as a difference between page fault and #UD */
[0xb9] = X86_OP_ENTRYr(UD, nop,v), /* UD1 */
[0xba] = X86_OP_GROUP2(group8, E,v, I,b),
[0xbb] = X86_OP_ENTRY2(BTC, E,v, G,v, btEvGv),
+ [0xbc] = X86_OP_GROUP0(0FBC),
+ [0xbd] = X86_OP_GROUP0(0FBD),
[0xbe] = X86_OP_ENTRY3(MOV, G,v, E,b, None, None, sextT0), /* MOVSX */
[0xbf] = X86_OP_ENTRY3(MOV, G,v, E,w, None, None, sextT0), /* MOVSX */
@@ -2174,6 +2221,8 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid)
return (s->cpuid_ext_features & CPUID_EXT_MOVBE);
case X86_FEAT_PCLMULQDQ:
return (s->cpuid_ext_features & CPUID_EXT_PCLMULQDQ);
+ case X86_FEAT_POPCNT:
+ return (s->cpuid_ext_features & CPUID_EXT_POPCNT);
case X86_FEAT_SSE:
return (s->cpuid_features & CPUID_SSE);
case X86_FEAT_SSE2:
@@ -2562,8 +2611,6 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
case 0x00 ... 0x01: /* mostly privileged instructions */
case 0x1a ... 0x1b: /* MPX */
case 0xb0 ... 0xb1: /* cmpxchg */
- case 0xb8: /* POPCNT */
- case 0xbc ... 0xbd: /* LZCNT/TZCNT */
case 0xc0 ... 0xc1: /* xadd */
case 0xc7: /* grp9 */
disas_insn_old(s, cpu, b + 0x100);
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index aabc86669c2..2fbf2a5ce8c 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1345,6 +1345,47 @@ static void gen_BOUND(DisasContext *s, X86DecodedInsn *decode)
}
}
+/* Non-standard convention - on entry T0 is zero-extended input, T1 is the output. */
+static void gen_BSF(DisasContext *s, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+
+ /* Only the Z bit is defined and it is related to the input. */
+ decode->cc_dst = tcg_temp_new();
+ decode->cc_op = CC_OP_LOGICB + ot;
+ tcg_gen_mov_tl(decode->cc_dst, s->T0);
+
+ /*
+ * The manual says that the output is undefined when the
+ * input is zero, but real hardware leaves it unchanged, and
+ * real programs appear to depend on that. Accomplish this
+ * by passing the output as the value to return upon zero.
+ */
+ tcg_gen_ctz_tl(s->T0, s->T0, s->T1);
+}
+
+/* Non-standard convention - on entry T0 is zero-extended input, T1 is the output. */
+static void gen_BSR(DisasContext *s, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+
+ /* Only the Z bit is defined and it is related to the input. */
+ decode->cc_dst = tcg_temp_new();
+ decode->cc_op = CC_OP_LOGICB + ot;
+ tcg_gen_mov_tl(decode->cc_dst, s->T0);
+
+ /*
+ * The manual says that the output is undefined when the
+ * input is zero, but real hardware leaves it unchanged, and
+ * real programs appear to depend on that. Accomplish this
+ * by passing the output as the value to return upon zero.
+ * Plus, return the bit index of the first 1 bit.
+ */
+ tcg_gen_xori_tl(s->T1, s->T1, TARGET_LONG_BITS - 1);
+ tcg_gen_clz_tl(s->T0, s->T0, s->T1);
+ tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
+}
+
static void gen_BSWAP(DisasContext *s, X86DecodedInsn *decode)
{
#ifdef TARGET_X86_64
@@ -2254,6 +2295,24 @@ static void gen_LSS(DisasContext *s, X86DecodedInsn *decode)
gen_lxx_seg(s, decode, R_SS);
}
+static void gen_LZCNT(DisasContext *s, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+
+ /* C bit (cc_src) is defined related to the input. */
+ decode->cc_src = tcg_temp_new();
+ decode->cc_dst = s->T0;
+ decode->cc_op = CC_OP_BMILGB + ot;
+ tcg_gen_mov_tl(decode->cc_src, s->T0);
+
+ /*
+ * Reduce the target_ulong result by the number of zeros that
+ * we expect to find at the top.
+ */
+ tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
+ tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - (8 << ot));
+}
+
static void gen_MFENCE(DisasContext *s, X86DecodedInsn *decode)
{
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
@@ -2812,6 +2871,15 @@ static void gen_POPA(DisasContext *s, X86DecodedInsn *decode)
gen_popa(s);
}
+static void gen_POPCNT(DisasContext *s, X86DecodedInsn *decode)
+{
+ decode->cc_src = tcg_temp_new();
+ decode->cc_op = CC_OP_POPCNT;
+
+ tcg_gen_mov_tl(decode->cc_src, s->T0);
+ tcg_gen_ctpop_tl(s->T0, s->T0);
+}
+
static void gen_POPF(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot;
@@ -3893,6 +3961,20 @@ static void gen_SYSRET(DisasContext *s, X86DecodedInsn *decode)
s->base.is_jmp = DISAS_EOB_RECHECK_TF;
}
+static void gen_TZCNT(DisasContext *s, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+
+ /* C bit (cc_src) is defined related to the input. */
+ decode->cc_src = tcg_temp_new();
+ decode->cc_dst = s->T0;
+ decode->cc_op = CC_OP_BMILGB + ot;
+ tcg_gen_mov_tl(decode->cc_src, s->T0);
+
+ /* A zero input returns the operand size. */
+ tcg_gen_ctzi_tl(s->T0, s->T0, 8 << ot);
+}
+
static void gen_UD(DisasContext *s, X86DecodedInsn *decode)
{
gen_illegal_opcode(s);
--
2.45.1
next prev parent reply other threads:[~2024-06-08 8:42 UTC|newest]
Thread overview: 56+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-06-08 8:40 [PATCH 00/25] target/i386: more progress towards new decoder Paolo Bonzini
2024-06-08 8:40 ` [PATCH 01/25] target/i386: remove CPUX86State argument from generator functions Paolo Bonzini
2024-06-08 14:47 ` Richard Henderson
2024-06-08 8:40 ` [PATCH 02/25] target/i386: rewrite flags writeback for ADCX/ADOX Paolo Bonzini
2024-06-08 18:05 ` Richard Henderson
2024-06-08 8:40 ` [PATCH 03/25] target/i386: put BLS* input in T1, use generic flag writeback Paolo Bonzini
2024-06-08 18:07 ` Richard Henderson
2024-06-08 8:40 ` [PATCH 04/25] target/i386: change X86_ENTRYr to use T0 Paolo Bonzini
2024-06-08 18:10 ` Richard Henderson
2024-06-08 8:40 ` [PATCH 05/25] target/i386: change X86_ENTRYwr to use T0, use it for moves Paolo Bonzini
2024-06-08 18:13 ` Richard Henderson
2024-06-08 8:40 ` [PATCH 06/25] target/i386: replace NoSeg special with NoLoadEA Paolo Bonzini
2024-06-08 18:16 ` Richard Henderson
2024-06-08 8:40 ` [PATCH 07/25] target/i386: fix processing of intercept 0 (read CR0) Paolo Bonzini
2024-06-08 18:17 ` Richard Henderson
2024-06-08 8:40 ` [PATCH 08/25] target/i386: convert MOV from/to CR and DR to new decoder Paolo Bonzini
2024-06-08 18:24 ` Richard Henderson
2024-06-08 8:40 ` [PATCH 09/25] target/i386: fix bad sorting of entries in the 0F table Paolo Bonzini
2024-06-08 18:26 ` Richard Henderson
2024-06-08 8:40 ` [PATCH 10/25] target/i386: finish converting 0F AE to the new decoder Paolo Bonzini
2024-06-08 18:42 ` Richard Henderson
2024-10-21 1:49 ` Guenter Roeck
2024-10-21 6:57 ` Paolo Bonzini
2024-10-21 13:54 ` Guenter Roeck
2024-06-08 8:40 ` [PATCH 11/25] target/i386: replace read_crN helper with read_cr8 Paolo Bonzini
2024-06-08 18:45 ` Richard Henderson
2024-06-10 17:14 ` Paolo Bonzini
2024-06-08 8:41 ` [PATCH 12/25] target/i386: split X86_CHECK_prot into PE and VM86 checks Paolo Bonzini
2024-06-08 18:47 ` Richard Henderson
2024-06-08 8:41 ` [PATCH 13/25] target/i386: convert non-grouped, helper-based 2-byte opcodes Paolo Bonzini
2024-06-08 19:03 ` Richard Henderson
2024-06-08 8:41 ` [PATCH 14/25] target/i386: convert bit test instructions to new decoder Paolo Bonzini
2024-06-08 19:37 ` Richard Henderson
2024-06-08 8:41 ` [PATCH 15/25] target/i386: pull load/writeback out of gen_shiftd_rm_T1 Paolo Bonzini
2024-06-08 19:39 ` Richard Henderson
2024-06-08 8:41 ` [PATCH 16/25] target/i386: adapt gen_shift_count for SHLD/SHRD Paolo Bonzini
2024-06-08 19:42 ` Richard Henderson
2024-06-08 8:41 ` [PATCH 17/25] target/i386: convert SHLD/SHRD to new decoder Paolo Bonzini
2024-06-08 19:47 ` Richard Henderson
2024-06-08 8:41 ` Paolo Bonzini [this message]
2024-06-08 19:53 ` [PATCH 18/25] target/i386: convert LZCNT/TZCNT/BSF/BSR/POPCNT " Richard Henderson
2024-06-08 8:41 ` [PATCH 19/25] target/i386: convert XADD " Paolo Bonzini
2024-06-08 20:00 ` Richard Henderson
2024-06-08 8:41 ` [PATCH 20/25] target/i386: convert CMPXCHG " Paolo Bonzini
2024-06-08 20:04 ` Richard Henderson
2024-06-08 8:41 ` [PATCH 21/25] target/i386: decode address before going back to translate.c Paolo Bonzini
2024-06-08 20:13 ` Richard Henderson
2024-06-08 8:41 ` [PATCH 22/25] target/i386: list instructions still in translate.c Paolo Bonzini
2024-06-08 20:14 ` Richard Henderson
2024-06-08 8:41 ` [PATCH 23/25] target/i386: assert that cc_op* and pc_save are preserved Paolo Bonzini
2024-06-08 20:14 ` Richard Henderson
2024-06-08 8:41 ` [PATCH 24/25] target/i386: do not check PREFIX_LOCK in old-style decoder Paolo Bonzini
2024-06-08 20:15 ` Richard Henderson
2024-06-10 17:10 ` Paolo Bonzini
2024-06-08 8:41 ` [PATCH 25/25] target/i386: remove gen_ext_tl Paolo Bonzini
2024-06-08 20:17 ` Richard Henderson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240608084113.2770363-19-pbonzini@redhat.com \
--to=pbonzini@redhat.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).