From: Paolo Bonzini <pbonzini@redhat.com>
To: qemu-devel@nongnu.org
Cc: richard.henderson@linaro.org, paul@nowt.org
Subject: [PATCH v2 12/37] target/i386: move scalar 0F 38 and 0F 3A instruction to new decoder
Date: Tue, 20 Sep 2022 19:24:42 +0200 [thread overview]
Message-ID: <20220920172507.95568-13-pbonzini@redhat.com> (raw)
In-Reply-To: <20220920172507.95568-1-pbonzini@redhat.com>
Because these are the only VEX instructions that QEMU supports, the
new decoder is entered on the first byte of a valid VEX prefix, and VEX
decoding only needs to be done in decode-new.c.inc.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/decode-new.c.inc | 59 +++++++
target/i386/tcg/emit.c.inc | 258 +++++++++++++++++++++++++++
target/i386/tcg/translate.c | 293 +------------------------------
3 files changed, 321 insertions(+), 289 deletions(-)
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 4dc67e6d37..4344bcb40c 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -135,11 +135,69 @@ static uint8_t get_modrm(DisasContext *s, CPUX86State *env)
return s->modrm;
}
+static void decode_group17(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+ static const X86GenFunc group17_gen[8] = {
+ NULL, gen_BLSR, gen_BLSMSK, gen_BLSI,
+ };
+ int op = (get_modrm(s, env) >> 3) & 7;
+ entry->gen = group17_gen[op];
+}
+
static const X86OpEntry opcodes_0F38_00toEF[240] = {
};
/* five rows for no prefix, 66, F3, F2, 66+F2 */
static const X86OpEntry opcodes_0F38_F0toFF[16][5] = {
+ [0] = {
+ X86_OP_ENTRY3(MOVBE, G,y, M,y, None,None, cpuid(MOVBE)),
+ X86_OP_ENTRY3(MOVBE, G,w, M,w, None,None, cpuid(MOVBE)),
+ {},
+ X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)),
+ X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)),
+ },
+ [1] = {
+ X86_OP_ENTRY3(MOVBE, M,y, G,y, None,None, cpuid(MOVBE)),
+ X86_OP_ENTRY3(MOVBE, M,w, G,w, None,None, cpuid(MOVBE)),
+ {},
+ X86_OP_ENTRY2(CRC32, G,d, E,y, cpuid(SSE42)),
+ X86_OP_ENTRY2(CRC32, G,d, E,w, cpuid(SSE42)),
+ },
+ [2] = {
+ X86_OP_ENTRY3(ANDN, G,y, B,y, E,y, vex13 cpuid(BMI1)),
+ {},
+ {},
+ {},
+ {},
+ },
+ [3] = {
+ X86_OP_GROUP3(group17, B,y, E,y, None,None, vex13 cpuid(BMI1)),
+ {},
+ {},
+ {},
+ {},
+ },
+ [5] = {
+ X86_OP_ENTRY3(BZHI, G,y, E,y, B,y, vex13 cpuid(BMI1)),
+ {},
+ X86_OP_ENTRY3(PEXT, G,y, B,y, E,y, vex13 cpuid(BMI2)),
+ X86_OP_ENTRY3(PDEP, G,y, B,y, E,y, vex13 cpuid(BMI2)),
+ {},
+ },
+ [6] = {
+ {},
+ X86_OP_ENTRY2(ADCX, G,y, E,y, cpuid(ADX)),
+ X86_OP_ENTRY2(ADOX, G,y, E,y, cpuid(ADX)),
+ X86_OP_ENTRY3(MULX, /* B,y, */ G,y, E,y, 2,y, vex13 cpuid(BMI2)),
+ {},
+ },
+ [7] = {
+ X86_OP_ENTRY3(BEXTR, G,y, E,y, B,y, vex13 cpuid(BMI1)),
+ X86_OP_ENTRY3(SHLX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
+ X86_OP_ENTRY3(SARX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
+ X86_OP_ENTRY3(SHRX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
+ {},
+ },
};
static void decode_0F38(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
@@ -161,6 +219,7 @@ static void decode_0F38(DisasContext *s, CPUX86State *env, X86OpEntry *entry, ui
}
static const X86OpEntry opcodes_0F3A[256] = {
+ [0xF0] = X86_OP_ENTRY3(RORX, G,y, E,y, I,b, vex13 cpuid(BMI2) p_f2),
};
static void decode_0F3A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 0cba106f74..862da3c84a 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -215,3 +215,261 @@ static void gen_writeback(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv
g_assert_not_reached();
}
}
+
+static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op)
+{
+ TCGv carry_in = NULL;
+ TCGv carry_out = (cc_op == CC_OP_ADCX ? cpu_cc_dst : cpu_cc_src2);
+ TCGv zero;
+
+ if (cc_op == s->cc_op || s->cc_op == CC_OP_ADCOX) {
+ /* Re-use the carry-out from a previous round. */
+ carry_in = carry_out;
+ cc_op = s->cc_op;
+ } else if (s->cc_op == CC_OP_ADCX || s->cc_op == CC_OP_ADOX) {
+ /* Merge with the carry-out from the opposite instruction. */
+ cc_op = CC_OP_ADCOX;
+ }
+
+ /* If we don't have a carry-in, get it out of EFLAGS. */
+ if (!carry_in) {
+ if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
+ gen_compute_eflags(s);
+ }
+ carry_in = s->tmp0;
+ tcg_gen_extract_tl(carry_in, cpu_cc_src,
+ ctz32(cc_op == CC_OP_ADCX ? CC_C : CC_O), 1);
+ }
+
+ switch (ot) {
+#ifdef TARGET_X86_64
+ case MO_32:
+ /* If TL is 64-bit just do everything in 64-bit arithmetic. */
+ tcg_gen_add_i64(s->T0, s->T0, s->T1);
+ tcg_gen_add_i64(s->T0, s->T0, carry_in);
+ tcg_gen_shri_i64(carry_out, s->T0, 32);
+ break;
+#endif
+ default:
+ zero = tcg_constant_tl(0);
+ tcg_gen_add2_tl(s->T0, carry_out, s->T0, zero, carry_in, zero);
+ tcg_gen_add2_tl(s->T0, carry_out, s->T0, carry_out, s->T1, zero);
+ break;
+ }
+ set_cc_op(s, cc_op);
+}
+
+static void gen_ADCX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_ADCOX(s, env, decode->op[0].ot, CC_OP_ADCX);
+}
+
+static void gen_ADOX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_ADCOX(s, env, decode->op[0].ot, CC_OP_ADOX);
+}
+
+static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+
+ tcg_gen_andc_tl(s->T0, s->T1, s->T0);
+ gen_op_update1_cc(s);
+ set_cc_op(s, CC_OP_LOGICB + ot);
+}
+
+static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+ TCGv bound, zero;
+
+ /*
+ * Extract START, and shift the operand.
+ * Shifts larger than operand size get zeros.
+ */
+ tcg_gen_ext8u_tl(s->A0, s->T1);
+ tcg_gen_shr_tl(s->T0, s->T0, s->A0);
+
+ bound = tcg_constant_tl(ot == MO_64 ? 63 : 31);
+ zero = tcg_constant_tl(0);
+ tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound, s->T0, zero);
+
+ /*
+ * Extract the LEN into a mask. Lengths larger than
+ * operand size get all ones.
+ */
+ tcg_gen_extract_tl(s->A0, s->T1, 8, 8);
+ tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound, s->A0, bound);
+
+ tcg_gen_movi_tl(s->T1, 1);
+ tcg_gen_shl_tl(s->T1, s->T1, s->A0);
+ tcg_gen_subi_tl(s->T1, s->T1, 1);
+ tcg_gen_and_tl(s->T0, s->T0, s->T1);
+
+ gen_op_update1_cc(s);
+ set_cc_op(s, CC_OP_LOGICB + ot);
+}
+
+static void gen_BLSI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+
+ tcg_gen_neg_tl(s->T1, s->T0);
+ tcg_gen_and_tl(s->T0, s->T0, s->T1);
+ tcg_gen_mov_tl(cpu_cc_dst, s->T0);
+ set_cc_op(s, CC_OP_BMILGB + ot);
+}
+
+static void gen_BLSMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+
+ tcg_gen_subi_tl(s->T1, s->T0, 1);
+ tcg_gen_xor_tl(s->T0, s->T0, s->T1);
+ tcg_gen_mov_tl(cpu_cc_dst, s->T0);
+ set_cc_op(s, CC_OP_BMILGB + ot);
+}
+
+static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+
+ tcg_gen_subi_tl(s->T1, s->T0, 1);
+ tcg_gen_and_tl(s->T0, s->T0, s->T1);
+ tcg_gen_mov_tl(cpu_cc_dst, s->T0);
+ set_cc_op(s, CC_OP_BMILGB + ot);
+}
+
+static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+ TCGv bound;
+
+ tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
+ bound = tcg_constant_tl(ot == MO_64 ? 63 : 31);
+
+ /*
+ * Note that since we're using BMILG (in order to get O
+ * cleared) we need to store the inverse into C.
+ */
+ tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src, s->T1, bound);
+ tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1, bound, bound, s->T1);
+
+ tcg_gen_movi_tl(s->A0, -1);
+ tcg_gen_shl_tl(s->A0, s->A0, s->T1);
+ tcg_gen_andc_tl(s->T0, s->T0, s->A0);
+
+ gen_op_update1_cc(s);
+ set_cc_op(s, CC_OP_BMILGB + ot);
+}
+
+static void gen_CRC32(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[2].ot;
+
+ tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+ gen_helper_crc32(s->T0, s->tmp2_i32, s->T1, tcg_constant_i32(8 << ot));
+}
+
+static void gen_MOVBE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+
+ /* M operand type does not load/store */
+ if (decode->e.op0 == X86_TYPE_M) {
+ tcg_gen_qemu_st_tl(s->T0, s->A0, s->mem_index, ot | MO_BE);
+ } else {
+ tcg_gen_qemu_ld_tl(s->T0, s->A0, s->mem_index, ot | MO_BE);
+ }
+}
+
+static void gen_MULX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+
+ /* low part of result in VEX.vvvv, high in MODRM */
+ switch (ot) {
+ default:
+ tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+ tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
+ tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
+ s->tmp2_i32, s->tmp3_i32);
+ tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32);
+ tcg_gen_extu_i32_tl(s->T0, s->tmp3_i32);
+ break;
+#ifdef TARGET_X86_64
+ case MO_64:
+ tcg_gen_mulu2_i64(cpu_regs[s->vex_v], s->T0, s->T0, s->T1);
+ break;
+#endif
+ }
+
+}
+
+static void gen_PDEP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[1].ot;
+ if (ot < MO_64) {
+ tcg_gen_ext32u_tl(s->T0, s->T0);
+ }
+ gen_helper_pdep(s->T0, s->T0, s->T1);
+}
+
+static void gen_PEXT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[1].ot;
+ if (ot < MO_64) {
+ tcg_gen_ext32u_tl(s->T0, s->T0);
+ }
+ gen_helper_pext(s->T0, s->T0, s->T1);
+}
+
+static void gen_RORX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+ int b = decode->immediate;
+
+ if (ot == MO_64) {
+ tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
+ } else {
+ tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+ tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
+ tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
+ }
+}
+
+static void gen_SARX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+ int mask;
+
+ mask = ot == MO_64 ? 63 : 31;
+ tcg_gen_andi_tl(s->T1, s->T1, mask);
+ if (ot != MO_64) {
+ tcg_gen_ext32s_tl(s->T0, s->T0);
+ }
+ tcg_gen_sar_tl(s->T0, s->T0, s->T1);
+}
+
+static void gen_SHLX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+ int mask;
+
+ mask = ot == MO_64 ? 63 : 31;
+ tcg_gen_andi_tl(s->T1, s->T1, mask);
+ tcg_gen_shl_tl(s->T0, s->T0, s->T1);
+}
+
+static void gen_SHRX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+ int mask;
+
+ mask = ot == MO_64 ? 63 : 31;
+ tcg_gen_andi_tl(s->T1, s->T1, mask);
+ if (ot != MO_64) {
+ tcg_gen_ext32u_tl(s->T0, s->T0);
+ }
+ tcg_gen_shr_tl(s->T0, s->T0, s->T1);
+}
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index aeda520f35..cc2e6f0906 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -4130,151 +4130,6 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
s->mem_index, ot | MO_BE);
}
break;
-
- case 0x0f2: /* andn Gy, By, Ey */
- if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
- || !(s->prefix & PREFIX_VEX)
- || s->vex_l != 0) {
- goto illegal_op;
- }
- ot = mo_64_32(s->dflag);
- gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
- tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]);
- gen_op_mov_reg_v(s, ot, reg, s->T0);
- gen_op_update1_cc(s);
- set_cc_op(s, CC_OP_LOGICB + ot);
- break;
-
- case 0x0f7: /* bextr Gy, Ey, By */
- if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
- || !(s->prefix & PREFIX_VEX)
- || s->vex_l != 0) {
- goto illegal_op;
- }
- ot = mo_64_32(s->dflag);
- {
- TCGv bound, zero;
-
- gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
- /* Extract START, and shift the operand.
- Shifts larger than operand size get zeros. */
- tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]);
- tcg_gen_shr_tl(s->T0, s->T0, s->A0);
-
- bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
- zero = tcg_const_tl(0);
- tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound,
- s->T0, zero);
- tcg_temp_free(zero);
-
- /* Extract the LEN into a mask. Lengths larger than
- operand size get all ones. */
- tcg_gen_extract_tl(s->A0, cpu_regs[s->vex_v], 8, 8);
- tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound,
- s->A0, bound);
- tcg_temp_free(bound);
- tcg_gen_movi_tl(s->T1, 1);
- tcg_gen_shl_tl(s->T1, s->T1, s->A0);
- tcg_gen_subi_tl(s->T1, s->T1, 1);
- tcg_gen_and_tl(s->T0, s->T0, s->T1);
-
- gen_op_mov_reg_v(s, ot, reg, s->T0);
- gen_op_update1_cc(s);
- set_cc_op(s, CC_OP_LOGICB + ot);
- }
- break;
-
- case 0x0f5: /* bzhi Gy, Ey, By */
- if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
- || !(s->prefix & PREFIX_VEX)
- || s->vex_l != 0) {
- goto illegal_op;
- }
- ot = mo_64_32(s->dflag);
- gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
- tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
- {
- TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
- /* Note that since we're using BMILG (in order to get O
- cleared) we need to store the inverse into C. */
- tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
- s->T1, bound);
- tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1,
- bound, bound, s->T1);
- tcg_temp_free(bound);
- }
- tcg_gen_movi_tl(s->A0, -1);
- tcg_gen_shl_tl(s->A0, s->A0, s->T1);
- tcg_gen_andc_tl(s->T0, s->T0, s->A0);
- gen_op_mov_reg_v(s, ot, reg, s->T0);
- gen_op_update1_cc(s);
- set_cc_op(s, CC_OP_BMILGB + ot);
- break;
-
- case 0x3f6: /* mulx By, Gy, rdx, Ey */
- if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
- || !(s->prefix & PREFIX_VEX)
- || s->vex_l != 0) {
- goto illegal_op;
- }
- ot = mo_64_32(s->dflag);
- gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
- switch (ot) {
- default:
- tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
- tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EDX]);
- tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
- s->tmp2_i32, s->tmp3_i32);
- tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32);
- tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp3_i32);
- break;
-#ifdef TARGET_X86_64
- case MO_64:
- tcg_gen_mulu2_i64(s->T0, s->T1,
- s->T0, cpu_regs[R_EDX]);
- tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0);
- tcg_gen_mov_i64(cpu_regs[reg], s->T1);
- break;
-#endif
- }
- break;
-
- case 0x3f5: /* pdep Gy, By, Ey */
- if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
- || !(s->prefix & PREFIX_VEX)
- || s->vex_l != 0) {
- goto illegal_op;
- }
- ot = mo_64_32(s->dflag);
- gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
- /* Note that by zero-extending the source operand, we
- automatically handle zero-extending the result. */
- if (ot == MO_64) {
- tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
- } else {
- tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
- }
- gen_helper_pdep(cpu_regs[reg], s->T1, s->T0);
- break;
-
- case 0x2f5: /* pext Gy, By, Ey */
- if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
- || !(s->prefix & PREFIX_VEX)
- || s->vex_l != 0) {
- goto illegal_op;
- }
- ot = mo_64_32(s->dflag);
- gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
- /* Note that by zero-extending the source operand, we
- automatically handle zero-extending the result. */
- if (ot == MO_64) {
- tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
- } else {
- tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
- }
- gen_helper_pext(cpu_regs[reg], s->T1, s->T0);
- break;
-
case 0x1f6: /* adcx Gy, Ey */
case 0x2f6: /* adox Gy, Ey */
CHECK_NO_VEX(s);
@@ -4354,73 +4209,6 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
}
break;
- case 0x1f7: /* shlx Gy, Ey, By */
- case 0x2f7: /* sarx Gy, Ey, By */
- case 0x3f7: /* shrx Gy, Ey, By */
- if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
- || !(s->prefix & PREFIX_VEX)
- || s->vex_l != 0) {
- goto illegal_op;
- }
- ot = mo_64_32(s->dflag);
- gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
- if (ot == MO_64) {
- tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 63);
- } else {
- tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 31);
- }
- if (b == 0x1f7) {
- tcg_gen_shl_tl(s->T0, s->T0, s->T1);
- } else if (b == 0x2f7) {
- if (ot != MO_64) {
- tcg_gen_ext32s_tl(s->T0, s->T0);
- }
- tcg_gen_sar_tl(s->T0, s->T0, s->T1);
- } else {
- if (ot != MO_64) {
- tcg_gen_ext32u_tl(s->T0, s->T0);
- }
- tcg_gen_shr_tl(s->T0, s->T0, s->T1);
- }
- gen_op_mov_reg_v(s, ot, reg, s->T0);
- break;
-
- case 0x0f3:
- case 0x1f3:
- case 0x2f3:
- case 0x3f3: /* Group 17 */
- if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
- || !(s->prefix & PREFIX_VEX)
- || s->vex_l != 0) {
- goto illegal_op;
- }
- ot = mo_64_32(s->dflag);
- gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-
- tcg_gen_mov_tl(cpu_cc_src, s->T0);
- switch (reg & 7) {
- case 1: /* blsr By,Ey */
- tcg_gen_subi_tl(s->T1, s->T0, 1);
- tcg_gen_and_tl(s->T0, s->T0, s->T1);
- break;
- case 2: /* blsmsk By,Ey */
- tcg_gen_subi_tl(s->T1, s->T0, 1);
- tcg_gen_xor_tl(s->T0, s->T0, s->T1);
- break;
- case 3: /* blsi By, Ey */
- tcg_gen_neg_tl(s->T1, s->T0);
- tcg_gen_and_tl(s->T0, s->T0, s->T1);
- break;
- default:
- goto unknown_op;
- }
- tcg_gen_mov_tl(cpu_cc_dst, s->T0);
- gen_op_mov_reg_v(s, ot, s->vex_v, s->T0);
- set_cc_op(s, CC_OP_BMILGB + ot);
- break;
-
- default:
- goto unknown_op;
}
break;
@@ -4636,37 +4424,6 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
}
break;
- case 0x33a:
- /* Various integer extensions at 0f 3a f[0-f]. */
- b = modrm | (b1 << 8);
- modrm = x86_ldub_code(env, s);
- reg = ((modrm >> 3) & 7) | REX_R(s);
-
- switch (b) {
- case 0x3f0: /* rorx Gy,Ey, Ib */
- if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
- || !(s->prefix & PREFIX_VEX)
- || s->vex_l != 0) {
- goto illegal_op;
- }
- ot = mo_64_32(s->dflag);
- gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
- b = x86_ldub_code(env, s);
- if (ot == MO_64) {
- tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
- } else {
- tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
- tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
- tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
- }
- gen_op_mov_reg_v(s, ot, reg, s->T0);
- break;
-
- default:
- goto unknown_op;
- }
- break;
-
default:
unknown_op:
gen_unknown_opcode(env, s);
@@ -4905,59 +4662,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
#endif
case 0xc5: /* 2-byte VEX */
case 0xc4: /* 3-byte VEX */
- use_new = false;
- /* VEX prefixes cannot be used except in 32-bit mode.
- Otherwise the instruction is LES or LDS. */
if (CODE32(s) && !VM86(s)) {
- static const int pp_prefix[4] = {
- 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
- };
- int vex3, vex2 = x86_ldub_code(env, s);
+ int vex2 = x86_ldub_code(env, s);
+ s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
/* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
otherwise the instruction is LES or LDS. */
- s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
break;
}
-
- /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
- if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
- | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) {
- goto illegal_op;
- }
-#ifdef TARGET_X86_64
- s->rex_r = (~vex2 >> 4) & 8;
-#endif
- if (b == 0xc5) {
- /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
- vex3 = vex2;
- b = x86_ldub_code(env, s) | 0x100;
- } else {
- /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
- vex3 = x86_ldub_code(env, s);
-#ifdef TARGET_X86_64
- s->rex_x = (~vex2 >> 3) & 8;
- s->rex_b = (~vex2 >> 2) & 8;
- s->rex_w = (vex3 >> 7) & 1;
-#endif
- switch (vex2 & 0x1f) {
- case 0x01: /* Implied 0f leading opcode bytes. */
- b = x86_ldub_code(env, s) | 0x100;
- break;
- case 0x02: /* Implied 0f 38 leading opcode bytes. */
- b = 0x138;
- break;
- case 0x03: /* Implied 0f 3a leading opcode bytes. */
- b = 0x13a;
- break;
- default: /* Reserved for future use. */
- goto unknown_op;
- }
- }
- s->vex_v = (~vex3 >> 3) & 0xf;
- s->vex_l = (vex3 >> 2) & 1;
- prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
+ disas_insn_new(s, cpu, b);
+ return s->pc;
}
break;
}
--
2.37.2
next prev parent reply other threads:[~2022-09-20 21:59 UTC|newest]
Thread overview: 61+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-09-20 17:24 [PATCH v2 00/37] target/i386: new decoder + AVX implementation Paolo Bonzini
2022-09-20 17:24 ` [PATCH v2 01/37] target/i386: Define XMMReg and access macros, align ZMM registers Paolo Bonzini
2022-09-20 17:24 ` [PATCH v2 02/37] target/i386: make ldo/sto operations consistent with ldq Paolo Bonzini
2022-09-24 15:00 ` Richard Henderson
2022-09-20 17:24 ` [PATCH v2 03/37] target/i386: REPZ and REPNZ are mutually exclusive Paolo Bonzini
2022-09-20 17:24 ` [PATCH v2 04/37] target/i386: introduce insn_get_addr Paolo Bonzini
2022-09-20 17:24 ` [PATCH v2 05/37] target/i386: add core of new i386 decoder Paolo Bonzini
2022-09-24 15:09 ` Richard Henderson
2022-09-20 17:24 ` [PATCH v2 06/37] target/i386: add ALU load/writeback core Paolo Bonzini
2022-09-24 15:13 ` Richard Henderson
2022-09-20 17:24 ` [PATCH v2 07/37] target/i386: add CPUID[EAX=7, ECX=0].ECX to DisasContext Paolo Bonzini
2022-09-20 17:24 ` [PATCH v2 08/37] target/i386: add CPUID feature checks to new decoder Paolo Bonzini
2022-09-20 17:24 ` [PATCH v2 09/37] target/i386: add AVX_EN hflag Paolo Bonzini
2022-09-20 17:24 ` [PATCH v2 10/37] target/i386: validate VEX prefixes via the instructions' exception classes Paolo Bonzini
2022-09-24 20:19 ` Richard Henderson
2022-09-20 17:24 ` [PATCH v2 11/37] target/i386: validate SSE prefixes directly in the decoding table Paolo Bonzini
2022-09-24 20:23 ` Richard Henderson
2022-09-20 17:24 ` Paolo Bonzini [this message]
2022-09-20 17:24 ` [PATCH v2 13/37] target/i386: Prepare ops_sse_header.h for 256 bit AVX Paolo Bonzini
2022-09-20 17:24 ` [PATCH v2 14/37] target/i386: extend helpers to support VEX.V 3- and 4- operand encodings Paolo Bonzini
2022-09-20 17:24 ` [PATCH v2 15/37] target/i386: support operand merging in binary scalar helpers Paolo Bonzini
2022-09-20 17:24 ` [PATCH v2 16/37] target/i386: provide 3-operand versions of unary " Paolo Bonzini
2022-09-20 17:24 ` [PATCH v2 17/37] target/i386: implement additional AVX comparison operators Paolo Bonzini
2022-09-20 17:24 ` [PATCH v2 18/37] target/i386: Introduce 256-bit vector helpers Paolo Bonzini
2022-09-20 17:24 ` [PATCH v2 19/37] target/i386: reimplement 0x0f 0x60-0x6f, add AVX Paolo Bonzini
2022-09-24 20:31 ` Richard Henderson
2022-09-20 17:24 ` [PATCH v2 20/37] target/i386: reimplement 0x0f 0xd8-0xdf, 0xe8-0xef, 0xf8-0xff, " Paolo Bonzini
2022-09-24 20:32 ` Richard Henderson
2022-09-20 17:24 ` [PATCH v2 21/37] target/i386: reimplement 0x0f 0x50-0x5f, " Paolo Bonzini
2022-09-24 20:37 ` Richard Henderson
2022-09-20 17:24 ` [PATCH v2 22/37] target/i386: reimplement 0x0f 0x78-0x7f, " Paolo Bonzini
2022-09-24 20:43 ` Richard Henderson
2022-09-26 7:24 ` Paolo Bonzini
2022-09-26 7:46 ` Richard Henderson
2022-09-20 17:24 ` [PATCH v2 23/37] target/i386: reimplement 0x0f 0x70-0x77, " Paolo Bonzini
2022-09-24 20:53 ` Richard Henderson
2022-09-20 17:24 ` [PATCH v2 24/37] target/i386: reimplement 0x0f 0xd0-0xd7, 0xe0-0xe7, 0xf0-0xf7, " Paolo Bonzini
2022-09-24 20:56 ` Richard Henderson
2022-09-26 7:56 ` Paolo Bonzini
2022-09-20 17:24 ` [PATCH v2 25/37] target/i386: clarify (un)signedness of immediates from 0F3Ah opcodes Paolo Bonzini
2022-09-24 20:57 ` Richard Henderson
2022-09-20 17:24 ` [PATCH v2 26/37] target/i386: reimplement 0x0f 0x3a, add AVX Paolo Bonzini
2022-09-24 21:02 ` Richard Henderson
2022-09-20 17:24 ` [PATCH v2 27/37] target/i386: Use tcg gvec ops for pmovmskb Paolo Bonzini
2022-09-24 21:08 ` Richard Henderson
2022-09-20 17:24 ` [PATCH v2 28/37] target/i386: reimplement 0x0f 0x38, add AVX Paolo Bonzini
2022-09-24 21:14 ` Richard Henderson
2022-09-20 17:24 ` [PATCH v2 29/37] target/i386: reimplement 0x0f 0xc2, 0xc4-0xc6, " Paolo Bonzini
2022-09-20 17:25 ` [PATCH v2 30/37] target/i386: reimplement 0x0f 0x10-0x17, " Paolo Bonzini
2022-09-24 21:16 ` Richard Henderson
2022-09-20 17:25 ` [PATCH v2 31/37] target/i386: reimplement 0x0f 0x28-0x2f, " Paolo Bonzini
2022-09-24 21:18 ` Richard Henderson
2022-09-20 17:25 ` [PATCH v2 32/37] target/i386: implement XSAVE and XRSTOR of AVX registers Paolo Bonzini
2022-09-24 21:19 ` Richard Henderson
2022-09-20 17:25 ` [PATCH v2 33/37] target/i386: implement VLDMXCSR/VSTMXCSR Paolo Bonzini
2022-09-24 21:20 ` Richard Henderson
2022-09-20 17:25 ` [PATCH v2 34/37] target/i386: Enable AVX cpuid bits when using TCG Paolo Bonzini
2022-09-20 17:25 ` [PATCH v2 35/37] tests/tcg: extend SSE tests to AVX Paolo Bonzini
2022-09-20 17:25 ` [PATCH v2 36/37] target/i386: move 3DNow to the new decoder Paolo Bonzini
2022-09-24 21:24 ` Richard Henderson
2022-09-20 17:25 ` [PATCH v2 37/37] target/i386: remove old SSE decoder Paolo Bonzini
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220920172507.95568-13-pbonzini@redhat.com \
--to=pbonzini@redhat.com \
--cc=paul@nowt.org \
--cc=qemu-devel@nongnu.org \
--cc=richard.henderson@linaro.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).