* [PATCH 01/25] target/i386: remove CPUX86State argument from generator functions
2024-06-08 8:40 [PATCH 00/25] target/i386: more progress towards new decoder Paolo Bonzini
@ 2024-06-08 8:40 ` Paolo Bonzini
2024-06-08 14:47 ` Richard Henderson
2024-06-08 8:40 ` [PATCH 02/25] target/i386: rewrite flags writeback for ADCX/ADOX Paolo Bonzini
` (23 subsequent siblings)
24 siblings, 1 reply; 56+ messages in thread
From: Paolo Bonzini @ 2024-06-08 8:40 UTC (permalink / raw)
To: qemu-devel
CPUX86State argument would only be used to fetch bytes, but that has to be
done before the generator function is called. So remove it, and all
temptation together with it.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/decode-new.h | 2 +-
target/i386/tcg/decode-new.c.inc | 4 +-
target/i386/tcg/emit.c.inc | 572 +++++++++++++++----------------
3 files changed, 289 insertions(+), 289 deletions(-)
diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h
index 1f90cf96407..f704698575f 100644
--- a/target/i386/tcg/decode-new.h
+++ b/target/i386/tcg/decode-new.h
@@ -245,7 +245,7 @@ typedef struct X86DecodedInsn X86DecodedInsn;
typedef void (*X86DecodeFunc)(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b);
/* Code generation function. */
-typedef void (*X86GenFunc)(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode);
+typedef void (*X86GenFunc)(DisasContext *s, X86DecodedInsn *decode);
struct X86OpEntry {
/* Based on the is_decode flags. */
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index c2d8da8d14e..e7d88020481 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -2590,7 +2590,7 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
}
if (s->prefix & PREFIX_LOCK) {
gen_load(s, &decode, 2, s->T1);
- decode.e.gen(s, env, &decode);
+ decode.e.gen(s, &decode);
} else {
if (decode.op[0].unit == X86_OP_MMX) {
compute_mmx_offset(&decode.op[0]);
@@ -2599,7 +2599,7 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
}
gen_load(s, &decode, 1, s->T0);
gen_load(s, &decode, 2, s->T1);
- decode.e.gen(s, env, &decode);
+ decode.e.gen(s, &decode);
gen_writeback(s, &decode, 0, s->T0);
}
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 4be3d9a6fba..df7597c7e2f 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -60,8 +60,8 @@ typedef void (*SSEFunc_0_eppppii)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
TCGv_ptr reg_c, TCGv_ptr reg_d, TCGv_i32 even,
TCGv_i32 odd);
-static void gen_JMP_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode);
-static void gen_JMP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode);
+static void gen_JMP_m(DisasContext *s, X86DecodedInsn *decode);
+static void gen_JMP(DisasContext *s, X86DecodedInsn *decode);
static inline TCGv_i32 tcg_constant8u_i32(uint8_t val)
{
@@ -446,7 +446,7 @@ static const SSEFunc_0_epp fns_3dnow[] = {
[0xbf] = gen_helper_pavgusb,
};
-static void gen_3dnow(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_3dnow(DisasContext *s, X86DecodedInsn *decode)
{
uint8_t b = decode->immediate;
SSEFunc_0_epp fn = b < ARRAY_SIZE(fns_3dnow) ? fns_3dnow[b] : NULL;
@@ -479,7 +479,7 @@ static void gen_3dnow(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
* f3 = v*ss Vss, Hss, Wps
* f2 = v*sd Vsd, Hsd, Wps
*/
-static inline void gen_unary_fp_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
+static inline void gen_unary_fp_sse(DisasContext *s, X86DecodedInsn *decode,
SSEFunc_0_epp pd_xmm, SSEFunc_0_epp ps_xmm,
SSEFunc_0_epp pd_ymm, SSEFunc_0_epp ps_ymm,
SSEFunc_0_eppp sd, SSEFunc_0_eppp ss)
@@ -504,9 +504,9 @@ static inline void gen_unary_fp_sse(DisasContext *s, CPUX86State *env, X86Decode
}
}
#define UNARY_FP_SSE(uname, lname) \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \
{ \
- gen_unary_fp_sse(s, env, decode, \
+ gen_unary_fp_sse(s, decode, \
gen_helper_##lname##pd_xmm, \
gen_helper_##lname##ps_xmm, \
gen_helper_##lname##pd_ymm, \
@@ -522,7 +522,7 @@ UNARY_FP_SSE(VSQRT, sqrt)
* f3 = v*ss Vss, Hss, Wps
* f2 = v*sd Vsd, Hsd, Wps
*/
-static inline void gen_fp_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
+static inline void gen_fp_sse(DisasContext *s, X86DecodedInsn *decode,
SSEFunc_0_eppp pd_xmm, SSEFunc_0_eppp ps_xmm,
SSEFunc_0_eppp pd_ymm, SSEFunc_0_eppp ps_ymm,
SSEFunc_0_eppp sd, SSEFunc_0_eppp ss)
@@ -543,9 +543,9 @@ static inline void gen_fp_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn
}
#define FP_SSE(uname, lname) \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \
{ \
- gen_fp_sse(s, env, decode, \
+ gen_fp_sse(s, decode, \
gen_helper_##lname##pd_xmm, \
gen_helper_##lname##ps_xmm, \
gen_helper_##lname##pd_ymm, \
@@ -561,7 +561,7 @@ FP_SSE(VDIV, div)
FP_SSE(VMAX, max)
#define FMA_SSE_PACKED(uname, ptr0, ptr1, ptr2, even, odd) \
-static void gen_##uname##Px(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname##Px(DisasContext *s, X86DecodedInsn *decode) \
{ \
SSEFunc_0_eppppii xmm = s->vex_w ? gen_helper_fma4pd_xmm : gen_helper_fma4ps_xmm; \
SSEFunc_0_eppppii ymm = s->vex_w ? gen_helper_fma4pd_ymm : gen_helper_fma4ps_ymm; \
@@ -574,7 +574,7 @@ static void gen_##uname##Px(DisasContext *s, CPUX86State *env, X86DecodedInsn *d
#define FMA_SSE(uname, ptr0, ptr1, ptr2, flags) \
FMA_SSE_PACKED(uname, ptr0, ptr1, ptr2, flags, flags) \
-static void gen_##uname##Sx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname##Sx(DisasContext *s, X86DecodedInsn *decode) \
{ \
SSEFunc_0_eppppi fn = s->vex_w ? gen_helper_fma4sd : gen_helper_fma4ss; \
\
@@ -607,10 +607,10 @@ FMA_SSE_PACKED(VFMSUBADD213, OP_PTR1, OP_PTR0, OP_PTR2, 0, float_muladd_negate_c
FMA_SSE_PACKED(VFMSUBADD132, OP_PTR0, OP_PTR2, OP_PTR1, 0, float_muladd_negate_c)
#define FP_UNPACK_SSE(uname, lname) \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \
{ \
/* PS maps to the DQ integer instruction, PD maps to QDQ. */ \
- gen_fp_sse(s, env, decode, \
+ gen_fp_sse(s, decode, \
gen_helper_##lname##qdq_xmm, \
gen_helper_##lname##dq_xmm, \
gen_helper_##lname##qdq_ymm, \
@@ -624,7 +624,7 @@ FP_UNPACK_SSE(VUNPCKHPx, punpckh)
* 00 = v*ps Vps, Wpd
* f3 = v*ss Vss, Wps
*/
-static inline void gen_unary_fp32_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
+static inline void gen_unary_fp32_sse(DisasContext *s, X86DecodedInsn *decode,
SSEFunc_0_epp ps_xmm,
SSEFunc_0_epp ps_ymm,
SSEFunc_0_eppp ss)
@@ -649,9 +649,9 @@ illegal_op:
gen_illegal_opcode(s);
}
#define UNARY_FP32_SSE(uname, lname) \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \
{ \
- gen_unary_fp32_sse(s, env, decode, \
+ gen_unary_fp32_sse(s, decode, \
gen_helper_##lname##ps_xmm, \
gen_helper_##lname##ps_ymm, \
gen_helper_##lname##ss); \
@@ -663,7 +663,7 @@ UNARY_FP32_SSE(VRCP, rcp)
* 66 = v*pd Vpd, Hpd, Wpd
* f2 = v*ps Vps, Hps, Wps
*/
-static inline void gen_horizontal_fp_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
+static inline void gen_horizontal_fp_sse(DisasContext *s, X86DecodedInsn *decode,
SSEFunc_0_eppp pd_xmm, SSEFunc_0_eppp ps_xmm,
SSEFunc_0_eppp pd_ymm, SSEFunc_0_eppp ps_ymm)
{
@@ -674,9 +674,9 @@ static inline void gen_horizontal_fp_sse(DisasContext *s, CPUX86State *env, X86D
fn(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2);
}
#define HORIZONTAL_FP_SSE(uname, lname) \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \
{ \
- gen_horizontal_fp_sse(s, env, decode, \
+ gen_horizontal_fp_sse(s, decode, \
gen_helper_##lname##pd_xmm, gen_helper_##lname##ps_xmm, \
gen_helper_##lname##pd_ymm, gen_helper_##lname##ps_ymm); \
}
@@ -684,7 +684,7 @@ HORIZONTAL_FP_SSE(VHADD, hadd)
HORIZONTAL_FP_SSE(VHSUB, hsub)
HORIZONTAL_FP_SSE(VADDSUB, addsub)
-static inline void gen_ternary_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
+static inline void gen_ternary_sse(DisasContext *s, X86DecodedInsn *decode,
int op3, SSEFunc_0_epppp xmm, SSEFunc_0_epppp ymm)
{
SSEFunc_0_epppp fn = s->vex_l ? ymm : xmm;
@@ -695,21 +695,21 @@ static inline void gen_ternary_sse(DisasContext *s, CPUX86State *env, X86Decoded
fn(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, ptr3);
}
#define TERNARY_SSE(uname, uvname, lname) \
-static void gen_##uvname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uvname(DisasContext *s, X86DecodedInsn *decode) \
{ \
- gen_ternary_sse(s, env, decode, (uint8_t)decode->immediate >> 4, \
+ gen_ternary_sse(s, decode, (uint8_t)decode->immediate >> 4, \
gen_helper_##lname##_xmm, gen_helper_##lname##_ymm); \
} \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \
{ \
- gen_ternary_sse(s, env, decode, 0, \
+ gen_ternary_sse(s, decode, 0, \
gen_helper_##lname##_xmm, gen_helper_##lname##_ymm); \
}
TERNARY_SSE(BLENDVPS, VBLENDVPS, blendvps)
TERNARY_SSE(BLENDVPD, VBLENDVPD, blendvpd)
TERNARY_SSE(PBLENDVB, VPBLENDVB, pblendvb)
-static inline void gen_binary_imm_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
+static inline void gen_binary_imm_sse(DisasContext *s, X86DecodedInsn *decode,
SSEFunc_0_epppi xmm, SSEFunc_0_epppi ymm)
{
TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
@@ -721,9 +721,9 @@ static inline void gen_binary_imm_sse(DisasContext *s, CPUX86State *env, X86Deco
}
#define BINARY_IMM_SSE(uname, lname) \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \
{ \
- gen_binary_imm_sse(s, env, decode, \
+ gen_binary_imm_sse(s, decode, \
gen_helper_##lname##_xmm, \
gen_helper_##lname##_ymm); \
}
@@ -739,7 +739,7 @@ BINARY_IMM_SSE(PCLMULQDQ, pclmulqdq)
#define UNARY_INT_GVEC(uname, func, ...) \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \
{ \
int vec_len = vector_len(s, decode); \
\
@@ -757,7 +757,7 @@ UNARY_INT_GVEC(VPBROADCASTQ, tcg_gen_gvec_dup_mem, MO_64)
#define BINARY_INT_GVEC(uname, func, ...) \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \
{ \
int vec_len = vector_len(s, decode); \
\
@@ -816,7 +816,7 @@ BINARY_INT_GVEC(PXOR, tcg_gen_gvec_xor, MO_64)
* These are really the same encoding, because 1) V is the same as P when VEX.V
* is not present 2) P and Q are the same as H and W apart from MM/XMM
*/
-static inline void gen_binary_int_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
+static inline void gen_binary_int_sse(DisasContext *s, X86DecodedInsn *decode,
SSEFunc_0_eppp mmx, SSEFunc_0_eppp xmm, SSEFunc_0_eppp ymm)
{
assert(!!mmx == !!(decode->e.special == X86_SPECIAL_MMX));
@@ -837,9 +837,9 @@ static inline void gen_binary_int_sse(DisasContext *s, CPUX86State *env, X86Deco
#define BINARY_INT_MMX(uname, lname) \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \
{ \
- gen_binary_int_sse(s, env, decode, \
+ gen_binary_int_sse(s, decode, \
gen_helper_##lname##_mmx, \
gen_helper_##lname##_xmm, \
gen_helper_##lname##_ymm); \
@@ -886,9 +886,9 @@ BINARY_INT_MMX(PMULHRSW, pmulhrsw)
/* Instructions with no MMX equivalent. */
#define BINARY_INT_SSE(uname, lname) \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \
{ \
- gen_binary_int_sse(s, env, decode, \
+ gen_binary_int_sse(s, decode, \
NULL, \
gen_helper_##lname##_xmm, \
gen_helper_##lname##_ymm); \
@@ -911,7 +911,7 @@ BINARY_INT_SSE(VAESENC, aesenc)
BINARY_INT_SSE(VAESENCLAST, aesenclast)
#define UNARY_CMP_SSE(uname, lname) \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \
{ \
if (!s->vex_l) { \
gen_helper_##lname##_xmm(tcg_env, OP_PTR1, OP_PTR2); \
@@ -924,7 +924,7 @@ UNARY_CMP_SSE(VPTEST, ptest)
UNARY_CMP_SSE(VTESTPS, vtestps)
UNARY_CMP_SSE(VTESTPD, vtestpd)
-static inline void gen_unary_int_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
+static inline void gen_unary_int_sse(DisasContext *s, X86DecodedInsn *decode,
SSEFunc_0_epp xmm, SSEFunc_0_epp ymm)
{
if (!s->vex_l) {
@@ -935,9 +935,9 @@ static inline void gen_unary_int_sse(DisasContext *s, CPUX86State *env, X86Decod
}
#define UNARY_INT_SSE(uname, lname) \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \
{ \
- gen_unary_int_sse(s, env, decode, \
+ gen_unary_int_sse(s, decode, \
gen_helper_##lname##_xmm, \
gen_helper_##lname##_ymm); \
}
@@ -969,7 +969,7 @@ UNARY_INT_SSE(VCVTTPS2DQ, cvttps2dq)
UNARY_INT_SSE(VCVTPH2PS, cvtph2ps)
-static inline void gen_unary_imm_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
+static inline void gen_unary_imm_sse(DisasContext *s, X86DecodedInsn *decode,
SSEFunc_0_ppi xmm, SSEFunc_0_ppi ymm)
{
TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
@@ -981,9 +981,9 @@ static inline void gen_unary_imm_sse(DisasContext *s, CPUX86State *env, X86Decod
}
#define UNARY_IMM_SSE(uname, lname) \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \
{ \
- gen_unary_imm_sse(s, env, decode, \
+ gen_unary_imm_sse(s, decode, \
gen_helper_##lname##_xmm, \
gen_helper_##lname##_ymm); \
}
@@ -996,7 +996,7 @@ UNARY_IMM_SSE(VPERMQ, vpermq)
UNARY_IMM_SSE(VPERMILPS_i, vpermilps_imm)
UNARY_IMM_SSE(VPERMILPD_i, vpermilpd_imm)
-static inline void gen_unary_imm_fp_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
+static inline void gen_unary_imm_fp_sse(DisasContext *s, X86DecodedInsn *decode,
SSEFunc_0_eppi xmm, SSEFunc_0_eppi ymm)
{
TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
@@ -1008,9 +1008,9 @@ static inline void gen_unary_imm_fp_sse(DisasContext *s, CPUX86State *env, X86De
}
#define UNARY_IMM_FP_SSE(uname, lname) \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \
{ \
- gen_unary_imm_fp_sse(s, env, decode, \
+ gen_unary_imm_fp_sse(s, decode, \
gen_helper_##lname##_xmm, \
gen_helper_##lname##_ymm); \
}
@@ -1018,7 +1018,7 @@ static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod
UNARY_IMM_FP_SSE(VROUNDPS, roundps)
UNARY_IMM_FP_SSE(VROUNDPD, roundpd)
-static inline void gen_vexw_avx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
+static inline void gen_vexw_avx(DisasContext *s, X86DecodedInsn *decode,
SSEFunc_0_eppp d_xmm, SSEFunc_0_eppp q_xmm,
SSEFunc_0_eppp d_ymm, SSEFunc_0_eppp q_ymm)
{
@@ -1030,9 +1030,9 @@ static inline void gen_vexw_avx(DisasContext *s, CPUX86State *env, X86DecodedIns
/* VEX.W affects whether to operate on 32- or 64-bit elements. */
#define VEXW_AVX(uname, lname) \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \
{ \
- gen_vexw_avx(s, env, decode, \
+ gen_vexw_avx(s, decode, \
gen_helper_##lname##d_xmm, gen_helper_##lname##q_xmm, \
gen_helper_##lname##d_ymm, gen_helper_##lname##q_ymm); \
}
@@ -1042,7 +1042,7 @@ VEXW_AVX(VPSRAV, vpsrav)
VEXW_AVX(VPMASKMOV, vpmaskmov)
/* Same as above, but with extra arguments to the helper. */
-static inline void gen_vsib_avx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
+static inline void gen_vsib_avx(DisasContext *s, X86DecodedInsn *decode,
SSEFunc_0_epppti d_xmm, SSEFunc_0_epppti q_xmm,
SSEFunc_0_epppti d_ymm, SSEFunc_0_epppti q_ymm)
{
@@ -1066,29 +1066,29 @@ static inline void gen_vsib_avx(DisasContext *s, CPUX86State *env, X86DecodedIns
}
}
#define VSIB_AVX(uname, lname) \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \
{ \
- gen_vsib_avx(s, env, decode, \
+ gen_vsib_avx(s, decode, \
gen_helper_##lname##d_xmm, gen_helper_##lname##q_xmm, \
gen_helper_##lname##d_ymm, gen_helper_##lname##q_ymm); \
}
VSIB_AVX(VPGATHERD, vpgatherd)
VSIB_AVX(VPGATHERQ, vpgatherq)
-static void gen_AAA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_AAA(DisasContext *s, X86DecodedInsn *decode)
{
gen_update_cc_op(s);
gen_helper_aaa(tcg_env);
assume_cc_op(s, CC_OP_EFLAGS);
}
-static void gen_AAD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_AAD(DisasContext *s, X86DecodedInsn *decode)
{
gen_helper_aad(s->T0, s->T0, s->T1);
prepare_update1_cc(decode, s, CC_OP_LOGICB);
}
-static void gen_AAM(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_AAM(DisasContext *s, X86DecodedInsn *decode)
{
if (decode->immediate == 0) {
gen_exception(s, EXCP00_DIVZ);
@@ -1098,14 +1098,14 @@ static void gen_AAM(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
}
-static void gen_AAS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_AAS(DisasContext *s, X86DecodedInsn *decode)
{
gen_update_cc_op(s);
gen_helper_aas(tcg_env);
assume_cc_op(s, CC_OP_EFLAGS);
}
-static void gen_ADC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_ADC(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[1].ot;
TCGv c_in = tcg_temp_new();
@@ -1123,7 +1123,7 @@ static void gen_ADC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
/* ADCX/ADOX do not have memory operands and can use set_cc_op. */
-static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op)
+static void gen_ADCOX(DisasContext *s, MemOp ot, int cc_op)
{
int opposite_cc_op;
TCGv carry_in = NULL;
@@ -1170,12 +1170,12 @@ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op)
}
}
-static void gen_ADCX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_ADCX(DisasContext *s, X86DecodedInsn *decode)
{
- gen_ADCOX(s, env, decode->op[0].ot, CC_OP_ADCX);
+ gen_ADCOX(s, decode->op[0].ot, CC_OP_ADCX);
}
-static void gen_ADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_ADD(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[1].ot;
@@ -1188,12 +1188,12 @@ static void gen_ADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
prepare_update2_cc(decode, s, CC_OP_ADDB + ot);
}
-static void gen_ADOX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_ADOX(DisasContext *s, X86DecodedInsn *decode)
{
- gen_ADCOX(s, env, decode->op[0].ot, CC_OP_ADOX);
+ gen_ADCOX(s, decode->op[0].ot, CC_OP_ADOX);
}
-static void gen_AND(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_AND(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[1].ot;
@@ -1206,7 +1206,7 @@ static void gen_AND(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
prepare_update1_cc(decode, s, CC_OP_LOGICB + ot);
}
-static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_ANDN(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
@@ -1214,7 +1214,7 @@ static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
prepare_update1_cc(decode, s, CC_OP_LOGICB + ot);
}
-static void gen_ARPL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_ARPL(DisasContext *s, X86DecodedInsn *decode)
{
TCGv zf = tcg_temp_new();
TCGv flags = tcg_temp_new();
@@ -1235,7 +1235,7 @@ static void gen_ARPL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
decode->cc_op = CC_OP_EFLAGS;
}
-static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_BEXTR(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
TCGv bound = tcg_constant_tl(ot == MO_64 ? 63 : 31);
@@ -1264,7 +1264,7 @@ static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
/* BLSI do not have memory operands and can use set_cc_op. */
-static void gen_BLSI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_BLSI(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
@@ -1276,7 +1276,7 @@ static void gen_BLSI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
/* BLSMSK do not have memory operands and can use set_cc_op. */
-static void gen_BLSMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_BLSMSK(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
@@ -1288,7 +1288,7 @@ static void gen_BLSMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode
}
/* BLSR do not have memory operands and can use set_cc_op. */
-static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_BLSR(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
@@ -1299,7 +1299,7 @@ static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
set_cc_op(s, CC_OP_BMILGB + ot);
}
-static void gen_BOUND(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_BOUND(DisasContext *s, X86DecodedInsn *decode)
{
TCGv_i32 op = tcg_temp_new_i32();
tcg_gen_trunc_tl_i32(op, s->T0);
@@ -1310,7 +1310,7 @@ static void gen_BOUND(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
}
-static void gen_BSWAP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_BSWAP(DisasContext *s, X86DecodedInsn *decode)
{
#ifdef TARGET_X86_64
if (s->dflag == MO_64) {
@@ -1321,7 +1321,7 @@ static void gen_BSWAP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
tcg_gen_bswap32_tl(s->T0, s->T0, TCG_BSWAP_OZ);
}
-static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_BZHI(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
TCGv bound = tcg_constant_tl(ot == MO_64 ? 63 : 31);
@@ -1341,24 +1341,24 @@ static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
prepare_update2_cc(decode, s, CC_OP_BMILGB + ot);
}
-static void gen_CALL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CALL(DisasContext *s, X86DecodedInsn *decode)
{
gen_push_v(s, eip_next_tl(s));
- gen_JMP(s, env, decode);
+ gen_JMP(s, decode);
}
-static void gen_CALL_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CALL_m(DisasContext *s, X86DecodedInsn *decode)
{
gen_push_v(s, eip_next_tl(s));
- gen_JMP_m(s, env, decode);
+ gen_JMP_m(s, decode);
}
-static void gen_CALLF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CALLF(DisasContext *s, X86DecodedInsn *decode)
{
gen_far_call(s);
}
-static void gen_CALLF_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CALLF_m(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[2].ot;
@@ -1368,41 +1368,41 @@ static void gen_CALLF_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod
gen_far_call(s);
}
-static void gen_CBW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CBW(DisasContext *s, X86DecodedInsn *decode)
{
MemOp src_ot = decode->op[0].ot - 1;
tcg_gen_ext_tl(s->T0, s->T0, src_ot | MO_SIGN);
}
-static void gen_CLC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CLC(DisasContext *s, X86DecodedInsn *decode)
{
gen_compute_eflags(s);
tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
}
-static void gen_CLD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CLD(DisasContext *s, X86DecodedInsn *decode)
{
tcg_gen_st_i32(tcg_constant_i32(1), tcg_env, offsetof(CPUX86State, df));
}
-static void gen_CLI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CLI(DisasContext *s, X86DecodedInsn *decode)
{
gen_reset_eflags(s, IF_MASK);
}
-static void gen_CMC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CMC(DisasContext *s, X86DecodedInsn *decode)
{
gen_compute_eflags(s);
tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
}
-static void gen_CMOVcc(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CMOVcc(DisasContext *s, X86DecodedInsn *decode)
{
gen_cmovcc1(s, decode->b & 0xf, s->T0, s->T1);
}
-static void gen_CMPccXADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CMPccXADD(DisasContext *s, X86DecodedInsn *decode)
{
TCGLabel *label_top = gen_new_label();
TCGLabel *label_bottom = gen_new_label();
@@ -1505,7 +1505,7 @@ static void gen_CMPccXADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
decode->cc_op = CC_OP_SUBB + ot;
}
-static void gen_CMPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CMPS(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[2].ot;
if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
@@ -1515,7 +1515,7 @@ static void gen_CMPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
}
-static void gen_CRC32(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CRC32(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[2].ot;
@@ -1523,7 +1523,7 @@ static void gen_CRC32(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
gen_helper_crc32(s->T0, s->tmp2_i32, s->T1, tcg_constant_i32(8 << ot));
}
-static void gen_CVTPI2Px(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CVTPI2Px(DisasContext *s, X86DecodedInsn *decode)
{
gen_helper_enter_mmx(tcg_env);
if (s->prefix & PREFIX_DATA) {
@@ -1533,7 +1533,7 @@ static void gen_CVTPI2Px(DisasContext *s, CPUX86State *env, X86DecodedInsn *deco
}
}
-static void gen_CVTPx2PI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CVTPx2PI(DisasContext *s, X86DecodedInsn *decode)
{
gen_helper_enter_mmx(tcg_env);
if (s->prefix & PREFIX_DATA) {
@@ -1543,7 +1543,7 @@ static void gen_CVTPx2PI(DisasContext *s, CPUX86State *env, X86DecodedInsn *deco
}
}
-static void gen_CVTTPx2PI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CVTTPx2PI(DisasContext *s, X86DecodedInsn *decode)
{
gen_helper_enter_mmx(tcg_env);
if (s->prefix & PREFIX_DATA) {
@@ -1553,28 +1553,28 @@ static void gen_CVTTPx2PI(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
}
}
-static void gen_CWD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CWD(DisasContext *s, X86DecodedInsn *decode)
{
int shift = 8 << decode->op[0].ot;
tcg_gen_sextract_tl(s->T0, s->T0, shift - 1, 1);
}
-static void gen_DAA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_DAA(DisasContext *s, X86DecodedInsn *decode)
{
gen_update_cc_op(s);
gen_helper_daa(tcg_env);
assume_cc_op(s, CC_OP_EFLAGS);
}
-static void gen_DAS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_DAS(DisasContext *s, X86DecodedInsn *decode)
{
gen_update_cc_op(s);
gen_helper_das(tcg_env);
assume_cc_op(s, CC_OP_EFLAGS);
}
-static void gen_DEC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_DEC(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[1].ot;
@@ -1588,7 +1588,7 @@ static void gen_DEC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
prepare_update_cc_incdec(decode, s, CC_OP_DECB + ot);
}
-static void gen_DIV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_DIV(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[2].ot;
@@ -1611,17 +1611,17 @@ static void gen_DIV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
}
-static void gen_EMMS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_EMMS(DisasContext *s, X86DecodedInsn *decode)
{
gen_helper_emms(tcg_env);
}
-static void gen_ENTER(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_ENTER(DisasContext *s, X86DecodedInsn *decode)
{
gen_enter(s, decode->op[1].imm, decode->op[2].imm);
}
-static void gen_EXTRQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_EXTRQ_i(DisasContext *s, X86DecodedInsn *decode)
{
TCGv_i32 length = tcg_constant_i32(decode->immediate & 63);
TCGv_i32 index = tcg_constant_i32((decode->immediate >> 8) & 63);
@@ -1629,12 +1629,12 @@ static void gen_EXTRQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod
gen_helper_extrq_i(tcg_env, OP_PTR0, index, length);
}
-static void gen_EXTRQ_r(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_EXTRQ_r(DisasContext *s, X86DecodedInsn *decode)
{
gen_helper_extrq_r(tcg_env, OP_PTR0, OP_PTR2);
}
-static void gen_HLT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_HLT(DisasContext *s, X86DecodedInsn *decode)
{
#ifdef CONFIG_SYSTEM_ONLY
gen_update_cc_op(s);
@@ -1644,7 +1644,7 @@ static void gen_HLT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
#endif
}
-static void gen_IDIV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_IDIV(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[2].ot;
@@ -1667,7 +1667,7 @@ static void gen_IDIV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
}
-static void gen_IMUL3(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_IMUL3(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
TCGv cc_src_rhs;
@@ -1730,7 +1730,7 @@ static void gen_IMUL3(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
prepare_update2_cc(decode, s, CC_OP_MULB + ot);
}
-static void gen_IMUL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_IMUL(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[1].ot;
TCGv cc_src_rhs;
@@ -1788,7 +1788,7 @@ static void gen_IMUL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
prepare_update2_cc(decode, s, CC_OP_MULB + ot);
}
-static void gen_IN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_IN(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
TCGv_i32 port = tcg_temp_new_i32();
@@ -1804,7 +1804,7 @@ static void gen_IN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
gen_bpt_io(s, port, ot);
}
-static void gen_INC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_INC(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[1].ot;
@@ -1818,7 +1818,7 @@ static void gen_INC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
prepare_update_cc_incdec(decode, s, CC_OP_INCB + ot);
}
-static void gen_INS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_INS(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[1].ot;
TCGv_i32 port = tcg_temp_new_i32();
@@ -1838,7 +1838,7 @@ static void gen_INS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
}
-static void gen_INSERTQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_INSERTQ_i(DisasContext *s, X86DecodedInsn *decode)
{
TCGv_i32 length = tcg_constant_i32(decode->immediate & 63);
TCGv_i32 index = tcg_constant_i32((decode->immediate >> 8) & 63);
@@ -1846,17 +1846,17 @@ static void gen_INSERTQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
gen_helper_insertq_i(tcg_env, OP_PTR0, OP_PTR1, index, length);
}
-static void gen_INSERTQ_r(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_INSERTQ_r(DisasContext *s, X86DecodedInsn *decode)
{
gen_helper_insertq_r(tcg_env, OP_PTR0, OP_PTR2);
}
-static void gen_INT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_INT(DisasContext *s, X86DecodedInsn *decode)
{
gen_interrupt(s, decode->immediate);
}
-static void gen_INT1(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_INT1(DisasContext *s, X86DecodedInsn *decode)
{
gen_update_cc_op(s);
gen_update_eip_next(s);
@@ -1864,19 +1864,19 @@ static void gen_INT1(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
s->base.is_jmp = DISAS_NORETURN;
}
-static void gen_INT3(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_INT3(DisasContext *s, X86DecodedInsn *decode)
{
gen_interrupt(s, EXCP03_INT3);
}
-static void gen_INTO(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_INTO(DisasContext *s, X86DecodedInsn *decode)
{
gen_update_cc_op(s);
gen_update_eip_cur(s);
gen_helper_into(tcg_env, cur_insn_len_i32(s));
}
-static void gen_IRET(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_IRET(DisasContext *s, X86DecodedInsn *decode)
{
if (!PE(s) || VM86(s)) {
gen_helper_iret_real(tcg_env, tcg_constant_i32(s->dflag - 1));
@@ -1888,13 +1888,13 @@ static void gen_IRET(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
s->base.is_jmp = DISAS_EOB_ONLY;
}
-static void gen_Jcc(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_Jcc(DisasContext *s, X86DecodedInsn *decode)
{
gen_bnd_jmp(s);
gen_jcc(s, decode->b & 0xf, decode->immediate);
}
-static void gen_JCXZ(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_JCXZ(DisasContext *s, X86DecodedInsn *decode)
{
TCGLabel *taken = gen_new_label();
@@ -1903,25 +1903,25 @@ static void gen_JCXZ(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
gen_conditional_jump_labels(s, decode->immediate, NULL, taken);
}
-static void gen_JMP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_JMP(DisasContext *s, X86DecodedInsn *decode)
{
gen_update_cc_op(s);
gen_jmp_rel(s, s->dflag, decode->immediate, 0);
}
-static void gen_JMP_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_JMP_m(DisasContext *s, X86DecodedInsn *decode)
{
gen_op_jmp_v(s, s->T0);
gen_bnd_jmp(s);
s->base.is_jmp = DISAS_JUMP;
}
-static void gen_JMPF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_JMPF(DisasContext *s, X86DecodedInsn *decode)
{
gen_far_jmp(s);
}
-static void gen_JMPF_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_JMPF_m(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[2].ot;
@@ -1931,7 +1931,7 @@ static void gen_JMPF_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode
gen_far_jmp(s);
}
-static void gen_LAHF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_LAHF(DisasContext *s, X86DecodedInsn *decode)
{
if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM)) {
return gen_illegal_opcode(s);
@@ -1942,13 +1942,13 @@ static void gen_LAHF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
tcg_gen_deposit_tl(cpu_regs[R_EAX], cpu_regs[R_EAX], s->T0, 8, 8);
}
-static void gen_LDMXCSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_LDMXCSR(DisasContext *s, X86DecodedInsn *decode)
{
tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T1);
gen_helper_ldmxcsr(tcg_env, s->tmp2_i32);
}
-static void gen_lxx_seg(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, int seg)
+static void gen_lxx_seg(DisasContext *s, X86DecodedInsn *decode, int seg)
{
MemOp ot = decode->op[0].ot;
@@ -1960,37 +1960,37 @@ static void gen_lxx_seg(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod
gen_movl_seg(s, seg, s->T1);
}
-static void gen_LDS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_LDS(DisasContext *s, X86DecodedInsn *decode)
{
- gen_lxx_seg(s, env, decode, R_DS);
+ gen_lxx_seg(s, decode, R_DS);
}
-static void gen_LEA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_LEA(DisasContext *s, X86DecodedInsn *decode)
{
tcg_gen_mov_tl(s->T0, s->A0);
}
-static void gen_LEAVE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_LEAVE(DisasContext *s, X86DecodedInsn *decode)
{
gen_leave(s);
}
-static void gen_LES(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_LES(DisasContext *s, X86DecodedInsn *decode)
{
- gen_lxx_seg(s, env, decode, R_ES);
+ gen_lxx_seg(s, decode, R_ES);
}
-static void gen_LFS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_LFS(DisasContext *s, X86DecodedInsn *decode)
{
- gen_lxx_seg(s, env, decode, R_FS);
+ gen_lxx_seg(s, decode, R_FS);
}
-static void gen_LGS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_LGS(DisasContext *s, X86DecodedInsn *decode)
{
- gen_lxx_seg(s, env, decode, R_GS);
+ gen_lxx_seg(s, decode, R_GS);
}
-static void gen_LODS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_LODS(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[2].ot;
if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
@@ -2000,7 +2000,7 @@ static void gen_LODS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
}
-static void gen_LOOP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_LOOP(DisasContext *s, X86DecodedInsn *decode)
{
TCGLabel *taken = gen_new_label();
@@ -2010,7 +2010,7 @@ static void gen_LOOP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
gen_conditional_jump_labels(s, decode->immediate, NULL, taken);
}
-static void gen_LOOPE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_LOOPE(DisasContext *s, X86DecodedInsn *decode)
{
TCGLabel *taken = gen_new_label();
TCGLabel *not_taken = gen_new_label();
@@ -2022,7 +2022,7 @@ static void gen_LOOPE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
gen_conditional_jump_labels(s, decode->immediate, not_taken, taken);
}
-static void gen_LOOPNE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_LOOPNE(DisasContext *s, X86DecodedInsn *decode)
{
TCGLabel *taken = gen_new_label();
TCGLabel *not_taken = gen_new_label();
@@ -2034,18 +2034,18 @@ static void gen_LOOPNE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode
gen_conditional_jump_labels(s, decode->immediate, not_taken, taken);
}
-static void gen_LSS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_LSS(DisasContext *s, X86DecodedInsn *decode)
{
- gen_lxx_seg(s, env, decode, R_SS);
+ gen_lxx_seg(s, decode, R_SS);
}
-static void gen_MOV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_MOV(DisasContext *s, X86DecodedInsn *decode)
{
/* nothing to do! */
}
#define gen_NOP gen_MOV
-static void gen_MASKMOV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_MASKMOV(DisasContext *s, X86DecodedInsn *decode)
{
gen_lea_v_seg(s, cpu_regs[R_EDI], R_DS, s->override);
@@ -2056,7 +2056,7 @@ static void gen_MASKMOV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod
}
}
-static void gen_MOVBE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_MOVBE(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
@@ -2068,7 +2068,7 @@ static void gen_MOVBE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
}
-static void gen_MOVD_from(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_MOVD_from(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[2].ot;
@@ -2086,7 +2086,7 @@ static void gen_MOVD_from(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
}
}
-static void gen_MOVD_to(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_MOVD_to(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[2].ot;
int vec_len = vector_len(s, decode);
@@ -2108,12 +2108,12 @@ static void gen_MOVD_to(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod
}
}
-static void gen_MOVDQ(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_MOVDQ(DisasContext *s, X86DecodedInsn *decode)
{
gen_store_sse(s, decode, decode->op[2].offset);
}
-static void gen_MOVMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_MOVMSK(DisasContext *s, X86DecodedInsn *decode)
{
typeof(gen_helper_movmskps_ymm) *ps, *pd, *fn;
ps = s->vex_l ? gen_helper_movmskps_ymm : gen_helper_movmskps_xmm;
@@ -2123,7 +2123,7 @@ static void gen_MOVMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode
tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
}
-static void gen_MOVQ(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_MOVQ(DisasContext *s, X86DecodedInsn *decode)
{
int vec_len = vector_len(s, decode);
int lo_ofs = vector_elem_offset(&decode->op[0], MO_64, 0);
@@ -2145,14 +2145,14 @@ static void gen_MOVQ(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
}
-static void gen_MOVq_dq(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_MOVq_dq(DisasContext *s, X86DecodedInsn *decode)
{
gen_helper_enter_mmx(tcg_env);
/* Otherwise the same as any other movq. */
- return gen_MOVQ(s, env, decode);
+ return gen_MOVQ(s, decode);
}
-static void gen_MOVS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_MOVS(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[2].ot;
if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
@@ -2162,7 +2162,7 @@ static void gen_MOVS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
}
-static void gen_MUL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_MUL(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[1].ot;
@@ -2213,7 +2213,7 @@ static void gen_MUL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
decode->cc_op = CC_OP_MULB + ot;
}
-static void gen_MULX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_MULX(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
@@ -2239,7 +2239,7 @@ static void gen_MULX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
}
-static void gen_NEG(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_NEG(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
TCGv oldv = tcg_temp_new();
@@ -2266,7 +2266,7 @@ static void gen_NEG(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
decode->cc_op = CC_OP_SUBB + ot;
}
-static void gen_NOT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_NOT(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
@@ -2279,7 +2279,7 @@ static void gen_NOT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
}
-static void gen_OR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_OR(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[1].ot;
@@ -2292,7 +2292,7 @@ static void gen_OR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
prepare_update1_cc(decode, s, CC_OP_LOGICB + ot);
}
-static void gen_OUT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_OUT(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[1].ot;
TCGv_i32 port = tcg_temp_new_i32();
@@ -2309,7 +2309,7 @@ static void gen_OUT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
gen_bpt_io(s, port, ot);
}
-static void gen_OUTS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_OUTS(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[1].ot;
TCGv_i32 port = tcg_temp_new_i32();
@@ -2328,7 +2328,7 @@ static void gen_OUTS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
}
-static void gen_PALIGNR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PALIGNR(DisasContext *s, X86DecodedInsn *decode)
{
TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
if (!(s->prefix & PREFIX_DATA)) {
@@ -2340,7 +2340,7 @@ static void gen_PALIGNR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod
}
}
-static void gen_PANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PANDN(DisasContext *s, X86DecodedInsn *decode)
{
int vec_len = vector_len(s, decode);
@@ -2350,7 +2350,7 @@ static void gen_PANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
decode->op[1].offset, vec_len, vec_len);
}
-static void gen_PAUSE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PAUSE(DisasContext *s, X86DecodedInsn *decode)
{
gen_update_cc_op(s);
gen_update_eip_next(s);
@@ -2358,14 +2358,14 @@ static void gen_PAUSE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
s->base.is_jmp = DISAS_NORETURN;
}
-static void gen_PCMPESTRI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PCMPESTRI(DisasContext *s, X86DecodedInsn *decode)
{
TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
gen_helper_pcmpestri_xmm(tcg_env, OP_PTR1, OP_PTR2, imm);
assume_cc_op(s, CC_OP_EFLAGS);
}
-static void gen_PCMPESTRM(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PCMPESTRM(DisasContext *s, X86DecodedInsn *decode)
{
TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
gen_helper_pcmpestrm_xmm(tcg_env, OP_PTR1, OP_PTR2, imm);
@@ -2376,14 +2376,14 @@ static void gen_PCMPESTRM(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
}
}
-static void gen_PCMPISTRI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PCMPISTRI(DisasContext *s, X86DecodedInsn *decode)
{
TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
gen_helper_pcmpistri_xmm(tcg_env, OP_PTR1, OP_PTR2, imm);
assume_cc_op(s, CC_OP_EFLAGS);
}
-static void gen_PCMPISTRM(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PCMPISTRM(DisasContext *s, X86DecodedInsn *decode)
{
TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
gen_helper_pcmpistrm_xmm(tcg_env, OP_PTR1, OP_PTR2, imm);
@@ -2394,17 +2394,17 @@ static void gen_PCMPISTRM(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
}
}
-static void gen_PDEP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PDEP(DisasContext *s, X86DecodedInsn *decode)
{
gen_helper_pdep(s->T0, s->T0, s->T1);
}
-static void gen_PEXT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PEXT(DisasContext *s, X86DecodedInsn *decode)
{
gen_helper_pext(s->T0, s->T0, s->T1);
}
-static inline void gen_pextr(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, MemOp ot)
+static inline void gen_pextr(DisasContext *s, X86DecodedInsn *decode, MemOp ot)
{
int vec_len = vector_len(s, decode);
int mask = (vec_len >> ot) - 1;
@@ -2430,23 +2430,23 @@ static inline void gen_pextr(DisasContext *s, CPUX86State *env, X86DecodedInsn *
}
}
-static void gen_PEXTRB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PEXTRB(DisasContext *s, X86DecodedInsn *decode)
{
- gen_pextr(s, env, decode, MO_8);
+ gen_pextr(s, decode, MO_8);
}
-static void gen_PEXTRW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PEXTRW(DisasContext *s, X86DecodedInsn *decode)
{
- gen_pextr(s, env, decode, MO_16);
+ gen_pextr(s, decode, MO_16);
}
-static void gen_PEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PEXTR(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
- gen_pextr(s, env, decode, ot);
+ gen_pextr(s, decode, ot);
}
-static inline void gen_pinsr(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, MemOp ot)
+static inline void gen_pinsr(DisasContext *s, X86DecodedInsn *decode, MemOp ot)
{
int vec_len = vector_len(s, decode);
int mask = (vec_len >> ot) - 1;
@@ -2477,19 +2477,19 @@ static inline void gen_pinsr(DisasContext *s, CPUX86State *env, X86DecodedInsn *
}
}
-static void gen_PINSRB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PINSRB(DisasContext *s, X86DecodedInsn *decode)
{
- gen_pinsr(s, env, decode, MO_8);
+ gen_pinsr(s, decode, MO_8);
}
-static void gen_PINSRW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PINSRW(DisasContext *s, X86DecodedInsn *decode)
{
- gen_pinsr(s, env, decode, MO_16);
+ gen_pinsr(s, decode, MO_16);
}
-static void gen_PINSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PINSR(DisasContext *s, X86DecodedInsn *decode)
{
- gen_pinsr(s, env, decode, decode->op[2].ot);
+ gen_pinsr(s, decode, decode->op[2].ot);
}
static void gen_pmovmskb_i64(TCGv_i64 d, TCGv_i64 s)
@@ -2529,7 +2529,7 @@ static void gen_pmovmskb_vec(unsigned vece, TCGv_vec d, TCGv_vec s)
tcg_gen_or_vec(vece, d, d, t);
}
-static void gen_PMOVMSKB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PMOVMSKB(DisasContext *s, X86DecodedInsn *decode)
{
static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
static const GVecGen2 g = {
@@ -2573,7 +2573,7 @@ static void gen_PMOVMSKB(DisasContext *s, CPUX86State *env, X86DecodedInsn *deco
}
}
-static void gen_POP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_POP(DisasContext *s, X86DecodedInsn *decode)
{
X86DecodedOp *op = &decode->op[0];
MemOp ot = gen_pop_T0(s);
@@ -2587,12 +2587,12 @@ static void gen_POP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
gen_pop_update(s, ot);
}
-static void gen_POPA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_POPA(DisasContext *s, X86DecodedInsn *decode)
{
gen_popa(s);
}
-static void gen_POPF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_POPF(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot;
int mask = TF_MASK | AC_MASK | ID_MASK | NT_MASK;
@@ -2614,13 +2614,13 @@ static void gen_POPF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
s->base.is_jmp = DISAS_EOB_NEXT;
}
-static void gen_PSHUFW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PSHUFW(DisasContext *s, X86DecodedInsn *decode)
{
TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
gen_helper_pshufw_mmx(OP_PTR0, OP_PTR1, imm);
}
-static void gen_PSRLW_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PSRLW_i(DisasContext *s, X86DecodedInsn *decode)
{
int vec_len = vector_len(s, decode);
@@ -2633,7 +2633,7 @@ static void gen_PSRLW_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod
}
}
-static void gen_PSLLW_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PSLLW_i(DisasContext *s, X86DecodedInsn *decode)
{
int vec_len = vector_len(s, decode);
@@ -2646,7 +2646,7 @@ static void gen_PSLLW_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod
}
}
-static void gen_PSRAW_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PSRAW_i(DisasContext *s, X86DecodedInsn *decode)
{
int vec_len = vector_len(s, decode);
@@ -2658,7 +2658,7 @@ static void gen_PSRAW_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod
decode->immediate, vec_len, vec_len);
}
-static void gen_PSRLD_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PSRLD_i(DisasContext *s, X86DecodedInsn *decode)
{
int vec_len = vector_len(s, decode);
@@ -2671,7 +2671,7 @@ static void gen_PSRLD_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod
}
}
-static void gen_PSLLD_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PSLLD_i(DisasContext *s, X86DecodedInsn *decode)
{
int vec_len = vector_len(s, decode);
@@ -2684,7 +2684,7 @@ static void gen_PSLLD_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod
}
}
-static void gen_PSRAD_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PSRAD_i(DisasContext *s, X86DecodedInsn *decode)
{
int vec_len = vector_len(s, decode);
@@ -2696,7 +2696,7 @@ static void gen_PSRAD_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod
decode->immediate, vec_len, vec_len);
}
-static void gen_PSRLQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PSRLQ_i(DisasContext *s, X86DecodedInsn *decode)
{
int vec_len = vector_len(s, decode);
@@ -2709,7 +2709,7 @@ static void gen_PSRLQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod
}
}
-static void gen_PSLLQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PSLLQ_i(DisasContext *s, X86DecodedInsn *decode)
{
int vec_len = vector_len(s, decode);
@@ -2736,7 +2736,7 @@ static TCGv_ptr make_imm8u_xmm_vec(uint8_t imm, int vec_len)
return ptr;
}
-static void gen_PSRLDQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PSRLDQ_i(DisasContext *s, X86DecodedInsn *decode)
{
int vec_len = vector_len(s, decode);
TCGv_ptr imm_vec = make_imm8u_xmm_vec(decode->immediate, vec_len);
@@ -2748,7 +2748,7 @@ static void gen_PSRLDQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *deco
}
}
-static void gen_PSLLDQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PSLLDQ_i(DisasContext *s, X86DecodedInsn *decode)
{
int vec_len = vector_len(s, decode);
TCGv_ptr imm_vec = make_imm8u_xmm_vec(decode->immediate, vec_len);
@@ -2760,17 +2760,17 @@ static void gen_PSLLDQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *deco
}
}
-static void gen_PUSH(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PUSH(DisasContext *s, X86DecodedInsn *decode)
{
gen_push_v(s, s->T1);
}
-static void gen_PUSHA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PUSHA(DisasContext *s, X86DecodedInsn *decode)
{
gen_pusha(s);
}
-static void gen_PUSHF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PUSHF(DisasContext *s, X86DecodedInsn *decode)
{
gen_update_cc_op(s);
gen_helper_read_eflags(s->T0, tcg_env);
@@ -2967,7 +2967,7 @@ static void gen_rotc_mod(MemOp ot, TCGv count)
* length - count, because (length-1) - (count-1) can be computed with
* a XOR, and that is commutative unlike subtraction.
*/
-static void gen_RCL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_RCL(DisasContext *s, X86DecodedInsn *decode)
{
bool have_1bit_cin, can_be_zero;
TCGv count;
@@ -3019,7 +3019,7 @@ static void gen_RCL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
}
-static void gen_RCR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_RCR(DisasContext *s, X86DecodedInsn *decode)
{
bool have_1bit_cin, can_be_zero;
TCGv count;
@@ -3072,7 +3072,7 @@ static void gen_RCR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
}
-static void gen_RET(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_RET(DisasContext *s, X86DecodedInsn *decode)
{
int16_t adjust = decode->e.op2 == X86_TYPE_I ? decode->immediate : 0;
@@ -3083,7 +3083,7 @@ static void gen_RET(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
s->base.is_jmp = DISAS_JUMP;
}
-static void gen_RETF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_RETF(DisasContext *s, X86DecodedInsn *decode)
{
int16_t adjust = decode->e.op2 == X86_TYPE_I ? decode->immediate : 0;
@@ -3154,7 +3154,7 @@ static void gen_rot_carry(X86DecodedInsn *decode, TCGv result,
}
}
-static void gen_ROL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_ROL(DisasContext *s, X86DecodedInsn *decode)
{
bool can_be_zero;
TCGv count;
@@ -3182,7 +3182,7 @@ static void gen_ROL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
gen_rot_overflow(decode, s->T0, old, can_be_zero, count);
}
-static void gen_ROR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_ROR(DisasContext *s, X86DecodedInsn *decode)
{
bool can_be_zero;
TCGv count;
@@ -3211,7 +3211,7 @@ static void gen_ROR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
gen_rot_overflow(decode, s->T0, old, can_be_zero, count);
}
-static void gen_RORX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_RORX(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
int mask = ot == MO_64 ? 63 : 31;
@@ -3235,7 +3235,7 @@ static void gen_RORX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
}
-static void gen_SAHF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SAHF(DisasContext *s, X86DecodedInsn *decode)
{
if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM)) {
return gen_illegal_opcode(s);
@@ -3247,7 +3247,7 @@ static void gen_SAHF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
}
-static void gen_SALC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SALC(DisasContext *s, X86DecodedInsn *decode)
{
gen_compute_eflags_c(s, s->T0);
tcg_gen_neg_tl(s->T0, s->T0);
@@ -3283,7 +3283,7 @@ static void gen_shift_dynamic_flags(DisasContext *s, X86DecodedInsn *decode, TCG
old_cc_op, tcg_constant_i32(cc_op));
}
-static void gen_SAR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SAR(DisasContext *s, X86DecodedInsn *decode)
{
bool can_be_zero;
TCGv count;
@@ -3305,7 +3305,7 @@ static void gen_SAR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
}
-static void gen_SARX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SARX(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
int mask;
@@ -3315,7 +3315,7 @@ static void gen_SARX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
tcg_gen_sar_tl(s->T0, s->T0, s->T1);
}
-static void gen_SBB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SBB(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
TCGv c_in = tcg_temp_new();
@@ -3337,7 +3337,7 @@ static void gen_SBB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
prepare_update3_cc(decode, s, CC_OP_SBBB + ot, c_in);
}
-static void gen_SCAS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SCAS(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[2].ot;
if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
@@ -3347,27 +3347,27 @@ static void gen_SCAS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
}
-static void gen_SETcc(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SETcc(DisasContext *s, X86DecodedInsn *decode)
{
gen_setcc1(s, decode->b & 0xf, s->T0);
}
-static void gen_SHA1NEXTE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SHA1NEXTE(DisasContext *s, X86DecodedInsn *decode)
{
gen_helper_sha1nexte(OP_PTR0, OP_PTR1, OP_PTR2);
}
-static void gen_SHA1MSG1(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SHA1MSG1(DisasContext *s, X86DecodedInsn *decode)
{
gen_helper_sha1msg1(OP_PTR0, OP_PTR1, OP_PTR2);
}
-static void gen_SHA1MSG2(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SHA1MSG2(DisasContext *s, X86DecodedInsn *decode)
{
gen_helper_sha1msg2(OP_PTR0, OP_PTR1, OP_PTR2);
}
-static void gen_SHA1RNDS4(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SHA1RNDS4(DisasContext *s, X86DecodedInsn *decode)
{
switch(decode->immediate & 3) {
case 0:
@@ -3385,17 +3385,17 @@ static void gen_SHA1RNDS4(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
}
}
-static void gen_SHA256MSG1(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SHA256MSG1(DisasContext *s, X86DecodedInsn *decode)
{
gen_helper_sha256msg1(OP_PTR0, OP_PTR1, OP_PTR2);
}
-static void gen_SHA256MSG2(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SHA256MSG2(DisasContext *s, X86DecodedInsn *decode)
{
gen_helper_sha256msg2(OP_PTR0, OP_PTR1, OP_PTR2);
}
-static void gen_SHA256RNDS2(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SHA256RNDS2(DisasContext *s, X86DecodedInsn *decode)
{
TCGv_i32 wk0 = tcg_temp_new_i32();
TCGv_i32 wk1 = tcg_temp_new_i32();
@@ -3406,7 +3406,7 @@ static void gen_SHA256RNDS2(DisasContext *s, CPUX86State *env, X86DecodedInsn *d
gen_helper_sha256rnds2(OP_PTR0, OP_PTR1, OP_PTR2, wk0, wk1);
}
-static void gen_SHL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SHL(DisasContext *s, X86DecodedInsn *decode)
{
bool can_be_zero;
TCGv count;
@@ -3428,7 +3428,7 @@ static void gen_SHL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
}
-static void gen_SHLX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SHLX(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
int mask;
@@ -3438,7 +3438,7 @@ static void gen_SHLX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
tcg_gen_shl_tl(s->T0, s->T0, s->T1);
}
-static void gen_SHR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SHR(DisasContext *s, X86DecodedInsn *decode)
{
bool can_be_zero;
TCGv count;
@@ -3460,7 +3460,7 @@ static void gen_SHR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
}
-static void gen_SHRX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SHRX(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
int mask;
@@ -3470,37 +3470,37 @@ static void gen_SHRX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
tcg_gen_shr_tl(s->T0, s->T0, s->T1);
}
-static void gen_STC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_STC(DisasContext *s, X86DecodedInsn *decode)
{
gen_compute_eflags(s);
tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
}
-static void gen_STD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_STD(DisasContext *s, X86DecodedInsn *decode)
{
tcg_gen_st_i32(tcg_constant_i32(-1), tcg_env, offsetof(CPUX86State, df));
}
-static void gen_STI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_STI(DisasContext *s, X86DecodedInsn *decode)
{
gen_set_eflags(s, IF_MASK);
s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ;
}
-static void gen_VAESKEYGEN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VAESKEYGEN(DisasContext *s, X86DecodedInsn *decode)
{
TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
assert(!s->vex_l);
gen_helper_aeskeygenassist_xmm(tcg_env, OP_PTR0, OP_PTR1, imm);
}
-static void gen_STMXCSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_STMXCSR(DisasContext *s, X86DecodedInsn *decode)
{
gen_helper_update_mxcsr(tcg_env);
tcg_gen_ld32u_tl(s->T0, tcg_env, offsetof(CPUX86State, mxcsr));
}
-static void gen_STOS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_STOS(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[1].ot;
if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
@@ -3510,7 +3510,7 @@ static void gen_STOS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
}
-static void gen_SUB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SUB(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[1].ot;
@@ -3526,12 +3526,12 @@ static void gen_SUB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
prepare_update2_cc(decode, s, CC_OP_SUBB + ot);
}
-static void gen_UD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_UD(DisasContext *s, X86DecodedInsn *decode)
{
gen_illegal_opcode(s);
}
-static void gen_VAESIMC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VAESIMC(DisasContext *s, X86DecodedInsn *decode)
{
assert(!s->vex_l);
gen_helper_aesimc_xmm(tcg_env, OP_PTR0, OP_PTR2);
@@ -3586,7 +3586,7 @@ static const SSEFunc_0_eppp gen_helper_cmp_funcs[32][6] = {
};
#undef SSE_CMP
-static void gen_VCMP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VCMP(DisasContext *s, X86DecodedInsn *decode)
{
int index = decode->immediate & (s->prefix & PREFIX_VEX ? 31 : 7);
int b =
@@ -3597,7 +3597,7 @@ static void gen_VCMP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
gen_helper_cmp_funcs[index][b](tcg_env, OP_PTR0, OP_PTR1, OP_PTR2);
}
-static void gen_VCOMI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VCOMI(DisasContext *s, X86DecodedInsn *decode)
{
SSEFunc_0_epp fn;
fn = s->prefix & PREFIX_DATA ? gen_helper_comisd : gen_helper_comiss;
@@ -3605,7 +3605,7 @@ static void gen_VCOMI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
assume_cc_op(s, CC_OP_EFLAGS);
}
-static void gen_VCVTPD2PS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VCVTPD2PS(DisasContext *s, X86DecodedInsn *decode)
{
if (s->vex_l) {
gen_helper_cvtpd2ps_ymm(tcg_env, OP_PTR0, OP_PTR2);
@@ -3614,7 +3614,7 @@ static void gen_VCVTPD2PS(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
}
}
-static void gen_VCVTPS2PD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VCVTPS2PD(DisasContext *s, X86DecodedInsn *decode)
{
if (s->vex_l) {
gen_helper_cvtps2pd_ymm(tcg_env, OP_PTR0, OP_PTR2);
@@ -3623,9 +3623,9 @@ static void gen_VCVTPS2PD(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
}
}
-static void gen_VCVTPS2PH(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VCVTPS2PH(DisasContext *s, X86DecodedInsn *decode)
{
- gen_unary_imm_fp_sse(s, env, decode,
+ gen_unary_imm_fp_sse(s, decode,
gen_helper_cvtps2ph_xmm,
gen_helper_cvtps2ph_ymm);
/*
@@ -3637,17 +3637,17 @@ static void gen_VCVTPS2PH(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
}
}
-static void gen_VCVTSD2SS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VCVTSD2SS(DisasContext *s, X86DecodedInsn *decode)
{
gen_helper_cvtsd2ss(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2);
}
-static void gen_VCVTSS2SD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VCVTSS2SD(DisasContext *s, X86DecodedInsn *decode)
{
gen_helper_cvtss2sd(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2);
}
-static void gen_VCVTSI2Sx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VCVTSI2Sx(DisasContext *s, X86DecodedInsn *decode)
{
int vec_len = vector_len(s, decode);
TCGv_i32 in;
@@ -3677,7 +3677,7 @@ static void gen_VCVTSI2Sx(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
}
}
-static inline void gen_VCVTtSx2SI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
+static inline void gen_VCVTtSx2SI(DisasContext *s, X86DecodedInsn *decode,
SSEFunc_i_ep ss2si, SSEFunc_l_ep ss2sq,
SSEFunc_i_ep sd2si, SSEFunc_l_ep sd2sq)
{
@@ -3715,21 +3715,21 @@ static inline void gen_VCVTtSx2SI(DisasContext *s, CPUX86State *env, X86DecodedI
#define gen_helper_cvttsd2sq NULL
#endif
-static void gen_VCVTSx2SI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VCVTSx2SI(DisasContext *s, X86DecodedInsn *decode)
{
- gen_VCVTtSx2SI(s, env, decode,
+ gen_VCVTtSx2SI(s, decode,
gen_helper_cvtss2si, gen_helper_cvtss2sq,
gen_helper_cvtsd2si, gen_helper_cvtsd2sq);
}
-static void gen_VCVTTSx2SI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VCVTTSx2SI(DisasContext *s, X86DecodedInsn *decode)
{
- gen_VCVTtSx2SI(s, env, decode,
+ gen_VCVTtSx2SI(s, decode,
gen_helper_cvttss2si, gen_helper_cvttss2sq,
gen_helper_cvttsd2si, gen_helper_cvttsd2sq);
}
-static void gen_VEXTRACTx128(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VEXTRACTx128(DisasContext *s, X86DecodedInsn *decode)
{
int mask = decode->immediate & 1;
int src_ofs = vector_elem_offset(&decode->op[1], MO_128, mask);
@@ -3741,12 +3741,12 @@ static void gen_VEXTRACTx128(DisasContext *s, CPUX86State *env, X86DecodedInsn *
}
}
-static void gen_VEXTRACTPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VEXTRACTPS(DisasContext *s, X86DecodedInsn *decode)
{
- gen_pextr(s, env, decode, MO_32);
+ gen_pextr(s, decode, MO_32);
}
-static void gen_vinsertps(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_vinsertps(DisasContext *s, X86DecodedInsn *decode)
{
int val = decode->immediate;
int dest_word = (val >> 4) & 3;
@@ -3779,21 +3779,21 @@ static void gen_vinsertps(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
}
}
-static void gen_VINSERTPS_r(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VINSERTPS_r(DisasContext *s, X86DecodedInsn *decode)
{
int val = decode->immediate;
tcg_gen_ld_i32(s->tmp2_i32, tcg_env,
vector_elem_offset(&decode->op[2], MO_32, (val >> 6) & 3));
- gen_vinsertps(s, env, decode);
+ gen_vinsertps(s, decode);
}
-static void gen_VINSERTPS_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VINSERTPS_m(DisasContext *s, X86DecodedInsn *decode)
{
tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
- gen_vinsertps(s, env, decode);
+ gen_vinsertps(s, decode);
}
-static void gen_VINSERTx128(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VINSERTx128(DisasContext *s, X86DecodedInsn *decode)
{
int mask = decode->immediate & 1;
tcg_gen_gvec_mov(MO_64,
@@ -3804,7 +3804,7 @@ static void gen_VINSERTx128(DisasContext *s, CPUX86State *env, X86DecodedInsn *d
decode->op[1].offset + offsetof(YMMReg, YMM_X(!mask)), 16, 16);
}
-static inline void gen_maskmov(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
+static inline void gen_maskmov(DisasContext *s, X86DecodedInsn *decode,
SSEFunc_0_eppt xmm, SSEFunc_0_eppt ymm)
{
if (!s->vex_l) {
@@ -3814,17 +3814,17 @@ static inline void gen_maskmov(DisasContext *s, CPUX86State *env, X86DecodedInsn
}
}
-static void gen_VMASKMOVPD_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VMASKMOVPD_st(DisasContext *s, X86DecodedInsn *decode)
{
- gen_maskmov(s, env, decode, gen_helper_vpmaskmovq_st_xmm, gen_helper_vpmaskmovq_st_ymm);
+ gen_maskmov(s, decode, gen_helper_vpmaskmovq_st_xmm, gen_helper_vpmaskmovq_st_ymm);
}
-static void gen_VMASKMOVPS_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VMASKMOVPS_st(DisasContext *s, X86DecodedInsn *decode)
{
- gen_maskmov(s, env, decode, gen_helper_vpmaskmovd_st_xmm, gen_helper_vpmaskmovd_st_ymm);
+ gen_maskmov(s, decode, gen_helper_vpmaskmovd_st_xmm, gen_helper_vpmaskmovd_st_ymm);
}
-static void gen_VMOVHPx_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VMOVHPx_ld(DisasContext *s, X86DecodedInsn *decode)
{
gen_ldq_env_A0(s, decode->op[0].offset + offsetof(XMMReg, XMM_Q(1)));
if (decode->op[0].offset != decode->op[1].offset) {
@@ -3833,12 +3833,12 @@ static void gen_VMOVHPx_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *de
}
}
-static void gen_VMOVHPx_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VMOVHPx_st(DisasContext *s, X86DecodedInsn *decode)
{
gen_stq_env_A0(s, decode->op[2].offset + offsetof(XMMReg, XMM_Q(1)));
}
-static void gen_VMOVHPx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VMOVHPx(DisasContext *s, X86DecodedInsn *decode)
{
if (decode->op[0].offset != decode->op[2].offset) {
tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[2].offset + offsetof(XMMReg, XMM_Q(1)));
@@ -3850,7 +3850,7 @@ static void gen_VMOVHPx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod
}
}
-static void gen_VMOVHLPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VMOVHLPS(DisasContext *s, X86DecodedInsn *decode)
{
tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[2].offset + offsetof(XMMReg, XMM_Q(1)));
tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(0)));
@@ -3860,7 +3860,7 @@ static void gen_VMOVHLPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *deco
}
}
-static void gen_VMOVLHPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VMOVLHPS(DisasContext *s, X86DecodedInsn *decode)
{
tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[2].offset);
tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(1)));
@@ -3875,7 +3875,7 @@ static void gen_VMOVLHPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *deco
* Use a gvec move to move everything above the bottom 64 bits.
*/
-static void gen_VMOVLPx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VMOVLPx(DisasContext *s, X86DecodedInsn *decode)
{
int vec_len = vector_len(s, decode);
@@ -3884,7 +3884,7 @@ static void gen_VMOVLPx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod
tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(0)));
}
-static void gen_VMOVLPx_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VMOVLPx_ld(DisasContext *s, X86DecodedInsn *decode)
{
int vec_len = vector_len(s, decode);
@@ -3893,13 +3893,13 @@ static void gen_VMOVLPx_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *de
tcg_gen_st_i64(s->tmp1_i64, OP_PTR0, offsetof(ZMMReg, ZMM_Q(0)));
}
-static void gen_VMOVLPx_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VMOVLPx_st(DisasContext *s, X86DecodedInsn *decode)
{
tcg_gen_ld_i64(s->tmp1_i64, OP_PTR2, offsetof(ZMMReg, ZMM_Q(0)));
tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEUQ);
}
-static void gen_VMOVSD_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VMOVSD_ld(DisasContext *s, X86DecodedInsn *decode)
{
TCGv_i64 zero = tcg_constant_i64(0);
@@ -3908,7 +3908,7 @@ static void gen_VMOVSD_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
tcg_gen_st_i64(s->tmp1_i64, OP_PTR0, offsetof(ZMMReg, ZMM_Q(0)));
}
-static void gen_VMOVSS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VMOVSS(DisasContext *s, X86DecodedInsn *decode)
{
int vec_len = vector_len(s, decode);
@@ -3917,7 +3917,7 @@ static void gen_VMOVSS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode
tcg_gen_st_i32(s->tmp2_i32, OP_PTR0, offsetof(ZMMReg, ZMM_L(0)));
}
-static void gen_VMOVSS_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VMOVSS_ld(DisasContext *s, X86DecodedInsn *decode)
{
int vec_len = vector_len(s, decode);
@@ -3926,55 +3926,55 @@ static void gen_VMOVSS_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
tcg_gen_st_i32(s->tmp2_i32, OP_PTR0, offsetof(ZMMReg, ZMM_L(0)));
}
-static void gen_VMOVSS_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VMOVSS_st(DisasContext *s, X86DecodedInsn *decode)
{
tcg_gen_ld_i32(s->tmp2_i32, OP_PTR2, offsetof(ZMMReg, ZMM_L(0)));
tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
}
-static void gen_VPMASKMOV_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VPMASKMOV_st(DisasContext *s, X86DecodedInsn *decode)
{
if (s->vex_w) {
- gen_VMASKMOVPD_st(s, env, decode);
+ gen_VMASKMOVPD_st(s, decode);
} else {
- gen_VMASKMOVPS_st(s, env, decode);
+ gen_VMASKMOVPS_st(s, decode);
}
}
-static void gen_VPERMD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VPERMD(DisasContext *s, X86DecodedInsn *decode)
{
assert(s->vex_l);
gen_helper_vpermd_ymm(OP_PTR0, OP_PTR1, OP_PTR2);
}
-static void gen_VPERM2x128(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VPERM2x128(DisasContext *s, X86DecodedInsn *decode)
{
TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
assert(s->vex_l);
gen_helper_vpermdq_ymm(OP_PTR0, OP_PTR1, OP_PTR2, imm);
}
-static void gen_VPHMINPOSUW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VPHMINPOSUW(DisasContext *s, X86DecodedInsn *decode)
{
assert(!s->vex_l);
gen_helper_phminposuw_xmm(tcg_env, OP_PTR0, OP_PTR2);
}
-static void gen_VROUNDSD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VROUNDSD(DisasContext *s, X86DecodedInsn *decode)
{
TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
assert(!s->vex_l);
gen_helper_roundsd_xmm(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, imm);
}
-static void gen_VROUNDSS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VROUNDSS(DisasContext *s, X86DecodedInsn *decode)
{
TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
assert(!s->vex_l);
gen_helper_roundss_xmm(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, imm);
}
-static void gen_VSHUF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VSHUF(DisasContext *s, X86DecodedInsn *decode)
{
TCGv_i32 imm = tcg_constant_i32(decode->immediate);
SSEFunc_0_pppi ps, pd, fn;
@@ -3984,7 +3984,7 @@ static void gen_VSHUF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
fn(OP_PTR0, OP_PTR1, OP_PTR2, imm);
}
-static void gen_VUCOMI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VUCOMI(DisasContext *s, X86DecodedInsn *decode)
{
SSEFunc_0_epp fn;
fn = s->prefix & PREFIX_DATA ? gen_helper_ucomisd : gen_helper_ucomiss;
@@ -3992,7 +3992,7 @@ static void gen_VUCOMI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode
assume_cc_op(s, CC_OP_EFLAGS);
}
-static void gen_VZEROALL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VZEROALL(DisasContext *s, X86DecodedInsn *decode)
{
TCGv_ptr ptr = tcg_temp_new_ptr();
@@ -4001,7 +4001,7 @@ static void gen_VZEROALL(DisasContext *s, CPUX86State *env, X86DecodedInsn *deco
tcg_constant_ptr(CPU_NB_REGS * sizeof(ZMMReg)));
}
-static void gen_VZEROUPPER(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VZEROUPPER(DisasContext *s, X86DecodedInsn *decode)
{
int i;
@@ -4011,7 +4011,7 @@ static void gen_VZEROUPPER(DisasContext *s, CPUX86State *env, X86DecodedInsn *de
}
}
-static void gen_WAIT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_WAIT(DisasContext *s, X86DecodedInsn *decode)
{
if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) == (HF_MP_MASK | HF_TS_MASK)) {
gen_NM_exception(s);
@@ -4022,7 +4022,7 @@ static void gen_WAIT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
}
-static void gen_XCHG(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_XCHG(DisasContext *s, X86DecodedInsn *decode)
{
if (s->prefix & PREFIX_LOCK) {
tcg_gen_atomic_xchg_tl(s->T0, s->A0, s->T1,
@@ -4036,7 +4036,7 @@ static void gen_XCHG(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
}
-static void gen_XLAT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_XLAT(DisasContext *s, X86DecodedInsn *decode)
{
/* AL is already zero-extended into s->T0. */
tcg_gen_add_tl(s->A0, cpu_regs[R_EBX], s->T0);
@@ -4044,7 +4044,7 @@ static void gen_XLAT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
gen_op_ld_v(s, MO_8, s->T0, s->A0);
}
-static void gen_XOR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_XOR(DisasContext *s, X86DecodedInsn *decode)
{
/* special case XOR reg, reg */
if (decode->op[1].unit == X86_OP_INT &&
--
2.45.1
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH 02/25] target/i386: rewrite flags writeback for ADCX/ADOX
2024-06-08 8:40 [PATCH 00/25] target/i386: more progress towards new decoder Paolo Bonzini
2024-06-08 8:40 ` [PATCH 01/25] target/i386: remove CPUX86State argument from generator functions Paolo Bonzini
@ 2024-06-08 8:40 ` Paolo Bonzini
2024-06-08 18:05 ` Richard Henderson
2024-06-08 8:40 ` [PATCH 03/25] target/i386: put BLS* input in T1, use generic flag writeback Paolo Bonzini
` (22 subsequent siblings)
24 siblings, 1 reply; 56+ messages in thread
From: Paolo Bonzini @ 2024-06-08 8:40 UTC (permalink / raw)
To: qemu-devel
Avoid using set_cc_op() in preparation for implementing APX; treat
CC_OP_EFLAGS similar to the case where we have the "opposite" cc_op
(CC_OP_ADOX for ADCX and CC_OP_ADCX for ADOX), except the resulting
cc_op is not CC_OP_ADCOX. This is written easily as two "if"s, whose
conditions are both false for CC_OP_EFLAGS, both true for CC_OP_ADCOX,
and one each true for CC_OP_ADCX/ADOX.
The new logic also makes it easy to drop usage of tmp0.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/cpu.h | 9 +++---
target/i386/tcg/emit.c.inc | 61 ++++++++++++++++++++++----------------
2 files changed, 40 insertions(+), 30 deletions(-)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 8fe28b67e0f..ee873a0ed84 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1260,6 +1260,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
/* Use a clearer name for this. */
#define CPU_INTERRUPT_INIT CPU_INTERRUPT_RESET
+#define CC_OP_HAS_EFLAGS(op) ((op) >= CC_OP_EFLAGS && (op) <= CC_OP_ADCOX)
+
/* Instead of computing the condition codes after each x86 instruction,
* QEMU just stores one operand (called CC_SRC), the result
* (called CC_DST) and the type of operation (called CC_OP). When the
@@ -1270,6 +1272,9 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
typedef enum {
CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */
CC_OP_EFLAGS, /* all cc are explicitly computed, CC_SRC = flags */
+ CC_OP_ADCX, /* CC_DST = C, CC_SRC = rest. */
+ CC_OP_ADOX, /* CC_DST = O, CC_SRC = rest. */
+ CC_OP_ADCOX, /* CC_DST = C, CC_SRC2 = O, CC_SRC = rest. */
CC_OP_MULB, /* modify all flags, C, O = (CC_SRC != 0) */
CC_OP_MULW,
@@ -1326,10 +1331,6 @@ typedef enum {
CC_OP_BMILGL,
CC_OP_BMILGQ,
- CC_OP_ADCX, /* CC_DST = C, CC_SRC = rest. */
- CC_OP_ADOX, /* CC_DST = O, CC_SRC = rest. */
- CC_OP_ADCOX, /* CC_DST = C, CC_SRC2 = O, CC_SRC = rest. */
-
CC_OP_CLR, /* Z set, all other flags clear. */
CC_OP_POPCNT, /* Z via CC_SRC, all other flags clear. */
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index df7597c7e2f..2041ea9d04a 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1122,24 +1122,41 @@ static void gen_ADC(DisasContext *s, X86DecodedInsn *decode)
prepare_update3_cc(decode, s, CC_OP_ADCB + ot, c_in);
}
-/* ADCX/ADOX do not have memory operands and can use set_cc_op. */
-static void gen_ADCOX(DisasContext *s, MemOp ot, int cc_op)
+static void gen_ADCOX(DisasContext *s, X86DecodedInsn *decode, int cc_op)
{
- int opposite_cc_op;
+ MemOp ot = decode->op[0].ot;
TCGv carry_in = NULL;
- TCGv carry_out = (cc_op == CC_OP_ADCX ? cpu_cc_dst : cpu_cc_src2);
+ TCGv *carry_out = (cc_op == CC_OP_ADCX ? &decode->cc_dst : &decode->cc_src2);
TCGv zero;
- if (cc_op == s->cc_op || s->cc_op == CC_OP_ADCOX) {
- /* Re-use the carry-out from a previous round. */
- carry_in = carry_out;
- } else {
- /* We don't have a carry-in, get it out of EFLAGS. */
- if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
- gen_compute_eflags(s);
+ decode->cc_op = cc_op;
+ *carry_out = tcg_temp_new();
+ if (CC_OP_HAS_EFLAGS(s->cc_op)) {
+ decode->cc_src = cpu_cc_src;
+
+ /* Re-use the carry-out from a previous round? */
+ if (s->cc_op == cc_op || s->cc_op == CC_OP_ADCOX) {
+ carry_in = (cc_op == CC_OP_ADCX ? cpu_cc_dst : cpu_cc_src2);
}
- carry_in = s->tmp0;
- tcg_gen_extract_tl(carry_in, cpu_cc_src,
+
+ /* Preserve the opposite carry from previous rounds? */
+ if (s->cc_op != cc_op && s->cc_op != CC_OP_EFLAGS) {
+ decode->cc_op = CC_OP_ADCOX;
+ if (carry_out == &decode->cc_dst) {
+ decode->cc_src2 = cpu_cc_src2;
+ } else {
+ decode->cc_dst = cpu_cc_dst;
+ }
+ }
+ } else {
+ decode->cc_src = tcg_temp_new();
+ gen_mov_eflags(s, decode->cc_src);
+ }
+
+ if (!carry_in) {
+ /* Get carry_in out of EFLAGS. */
+ carry_in = tcg_temp_new();
+ tcg_gen_extract_tl(carry_in, decode->cc_src,
ctz32(cc_op == CC_OP_ADCX ? CC_C : CC_O), 1);
}
@@ -1151,28 +1168,20 @@ static void gen_ADCOX(DisasContext *s, MemOp ot, int cc_op)
tcg_gen_ext32u_tl(s->T1, s->T1);
tcg_gen_add_i64(s->T0, s->T0, s->T1);
tcg_gen_add_i64(s->T0, s->T0, carry_in);
- tcg_gen_shri_i64(carry_out, s->T0, 32);
+ tcg_gen_shri_i64(*carry_out, s->T0, 32);
break;
#endif
default:
zero = tcg_constant_tl(0);
- tcg_gen_add2_tl(s->T0, carry_out, s->T0, zero, carry_in, zero);
- tcg_gen_add2_tl(s->T0, carry_out, s->T0, carry_out, s->T1, zero);
+ tcg_gen_add2_tl(s->T0, *carry_out, s->T0, zero, carry_in, zero);
+ tcg_gen_add2_tl(s->T0, *carry_out, s->T0, *carry_out, s->T1, zero);
break;
}
-
- opposite_cc_op = cc_op == CC_OP_ADCX ? CC_OP_ADOX : CC_OP_ADCX;
- if (s->cc_op == CC_OP_ADCOX || s->cc_op == opposite_cc_op) {
- /* Merge with the carry-out from the opposite instruction. */
- set_cc_op(s, CC_OP_ADCOX);
- } else {
- set_cc_op(s, cc_op);
- }
}
static void gen_ADCX(DisasContext *s, X86DecodedInsn *decode)
{
- gen_ADCOX(s, decode->op[0].ot, CC_OP_ADCX);
+ gen_ADCOX(s, decode, CC_OP_ADCX);
}
static void gen_ADD(DisasContext *s, X86DecodedInsn *decode)
@@ -1190,7 +1199,7 @@ static void gen_ADD(DisasContext *s, X86DecodedInsn *decode)
static void gen_ADOX(DisasContext *s, X86DecodedInsn *decode)
{
- gen_ADCOX(s, decode->op[0].ot, CC_OP_ADOX);
+ gen_ADCOX(s, decode, CC_OP_ADOX);
}
static void gen_AND(DisasContext *s, X86DecodedInsn *decode)
--
2.45.1
^ permalink raw reply related [flat|nested] 56+ messages in thread
* Re: [PATCH 02/25] target/i386: rewrite flags writeback for ADCX/ADOX
2024-06-08 8:40 ` [PATCH 02/25] target/i386: rewrite flags writeback for ADCX/ADOX Paolo Bonzini
@ 2024-06-08 18:05 ` Richard Henderson
0 siblings, 0 replies; 56+ messages in thread
From: Richard Henderson @ 2024-06-08 18:05 UTC (permalink / raw)
To: Paolo Bonzini, qemu-devel
On 6/8/24 01:40, Paolo Bonzini wrote:
> Avoid using set_cc_op() in preparation for implementing APX; treat
> CC_OP_EFLAGS similar to the case where we have the "opposite" cc_op
> (CC_OP_ADOX for ADCX and CC_OP_ADCX for ADOX), except the resulting
> cc_op is not CC_OP_ADCOX. This is written easily as two "if"s, whose
> conditions are both false for CC_OP_EFLAGS, both true for CC_OP_ADCOX,
> and one each true for CC_OP_ADCX/ADOX.
>
> The new logic also makes it easy to drop usage of tmp0.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> target/i386/cpu.h | 9 +++---
> target/i386/tcg/emit.c.inc | 61 ++++++++++++++++++++++----------------
> 2 files changed, 40 insertions(+), 30 deletions(-)
>
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index 8fe28b67e0f..ee873a0ed84 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -1260,6 +1260,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
> /* Use a clearer name for this. */
> #define CPU_INTERRUPT_INIT CPU_INTERRUPT_RESET
>
> +#define CC_OP_HAS_EFLAGS(op) ((op) >= CC_OP_EFLAGS && (op) <= CC_OP_ADCOX)
> +
> /* Instead of computing the condition codes after each x86 instruction,
> * QEMU just stores one operand (called CC_SRC), the result
> * (called CC_DST) and the type of operation (called CC_OP). When the
> @@ -1270,6 +1272,9 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
> typedef enum {
> CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */
> CC_OP_EFLAGS, /* all cc are explicitly computed, CC_SRC = flags */
> + CC_OP_ADCX, /* CC_DST = C, CC_SRC = rest. */
> + CC_OP_ADOX, /* CC_DST = O, CC_SRC = rest. */
> + CC_OP_ADCOX, /* CC_DST = C, CC_SRC2 = O, CC_SRC = rest. */
Comment for ADOX is (and was) wrong -- SRC2 = O.
That made review a bit confusing... :-)
Otherwise,
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 56+ messages in thread
* [PATCH 03/25] target/i386: put BLS* input in T1, use generic flag writeback
2024-06-08 8:40 [PATCH 00/25] target/i386: more progress towards new decoder Paolo Bonzini
2024-06-08 8:40 ` [PATCH 01/25] target/i386: remove CPUX86State argument from generator functions Paolo Bonzini
2024-06-08 8:40 ` [PATCH 02/25] target/i386: rewrite flags writeback for ADCX/ADOX Paolo Bonzini
@ 2024-06-08 8:40 ` Paolo Bonzini
2024-06-08 18:07 ` Richard Henderson
2024-06-08 8:40 ` [PATCH 04/25] target/i386: change X86_ENTRYr to use T0 Paolo Bonzini
` (21 subsequent siblings)
24 siblings, 1 reply; 56+ messages in thread
From: Paolo Bonzini @ 2024-06-08 8:40 UTC (permalink / raw)
To: qemu-devel
This makes for easier cpu_cc_* setup, and not using set_cc_op()
should come in handy if QEMU ever implements APX.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/decode-new.c.inc | 4 ++--
target/i386/tcg/emit.c.inc | 24 +++++++++---------------
2 files changed, 11 insertions(+), 17 deletions(-)
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index e7d88020481..380fb793531 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -633,7 +633,7 @@ static const X86OpEntry opcodes_0F38_F0toFF[16][5] = {
{},
},
[3] = {
- X86_OP_GROUP3(group17, B,y, E,y, None,None, vex13 cpuid(BMI1)),
+ X86_OP_GROUP3(group17, B,y, None,None, E,y, vex13 cpuid(BMI1)),
{},
{},
{},
@@ -2604,7 +2604,7 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
}
/*
- * Write back flags after last memory access. Some newer ALU instructions, as
+ * Write back flags after last memory access. Some older ALU instructions, as
* well as SSE instructions, write flags in the gen_* function, but that can
* cause incorrect tracking of CC_OP for instructions that write to both memory
* and flags.
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 2041ea9d04a..a25b3dfc6b5 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1272,40 +1272,34 @@ static void gen_BEXTR(DisasContext *s, X86DecodedInsn *decode)
prepare_update1_cc(decode, s, CC_OP_LOGICB + ot);
}
-/* BLSI do not have memory operands and can use set_cc_op. */
static void gen_BLSI(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
- tcg_gen_mov_tl(cpu_cc_src, s->T0);
- tcg_gen_neg_tl(s->T1, s->T0);
+ /* input in T1, which is ready for prepare_update2_cc */
+ tcg_gen_neg_tl(s->T0, s->T1);
tcg_gen_and_tl(s->T0, s->T0, s->T1);
- tcg_gen_mov_tl(cpu_cc_dst, s->T0);
- set_cc_op(s, CC_OP_BMILGB + ot);
+ prepare_update2_cc(decode, s, CC_OP_BMILGB + ot);
}
-/* BLSMSK do not have memory operands and can use set_cc_op. */
static void gen_BLSMSK(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
- tcg_gen_mov_tl(cpu_cc_src, s->T0);
- tcg_gen_subi_tl(s->T1, s->T0, 1);
+ /* input in T1, which is ready for prepare_update2_cc */
+ tcg_gen_subi_tl(s->T0, s->T1, 1);
tcg_gen_xor_tl(s->T0, s->T0, s->T1);
- tcg_gen_mov_tl(cpu_cc_dst, s->T0);
- set_cc_op(s, CC_OP_BMILGB + ot);
+ prepare_update2_cc(decode, s, CC_OP_BMILGB + ot);
}
-/* BLSR do not have memory operands and can use set_cc_op. */
static void gen_BLSR(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
- tcg_gen_mov_tl(cpu_cc_src, s->T0);
- tcg_gen_subi_tl(s->T1, s->T0, 1);
+ /* input in T1, which is ready for prepare_update2_cc */
+ tcg_gen_subi_tl(s->T0, s->T1, 1);
tcg_gen_and_tl(s->T0, s->T0, s->T1);
- tcg_gen_mov_tl(cpu_cc_dst, s->T0);
- set_cc_op(s, CC_OP_BMILGB + ot);
+ prepare_update2_cc(decode, s, CC_OP_BMILGB + ot);
}
static void gen_BOUND(DisasContext *s, X86DecodedInsn *decode)
--
2.45.1
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH 04/25] target/i386: change X86_ENTRYr to use T0
2024-06-08 8:40 [PATCH 00/25] target/i386: more progress towards new decoder Paolo Bonzini
` (2 preceding siblings ...)
2024-06-08 8:40 ` [PATCH 03/25] target/i386: put BLS* input in T1, use generic flag writeback Paolo Bonzini
@ 2024-06-08 8:40 ` Paolo Bonzini
2024-06-08 18:10 ` Richard Henderson
2024-06-08 8:40 ` [PATCH 05/25] target/i386: change X86_ENTRYwr to use T0, use it for moves Paolo Bonzini
` (20 subsequent siblings)
24 siblings, 1 reply; 56+ messages in thread
From: Paolo Bonzini @ 2024-06-08 8:40 UTC (permalink / raw)
To: qemu-devel
I am not sure why I made it use T1. It is a bit more symmetric with
respect to X86_ENTRYwr (which uses T0 for the "w"ritten operand
and T1 for the "r"ead operand), but it is also less flexible because it
does not let you apply zextT0/sextT0.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/decode-new.c.inc | 6 +++---
target/i386/tcg/emit.c.inc | 34 ++++++++++++++++----------------
2 files changed, 20 insertions(+), 20 deletions(-)
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 380fb793531..f9d3e2577b2 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -186,7 +186,7 @@
#define X86_OP_ENTRYw(op, op0, s0, ...) \
X86_OP_ENTRY3(op, op0, s0, None, None, None, None, ## __VA_ARGS__)
#define X86_OP_ENTRYr(op, op0, s0, ...) \
- X86_OP_ENTRY3(op, None, None, None, None, op0, s0, ## __VA_ARGS__)
+ X86_OP_ENTRY3(op, None, None, op0, s0, None, None, ## __VA_ARGS__)
#define X86_OP_ENTRY1(op, op0, s0, ...) \
X86_OP_ENTRY3(op, op0, s0, 2op, s0, None, None, ## __VA_ARGS__)
#define X86_OP_ENTRY0(op, ...) \
@@ -1335,9 +1335,9 @@ static void decode_group4_5(DisasContext *s, CPUX86State *env, X86OpEntry *entry
/* 0xff */
[0x08] = X86_OP_ENTRY1(INC, E,v, lock),
[0x09] = X86_OP_ENTRY1(DEC, E,v, lock),
- [0x0a] = X86_OP_ENTRY3(CALL_m, None, None, E,f64, None, None, zextT0),
+ [0x0a] = X86_OP_ENTRYr(CALL_m, E,f64, zextT0),
[0x0b] = X86_OP_ENTRYr(CALLF_m, M,p),
- [0x0c] = X86_OP_ENTRY3(JMP_m, None, None, E,f64, None, None, zextT0),
+ [0x0c] = X86_OP_ENTRYr(JMP_m, E,f64, zextT0),
[0x0d] = X86_OP_ENTRYr(JMPF_m, M,p),
[0x0e] = X86_OP_ENTRYr(PUSH, E,f64),
};
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index a25b3dfc6b5..797e6e81406 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1363,7 +1363,7 @@ static void gen_CALLF(DisasContext *s, X86DecodedInsn *decode)
static void gen_CALLF_m(DisasContext *s, X86DecodedInsn *decode)
{
- MemOp ot = decode->op[2].ot;
+ MemOp ot = decode->op[1].ot;
gen_op_ld_v(s, ot, s->T0, s->A0);
gen_add_A0_im(s, 1 << ot);
@@ -1593,22 +1593,22 @@ static void gen_DEC(DisasContext *s, X86DecodedInsn *decode)
static void gen_DIV(DisasContext *s, X86DecodedInsn *decode)
{
- MemOp ot = decode->op[2].ot;
+ MemOp ot = decode->op[1].ot;
switch(ot) {
case MO_8:
- gen_helper_divb_AL(tcg_env, s->T1);
+ gen_helper_divb_AL(tcg_env, s->T0);
break;
case MO_16:
- gen_helper_divw_AX(tcg_env, s->T1);
+ gen_helper_divw_AX(tcg_env, s->T0);
break;
default:
case MO_32:
- gen_helper_divl_EAX(tcg_env, s->T1);
+ gen_helper_divl_EAX(tcg_env, s->T0);
break;
#ifdef TARGET_X86_64
case MO_64:
- gen_helper_divq_EAX(tcg_env, s->T1);
+ gen_helper_divq_EAX(tcg_env, s->T0);
break;
#endif
}
@@ -1649,22 +1649,22 @@ static void gen_HLT(DisasContext *s, X86DecodedInsn *decode)
static void gen_IDIV(DisasContext *s, X86DecodedInsn *decode)
{
- MemOp ot = decode->op[2].ot;
+ MemOp ot = decode->op[1].ot;
switch(ot) {
case MO_8:
- gen_helper_idivb_AL(tcg_env, s->T1);
+ gen_helper_idivb_AL(tcg_env, s->T0);
break;
case MO_16:
- gen_helper_idivw_AX(tcg_env, s->T1);
+ gen_helper_idivw_AX(tcg_env, s->T0);
break;
default:
case MO_32:
- gen_helper_idivl_EAX(tcg_env, s->T1);
+ gen_helper_idivl_EAX(tcg_env, s->T0);
break;
#ifdef TARGET_X86_64
case MO_64:
- gen_helper_idivq_EAX(tcg_env, s->T1);
+ gen_helper_idivq_EAX(tcg_env, s->T0);
break;
#endif
}
@@ -1926,7 +1926,7 @@ static void gen_JMPF(DisasContext *s, X86DecodedInsn *decode)
static void gen_JMPF_m(DisasContext *s, X86DecodedInsn *decode)
{
- MemOp ot = decode->op[2].ot;
+ MemOp ot = decode->op[1].ot;
gen_op_ld_v(s, ot, s->T0, s->A0);
gen_add_A0_im(s, 1 << ot);
@@ -1947,7 +1947,7 @@ static void gen_LAHF(DisasContext *s, X86DecodedInsn *decode)
static void gen_LDMXCSR(DisasContext *s, X86DecodedInsn *decode)
{
- tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T1);
+ tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
gen_helper_ldmxcsr(tcg_env, s->tmp2_i32);
}
@@ -1995,7 +1995,7 @@ static void gen_LGS(DisasContext *s, X86DecodedInsn *decode)
static void gen_LODS(DisasContext *s, X86DecodedInsn *decode)
{
- MemOp ot = decode->op[2].ot;
+ MemOp ot = decode->op[1].ot;
if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
gen_repz(s, ot, gen_lods);
} else {
@@ -2765,7 +2765,7 @@ static void gen_PSLLDQ_i(DisasContext *s, X86DecodedInsn *decode)
static void gen_PUSH(DisasContext *s, X86DecodedInsn *decode)
{
- gen_push_v(s, s->T1);
+ gen_push_v(s, s->T0);
}
static void gen_PUSHA(DisasContext *s, X86DecodedInsn *decode)
@@ -3077,7 +3077,7 @@ static void gen_RCR(DisasContext *s, X86DecodedInsn *decode)
static void gen_RET(DisasContext *s, X86DecodedInsn *decode)
{
- int16_t adjust = decode->e.op2 == X86_TYPE_I ? decode->immediate : 0;
+ int16_t adjust = decode->e.op1 == X86_TYPE_I ? decode->immediate : 0;
MemOp ot = gen_pop_T0(s);
gen_stack_update(s, adjust + (1 << ot));
@@ -3088,7 +3088,7 @@ static void gen_RET(DisasContext *s, X86DecodedInsn *decode)
static void gen_RETF(DisasContext *s, X86DecodedInsn *decode)
{
- int16_t adjust = decode->e.op2 == X86_TYPE_I ? decode->immediate : 0;
+ int16_t adjust = decode->e.op1 == X86_TYPE_I ? decode->immediate : 0;
if (!PE(s) || VM86(s)) {
gen_lea_ss_ofs(s, s->A0, cpu_regs[R_ESP], 0);
--
2.45.1
^ permalink raw reply related [flat|nested] 56+ messages in thread
* Re: [PATCH 04/25] target/i386: change X86_ENTRYr to use T0
2024-06-08 8:40 ` [PATCH 04/25] target/i386: change X86_ENTRYr to use T0 Paolo Bonzini
@ 2024-06-08 18:10 ` Richard Henderson
0 siblings, 0 replies; 56+ messages in thread
From: Richard Henderson @ 2024-06-08 18:10 UTC (permalink / raw)
To: Paolo Bonzini, qemu-devel
On 6/8/24 01:40, Paolo Bonzini wrote:
> I am not sure why I made it use T1. It is a bit more symmetric with
> respect to X86_ENTRYwr (which uses T0 for the "w"ritten operand
> and T1 for the "r"ead operand), but it is also less flexible because it
> does not let you apply zextT0/sextT0.
>
> Signed-off-by: Paolo Bonzini<pbonzini@redhat.com>
> ---
> target/i386/tcg/decode-new.c.inc | 6 +++---
> target/i386/tcg/emit.c.inc | 34 ++++++++++++++++----------------
> 2 files changed, 20 insertions(+), 20 deletions(-)
Acked-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 56+ messages in thread
* [PATCH 05/25] target/i386: change X86_ENTRYwr to use T0, use it for moves
2024-06-08 8:40 [PATCH 00/25] target/i386: more progress towards new decoder Paolo Bonzini
` (3 preceding siblings ...)
2024-06-08 8:40 ` [PATCH 04/25] target/i386: change X86_ENTRYr to use T0 Paolo Bonzini
@ 2024-06-08 8:40 ` Paolo Bonzini
2024-06-08 18:13 ` Richard Henderson
2024-06-08 8:40 ` [PATCH 06/25] target/i386: replace NoSeg special with NoLoadEA Paolo Bonzini
` (19 subsequent siblings)
24 siblings, 1 reply; 56+ messages in thread
From: Paolo Bonzini @ 2024-06-08 8:40 UTC (permalink / raw)
To: qemu-devel
Just like X86_ENTRYr, X86_ENTRYwr is easily changed to use only T0.
In this case, the motivation is to use it for the MOV instruction
family. The case when you need to preserve the input value is the
odd one, as it is used basically only for BLS* instructions.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/decode-new.c.inc | 48 ++++++++++++++++----------------
target/i386/tcg/emit.c.inc | 2 +-
2 files changed, 25 insertions(+), 25 deletions(-)
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index f9d3e2577b2..d41002e2f5c 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -180,7 +180,7 @@
#define X86_OP_ENTRYrr(op, op0, s0, op1, s1, ...) \
X86_OP_ENTRY3(op, None, None, op0, s0, op1, s1, ## __VA_ARGS__)
#define X86_OP_ENTRYwr(op, op0, s0, op1, s1, ...) \
- X86_OP_ENTRY3(op, op0, s0, None, None, op1, s1, ## __VA_ARGS__)
+ X86_OP_ENTRY3(op, op0, s0, op1, s1, None, None, ## __VA_ARGS__)
#define X86_OP_ENTRY2(op, op0, s0, op1, s1, ...) \
X86_OP_ENTRY3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
#define X86_OP_ENTRYw(op, op0, s0, ...) \
@@ -612,15 +612,15 @@ static const X86OpEntry opcodes_0F38_00toEF[240] = {
/* five rows for no prefix, 66, F3, F2, 66+F2 */
static const X86OpEntry opcodes_0F38_F0toFF[16][5] = {
[0] = {
- X86_OP_ENTRY3(MOVBE, G,y, M,y, None,None, cpuid(MOVBE)),
- X86_OP_ENTRY3(MOVBE, G,w, M,w, None,None, cpuid(MOVBE)),
+ X86_OP_ENTRYwr(MOVBE, G,y, M,y, cpuid(MOVBE)),
+ X86_OP_ENTRYwr(MOVBE, G,w, M,w, cpuid(MOVBE)),
{},
X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)),
X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)),
},
[1] = {
- X86_OP_ENTRY3(MOVBE, M,y, G,y, None,None, cpuid(MOVBE)),
- X86_OP_ENTRY3(MOVBE, M,w, G,w, None,None, cpuid(MOVBE)),
+ X86_OP_ENTRYwr(MOVBE, M,y, G,y, cpuid(MOVBE)),
+ X86_OP_ENTRYwr(MOVBE, M,w, G,w, cpuid(MOVBE)),
{},
X86_OP_ENTRY2(CRC32, G,d, E,y, cpuid(SSE42)),
X86_OP_ENTRY2(CRC32, G,d, E,w, cpuid(SSE42)),
@@ -1586,18 +1586,18 @@ static const X86OpEntry opcodes_root[256] = {
[0x7E] = X86_OP_ENTRYr(Jcc, J,b),
[0x7F] = X86_OP_ENTRYr(Jcc, J,b),
- [0x88] = X86_OP_ENTRY3(MOV, E,b, G,b, None, None),
- [0x89] = X86_OP_ENTRY3(MOV, E,v, G,v, None, None),
- [0x8A] = X86_OP_ENTRY3(MOV, G,b, E,b, None, None),
- [0x8B] = X86_OP_ENTRY3(MOV, G,v, E,v, None, None),
- /* Missing in Table A-2: memory destination is always 16-bit. */
- [0x8C] = X86_OP_ENTRY3(MOV, E,v, S,w, None, None, op0_Mw),
- [0x8D] = X86_OP_ENTRY3(LEA, G,v, M,v, None, None, noseg),
- [0x8E] = X86_OP_ENTRY3(MOV, S,w, E,w, None, None),
+ [0x88] = X86_OP_ENTRYwr(MOV, E,b, G,b),
+ [0x89] = X86_OP_ENTRYwr(MOV, E,v, G,v),
+ [0x8A] = X86_OP_ENTRYwr(MOV, G,b, E,b),
+ [0x8B] = X86_OP_ENTRYwr(MOV, G,v, E,v),
+ /* Missing in Table A-2: memory destination is always 16-bit. */
+ [0x8C] = X86_OP_ENTRYwr(MOV, E,v, S,w, op0_Mw),
+ [0x8D] = X86_OP_ENTRYwr(LEA, G,v, M,v, noseg),
+ [0x8E] = X86_OP_ENTRYwr(MOV, S,w, E,w),
[0x8F] = X86_OP_GROUPw(group1A, E,v),
[0x98] = X86_OP_ENTRY1(CBW, 0,v), /* rAX */
- [0x99] = X86_OP_ENTRY3(CWD, 2,v, 0,v, None, None), /* rDX, rAX */
+ [0x99] = X86_OP_ENTRYwr(CWD, 2,v, 0,v), /* rDX, rAX */
[0x9A] = X86_OP_ENTRYrr(CALLF, I_unsigned,p, I_unsigned,w, chk(i64)),
[0x9B] = X86_OP_ENTRY0(WAIT),
[0x9C] = X86_OP_ENTRY0(PUSHF, chk(vm86_iopl) svm(PUSHF)),
@@ -1607,22 +1607,22 @@ static const X86OpEntry opcodes_root[256] = {
[0xA8] = X86_OP_ENTRYrr(AND, 0,b, I,b), /* AL, Ib */
[0xA9] = X86_OP_ENTRYrr(AND, 0,v, I,z), /* rAX, Iz */
- [0xAA] = X86_OP_ENTRY3(STOS, Y,b, 0,b, None, None),
- [0xAB] = X86_OP_ENTRY3(STOS, Y,v, 0,v, None, None),
+ [0xAA] = X86_OP_ENTRYwr(STOS, Y,b, 0,b),
+ [0xAB] = X86_OP_ENTRYwr(STOS, Y,v, 0,v),
/* Manual writeback because REP LODS (!) has to write EAX/RAX after every LODS. */
[0xAC] = X86_OP_ENTRYr(LODS, X,b),
[0xAD] = X86_OP_ENTRYr(LODS, X,v),
[0xAE] = X86_OP_ENTRYrr(SCAS, 0,b, Y,b),
[0xAF] = X86_OP_ENTRYrr(SCAS, 0,v, Y,v),
- [0xB8] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
- [0xB9] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
- [0xBA] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
- [0xBB] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
- [0xBC] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
- [0xBD] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
- [0xBE] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
- [0xBF] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
+ [0xB8] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
+ [0xB9] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
+ [0xBA] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
+ [0xBB] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
+ [0xBC] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
+ [0xBD] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
+ [0xBE] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
+ [0xBF] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
[0xC8] = X86_OP_ENTRYrr(ENTER, I,w, I,b),
[0xC9] = X86_OP_ENTRY1(LEAVE, A,d64),
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 797e6e81406..78d89db57cd 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1796,7 +1796,7 @@ static void gen_IN(DisasContext *s, X86DecodedInsn *decode)
MemOp ot = decode->op[0].ot;
TCGv_i32 port = tcg_temp_new_i32();
- tcg_gen_trunc_tl_i32(port, s->T1);
+ tcg_gen_trunc_tl_i32(port, s->T0);
tcg_gen_ext16u_i32(port, port);
if (!gen_check_io(s, ot, port, SVM_IOIO_TYPE_MASK)) {
return;
--
2.45.1
^ permalink raw reply related [flat|nested] 56+ messages in thread
* Re: [PATCH 05/25] target/i386: change X86_ENTRYwr to use T0, use it for moves
2024-06-08 8:40 ` [PATCH 05/25] target/i386: change X86_ENTRYwr to use T0, use it for moves Paolo Bonzini
@ 2024-06-08 18:13 ` Richard Henderson
0 siblings, 0 replies; 56+ messages in thread
From: Richard Henderson @ 2024-06-08 18:13 UTC (permalink / raw)
To: Paolo Bonzini, qemu-devel
On 6/8/24 01:40, Paolo Bonzini wrote:
> Just like X86_ENTRYr, X86_ENTRYwr is easily changed to use only T0.
> In this case, the motivation is to use it for the MOV instruction
> family. The case when you need to preserve the input value is the
> odd one, as it is used basically only for BLS* instructions.
>
> Signed-off-by: Paolo Bonzini<pbonzini@redhat.com>
> ---
> target/i386/tcg/decode-new.c.inc | 48 ++++++++++++++++----------------
> target/i386/tcg/emit.c.inc | 2 +-
> 2 files changed, 25 insertions(+), 25 deletions(-)
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 56+ messages in thread
* [PATCH 06/25] target/i386: replace NoSeg special with NoLoadEA
2024-06-08 8:40 [PATCH 00/25] target/i386: more progress towards new decoder Paolo Bonzini
` (4 preceding siblings ...)
2024-06-08 8:40 ` [PATCH 05/25] target/i386: change X86_ENTRYwr to use T0, use it for moves Paolo Bonzini
@ 2024-06-08 8:40 ` Paolo Bonzini
2024-06-08 18:16 ` Richard Henderson
2024-06-08 8:40 ` [PATCH 07/25] target/i386: fix processing of intercept 0 (read CR0) Paolo Bonzini
` (18 subsequent siblings)
24 siblings, 1 reply; 56+ messages in thread
From: Paolo Bonzini @ 2024-06-08 8:40 UTC (permalink / raw)
To: qemu-devel
This is a bit more generic, as it can be applied to MPX as well.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/decode-new.h | 5 +++--
target/i386/tcg/decode-new.c.inc | 12 ++++--------
target/i386/tcg/emit.c.inc | 3 ++-
3 files changed, 9 insertions(+), 11 deletions(-)
diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h
index f704698575f..46a96b220d0 100644
--- a/target/i386/tcg/decode-new.h
+++ b/target/i386/tcg/decode-new.h
@@ -170,8 +170,9 @@ typedef enum X86InsnSpecial {
/* Always locked if it has a memory operand (XCHG) */
X86_SPECIAL_Locked,
- /* Do not apply segment base to effective address */
- X86_SPECIAL_NoSeg,
+ /* Do not load effective address in s->A0 */
+ X86_SPECIAL_NoLoadEA,
+
/*
* Rd/Mb or Rd/Mw in the manual: register operand 0 is treated as 32 bits
* (and writeback zero-extends it to 64 bits if applicable). PREFIX_DATA
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index d41002e2f5c..4f5fcdb88dd 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -193,7 +193,7 @@
X86_OP_ENTRY3(op, None, None, None, None, None, None, ## __VA_ARGS__)
#define cpuid(feat) .cpuid = X86_FEAT_##feat,
-#define noseg .special = X86_SPECIAL_NoSeg,
+#define nolea .special = X86_SPECIAL_NoLoadEA,
#define xchg .special = X86_SPECIAL_Locked,
#define lock .special = X86_SPECIAL_HasLock,
#define mmx .special = X86_SPECIAL_MMX,
@@ -1592,7 +1592,7 @@ static const X86OpEntry opcodes_root[256] = {
[0x8B] = X86_OP_ENTRYwr(MOV, G,v, E,v),
/* Missing in Table A-2: memory destination is always 16-bit. */
[0x8C] = X86_OP_ENTRYwr(MOV, E,v, S,w, op0_Mw),
- [0x8D] = X86_OP_ENTRYwr(LEA, G,v, M,v, noseg),
+ [0x8D] = X86_OP_ENTRYwr(LEA, G,v, M,v, nolea),
[0x8E] = X86_OP_ENTRYwr(MOV, S,w, E,w),
[0x8F] = X86_OP_GROUPw(group1A, E,v),
@@ -2524,11 +2524,6 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
assert(decode.op[1].unit == X86_OP_INT);
break;
- case X86_SPECIAL_NoSeg:
- decode.mem.def_seg = -1;
- s->override = -1;
- break;
-
case X86_SPECIAL_Op0_Mw:
assert(decode.op[0].unit == X86_OP_INT);
if (decode.op[0].has_ea) {
@@ -2585,7 +2580,8 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
gen_helper_enter_mmx(tcg_env);
}
- if (decode.op[0].has_ea || decode.op[1].has_ea || decode.op[2].has_ea) {
+ if (decode.e.special != X86_SPECIAL_NoLoadEA &&
+ (decode.op[0].has_ea || decode.op[1].has_ea || decode.op[2].has_ea)) {
gen_load_ea(s, &decode.mem, decode.e.vex_class == 12);
}
if (s->prefix & PREFIX_LOCK) {
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 78d89db57cd..e6521632edd 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1970,7 +1970,8 @@ static void gen_LDS(DisasContext *s, X86DecodedInsn *decode)
static void gen_LEA(DisasContext *s, X86DecodedInsn *decode)
{
- tcg_gen_mov_tl(s->T0, s->A0);
+ TCGv ea = gen_lea_modrm_1(s, decode->mem, false);
+ gen_lea_v_seg_dest(s, s->aflag, s->T0, ea, -1, -1);
}
static void gen_LEAVE(DisasContext *s, X86DecodedInsn *decode)
--
2.45.1
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH 07/25] target/i386: fix processing of intercept 0 (read CR0)
2024-06-08 8:40 [PATCH 00/25] target/i386: more progress towards new decoder Paolo Bonzini
` (5 preceding siblings ...)
2024-06-08 8:40 ` [PATCH 06/25] target/i386: replace NoSeg special with NoLoadEA Paolo Bonzini
@ 2024-06-08 8:40 ` Paolo Bonzini
2024-06-08 18:17 ` Richard Henderson
2024-06-08 8:40 ` [PATCH 08/25] target/i386: convert MOV from/to CR and DR to new decoder Paolo Bonzini
` (17 subsequent siblings)
24 siblings, 1 reply; 56+ messages in thread
From: Paolo Bonzini @ 2024-06-08 8:40 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/decode-new.h | 1 +
target/i386/tcg/decode-new.c.inc | 4 ++--
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h
index 46a96b220d0..8465717ea21 100644
--- a/target/i386/tcg/decode-new.h
+++ b/target/i386/tcg/decode-new.h
@@ -272,6 +272,7 @@ struct X86OpEntry {
unsigned valid_prefix:16;
unsigned check:16;
unsigned intercept:8;
+ bool has_intercept:1;
bool is_decode:1;
};
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 4f5fcdb88dd..cd925fe3589 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -221,7 +221,7 @@
#define vex13 .vex_class = 13,
#define chk(a) .check = X86_CHECK_##a,
-#define svm(a) .intercept = SVM_EXIT_##a,
+#define svm(a) .intercept = SVM_EXIT_##a, .has_intercept = true,
#define avx2_256 .vex_special = X86_VEX_AVX2_256,
@@ -2559,7 +2559,7 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
goto gp_fault;
}
}
- if (decode.e.intercept && unlikely(GUEST(s))) {
+ if (decode.e.has_intercept && unlikely(GUEST(s))) {
gen_helper_svm_check_intercept(tcg_env,
tcg_constant_i32(decode.e.intercept));
}
--
2.45.1
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH 08/25] target/i386: convert MOV from/to CR and DR to new decoder
2024-06-08 8:40 [PATCH 00/25] target/i386: more progress towards new decoder Paolo Bonzini
` (6 preceding siblings ...)
2024-06-08 8:40 ` [PATCH 07/25] target/i386: fix processing of intercept 0 (read CR0) Paolo Bonzini
@ 2024-06-08 8:40 ` Paolo Bonzini
2024-06-08 18:24 ` Richard Henderson
2024-06-08 8:40 ` [PATCH 09/25] target/i386: fix bad sorting of entries in the 0F table Paolo Bonzini
` (16 subsequent siblings)
24 siblings, 1 reply; 56+ messages in thread
From: Paolo Bonzini @ 2024-06-08 8:40 UTC (permalink / raw)
To: qemu-devel
Complete implementation of C and D operand types, then the operations
are just MOVs.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 79 --------------------------------
target/i386/tcg/decode-new.c.inc | 53 +++++++++++++++++++--
target/i386/tcg/emit.c.inc | 20 +++++++-
3 files changed, 68 insertions(+), 84 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index fcba9c155f9..4958f4c45d5 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -247,9 +247,6 @@ STUB_HELPER(outb, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
STUB_HELPER(outw, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
STUB_HELPER(outl, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
STUB_HELPER(rdmsr, TCGv_env env)
-STUB_HELPER(read_crN, TCGv ret, TCGv_env env, TCGv_i32 reg)
-STUB_HELPER(get_dr, TCGv ret, TCGv_env env, TCGv_i32 reg)
-STUB_HELPER(set_dr, TCGv_env env, TCGv_i32 reg, TCGv val)
STUB_HELPER(stgi, TCGv_env env)
STUB_HELPER(svm_check_intercept, TCGv_env env, TCGv_i32 type)
STUB_HELPER(vmload, TCGv_env env, TCGv_i32 aflag)
@@ -4192,82 +4189,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
gen_nop_modrm(env, s, modrm);
break;
- case 0x120: /* mov reg, crN */
- case 0x122: /* mov crN, reg */
- if (!check_cpl0(s)) {
- break;
- }
- modrm = x86_ldub_code(env, s);
- /*
- * Ignore the mod bits (assume (modrm&0xc0)==0xc0).
- * AMD documentation (24594.pdf) and testing of Intel 386 and 486
- * processors all show that the mod bits are assumed to be 1's,
- * regardless of actual values.
- */
- rm = (modrm & 7) | REX_B(s);
- reg = ((modrm >> 3) & 7) | REX_R(s);
- switch (reg) {
- case 0:
- if ((prefixes & PREFIX_LOCK) &&
- (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
- reg = 8;
- }
- break;
- case 2:
- case 3:
- case 4:
- case 8:
- break;
- default:
- goto unknown_op;
- }
- ot = (CODE64(s) ? MO_64 : MO_32);
-
- translator_io_start(&s->base);
- if (b & 2) {
- gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0 + reg);
- gen_op_mov_v_reg(s, ot, s->T0, rm);
- gen_helper_write_crN(tcg_env, tcg_constant_i32(reg), s->T0);
- s->base.is_jmp = DISAS_EOB_NEXT;
- } else {
- gen_svm_check_intercept(s, SVM_EXIT_READ_CR0 + reg);
- gen_helper_read_crN(s->T0, tcg_env, tcg_constant_i32(reg));
- gen_op_mov_reg_v(s, ot, rm, s->T0);
- }
- break;
-
- case 0x121: /* mov reg, drN */
- case 0x123: /* mov drN, reg */
- if (check_cpl0(s)) {
- modrm = x86_ldub_code(env, s);
- /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
- * AMD documentation (24594.pdf) and testing of
- * intel 386 and 486 processors all show that the mod bits
- * are assumed to be 1's, regardless of actual values.
- */
- rm = (modrm & 7) | REX_B(s);
- reg = ((modrm >> 3) & 7) | REX_R(s);
- if (CODE64(s))
- ot = MO_64;
- else
- ot = MO_32;
- if (reg >= 8) {
- goto illegal_op;
- }
- if (b & 2) {
- gen_svm_check_intercept(s, SVM_EXIT_WRITE_DR0 + reg);
- gen_op_mov_v_reg(s, ot, s->T0, rm);
- tcg_gen_movi_i32(s->tmp2_i32, reg);
- gen_helper_set_dr(tcg_env, s->tmp2_i32, s->T0);
- s->base.is_jmp = DISAS_EOB_NEXT;
- } else {
- gen_svm_check_intercept(s, SVM_EXIT_READ_DR0 + reg);
- tcg_gen_movi_i32(s->tmp2_i32, reg);
- gen_helper_get_dr(s->T0, tcg_env, s->tmp2_i32);
- gen_op_mov_reg_v(s, ot, rm, s->T0);
- }
- }
- break;
case 0x106: /* clts */
if (check_cpl0(s)) {
gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0);
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index cd925fe3589..4c567911f41 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -151,6 +151,8 @@
X86_OP_GROUP3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
#define X86_OP_GROUPw(op, op0, s0, ...) \
X86_OP_GROUP3(op, op0, s0, None, None, None, None, ## __VA_ARGS__)
+#define X86_OP_GROUPwr(op, op0, s0, op1, s1, ...) \
+ X86_OP_GROUP3(op, op0, s0, op1, s1, None, None, ## __VA_ARGS__)
#define X86_OP_GROUP0(op, ...) \
X86_OP_GROUP3(op, None, None, None, None, None, None, ## __VA_ARGS__)
@@ -985,6 +987,24 @@ static void decode_0FE6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, ui
*entry = *decode_by_prefix(s, opcodes_0FE6);
}
+/* These are a bit weird, so group all the pre-decode tweaks here. */
+static void decode_MOV_CR_DR(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+ /* No 16-bit mode. */
+ s->dflag = MO_32;
+
+ /*
+ * Ignore the mod bits (assume (modrm&0xc0)==0xc0).
+ * AMD documentation (24594.pdf) and testing of Intel 386 and 486
+ * processors all show that the mod bits are assumed to be 1's,
+ * regardless of actual values.
+ */
+ get_modrm(s, env);
+ s->modrm |= 0xC0;
+
+ entry->gen = gen_MOV;
+}
+
static const X86OpEntry opcodes_0F[256] = {
[0x0E] = X86_OP_ENTRY0(EMMS, cpuid(3DNOW)), /* femms */
/*
@@ -1004,6 +1024,11 @@ static const X86OpEntry opcodes_0F[256] = {
/* Incorrectly listed as Mq,Vq in the manual */
[0x17] = X86_OP_ENTRY3(VMOVHPx_st, M,q, None,None, V,dq, vex5 p_00_66),
+ [0x20] = X86_OP_GROUPwr(MOV_CR_DR, R,y, C,y, chk(cpl0) svm(READ_CR0)),
+ [0x21] = X86_OP_GROUPwr(MOV_CR_DR, R,y, D,y, chk(cpl0) svm(READ_DR0)),
+ [0x22] = X86_OP_GROUPwr(MOV_CR_DR, C,y, R,y, zextT0 chk(cpl0) svm(WRITE_CR0)),
+ [0x23] = X86_OP_GROUPwr(MOV_CR_DR, D,y, R,y, zextT0 chk(cpl0) svm(WRITE_DR0)),
+
[0x40] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)),
[0x41] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)),
[0x42] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)),
@@ -1802,11 +1827,34 @@ static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
case X86_TYPE_C: /* REG in the modrm byte selects a control register */
op->unit = X86_OP_CR;
- goto get_reg;
+ op->n = ((get_modrm(s, env) >> 3) & 7) | REX_R(s);
+ if (op->n == 0 && (s->prefix & PREFIX_LOCK) &&
+ (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
+ op->n = 8;
+ s->prefix &= ~PREFIX_LOCK;
+ }
+ if (op->n != 0 && op->n != 2 && op->n != 3 && op->n != 4 && op->n != 8) {
+ return false;
+ }
+ if (decode->e.intercept) {
+ decode->e.intercept += op->n;
+ }
+ break;
case X86_TYPE_D: /* REG in the modrm byte selects a debug register */
op->unit = X86_OP_DR;
- goto get_reg;
+ op->n = ((get_modrm(s, env) >> 3) & 7) | REX_R(s);
+ if (op->n >= 8) {
+ /*
+ * illegal opcode. The DR4 and DR5 case is checked in the generated
+ * code instead, to save on hflags bits.
+ */
+ return false;
+ }
+ if (decode->e.intercept) {
+ decode->e.intercept += op->n;
+ }
+ break;
case X86_TYPE_G: /* REG in the modrm byte selects a GPR */
op->unit = X86_OP_INT;
@@ -2431,7 +2479,6 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
case 0x00 ... 0x03: /* mostly privileged instructions */
case 0x05 ... 0x09:
case 0x1a ... 0x1b: /* MPX */
- case 0x20 ... 0x23: /* mov from/to CR and DR */
case 0x30 ... 0x35: /* more privileged instructions */
case 0xa2 ... 0xa5: /* CPUID, BT, SHLD */
case 0xaa ... 0xae: /* RSM, SHRD, grp15 */
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index e6521632edd..bcb6bccbd75 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -242,12 +242,19 @@ static void gen_load(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv v)
tcg_gen_ld32u_tl(v, tcg_env,
offsetof(CPUX86State,segs[op->n].selector));
break;
+#ifndef CONFIG_USER_ONLY
case X86_OP_CR:
- tcg_gen_ld_tl(v, tcg_env, offsetof(CPUX86State, cr[op->n]));
+ if (op->n == 8) {
+ gen_helper_read_crN(v, tcg_env, tcg_constant_i32(op->n));
+ } else {
+ tcg_gen_ld_tl(v, tcg_env, offsetof(CPUX86State, cr[op->n]));
+ }
break;
case X86_OP_DR:
- tcg_gen_ld_tl(v, tcg_env, offsetof(CPUX86State, dr[op->n]));
+ /* CR4.DE tested in the helper. */
+ gen_helper_get_dr(v, tcg_env, tcg_constant_i32(op->n));
break;
+#endif
case X86_OP_INT:
if (op->has_ea) {
if (v == s->T0 && decode->e.special == X86_SPECIAL_SExtT0) {
@@ -343,8 +350,17 @@ static void gen_writeback(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv
16, 16, 0);
}
break;
+#ifndef CONFIG_USER_ONLY
case X86_OP_CR:
+ gen_helper_write_crN(tcg_env, tcg_constant_i32(op->n), v);
+ s->base.is_jmp = DISAS_EOB_NEXT;
+ break;
case X86_OP_DR:
+ /* CR4.DE tested in the helper. */
+ gen_helper_set_dr(tcg_env, tcg_constant_i32(op->n), v);
+ s->base.is_jmp = DISAS_EOB_NEXT;
+ break;
+#endif
default:
g_assert_not_reached();
}
--
2.45.1
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH 09/25] target/i386: fix bad sorting of entries in the 0F table
2024-06-08 8:40 [PATCH 00/25] target/i386: more progress towards new decoder Paolo Bonzini
` (7 preceding siblings ...)
2024-06-08 8:40 ` [PATCH 08/25] target/i386: convert MOV from/to CR and DR to new decoder Paolo Bonzini
@ 2024-06-08 8:40 ` Paolo Bonzini
2024-06-08 18:26 ` Richard Henderson
2024-06-08 8:40 ` [PATCH 10/25] target/i386: finish converting 0F AE to the new decoder Paolo Bonzini
` (15 subsequent siblings)
24 siblings, 1 reply; 56+ messages in thread
From: Paolo Bonzini @ 2024-06-08 8:40 UTC (permalink / raw)
To: qemu-devel
Aesthetic change only.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/decode-new.c.inc | 93 ++++++++++++++++----------------
1 file changed, 46 insertions(+), 47 deletions(-)
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 4c567911f41..4e745f10dd8 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -1006,14 +1006,6 @@ static void decode_MOV_CR_DR(DisasContext *s, CPUX86State *env, X86OpEntry *entr
}
static const X86OpEntry opcodes_0F[256] = {
- [0x0E] = X86_OP_ENTRY0(EMMS, cpuid(3DNOW)), /* femms */
- /*
- * 3DNow!'s opcode byte comes *after* modrm and displacements, making it
- * more like an Ib operand. Dispatch to the right helper in a single gen_*
- * function.
- */
- [0x0F] = X86_OP_ENTRY3(3dnow, P,q, Q,q, I,b, cpuid(3DNOW)),
-
[0x10] = X86_OP_GROUP0(0F10),
[0x11] = X86_OP_GROUP0(0F11),
[0x12] = X86_OP_GROUP0(0F12),
@@ -1086,8 +1078,54 @@ static const X86OpEntry opcodes_0F[256] = {
[0xa0] = X86_OP_ENTRYr(PUSH, FS, w),
[0xa1] = X86_OP_ENTRYw(POP, FS, w),
+ [0xb2] = X86_OP_ENTRY3(LSS, G,v, EM,p, None, None),
+ [0xb4] = X86_OP_ENTRY3(LFS, G,v, EM,p, None, None),
+ [0xb5] = X86_OP_ENTRY3(LGS, G,v, EM,p, None, None),
+ [0xb6] = X86_OP_ENTRY3(MOV, G,v, E,b, None, None, zextT0), /* MOVZX */
+ [0xb7] = X86_OP_ENTRY3(MOV, G,v, E,w, None, None, zextT0), /* MOVZX */
+
+ [0xc2] = X86_OP_ENTRY4(VCMP, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
+ [0xc3] = X86_OP_ENTRY3(MOV, EM,y,G,y, None,None, cpuid(SSE2)), /* MOVNTI */
+ [0xc4] = X86_OP_ENTRY4(PINSRW, V,dq,H,dq,E,w, vex5 mmx p_00_66),
+ [0xc5] = X86_OP_ENTRY3(PEXTRW, G,d, U,dq,I,b, vex5 mmx p_00_66),
+ [0xc6] = X86_OP_ENTRY4(VSHUF, V,x, H,x, W,x, vex4 p_00_66),
+
+ [0xd0] = X86_OP_ENTRY3(VADDSUB, V,x, H,x, W,x, vex2 cpuid(SSE3) p_66_f2),
+ [0xd1] = X86_OP_ENTRY3(PSRLW_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
+ [0xd2] = X86_OP_ENTRY3(PSRLD_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
+ [0xd3] = X86_OP_ENTRY3(PSRLQ_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
+ [0xd4] = X86_OP_ENTRY3(PADDQ, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
+ [0xd5] = X86_OP_ENTRY3(PMULLW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
+ [0xd6] = X86_OP_GROUP0(0FD6),
+ [0xd7] = X86_OP_ENTRY3(PMOVMSKB, G,d, None,None, U,x, vex7 mmx avx2_256 p_00_66),
+
+ [0xe0] = X86_OP_ENTRY3(PAVGB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
+ [0xe1] = X86_OP_ENTRY3(PSRAW_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66),
+ [0xe2] = X86_OP_ENTRY3(PSRAD_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66),
+ [0xe3] = X86_OP_ENTRY3(PAVGW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
+ [0xe4] = X86_OP_ENTRY3(PMULHUW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
+ [0xe5] = X86_OP_ENTRY3(PMULHW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
+ [0xe6] = X86_OP_GROUP0(0FE6),
+ [0xe7] = X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex1 mmx p_00_66), /* MOVNTQ/MOVNTDQ */
+
+ [0xf0] = X86_OP_ENTRY3(MOVDQ, V,x, None,None, WM,x, vex4_unal cpuid(SSE3) p_f2), /* LDDQU */
+ [0xf1] = X86_OP_ENTRY3(PSLLW_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66),
+ [0xf2] = X86_OP_ENTRY3(PSLLD_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66),
+ [0xf3] = X86_OP_ENTRY3(PSLLQ_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66),
+ [0xf4] = X86_OP_ENTRY3(PMULUDQ, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
+ [0xf5] = X86_OP_ENTRY3(PMADDWD, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
+ [0xf6] = X86_OP_ENTRY3(PSADBW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
+ [0xf7] = X86_OP_ENTRY3(MASKMOV, None,None, V,dq, U,dq, vex4_unal avx2_256 mmx p_00_66),
+
[0x0b] = X86_OP_ENTRY0(UD), /* UD2 */
[0x0d] = X86_OP_ENTRY1(NOP, M,v), /* 3DNow! prefetch */
+ [0x0e] = X86_OP_ENTRY0(EMMS, cpuid(3DNOW)), /* femms */
+ /*
+ * 3DNow!'s opcode byte comes *after* modrm and displacements, making it
+ * more like an Ib operand. Dispatch to the right helper in a single gen_*
+ * function.
+ */
+ [0x0f] = X86_OP_ENTRY3(3dnow, P,q, Q,q, I,b, cpuid(3DNOW)),
[0x18] = X86_OP_ENTRY1(NOP, nop,v), /* prefetch/reserved NOP */
[0x19] = X86_OP_ENTRY1(NOP, nop,v), /* reserved NOP */
@@ -1169,23 +1207,11 @@ static const X86OpEntry opcodes_0F[256] = {
*/
[0xaf] = X86_OP_ENTRY3(IMUL3, G,v, E,v, 2op,v, sextT0),
- [0xb2] = X86_OP_ENTRY3(LSS, G,v, EM,p, None, None),
- [0xb4] = X86_OP_ENTRY3(LFS, G,v, EM,p, None, None),
- [0xb5] = X86_OP_ENTRY3(LGS, G,v, EM,p, None, None),
- [0xb6] = X86_OP_ENTRY3(MOV, G,v, E,b, None, None, zextT0), /* MOVZX */
- [0xb7] = X86_OP_ENTRY3(MOV, G,v, E,w, None, None, zextT0), /* MOVZX */
-
/* decoded as modrm, which is visible as a difference between page fault and #UD */
[0xb9] = X86_OP_ENTRYr(UD, nop,v), /* UD1 */
[0xbe] = X86_OP_ENTRY3(MOV, G,v, E,b, None, None, sextT0), /* MOVSX */
[0xbf] = X86_OP_ENTRY3(MOV, G,v, E,w, None, None, sextT0), /* MOVSX */
- [0xc2] = X86_OP_ENTRY4(VCMP, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
- [0xc3] = X86_OP_ENTRY3(MOV, EM,y,G,y, None,None, cpuid(SSE2)), /* MOVNTI */
- [0xc4] = X86_OP_ENTRY4(PINSRW, V,dq,H,dq,E,w, vex5 mmx p_00_66),
- [0xc5] = X86_OP_ENTRY3(PEXTRW, G,d, U,dq,I,b, vex5 mmx p_00_66),
- [0xc6] = X86_OP_ENTRY4(VSHUF, V,x, H,x, W,x, vex4 p_00_66),
-
[0xc8] = X86_OP_ENTRY1(BSWAP, LoBits,y),
[0xc9] = X86_OP_ENTRY1(BSWAP, LoBits,y),
[0xca] = X86_OP_ENTRY1(BSWAP, LoBits,y),
@@ -1195,33 +1221,6 @@ static const X86OpEntry opcodes_0F[256] = {
[0xce] = X86_OP_ENTRY1(BSWAP, LoBits,y),
[0xcf] = X86_OP_ENTRY1(BSWAP, LoBits,y),
- [0xd0] = X86_OP_ENTRY3(VADDSUB, V,x, H,x, W,x, vex2 cpuid(SSE3) p_66_f2),
- [0xd1] = X86_OP_ENTRY3(PSRLW_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
- [0xd2] = X86_OP_ENTRY3(PSRLD_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
- [0xd3] = X86_OP_ENTRY3(PSRLQ_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
- [0xd4] = X86_OP_ENTRY3(PADDQ, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
- [0xd5] = X86_OP_ENTRY3(PMULLW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
- [0xd6] = X86_OP_GROUP0(0FD6),
- [0xd7] = X86_OP_ENTRY3(PMOVMSKB, G,d, None,None, U,x, vex7 mmx avx2_256 p_00_66),
-
- [0xe0] = X86_OP_ENTRY3(PAVGB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
- [0xe1] = X86_OP_ENTRY3(PSRAW_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66),
- [0xe2] = X86_OP_ENTRY3(PSRAD_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66),
- [0xe3] = X86_OP_ENTRY3(PAVGW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
- [0xe4] = X86_OP_ENTRY3(PMULHUW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
- [0xe5] = X86_OP_ENTRY3(PMULHW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
- [0xe6] = X86_OP_GROUP0(0FE6),
- [0xe7] = X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex1 mmx p_00_66), /* MOVNTQ/MOVNTDQ */
-
- [0xf0] = X86_OP_ENTRY3(MOVDQ, V,x, None,None, WM,x, vex4_unal cpuid(SSE3) p_f2), /* LDDQU */
- [0xf1] = X86_OP_ENTRY3(PSLLW_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66),
- [0xf2] = X86_OP_ENTRY3(PSLLD_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66),
- [0xf3] = X86_OP_ENTRY3(PSLLQ_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66),
- [0xf4] = X86_OP_ENTRY3(PMULUDQ, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
- [0xf5] = X86_OP_ENTRY3(PMADDWD, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
- [0xf6] = X86_OP_ENTRY3(PSADBW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
- [0xf7] = X86_OP_ENTRY3(MASKMOV, None,None, V,dq, U,dq, vex4_unal avx2_256 mmx p_00_66),
-
/* Incorrectly missing from 2-17 */
[0xd8] = X86_OP_ENTRY3(PSUBUSB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
[0xd9] = X86_OP_ENTRY3(PSUBUSW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
--
2.45.1
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH 10/25] target/i386: finish converting 0F AE to the new decoder
2024-06-08 8:40 [PATCH 00/25] target/i386: more progress towards new decoder Paolo Bonzini
` (8 preceding siblings ...)
2024-06-08 8:40 ` [PATCH 09/25] target/i386: fix bad sorting of entries in the 0F table Paolo Bonzini
@ 2024-06-08 8:40 ` Paolo Bonzini
2024-06-08 18:42 ` Richard Henderson
2024-10-21 1:49 ` Guenter Roeck
2024-06-08 8:40 ` [PATCH 11/25] target/i386: replace read_crN helper with read_cr8 Paolo Bonzini
` (14 subsequent siblings)
24 siblings, 2 replies; 56+ messages in thread
From: Paolo Bonzini @ 2024-06-08 8:40 UTC (permalink / raw)
To: qemu-devel
This is already partly implemented due to VLDMXCSR and VSTMXCSR; finish
the job.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/decode-new.h | 7 ++
target/i386/tcg/translate.c | 188 -------------------------------
target/i386/tcg/decode-new.c.inc | 48 +++++++-
target/i386/tcg/emit.c.inc | 80 +++++++++++++
4 files changed, 129 insertions(+), 194 deletions(-)
diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h
index 8465717ea21..5577f7509aa 100644
--- a/target/i386/tcg/decode-new.h
+++ b/target/i386/tcg/decode-new.h
@@ -108,10 +108,15 @@ typedef enum X86CPUIDFeature {
X86_FEAT_AVX2,
X86_FEAT_BMI1,
X86_FEAT_BMI2,
+ X86_FEAT_CLFLUSH,
+ X86_FEAT_CLFLUSHOPT,
+ X86_FEAT_CLWB,
X86_FEAT_CMOV,
X86_FEAT_CMPCCXADD,
X86_FEAT_F16C,
X86_FEAT_FMA,
+ X86_FEAT_FSGSBASE,
+ X86_FEAT_FXSR,
X86_FEAT_MOVBE,
X86_FEAT_PCLMULQDQ,
X86_FEAT_SHA_NI,
@@ -122,6 +127,8 @@ typedef enum X86CPUIDFeature {
X86_FEAT_SSE41,
X86_FEAT_SSE42,
X86_FEAT_SSE4A,
+ X86_FEAT_XSAVE,
+ X86_FEAT_XSAVEOPT,
} X86CPUIDFeature;
/* Execution flags */
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 4958f4c45d5..ebae745ecba 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -4197,194 +4197,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
s->base.is_jmp = DISAS_EOB_NEXT;
}
break;
- /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
- case 0x1ae:
- modrm = x86_ldub_code(env, s);
- switch (modrm) {
- CASE_MODRM_MEM_OP(0): /* fxsave */
- if (!(s->cpuid_features & CPUID_FXSR)
- || (prefixes & PREFIX_LOCK)) {
- goto illegal_op;
- }
- if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
- gen_exception(s, EXCP07_PREX);
- break;
- }
- gen_lea_modrm(env, s, modrm);
- gen_helper_fxsave(tcg_env, s->A0);
- break;
-
- CASE_MODRM_MEM_OP(1): /* fxrstor */
- if (!(s->cpuid_features & CPUID_FXSR)
- || (prefixes & PREFIX_LOCK)) {
- goto illegal_op;
- }
- if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
- gen_exception(s, EXCP07_PREX);
- break;
- }
- gen_lea_modrm(env, s, modrm);
- gen_helper_fxrstor(tcg_env, s->A0);
- break;
-
- CASE_MODRM_MEM_OP(2): /* ldmxcsr */
- if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
- goto illegal_op;
- }
- if (s->flags & HF_TS_MASK) {
- gen_exception(s, EXCP07_PREX);
- break;
- }
- gen_lea_modrm(env, s, modrm);
- tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
- gen_helper_ldmxcsr(tcg_env, s->tmp2_i32);
- break;
-
- CASE_MODRM_MEM_OP(3): /* stmxcsr */
- if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
- goto illegal_op;
- }
- if (s->flags & HF_TS_MASK) {
- gen_exception(s, EXCP07_PREX);
- break;
- }
- gen_helper_update_mxcsr(tcg_env);
- gen_lea_modrm(env, s, modrm);
- tcg_gen_ld32u_tl(s->T0, tcg_env, offsetof(CPUX86State, mxcsr));
- gen_op_st_v(s, MO_32, s->T0, s->A0);
- break;
-
- CASE_MODRM_MEM_OP(4): /* xsave */
- if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
- || (prefixes & (PREFIX_LOCK | PREFIX_DATA
- | PREFIX_REPZ | PREFIX_REPNZ))) {
- goto illegal_op;
- }
- gen_lea_modrm(env, s, modrm);
- tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
- cpu_regs[R_EDX]);
- gen_helper_xsave(tcg_env, s->A0, s->tmp1_i64);
- break;
-
- CASE_MODRM_MEM_OP(5): /* xrstor */
- if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
- || (prefixes & (PREFIX_LOCK | PREFIX_DATA
- | PREFIX_REPZ | PREFIX_REPNZ))) {
- goto illegal_op;
- }
- gen_lea_modrm(env, s, modrm);
- tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
- cpu_regs[R_EDX]);
- gen_helper_xrstor(tcg_env, s->A0, s->tmp1_i64);
- /* XRSTOR is how MPX is enabled, which changes how
- we translate. Thus we need to end the TB. */
- s->base.is_jmp = DISAS_EOB_NEXT;
- break;
-
- CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
- if (prefixes & PREFIX_LOCK) {
- goto illegal_op;
- }
- if (prefixes & PREFIX_DATA) {
- /* clwb */
- if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) {
- goto illegal_op;
- }
- gen_nop_modrm(env, s, modrm);
- } else {
- /* xsaveopt */
- if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
- || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0
- || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) {
- goto illegal_op;
- }
- gen_lea_modrm(env, s, modrm);
- tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
- cpu_regs[R_EDX]);
- gen_helper_xsaveopt(tcg_env, s->A0, s->tmp1_i64);
- }
- break;
-
- CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
- if (prefixes & PREFIX_LOCK) {
- goto illegal_op;
- }
- if (prefixes & PREFIX_DATA) {
- /* clflushopt */
- if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) {
- goto illegal_op;
- }
- } else {
- /* clflush */
- if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))
- || !(s->cpuid_features & CPUID_CLFLUSH)) {
- goto illegal_op;
- }
- }
- gen_nop_modrm(env, s, modrm);
- break;
-
- case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */
- case 0xc8 ... 0xcf: /* rdgsbase (f3 0f ae /1) */
- case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */
- case 0xd8 ... 0xdf: /* wrgsbase (f3 0f ae /3) */
- if (CODE64(s)
- && (prefixes & PREFIX_REPZ)
- && !(prefixes & PREFIX_LOCK)
- && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) {
- TCGv base, treg, src, dst;
-
- /* Preserve hflags bits by testing CR4 at runtime. */
- tcg_gen_movi_i32(s->tmp2_i32, CR4_FSGSBASE_MASK);
- gen_helper_cr4_testbit(tcg_env, s->tmp2_i32);
-
- base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
- treg = cpu_regs[(modrm & 7) | REX_B(s)];
-
- if (modrm & 0x10) {
- /* wr*base */
- dst = base, src = treg;
- } else {
- /* rd*base */
- dst = treg, src = base;
- }
-
- if (s->dflag == MO_32) {
- tcg_gen_ext32u_tl(dst, src);
- } else {
- tcg_gen_mov_tl(dst, src);
- }
- break;
- }
- goto unknown_op;
-
- case 0xf8 ... 0xff: /* sfence */
- if (!(s->cpuid_features & CPUID_SSE)
- || (prefixes & PREFIX_LOCK)) {
- goto illegal_op;
- }
- tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
- break;
- case 0xe8 ... 0xef: /* lfence */
- if (!(s->cpuid_features & CPUID_SSE)
- || (prefixes & PREFIX_LOCK)) {
- goto illegal_op;
- }
- tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC);
- break;
- case 0xf0 ... 0xf7: /* mfence */
- if (!(s->cpuid_features & CPUID_SSE2)
- || (prefixes & PREFIX_LOCK)) {
- goto illegal_op;
- }
- tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
- break;
-
- default:
- goto unknown_op;
- }
- break;
-
case 0x1aa: /* rsm */
gen_svm_check_intercept(s, SVM_EXIT_RSM);
if (!(s->flags & HF_SMM_MASK))
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 4e745f10dd8..1c6fa39c3eb 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -269,20 +269,41 @@ static inline const X86OpEntry *decode_by_prefix(DisasContext *s, const X86OpEnt
static void decode_group15(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
{
- /* only includes ldmxcsr and stmxcsr, because they have AVX variants. */
static const X86OpEntry group15_reg[8] = {
+ [0] = X86_OP_ENTRYw(RDxxBASE, R,y, cpuid(FSGSBASE) chk(o64) p_f3),
+ [1] = X86_OP_ENTRYw(RDxxBASE, R,y, cpuid(FSGSBASE) chk(o64) p_f3),
+ [2] = X86_OP_ENTRYr(WRxxBASE, R,y, cpuid(FSGSBASE) chk(o64) p_f3 zextT0),
+ [3] = X86_OP_ENTRYr(WRxxBASE, R,y, cpuid(FSGSBASE) chk(o64) p_f3 zextT0),
+ [5] = X86_OP_ENTRY0(LFENCE, cpuid(SSE2) p_00),
+ [6] = X86_OP_ENTRY0(MFENCE, cpuid(SSE2) p_00),
+ [7] = X86_OP_ENTRY0(SFENCE, cpuid(SSE2) p_00),
};
static const X86OpEntry group15_mem[8] = {
- [2] = X86_OP_ENTRYr(LDMXCSR, E,d, vex5 chk(VEX128)),
- [3] = X86_OP_ENTRYw(STMXCSR, E,d, vex5 chk(VEX128)),
+ [0] = X86_OP_ENTRYw(FXSAVE, M,y, cpuid(FXSR) p_00),
+ [1] = X86_OP_ENTRYr(FXRSTOR, M,y, cpuid(FXSR) p_00),
+ [2] = X86_OP_ENTRYr(LDMXCSR, E,d, vex5 chk(VEX128) p_00),
+ [3] = X86_OP_ENTRYw(STMXCSR, E,d, vex5 chk(VEX128) p_00),
+ [4] = X86_OP_ENTRYw(XSAVE, M,y, cpuid(XSAVE) p_00),
+ [5] = X86_OP_ENTRYr(XRSTOR, M,y, cpuid(XSAVE) p_00),
+ [6] = X86_OP_ENTRYw(XSAVEOPT, M,b, cpuid(XSAVEOPT) p_00),
+ [7] = X86_OP_ENTRYw(NOP, M,b, cpuid(CLFLUSH) p_00),
+ };
+
+ static const X86OpEntry group15_mem_66[8] = {
+ [6] = X86_OP_ENTRYw(NOP, M,b, cpuid(CLWB)),
+ [7] = X86_OP_ENTRYw(NOP, M,b, cpuid(CLFLUSHOPT)),
};
uint8_t modrm = get_modrm(s, env);
+ int op = (modrm >> 3) & 7;
+
if ((modrm >> 6) == 3) {
- *entry = group15_reg[(modrm >> 3) & 7];
+ *entry = group15_reg[op];
+ } else if (s->prefix & PREFIX_DATA) {
+ *entry = group15_mem_66[op];
} else {
- *entry = group15_mem[(modrm >> 3) & 7];
+ *entry = group15_mem[op];
}
}
@@ -2094,6 +2115,10 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid)
return true;
case X86_FEAT_CMOV:
return (s->cpuid_features & CPUID_CMOV);
+ case X86_FEAT_CLFLUSH:
+ return (s->cpuid_features & CPUID_CLFLUSH);
+ case X86_FEAT_FXSR:
+ return (s->cpuid_features & CPUID_FXSR);
case X86_FEAT_F16C:
return (s->cpuid_ext_features & CPUID_EXT_F16C);
case X86_FEAT_FMA:
@@ -2127,6 +2152,8 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid)
case X86_FEAT_AVX:
return (s->cpuid_ext_features & CPUID_EXT_AVX);
+ case X86_FEAT_XSAVE:
+ return (s->cpuid_ext_features & CPUID_EXT_XSAVE);
case X86_FEAT_3DNOW:
return (s->cpuid_ext2_features & CPUID_EXT2_3DNOW);
@@ -2141,11 +2168,20 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid)
return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2);
case X86_FEAT_AVX2:
return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_AVX2);
+ case X86_FEAT_CLFLUSHOPT:
+ return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT);
+ case X86_FEAT_CLWB:
+ return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB);
+ case X86_FEAT_FSGSBASE:
+ return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE);
case X86_FEAT_SHA_NI:
return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SHA_NI);
case X86_FEAT_CMPCCXADD:
return (s->cpuid_7_1_eax_features & CPUID_7_1_EAX_CMPCCXADD);
+
+ case X86_FEAT_XSAVEOPT:
+ return (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT);
}
g_assert_not_reached();
}
@@ -2480,7 +2516,7 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
case 0x1a ... 0x1b: /* MPX */
case 0x30 ... 0x35: /* more privileged instructions */
case 0xa2 ... 0xa5: /* CPUID, BT, SHLD */
- case 0xaa ... 0xae: /* RSM, SHRD, grp15 */
+ case 0xaa ... 0xad: /* RSM, SHRD */
case 0xb0 ... 0xb1: /* cmpxchg */
case 0xb3: /* btr */
case 0xb8: /* integer ops */
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index bcb6bccbd75..5ca3764e006 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1653,6 +1653,22 @@ static void gen_EXTRQ_r(DisasContext *s, X86DecodedInsn *decode)
gen_helper_extrq_r(tcg_env, OP_PTR0, OP_PTR2);
}
+static void gen_FXRSTOR(DisasContext *s, X86DecodedInsn *decode)
+{
+ if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
+ gen_NM_exception(s);
+ }
+ gen_helper_fxrstor(tcg_env, s->A0);
+}
+
+static void gen_FXSAVE(DisasContext *s, X86DecodedInsn *decode)
+{
+ if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
+ gen_NM_exception(s);
+ }
+ gen_helper_fxsave(tcg_env, s->A0);
+}
+
static void gen_HLT(DisasContext *s, X86DecodedInsn *decode)
{
#ifdef CONFIG_SYSTEM_ONLY
@@ -2000,6 +2016,11 @@ static void gen_LES(DisasContext *s, X86DecodedInsn *decode)
gen_lxx_seg(s, decode, R_ES);
}
+static void gen_LFENCE(DisasContext *s, X86DecodedInsn *decode)
+{
+ tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC);
+}
+
static void gen_LFS(DisasContext *s, X86DecodedInsn *decode)
{
gen_lxx_seg(s, decode, R_FS);
@@ -2059,6 +2080,11 @@ static void gen_LSS(DisasContext *s, X86DecodedInsn *decode)
gen_lxx_seg(s, decode, R_SS);
}
+static void gen_MFENCE(DisasContext *s, X86DecodedInsn *decode)
+{
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
+}
+
static void gen_MOV(DisasContext *s, X86DecodedInsn *decode)
{
/* nothing to do! */
@@ -3092,6 +3118,15 @@ static void gen_RCR(DisasContext *s, X86DecodedInsn *decode)
}
}
+static void gen_RDxxBASE(DisasContext *s, X86DecodedInsn *decode)
+{
+ TCGv base = cpu_seg_base[s->modrm & 8 ? R_GS : R_FS];
+
+ /* Preserve hflags bits by testing CR4 at runtime. */
+ gen_helper_cr4_testbit(tcg_env, tcg_constant_i32(CR4_FSGSBASE_MASK));
+ tcg_gen_mov_tl(s->T0, base);
+}
+
static void gen_RET(DisasContext *s, X86DecodedInsn *decode)
{
int16_t adjust = decode->e.op1 == X86_TYPE_I ? decode->immediate : 0;
@@ -3372,6 +3407,11 @@ static void gen_SETcc(DisasContext *s, X86DecodedInsn *decode)
gen_setcc1(s, decode->b & 0xf, s->T0);
}
+static void gen_SFENCE(DisasContext *s, X86DecodedInsn *decode)
+{
+ tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
+}
+
static void gen_SHA1NEXTE(DisasContext *s, X86DecodedInsn *decode)
{
gen_helper_sha1nexte(OP_PTR0, OP_PTR1, OP_PTR2);
@@ -4042,6 +4082,15 @@ static void gen_WAIT(DisasContext *s, X86DecodedInsn *decode)
}
}
+static void gen_WRxxBASE(DisasContext *s, X86DecodedInsn *decode)
+{
+ TCGv base = cpu_seg_base[s->modrm & 8 ? R_GS : R_FS];
+
+ /* Preserve hflags bits by testing CR4 at runtime. */
+ gen_helper_cr4_testbit(tcg_env, tcg_constant_i32(CR4_FSGSBASE_MASK));
+ tcg_gen_mov_tl(base, s->T0);
+}
+
static void gen_XCHG(DisasContext *s, X86DecodedInsn *decode)
{
if (s->prefix & PREFIX_LOCK) {
@@ -4084,3 +4133,34 @@ static void gen_XOR(DisasContext *s, X86DecodedInsn *decode)
prepare_update1_cc(decode, s, CC_OP_LOGICB + ot);
}
}
+
+static void gen_XRSTOR(DisasContext *s, X86DecodedInsn *decode)
+{
+ TCGv_i64 features = tcg_temp_new_i64();
+
+ tcg_gen_concat_tl_i64(features, cpu_regs[R_EAX], cpu_regs[R_EDX]);
+ gen_helper_xrstor(tcg_env, s->A0, features);
+ if (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_MPX) {
+ /*
+ * XRSTOR is how MPX is enabled, which changes how
+ * we translate. Thus we need to end the TB.
+ */
+ s->base.is_jmp = DISAS_EOB_NEXT;
+ }
+}
+
+static void gen_XSAVE(DisasContext *s, X86DecodedInsn *decode)
+{
+ TCGv_i64 features = tcg_temp_new_i64();
+
+ tcg_gen_concat_tl_i64(features, cpu_regs[R_EAX], cpu_regs[R_EDX]);
+ gen_helper_xsave(tcg_env, s->A0, features);
+}
+
+static void gen_XSAVEOPT(DisasContext *s, X86DecodedInsn *decode)
+{
+ TCGv_i64 features = tcg_temp_new_i64();
+
+ tcg_gen_concat_tl_i64(features, cpu_regs[R_EAX], cpu_regs[R_EDX]);
+ gen_helper_xsave(tcg_env, s->A0, features);
+}
--
2.45.1
^ permalink raw reply related [flat|nested] 56+ messages in thread
* Re: [PATCH 10/25] target/i386: finish converting 0F AE to the new decoder
2024-06-08 8:40 ` [PATCH 10/25] target/i386: finish converting 0F AE to the new decoder Paolo Bonzini
@ 2024-06-08 18:42 ` Richard Henderson
2024-10-21 1:49 ` Guenter Roeck
1 sibling, 0 replies; 56+ messages in thread
From: Richard Henderson @ 2024-06-08 18:42 UTC (permalink / raw)
To: Paolo Bonzini, qemu-devel
On 6/8/24 01:40, Paolo Bonzini wrote:
> +static void gen_FXRSTOR(DisasContext *s, X86DecodedInsn *decode)
> +{
> + if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
> + gen_NM_exception(s);
> + }
> + gen_helper_fxrstor(tcg_env, s->A0);
> +}
> +
> +static void gen_FXSAVE(DisasContext *s, X86DecodedInsn *decode)
> +{
> + if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
> + gen_NM_exception(s);
> + }
> + gen_helper_fxsave(tcg_env, s->A0);
> +}
You want else's here, to not emit fxsave/restore after raising NM.
Otherwise,
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [PATCH 10/25] target/i386: finish converting 0F AE to the new decoder
2024-06-08 8:40 ` [PATCH 10/25] target/i386: finish converting 0F AE to the new decoder Paolo Bonzini
2024-06-08 18:42 ` Richard Henderson
@ 2024-10-21 1:49 ` Guenter Roeck
2024-10-21 6:57 ` Paolo Bonzini
1 sibling, 1 reply; 56+ messages in thread
From: Guenter Roeck @ 2024-10-21 1:49 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
Hi,
On Sat, Jun 08, 2024 at 10:40:58AM +0200, Paolo Bonzini wrote:
> This is already partly implemented due to VLDMXCSR and VSTMXCSR; finish
> the job.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
While testing qemu v9.1, I noticed the following crash when testing qemu-system-i386
with pentium3 CPU.
Oops: invalid opcode: 0000 [#1] PREEMPT SMP
CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.12.0-rc4 #1
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
EIP: raid6_sse12_gen_syndrome+0xfa/0x10c
Code: 83 e8 01 73 bf 8b 4d ec 0f e7 53 f8 0f e7 1b 0f e7 61 f8 0f e7 31 8b 45 e8 83 c6 10 83 c3 10 83 c1 10 39 c6 0f 82 6a ff ff ff <0f> 9
EAX: 00001000 EBX: c1367008 ECX: c1368008 EDX: c119deb0
ESI: 00001000 EDI: 00000ff8 EBP: c119de84 ESP: c119de68
DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 EFLAGS: 00010246
CR0: 80050033 CR2: ffd39000 CR3: 06144000 CR4: 000006d0
Call Trace:
? show_regs+0x4d/0x54
? die+0x2f/0x88
? do_trap+0xc6/0xc8
? do_error_trap+0x6c/0x100
? raid6_sse12_gen_syndrome+0xfa/0x10c
? exc_overflow+0x50/0x50
? exc_invalid_op+0x5b/0x70
? raid6_sse12_gen_syndrome+0xfa/0x10c
? handle_exception+0x14b/0x14b
? exc_overflow+0x50/0x50
? raid6_sse12_gen_syndrome+0xfa/0x10c
? exc_overflow+0x50/0x50
? raid6_sse12_gen_syndrome+0xfa/0x10c
? raid6_sse11_gen_syndrome+0xfc/0xfc
raid6_select_algo+0x144/0x420
? libcrc32c_mod_init+0x24/0x24
do_one_initcall+0x63/0x284
? rdinit_setup+0x40/0x40
? parse_args+0x14b/0x3f4
kernel_init_freeable+0x238/0x44c
? rdinit_setup+0x40/0x40
? rest_init+0x164/0x164
kernel_init+0x15/0x1dc
? schedule_tail+0x50/0x64
ret_from_fork+0x38/0x44
? rest_init+0x164/0x164
ret_from_fork_asm+0x12/0x18
entry_INT80_32+0x108/0x108
Modules linked in:
---[ end trace 0000000000000000 ]---
Bisect points to this patch. Bisect log as well as the decoded stacktrace
is attached below.
The problem is still seen in qemu mainline (v9.1.0-997-g72b0b80714).
Reverting the patch is not straightforward and results in a number of conflicts,
so I was not able to test qemu with the patch reverted.
Guenter
---
Bisect log:
# bad: [fd1952d814da738ed107e05583b3e02ac11e88ff] Update version for v9.1.0 release
# good: [c25df57ae8f9fe1c72eee2dab37d76d904ac382e] Update version for 9.0.0 release
git bisect start 'v9.1.0' 'v9.0.0'
# bad: [2529ea2d561ea9fe359fb19ebdcfeb8b6cddd219] hw/acpi/ich9: Remove dead code related to 'acpi_memory_hotplug'
git bisect bad 2529ea2d561ea9fe359fb19ebdcfeb8b6cddd219
# good: [544595e73007c824b7435b52519cc578586783a6] tests/plugin/inline: add test for conditional callback
git bisect good 544595e73007c824b7435b52519cc578586783a6
# good: [039003995047b2f7911142c7c5cfb845fda044fd] hw/riscv/boot.c: Support 64-bit address for initrd
git bisect good 039003995047b2f7911142c7c5cfb845fda044fd
# good: [a73f7a00eea15c75fe9cfbeeaff5228f5ee24b61] tests/qtest: Add numa test for loongarch system
git bisect good a73f7a00eea15c75fe9cfbeeaff5228f5ee24b61
# bad: [11ffaf8c73aae1a70f4640ada14a437a78d06efb] target/i386: convert LZCNT/TZCNT/BSF/BSR/POPCNT to new decoder
git bisect bad 11ffaf8c73aae1a70f4640ada14a437a78d06efb
# good: [fc00123f3abeb027cd51eb58ea8845377794b3bc] python: mkvenv: remove ensure command
git bisect good fc00123f3abeb027cd51eb58ea8845377794b3bc
# good: [593aab332f048347bd19893071caf44e1fb742ff] Merge tag 'pull-hex-20240608' of https://github.com/quic/qemu into staging
git bisect good 593aab332f048347bd19893071caf44e1fb742ff
# good: [c2b6b6a65a227d2bb45e1b2694cf064b881543e4] target/i386: change X86_ENTRYr to use T0
git bisect good c2b6b6a65a227d2bb45e1b2694cf064b881543e4
# good: [10340080cd501b1aba23c3e502e2e0aa7c825fbf] target/i386: fix bad sorting of entries in the 0F table
git bisect good 10340080cd501b1aba23c3e502e2e0aa7c825fbf
# bad: [ae541c0eb47f2fbcfd975c8e2fcb0e3a2613dc1c] target/i386: convert non-grouped, helper-based 2-byte opcodes
git bisect bad ae541c0eb47f2fbcfd975c8e2fcb0e3a2613dc1c
# bad: [556c4c5cc44c3454f78d796b6050c6d574a35dd2] target/i386: split X86_CHECK_prot into PE and VM86 checks
git bisect bad 556c4c5cc44c3454f78d796b6050c6d574a35dd2
# bad: [ea89aa895e98fd8a1b9ebf7e3dc8bfcd863b9466] target/i386: finish converting 0F AE to the new decoder
git bisect bad ea89aa895e98fd8a1b9ebf7e3dc8bfcd863b9466
# first bad commit: [ea89aa895e98fd8a1b9ebf7e3dc8bfcd863b9466] target/i386: finish converting 0F AE to the new decoder
---
Decoded stacktrace:
CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.12.0-rc4 #1
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
EIP: raid6_sse12_gen_syndrome (lib/raid6/sse1.c:147)
Code: 83 e8 01 73 bf 8b 4d ec 0f e7 53 f8 0f e7 1b 0f e7 61 f8 0f e7 31 8b 45 e8 83 c6 10 83 c3 10 83 c1 10 39 c6 0f 82 6a ff ff ff <0f> 9
All code
========
0: 83 e8 01 sub $0x1,%eax
3: 73 bf jae 0xffffffffffffffc4
5: 8b 4d ec mov -0x14(%rbp),%ecx
8: 0f e7 53 f8 movntq %mm2,-0x8(%rbx)
c: 0f e7 1b movntq %mm3,(%rbx)
f: 0f e7 61 f8 movntq %mm4,-0x8(%rcx)
13: 0f e7 31 movntq %mm6,(%rcx)
16: 8b 45 e8 mov -0x18(%rbp),%eax
19: 83 c6 10 add $0x10,%esi
1c: 83 c3 10 add $0x10,%ebx
1f: 83 c1 10 add $0x10,%ecx
22: 39 c6 cmp %eax,%esi
24: 0f 82 6a ff ff ff jb 0xffffffffffffff94
2a:* 0f 09 wbinvd <-- trapping instruction
Code starting with the faulting instruction
===========================================
0: 0f 09 wbinvd
EAX: 00001000 EBX: c1367008 ECX: c1368008 EDX: c119deb0
ESI: 00001000 EDI: 00000ff8 EBP: c119de84 ESP: c119de68
DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 EFLAGS: 00010246
CR0: 80050033 CR2: ffd39000 CR3: 06144000 CR4: 000006d0
Call Trace:
? show_regs (arch/x86/kernel/dumpstack.c:479)
? die (arch/x86/kernel/dumpstack.c:421 arch/x86/kernel/dumpstack.c:434 arch/x86/kernel/dumpstack.c:447)
? do_trap (arch/x86/kernel/traps.c:156 arch/x86/kernel/traps.c:197)
? do_error_trap (arch/x86/include/asm/traps.h:58 arch/x86/kernel/traps.c:218)
? raid6_sse12_gen_syndrome (lib/raid6/sse1.c:147)
? exc_overflow (arch/x86/kernel/traps.c:301)
? exc_invalid_op (arch/x86/kernel/traps.c:316)
? raid6_sse12_gen_syndrome (lib/raid6/sse1.c:147)
? handle_exception (arch/x86/entry/entry_32.S:1055)
? exc_overflow (arch/x86/kernel/traps.c:301)
? raid6_sse12_gen_syndrome (lib/raid6/sse1.c:147)
? exc_overflow (arch/x86/kernel/traps.c:301)
? raid6_sse12_gen_syndrome (lib/raid6/sse1.c:147)
? raid6_sse11_gen_syndrome (lib/raid6/sse1.c:100)
raid6_select_algo (lib/raid6/algos.c:179 (discriminator 2) lib/raid6/algos.c:273 (discriminator 2))
? libcrc32c_mod_init (lib/raid6/algos.c:243)
do_one_initcall (init/main.c:1269)
? rdinit_setup (init/main.c:1317)
? parse_args (kernel/params.c:153 kernel/params.c:186)
kernel_init_freeable (init/main.c:1330 (discriminator 1) init/main.c:1347 (discriminator 1) init/main.c:1366 (discriminator 1) init/main.c:1580 (discrim
? rdinit_setup (init/main.c:1317)
? rest_init (init/main.c:1461)
kernel_init (init/main.c:1471)
? schedule_tail (kernel/sched/core.c:5266)
ret_from_fork (arch/x86/kernel/process.c:153)
? rest_init (init/main.c:1461)
ret_from_fork_asm (arch/x86/entry/entry_32.S:737)
entry_INT80_32 (arch/x86/entry/entry_32.S:945)
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [PATCH 10/25] target/i386: finish converting 0F AE to the new decoder
2024-10-21 1:49 ` Guenter Roeck
@ 2024-10-21 6:57 ` Paolo Bonzini
2024-10-21 13:54 ` Guenter Roeck
0 siblings, 1 reply; 56+ messages in thread
From: Paolo Bonzini @ 2024-10-21 6:57 UTC (permalink / raw)
To: Guenter Roeck; +Cc: qemu-devel
On 10/21/24 03:49, Guenter Roeck wrote:
> Hi,
>
> On Sat, Jun 08, 2024 at 10:40:58AM +0200, Paolo Bonzini wrote:
>> This is already partly implemented due to VLDMXCSR and VSTMXCSR; finish
>> the job.
>>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
>
> While testing qemu v9.1, I noticed the following crash when testing qemu-system-i386
> with pentium3 CPU.
Is this enough to fix it?
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index ee2a508ae9a..cda32ee6784 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -345,9 +345,9 @@ static void decode_group15(DisasContext *s, CPUX86State *env, X86OpEntry *entry,
[1] = X86_OP_ENTRYw(RDxxBASE, R,y, cpuid(FSGSBASE) chk(o64) p_f3),
[2] = X86_OP_ENTRYr(WRxxBASE, R,y, cpuid(FSGSBASE) chk(o64) p_f3 zextT0),
[3] = X86_OP_ENTRYr(WRxxBASE, R,y, cpuid(FSGSBASE) chk(o64) p_f3 zextT0),
- [5] = X86_OP_ENTRY0(LFENCE, cpuid(SSE2) p_00),
+ [5] = X86_OP_ENTRY0(LFENCE, cpuid(SSE) p_00),
[6] = X86_OP_ENTRY0(MFENCE, cpuid(SSE2) p_00),
- [7] = X86_OP_ENTRY0(SFENCE, cpuid(SSE2) p_00),
+ [7] = X86_OP_ENTRY0(SFENCE, cpuid(SSE) p_00),
};
static const X86OpEntry group15_mem[8] = {
> 22: 39 c6 cmp %eax,%esi
> 24: 0f 82 6a ff ff ff jb 0xffffffffffffff94
> 2a:* 0f 09 wbinvd <-- trapping instruction
This is a bit weird, as wbinvd is not affected by this patch. However,
a checkout of Linux has
asm volatile("sfence" : :: "memory");
kernel_fpu_end();
}
at the end of lib/raid6/sse1.c and it would indeed be affected by this
patch. SSE2 was not present in Pentium III, but SSE was.
Paolo
> Code starting with the faulting instruction
> ===========================================
> 0: 0f 09 wbinvd
> EAX: 00001000 EBX: c1367008 ECX: c1368008 EDX: c119deb0
> ESI: 00001000 EDI: 00000ff8 EBP: c119de84 ESP: c119de68
> DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 EFLAGS: 00010246
> CR0: 80050033 CR2: ffd39000 CR3: 06144000 CR4: 000006d0
> Call Trace:
> ? show_regs (arch/x86/kernel/dumpstack.c:479)
> ? die (arch/x86/kernel/dumpstack.c:421 arch/x86/kernel/dumpstack.c:434 arch/x86/kernel/dumpstack.c:447)
> ? do_trap (arch/x86/kernel/traps.c:156 arch/x86/kernel/traps.c:197)
> ? do_error_trap (arch/x86/include/asm/traps.h:58 arch/x86/kernel/traps.c:218)
> ? raid6_sse12_gen_syndrome (lib/raid6/sse1.c:147)
> ? exc_overflow (arch/x86/kernel/traps.c:301)
> ? exc_invalid_op (arch/x86/kernel/traps.c:316)
> ? raid6_sse12_gen_syndrome (lib/raid6/sse1.c:147)
> ? handle_exception (arch/x86/entry/entry_32.S:1055)
> ? exc_overflow (arch/x86/kernel/traps.c:301)
> ? raid6_sse12_gen_syndrome (lib/raid6/sse1.c:147)
> ? exc_overflow (arch/x86/kernel/traps.c:301)
> ? raid6_sse12_gen_syndrome (lib/raid6/sse1.c:147)
> ? raid6_sse11_gen_syndrome (lib/raid6/sse1.c:100)
> raid6_select_algo (lib/raid6/algos.c:179 (discriminator 2) lib/raid6/algos.c:273 (discriminator 2))
> ? libcrc32c_mod_init (lib/raid6/algos.c:243)
> do_one_initcall (init/main.c:1269)
> ? rdinit_setup (init/main.c:1317)
> ? parse_args (kernel/params.c:153 kernel/params.c:186)
> kernel_init_freeable (init/main.c:1330 (discriminator 1) init/main.c:1347 (discriminator 1) init/main.c:1366 (discriminator 1) init/main.c:1580 (discrim
> ? rdinit_setup (init/main.c:1317)
> ? rest_init (init/main.c:1461)
> kernel_init (init/main.c:1471)
> ? schedule_tail (kernel/sched/core.c:5266)
> ret_from_fork (arch/x86/kernel/process.c:153)
> ? rest_init (init/main.c:1461)
> ret_from_fork_asm (arch/x86/entry/entry_32.S:737)
> entry_INT80_32 (arch/x86/entry/entry_32.S:945)
>
>
^ permalink raw reply related [flat|nested] 56+ messages in thread
* Re: [PATCH 10/25] target/i386: finish converting 0F AE to the new decoder
2024-10-21 6:57 ` Paolo Bonzini
@ 2024-10-21 13:54 ` Guenter Roeck
0 siblings, 0 replies; 56+ messages in thread
From: Guenter Roeck @ 2024-10-21 13:54 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
On 10/20/24 23:57, Paolo Bonzini wrote:
> On 10/21/24 03:49, Guenter Roeck wrote:
>> Hi,
>>
>> On Sat, Jun 08, 2024 at 10:40:58AM +0200, Paolo Bonzini wrote:
>>> This is already partly implemented due to VLDMXCSR and VSTMXCSR; finish
>>> the job.
>>>
>>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>>> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
>>
>> While testing qemu v9.1, I noticed the following crash when testing qemu-system-i386
>> with pentium3 CPU.
>
> Is this enough to fix it?
>
Yes.
> diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
> index ee2a508ae9a..cda32ee6784 100644
> --- a/target/i386/tcg/decode-new.c.inc
> +++ b/target/i386/tcg/decode-new.c.inc
> @@ -345,9 +345,9 @@ static void decode_group15(DisasContext *s, CPUX86State *env, X86OpEntry *entry,
> [1] = X86_OP_ENTRYw(RDxxBASE, R,y, cpuid(FSGSBASE) chk(o64) p_f3),
> [2] = X86_OP_ENTRYr(WRxxBASE, R,y, cpuid(FSGSBASE) chk(o64) p_f3 zextT0),
> [3] = X86_OP_ENTRYr(WRxxBASE, R,y, cpuid(FSGSBASE) chk(o64) p_f3 zextT0),
> - [5] = X86_OP_ENTRY0(LFENCE, cpuid(SSE2) p_00),
> + [5] = X86_OP_ENTRY0(LFENCE, cpuid(SSE) p_00),
> [6] = X86_OP_ENTRY0(MFENCE, cpuid(SSE2) p_00),
> - [7] = X86_OP_ENTRY0(SFENCE, cpuid(SSE2) p_00),
> + [7] = X86_OP_ENTRY0(SFENCE, cpuid(SSE) p_00),
> };
>
> static const X86OpEntry group15_mem[8] = {
>
>> 22: 39 c6 cmp %eax,%esi
>> 24: 0f 82 6a ff ff ff jb 0xffffffffffffff94
>> 2a:* 0f 09 wbinvd <-- trapping instruction
>
> This is a bit weird, as wbinvd is not affected by this patch. However,
> a checkout of Linux has
>
> asm volatile("sfence" : :: "memory");
> kernel_fpu_end();
> }
>
> at the end of lib/raid6/sse1.c and it would indeed be affected by this
> patch. SSE2 was not present in Pentium III, but SSE was.
>
No idea how the 0x0f 0x09 ends up in the log. I wondered about that as well.
Thanks,
Guenter
^ permalink raw reply [flat|nested] 56+ messages in thread
* [PATCH 11/25] target/i386: replace read_crN helper with read_cr8
2024-06-08 8:40 [PATCH 00/25] target/i386: more progress towards new decoder Paolo Bonzini
` (9 preceding siblings ...)
2024-06-08 8:40 ` [PATCH 10/25] target/i386: finish converting 0F AE to the new decoder Paolo Bonzini
@ 2024-06-08 8:40 ` Paolo Bonzini
2024-06-08 18:45 ` Richard Henderson
2024-06-08 8:41 ` [PATCH 12/25] target/i386: split X86_CHECK_prot into PE and VM86 checks Paolo Bonzini
` (13 subsequent siblings)
24 siblings, 1 reply; 56+ messages in thread
From: Paolo Bonzini @ 2024-06-08 8:40 UTC (permalink / raw)
To: qemu-devel
All other control registers are stored plainly in CPUX86State.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/helper.h | 2 +-
target/i386/tcg/sysemu/misc_helper.c | 20 +++++---------------
target/i386/tcg/emit.c.inc | 2 +-
3 files changed, 7 insertions(+), 17 deletions(-)
diff --git a/target/i386/helper.h b/target/i386/helper.h
index 2f46cffabd8..eeb8df56eaa 100644
--- a/target/i386/helper.h
+++ b/target/i386/helper.h
@@ -95,7 +95,7 @@ DEF_HELPER_FLAGS_2(monitor, TCG_CALL_NO_WG, void, env, tl)
DEF_HELPER_FLAGS_2(mwait, TCG_CALL_NO_WG, noreturn, env, int)
DEF_HELPER_1(rdmsr, void, env)
DEF_HELPER_1(wrmsr, void, env)
-DEF_HELPER_FLAGS_2(read_crN, TCG_CALL_NO_RWG, tl, env, int)
+DEF_HELPER_FLAGS_1(read_cr8, TCG_CALL_NO_RWG, tl, env)
DEF_HELPER_FLAGS_3(write_crN, TCG_CALL_NO_RWG, void, env, int, tl)
#endif /* !CONFIG_USER_ONLY */
diff --git a/target/i386/tcg/sysemu/misc_helper.c b/target/i386/tcg/sysemu/misc_helper.c
index 7fa0c5a06de..094aa56a20d 100644
--- a/target/i386/tcg/sysemu/misc_helper.c
+++ b/target/i386/tcg/sysemu/misc_helper.c
@@ -63,23 +63,13 @@ target_ulong helper_inl(CPUX86State *env, uint32_t port)
cpu_get_mem_attrs(env), NULL);
}
-target_ulong helper_read_crN(CPUX86State *env, int reg)
+target_ulong helper_read_cr8(CPUX86State *env)
{
- target_ulong val;
-
- switch (reg) {
- default:
- val = env->cr[reg];
- break;
- case 8:
- if (!(env->hflags2 & HF2_VINTR_MASK)) {
- val = cpu_get_apic_tpr(env_archcpu(env)->apic_state);
- } else {
- val = env->int_ctl & V_TPR_MASK;
- }
- break;
+ if (!(env->hflags2 & HF2_VINTR_MASK)) {
+ return cpu_get_apic_tpr(env_archcpu(env)->apic_state);
+ } else {
+ return env->int_ctl & V_TPR_MASK;
}
- return val;
}
void helper_write_crN(CPUX86State *env, int reg, target_ulong t0)
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 5ca3764e006..709ef7b0cb2 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -245,7 +245,7 @@ static void gen_load(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv v)
#ifndef CONFIG_USER_ONLY
case X86_OP_CR:
if (op->n == 8) {
- gen_helper_read_crN(v, tcg_env, tcg_constant_i32(op->n));
+ gen_helper_read_cr8(v, tcg_env);
} else {
tcg_gen_ld_tl(v, tcg_env, offsetof(CPUX86State, cr[op->n]));
}
--
2.45.1
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH 12/25] target/i386: split X86_CHECK_prot into PE and VM86 checks
2024-06-08 8:40 [PATCH 00/25] target/i386: more progress towards new decoder Paolo Bonzini
` (10 preceding siblings ...)
2024-06-08 8:40 ` [PATCH 11/25] target/i386: replace read_crN helper with read_cr8 Paolo Bonzini
@ 2024-06-08 8:41 ` Paolo Bonzini
2024-06-08 18:47 ` Richard Henderson
2024-06-08 8:41 ` [PATCH 13/25] target/i386: convert non-grouped, helper-based 2-byte opcodes Paolo Bonzini
` (12 subsequent siblings)
24 siblings, 1 reply; 56+ messages in thread
From: Paolo Bonzini @ 2024-06-08 8:41 UTC (permalink / raw)
To: qemu-devel
SYSENTER is allowed in VM86 mode, but not in real mode. Split the check
so that PE and !VM86 are covered by separate bits.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/decode-new.h | 8 ++++++--
target/i386/tcg/decode-new.c.inc | 9 +++++++--
2 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h
index 5577f7509aa..b46a9a0ccb3 100644
--- a/target/i386/tcg/decode-new.h
+++ b/target/i386/tcg/decode-new.h
@@ -149,8 +149,8 @@ typedef enum X86InsnCheck {
X86_CHECK_i64 = 1,
X86_CHECK_o64 = 2,
- /* Fault outside protected mode */
- X86_CHECK_prot = 4,
+ /* Fault in vm86 mode */
+ X86_CHECK_no_vm86 = 4,
/* Privileged instruction checks */
X86_CHECK_cpl0 = 8,
@@ -166,6 +166,10 @@ typedef enum X86InsnCheck {
/* Fault if VEX.W=0 */
X86_CHECK_W1 = 256,
+
+ /* Fault outside protected mode, possibly including vm86 mode */
+ X86_CHECK_prot_or_vm86 = 512,
+ X86_CHECK_prot = X86_CHECK_prot_or_vm86 | X86_CHECK_no_vm86,
} X86InsnCheck;
typedef enum X86InsnSpecial {
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 1c6fa39c3eb..f02f7c62647 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -2558,8 +2558,13 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
goto illegal_op;
}
}
- if (decode.e.check & X86_CHECK_prot) {
- if (!PE(s) || VM86(s)) {
+ if (decode.e.check & X86_CHECK_prot_or_vm86) {
+ if (!PE(s)) {
+ goto illegal_op;
+ }
+ }
+ if (decode.e.check & X86_CHECK_no_vm86) {
+ if (VM86(s)) {
goto illegal_op;
}
}
--
2.45.1
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH 13/25] target/i386: convert non-grouped, helper-based 2-byte opcodes
2024-06-08 8:40 [PATCH 00/25] target/i386: more progress towards new decoder Paolo Bonzini
` (11 preceding siblings ...)
2024-06-08 8:41 ` [PATCH 12/25] target/i386: split X86_CHECK_prot into PE and VM86 checks Paolo Bonzini
@ 2024-06-08 8:41 ` Paolo Bonzini
2024-06-08 19:03 ` Richard Henderson
2024-06-08 8:41 ` [PATCH 14/25] target/i386: convert bit test instructions to new decoder Paolo Bonzini
` (11 subsequent siblings)
24 siblings, 1 reply; 56+ messages in thread
From: Paolo Bonzini @ 2024-06-08 8:41 UTC (permalink / raw)
To: qemu-devel
These have very simple generators and no need for complex group
decoding. Apart from LAR/LSL which are simplified to use
gen_op_deposit_reg_v and movcond, the code is generally lifted
from translate.c into the generators.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/decode-new.h | 7 ++
target/i386/tcg/seg_helper.c | 16 ++--
target/i386/tcg/translate.c | 148 ------------------------------
target/i386/tcg/decode-new.c.inc | 48 +++++++---
target/i386/tcg/emit.c.inc | 151 +++++++++++++++++++++++++++++++
5 files changed, 202 insertions(+), 168 deletions(-)
diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h
index b46a9a0ccb3..c9f958bb0e5 100644
--- a/target/i386/tcg/decode-new.h
+++ b/target/i386/tcg/decode-new.h
@@ -170,6 +170,13 @@ typedef enum X86InsnCheck {
/* Fault outside protected mode, possibly including vm86 mode */
X86_CHECK_prot_or_vm86 = 512,
X86_CHECK_prot = X86_CHECK_prot_or_vm86 | X86_CHECK_no_vm86,
+
+ /* Fault outside SMM */
+ X86_CHECK_smm = 1024,
+
+ /* Vendor-specific checks for Intel/AMD differences */
+ X86_CHECK_i64_amd = 2048,
+ X86_CHECK_o64_intel = 4096,
} X86InsnCheck;
typedef enum X86InsnSpecial {
diff --git a/target/i386/tcg/seg_helper.c b/target/i386/tcg/seg_helper.c
index 715db1f2326..aee3d19f29b 100644
--- a/target/i386/tcg/seg_helper.c
+++ b/target/i386/tcg/seg_helper.c
@@ -2265,11 +2265,11 @@ void helper_sysexit(CPUX86State *env, int dflag)
target_ulong helper_lsl(CPUX86State *env, target_ulong selector1)
{
unsigned int limit;
- uint32_t e1, e2, eflags, selector;
+ uint32_t e1, e2, selector;
int rpl, dpl, cpl, type;
selector = selector1 & 0xffff;
- eflags = cpu_cc_compute_all(env);
+ assert(CC_OP == CC_OP_EFLAGS);
if ((selector & 0xfffc) == 0) {
goto fail;
}
@@ -2301,22 +2301,22 @@ target_ulong helper_lsl(CPUX86State *env, target_ulong selector1)
}
if (dpl < cpl || dpl < rpl) {
fail:
- CC_SRC = eflags & ~CC_Z;
+ CC_SRC &= ~CC_Z;
return 0;
}
}
limit = get_seg_limit(e1, e2);
- CC_SRC = eflags | CC_Z;
+ CC_SRC |= CC_Z;
return limit;
}
target_ulong helper_lar(CPUX86State *env, target_ulong selector1)
{
- uint32_t e1, e2, eflags, selector;
+ uint32_t e1, e2, selector;
int rpl, dpl, cpl, type;
selector = selector1 & 0xffff;
- eflags = cpu_cc_compute_all(env);
+ assert(CC_OP == CC_OP_EFLAGS);
if ((selector & 0xfffc) == 0) {
goto fail;
}
@@ -2351,11 +2351,11 @@ target_ulong helper_lar(CPUX86State *env, target_ulong selector1)
}
if (dpl < cpl || dpl < rpl) {
fail:
- CC_SRC = eflags & ~CC_Z;
+ CC_SRC &= ~CC_Z;
return 0;
}
}
- CC_SRC = eflags | CC_Z;
+ CC_SRC |= CC_Z;
return e2 & 0x00f0ff00;
}
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index ebae745ecba..4b2f7488022 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -246,7 +246,6 @@ STUB_HELPER(mwait, TCGv_env env, TCGv_i32 pc_ofs)
STUB_HELPER(outb, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
STUB_HELPER(outw, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
STUB_HELPER(outl, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
-STUB_HELPER(rdmsr, TCGv_env env)
STUB_HELPER(stgi, TCGv_env env)
STUB_HELPER(svm_check_intercept, TCGv_env env, TCGv_i32 type)
STUB_HELPER(vmload, TCGv_env env, TCGv_i32 aflag)
@@ -254,7 +253,6 @@ STUB_HELPER(vmmcall, TCGv_env env)
STUB_HELPER(vmrun, TCGv_env env, TCGv_i32 aflag, TCGv_i32 pc_ofs)
STUB_HELPER(vmsave, TCGv_env env, TCGv_i32 aflag)
STUB_HELPER(write_crN, TCGv_env env, TCGv_i32 reg, TCGv val)
-STUB_HELPER(wrmsr, TCGv_env env)
#endif
static void gen_jmp_rel(DisasContext *s, MemOp ot, int diff, int tb_num);
@@ -3470,97 +3468,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
}
gen_op_mov_reg_v(s, ot, reg, s->T0);
break;
- case 0x130: /* wrmsr */
- case 0x132: /* rdmsr */
- if (check_cpl0(s)) {
- gen_update_cc_op(s);
- gen_update_eip_cur(s);
- if (b & 2) {
- gen_helper_rdmsr(tcg_env);
- } else {
- gen_helper_wrmsr(tcg_env);
- s->base.is_jmp = DISAS_EOB_NEXT;
- }
- }
- break;
- case 0x131: /* rdtsc */
- gen_update_cc_op(s);
- gen_update_eip_cur(s);
- translator_io_start(&s->base);
- gen_helper_rdtsc(tcg_env);
- break;
- case 0x133: /* rdpmc */
- gen_update_cc_op(s);
- gen_update_eip_cur(s);
- gen_helper_rdpmc(tcg_env);
- s->base.is_jmp = DISAS_NORETURN;
- break;
- case 0x134: /* sysenter */
- /* For AMD SYSENTER is not valid in long mode */
- if (LMA(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1) {
- goto illegal_op;
- }
- if (!PE(s)) {
- gen_exception_gpf(s);
- } else {
- gen_helper_sysenter(tcg_env);
- s->base.is_jmp = DISAS_EOB_ONLY;
- }
- break;
- case 0x135: /* sysexit */
- /* For AMD SYSEXIT is not valid in long mode */
- if (LMA(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1) {
- goto illegal_op;
- }
- if (!PE(s) || CPL(s) != 0) {
- gen_exception_gpf(s);
- } else {
- gen_helper_sysexit(tcg_env, tcg_constant_i32(dflag - 1));
- s->base.is_jmp = DISAS_EOB_ONLY;
- }
- break;
- case 0x105: /* syscall */
- /* For Intel SYSCALL is only valid in long mode */
- if (!LMA(s) && env->cpuid_vendor1 == CPUID_VENDOR_INTEL_1) {
- goto illegal_op;
- }
- gen_update_cc_op(s);
- gen_update_eip_cur(s);
- gen_helper_syscall(tcg_env, cur_insn_len_i32(s));
- /* condition codes are modified only in long mode */
- if (LMA(s)) {
- assume_cc_op(s, CC_OP_EFLAGS);
- }
- /* TF handling for the syscall insn is different. The TF bit is checked
- after the syscall insn completes. This allows #DB to not be
- generated after one has entered CPL0 if TF is set in FMASK. */
- s->base.is_jmp = DISAS_EOB_RECHECK_TF;
- break;
- case 0x107: /* sysret */
- /* For Intel SYSRET is only valid in long mode */
- if (!LMA(s) && env->cpuid_vendor1 == CPUID_VENDOR_INTEL_1) {
- goto illegal_op;
- }
- if (!PE(s) || CPL(s) != 0) {
- gen_exception_gpf(s);
- } else {
- gen_helper_sysret(tcg_env, tcg_constant_i32(dflag - 1));
- /* condition codes are modified only in long mode */
- if (LMA(s)) {
- assume_cc_op(s, CC_OP_EFLAGS);
- }
- /* TF handling for the sysret insn is different. The TF bit is
- checked after the sysret insn completes. This allows #DB to be
- generated "as if" the syscall insn in userspace has just
- completed. */
- s->base.is_jmp = DISAS_EOB_RECHECK_TF;
- }
- break;
- case 0x1a2: /* cpuid */
- gen_update_cc_op(s);
- gen_update_eip_cur(s);
- gen_helper_cpuid(tcg_env);
- break;
case 0x100:
modrm = x86_ldub_code(env, s);
mod = (modrm >> 6) & 3;
@@ -3964,39 +3871,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
}
break;
- case 0x108: /* invd */
- case 0x109: /* wbinvd; wbnoinvd with REPZ prefix */
- if (check_cpl0(s)) {
- gen_svm_check_intercept(s, (b & 1) ? SVM_EXIT_WBINVD : SVM_EXIT_INVD);
- /* nothing to do */
- }
- break;
- case 0x102: /* lar */
- case 0x103: /* lsl */
- {
- TCGLabel *label1;
- TCGv t0;
- if (!PE(s) || VM86(s))
- goto illegal_op;
- ot = dflag != MO_16 ? MO_32 : MO_16;
- modrm = x86_ldub_code(env, s);
- reg = ((modrm >> 3) & 7) | REX_R(s);
- gen_ld_modrm(env, s, modrm, MO_16);
- t0 = tcg_temp_new();
- gen_update_cc_op(s);
- if (b == 0x102) {
- gen_helper_lar(t0, tcg_env, s->T0);
- } else {
- gen_helper_lsl(t0, tcg_env, s->T0);
- }
- tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z);
- label1 = gen_new_label();
- tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
- gen_op_mov_reg_v(s, ot, reg, t0);
- gen_set_label(label1);
- set_cc_op(s, CC_OP_EFLAGS);
- }
- break;
case 0x11a:
modrm = x86_ldub_code(env, s);
if (s->flags & HF_MPX_EN_MASK) {
@@ -4188,28 +4062,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
}
gen_nop_modrm(env, s, modrm);
break;
-
- case 0x106: /* clts */
- if (check_cpl0(s)) {
- gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0);
- gen_helper_clts(tcg_env);
- /* abort block because static cpu state changed */
- s->base.is_jmp = DISAS_EOB_NEXT;
- }
- break;
- case 0x1aa: /* rsm */
- gen_svm_check_intercept(s, SVM_EXIT_RSM);
- if (!(s->flags & HF_SMM_MASK))
- goto illegal_op;
-#ifdef CONFIG_USER_ONLY
- /* we should not be in SMM mode */
- g_assert_not_reached();
-#else
- gen_helper_rsm(tcg_env);
- assume_cc_op(s, CC_OP_EFLAGS);
-#endif /* CONFIG_USER_ONLY */
- s->base.is_jmp = DISAS_EOB_ONLY;
- break;
case 0x1b8: /* SSE4.2 popcnt */
if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
PREFIX_REPZ)
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index f02f7c62647..1db9d1e2bc3 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -223,6 +223,8 @@
#define vex13 .vex_class = 13,
#define chk(a) .check = X86_CHECK_##a,
+#define chk2(a, b) .check = X86_CHECK_##a | X86_CHECK_##b,
+#define chk3(a, b, c) .check = X86_CHECK_##a | X86_CHECK_##b | X86_CHECK_##c,
#define svm(a) .intercept = SVM_EXIT_##a, .has_intercept = true,
#define avx2_256 .vex_special = X86_VEX_AVX2_256,
@@ -1027,6 +1029,12 @@ static void decode_MOV_CR_DR(DisasContext *s, CPUX86State *env, X86OpEntry *entr
}
static const X86OpEntry opcodes_0F[256] = {
+ [0x02] = X86_OP_ENTRYwr(LAR, G,v, E,w, chk(prot)),
+ [0x03] = X86_OP_ENTRYwr(LSL, G,v, E,w, chk(prot)),
+ [0x05] = X86_OP_ENTRY0(SYSCALL, chk(o64_intel)),
+ [0x06] = X86_OP_ENTRY0(CLTS, chk(cpl0) svm(WRITE_CR0)),
+ [0x07] = X86_OP_ENTRY0(SYSRET, chk3(o64_intel, prot, cpl0)),
+
[0x10] = X86_OP_GROUP0(0F10),
[0x11] = X86_OP_GROUP0(0F11),
[0x12] = X86_OP_GROUP0(0F12),
@@ -1042,6 +1050,13 @@ static const X86OpEntry opcodes_0F[256] = {
[0x22] = X86_OP_GROUPwr(MOV_CR_DR, C,y, R,y, zextT0 chk(cpl0) svm(WRITE_CR0)),
[0x23] = X86_OP_GROUPwr(MOV_CR_DR, D,y, R,y, zextT0 chk(cpl0) svm(WRITE_DR0)),
+ [0x30] = X86_OP_ENTRY0(WRMSR, chk(cpl0)),
+ [0x31] = X86_OP_ENTRY0(RDTSC),
+ [0x32] = X86_OP_ENTRY0(RDMSR, chk(cpl0)),
+ [0x33] = X86_OP_ENTRY0(RDPMC),
+ [0x34] = X86_OP_ENTRY0(SYSENTER, chk2(i64_amd, prot_or_vm86)),
+ [0x35] = X86_OP_ENTRY0(SYSEXIT, chk3(i64_amd, prot, cpl0)),
+
[0x40] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)),
[0x41] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)),
[0x42] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)),
@@ -1098,6 +1113,7 @@ static const X86OpEntry opcodes_0F[256] = {
[0xa0] = X86_OP_ENTRYr(PUSH, FS, w),
[0xa1] = X86_OP_ENTRYw(POP, FS, w),
+ [0xa2] = X86_OP_ENTRY0(CPUID),
[0xb2] = X86_OP_ENTRY3(LSS, G,v, EM,p, None, None),
[0xb4] = X86_OP_ENTRY3(LFS, G,v, EM,p, None, None),
@@ -1138,6 +1154,8 @@ static const X86OpEntry opcodes_0F[256] = {
[0xf6] = X86_OP_ENTRY3(PSADBW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
[0xf7] = X86_OP_ENTRY3(MASKMOV, None,None, V,dq, U,dq, vex4_unal avx2_256 mmx p_00_66),
+ [0x08] = X86_OP_ENTRY0(NOP, svm(INVD)),
+ [0x09] = X86_OP_ENTRY0(NOP, svm(WBINVD)),
[0x0b] = X86_OP_ENTRY0(UD), /* UD2 */
[0x0d] = X86_OP_ENTRY1(NOP, M,v), /* 3DNow! prefetch */
[0x0e] = X86_OP_ENTRY0(EMMS, cpuid(3DNOW)), /* femms */
@@ -1221,6 +1239,7 @@ static const X86OpEntry opcodes_0F[256] = {
[0xa8] = X86_OP_ENTRYr(PUSH, GS, w),
[0xa9] = X86_OP_ENTRYw(POP, GS, w),
+ [0xaa] = X86_OP_ENTRY0(RSM, chk(smm) svm(RSM)),
[0xae] = X86_OP_GROUP0(group15),
/*
* It's slightly more efficient to put Ev operand in T0 and allow gen_IMUL3
@@ -2511,12 +2530,10 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
if (b == 0x0f) {
b = x86_ldub_code(env, s);
switch (b) {
- case 0x00 ... 0x03: /* mostly privileged instructions */
- case 0x05 ... 0x09:
+ case 0x00 ... 0x01: /* mostly privileged instructions */
case 0x1a ... 0x1b: /* MPX */
- case 0x30 ... 0x35: /* more privileged instructions */
- case 0xa2 ... 0xa5: /* CPUID, BT, SHLD */
- case 0xaa ... 0xad: /* RSM, SHRD */
+ case 0xa3 ... 0xa5: /* BT, SHLD */
+ case 0xab ... 0xad: /* BTS, SHRD */
case 0xb0 ... 0xb1: /* cmpxchg */
case 0xb3: /* btr */
case 0xb8: /* integer ops */
@@ -2548,13 +2565,18 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
/* Checks that result in #UD come first. */
if (decode.e.check) {
- if (decode.e.check & X86_CHECK_i64) {
- if (CODE64(s)) {
+ if (CODE64(s)) {
+ if (decode.e.check & X86_CHECK_i64) {
goto illegal_op;
}
- }
- if (decode.e.check & X86_CHECK_o64) {
- if (!CODE64(s)) {
+ if ((decode.e.check & X86_CHECK_i64_amd) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1) {
+ goto illegal_op;
+ }
+ } else {
+ if (decode.e.check & X86_CHECK_o64) {
+ goto illegal_op;
+ }
+ if ((decode.e.check & X86_CHECK_o64_intel) && env->cpuid_vendor1 == CPUID_VENDOR_INTEL_1) {
goto illegal_op;
}
}
@@ -2638,8 +2660,7 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
* exceptions if there is no memory operand). Exceptions are
* vm86 checks (INTn, IRET, PUSHF/POPF), RSM and XSETBV (!).
*
- * RSM and XSETBV will be handled in the gen_* functions
- * instead of using chk().
+ * XSETBV will check for CPL0 in the gen_* function instead of using chk().
*/
if (decode.e.check & X86_CHECK_cpl0) {
if (CPL(s) != 0) {
@@ -2651,6 +2672,9 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
tcg_constant_i32(decode.e.intercept));
}
if (decode.e.check) {
+ if ((decode.e.check & X86_CHECK_smm) && !(s->flags & HF_SMM_MASK)) {
+ goto illegal_op;
+ }
if ((decode.e.check & X86_CHECK_vm86_iopl) && VM86(s)) {
if (IOPL(s) < 3) {
goto gp_fault;
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 709ef7b0cb2..f788e1fa4c8 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1410,6 +1410,13 @@ static void gen_CLI(DisasContext *s, X86DecodedInsn *decode)
gen_reset_eflags(s, IF_MASK);
}
+static void gen_CLTS(DisasContext *s, X86DecodedInsn *decode)
+{
+ gen_helper_clts(tcg_env);
+ /* abort block because static cpu state changed */
+ s->base.is_jmp = DISAS_EOB_NEXT;
+}
+
static void gen_CMC(DisasContext *s, X86DecodedInsn *decode)
{
gen_compute_eflags(s);
@@ -1534,6 +1541,13 @@ static void gen_CMPS(DisasContext *s, X86DecodedInsn *decode)
}
}
+static void gen_CPUID(DisasContext *s, X86DecodedInsn *decode)
+{
+ gen_update_cc_op(s);
+ gen_update_eip_cur(s);
+ gen_helper_cpuid(tcg_env);
+}
+
static void gen_CRC32(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[2].ot;
@@ -1977,6 +1991,23 @@ static void gen_LAHF(DisasContext *s, X86DecodedInsn *decode)
tcg_gen_deposit_tl(cpu_regs[R_EAX], cpu_regs[R_EAX], s->T0, 8, 8);
}
+static void gen_LAR(DisasContext *s, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+ TCGv result = tcg_temp_new();
+ TCGv dest;
+
+ gen_compute_eflags(s);
+ gen_update_cc_op(s);
+ gen_helper_lar(result, tcg_env, s->T0);
+
+ /* Perform writeback here to skip it if ZF=0. */
+ decode->op[0].unit = X86_OP_SKIP;
+ dest = gen_op_deposit_reg_v(s, ot, decode->op[0].n, result, result);
+ tcg_gen_movcond_tl(TCG_COND_TSTNE, dest, cpu_cc_src, tcg_constant_tl(CC_Z),
+ result, dest);
+}
+
static void gen_LDMXCSR(DisasContext *s, X86DecodedInsn *decode)
{
tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
@@ -2075,6 +2106,23 @@ static void gen_LOOPNE(DisasContext *s, X86DecodedInsn *decode)
gen_conditional_jump_labels(s, decode->immediate, not_taken, taken);
}
+static void gen_LSL(DisasContext *s, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+ TCGv result = tcg_temp_new();
+ TCGv dest;
+
+ gen_compute_eflags(s);
+ gen_update_cc_op(s);
+ gen_helper_lsl(result, tcg_env, s->T0);
+
+ /* Perform writeback here to skip it if ZF=0. */
+ decode->op[0].unit = X86_OP_SKIP;
+ dest = gen_op_deposit_reg_v(s, ot, decode->op[0].n, result, result);
+ tcg_gen_movcond_tl(TCG_COND_TSTNE, dest, cpu_cc_src, tcg_constant_tl(CC_Z),
+ result, dest);
+}
+
static void gen_LSS(DisasContext *s, X86DecodedInsn *decode)
{
gen_lxx_seg(s, decode, R_SS);
@@ -3118,6 +3166,41 @@ static void gen_RCR(DisasContext *s, X86DecodedInsn *decode)
}
}
+#ifdef CONFIG_USER_ONLY
+static void gen_unreachable(DisasContext *s, X86DecodedInsn *decode)
+{
+ g_assert_not_reached();
+}
+#endif
+
+#ifndef CONFIG_USER_ONLY
+static void gen_RDMSR(DisasContext *s, X86DecodedInsn *decode)
+{
+ gen_update_cc_op(s);
+ gen_update_eip_cur(s);
+ gen_helper_rdmsr(tcg_env);
+}
+#else
+#define gen_RDMSR gen_unreachable
+#endif
+
+static void gen_RDPMC(DisasContext *s, X86DecodedInsn *decode)
+{
+ gen_update_cc_op(s);
+ gen_update_eip_cur(s);
+ translator_io_start(&s->base);
+ gen_helper_rdpmc(tcg_env);
+ s->base.is_jmp = DISAS_NORETURN;
+}
+
+static void gen_RDTSC(DisasContext *s, X86DecodedInsn *decode)
+{
+ gen_update_cc_op(s);
+ gen_update_eip_cur(s);
+ translator_io_start(&s->base);
+ gen_helper_rdtsc(tcg_env);
+}
+
static void gen_RDxxBASE(DisasContext *s, X86DecodedInsn *decode)
{
TCGv base = cpu_seg_base[s->modrm & 8 ? R_GS : R_FS];
@@ -3290,6 +3373,17 @@ static void gen_RORX(DisasContext *s, X86DecodedInsn *decode)
}
}
+#ifndef CONFIG_USER_ONLY
+static void gen_RSM(DisasContext *s, X86DecodedInsn *decode)
+{
+ gen_helper_rsm(tcg_env);
+ assume_cc_op(s, CC_OP_EFLAGS);
+ s->base.is_jmp = DISAS_EOB_ONLY;
+}
+#else
+#define gen_RSM gen_UD
+#endif
+
static void gen_SAHF(DisasContext *s, X86DecodedInsn *decode)
{
if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM)) {
@@ -3586,6 +3680,51 @@ static void gen_SUB(DisasContext *s, X86DecodedInsn *decode)
prepare_update2_cc(decode, s, CC_OP_SUBB + ot);
}
+static void gen_SYSCALL(DisasContext *s, X86DecodedInsn *decode)
+{
+ gen_update_cc_op(s);
+ gen_update_eip_cur(s);
+ gen_helper_syscall(tcg_env, cur_insn_len_i32(s));
+ if (LMA(s)) {
+ assume_cc_op(s, CC_OP_EFLAGS);
+ }
+
+ /*
+ * TF handling for the syscall insn is different. The TF bit is checked
+ * after the syscall insn completes. This allows #DB to not be
+ * generated after one has entered CPL0 if TF is set in FMASK.
+ */
+ s->base.is_jmp = DISAS_EOB_RECHECK_TF;
+}
+
+static void gen_SYSENTER(DisasContext *s, X86DecodedInsn *decode)
+{
+ gen_helper_sysenter(tcg_env);
+ s->base.is_jmp = DISAS_EOB_ONLY;
+}
+
+static void gen_SYSEXIT(DisasContext *s, X86DecodedInsn *decode)
+{
+ gen_helper_sysexit(tcg_env, tcg_constant_i32(s->dflag - 1));
+ s->base.is_jmp = DISAS_EOB_ONLY;
+}
+
+static void gen_SYSRET(DisasContext *s, X86DecodedInsn *decode)
+{
+ gen_helper_sysret(tcg_env, tcg_constant_i32(s->dflag - 1));
+ if (LMA(s)) {
+ assume_cc_op(s, CC_OP_EFLAGS);
+ }
+
+ /*
+ * TF handling for the sysret insn is different. The TF bit is checked
+ * after the sysret insn completes. This allows #DB to be
+ * generated "as if" the syscall insn in userspace has just
+ * completed.
+ */
+ s->base.is_jmp = DISAS_EOB_RECHECK_TF;
+}
+
static void gen_UD(DisasContext *s, X86DecodedInsn *decode)
{
gen_illegal_opcode(s);
@@ -4082,6 +4221,18 @@ static void gen_WAIT(DisasContext *s, X86DecodedInsn *decode)
}
}
+#ifndef CONFIG_USER_ONLY
+static void gen_WRMSR(DisasContext *s, X86DecodedInsn *decode)
+{
+ gen_update_cc_op(s);
+ gen_update_eip_cur(s);
+ gen_helper_wrmsr(tcg_env);
+ s->base.is_jmp = DISAS_EOB_NEXT;
+}
+#else
+#define gen_WRMSR gen_unreachable
+#endif
+
static void gen_WRxxBASE(DisasContext *s, X86DecodedInsn *decode)
{
TCGv base = cpu_seg_base[s->modrm & 8 ? R_GS : R_FS];
--
2.45.1
^ permalink raw reply related [flat|nested] 56+ messages in thread
* Re: [PATCH 13/25] target/i386: convert non-grouped, helper-based 2-byte opcodes
2024-06-08 8:41 ` [PATCH 13/25] target/i386: convert non-grouped, helper-based 2-byte opcodes Paolo Bonzini
@ 2024-06-08 19:03 ` Richard Henderson
0 siblings, 0 replies; 56+ messages in thread
From: Richard Henderson @ 2024-06-08 19:03 UTC (permalink / raw)
To: Paolo Bonzini, qemu-devel
On 6/8/24 01:41, Paolo Bonzini wrote:
> These have very simple generators and no need for complex group
> decoding. Apart from LAR/LSL which are simplified to use
> gen_op_deposit_reg_v and movcond, the code is generally lifted
> from translate.c into the generators.
>
> Signed-off-by: Paolo Bonzini<pbonzini@redhat.com>
> ---
> target/i386/tcg/decode-new.h | 7 ++
> target/i386/tcg/seg_helper.c | 16 ++--
> target/i386/tcg/translate.c | 148 ------------------------------
> target/i386/tcg/decode-new.c.inc | 48 +++++++---
> target/i386/tcg/emit.c.inc | 151 +++++++++++++++++++++++++++++++
> 5 files changed, 202 insertions(+), 168 deletions(-)
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 56+ messages in thread
* [PATCH 14/25] target/i386: convert bit test instructions to new decoder
2024-06-08 8:40 [PATCH 00/25] target/i386: more progress towards new decoder Paolo Bonzini
` (12 preceding siblings ...)
2024-06-08 8:41 ` [PATCH 13/25] target/i386: convert non-grouped, helper-based 2-byte opcodes Paolo Bonzini
@ 2024-06-08 8:41 ` Paolo Bonzini
2024-06-08 19:37 ` Richard Henderson
2024-06-08 8:41 ` [PATCH 15/25] target/i386: pull load/writeback out of gen_shiftd_rm_T1 Paolo Bonzini
` (10 subsequent siblings)
24 siblings, 1 reply; 56+ messages in thread
From: Paolo Bonzini @ 2024-06-08 8:41 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/decode-new.h | 3 +
target/i386/tcg/translate.c | 147 +------------------------------
target/i386/tcg/decode-new.c.inc | 45 +++++++---
target/i386/tcg/emit.c.inc | 130 ++++++++++++++++++++++++++-
4 files changed, 166 insertions(+), 159 deletions(-)
diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h
index c9f958bb0e5..f781bb5bbec 100644
--- a/target/i386/tcg/decode-new.h
+++ b/target/i386/tcg/decode-new.h
@@ -188,6 +188,9 @@ typedef enum X86InsnSpecial {
/* Always locked if it has a memory operand (XCHG) */
X86_SPECIAL_Locked,
+ /* Like HasLock, but also operand 2 provides bit displacement into memory. */
+ X86_SPECIAL_BitTest,
+
/* Do not load effective address in s->A0 */
X86_SPECIAL_NoLoadEA,
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 4b2f7488022..c3843092350 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -729,11 +729,6 @@ static void gen_extu(MemOp ot, TCGv reg)
gen_ext_tl(reg, reg, ot, false);
}
-static void gen_exts(MemOp ot, TCGv reg)
-{
- gen_ext_tl(reg, reg, ot, true);
-}
-
static void gen_op_j_ecx(DisasContext *s, TCGCond cond, TCGLabel *label1)
{
TCGv tmp = gen_ext_tl(NULL, cpu_regs[R_ECX], s->aflag, false);
@@ -3078,7 +3073,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
MemOp dflag = s->dflag;
int shift;
MemOp ot;
- int modrm, reg, rm, mod, op, opreg, val;
+ int modrm, reg, rm, mod, op, opreg;
/* now check op code */
switch (b) {
@@ -3278,146 +3273,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
}
break;
-
- /************************/
- /* bit operations */
- case 0x1ba: /* bt/bts/btr/btc Gv, im */
- ot = dflag;
- modrm = x86_ldub_code(env, s);
- op = (modrm >> 3) & 7;
- mod = (modrm >> 6) & 3;
- rm = (modrm & 7) | REX_B(s);
- if (mod != 3) {
- s->rip_offset = 1;
- gen_lea_modrm(env, s, modrm);
- if (!(s->prefix & PREFIX_LOCK)) {
- gen_op_ld_v(s, ot, s->T0, s->A0);
- }
- } else {
- gen_op_mov_v_reg(s, ot, s->T0, rm);
- }
- /* load shift */
- val = x86_ldub_code(env, s);
- tcg_gen_movi_tl(s->T1, val);
- if (op < 4)
- goto unknown_op;
- op -= 4;
- goto bt_op;
- case 0x1a3: /* bt Gv, Ev */
- op = 0;
- goto do_btx;
- case 0x1ab: /* bts */
- op = 1;
- goto do_btx;
- case 0x1b3: /* btr */
- op = 2;
- goto do_btx;
- case 0x1bb: /* btc */
- op = 3;
- do_btx:
- ot = dflag;
- modrm = x86_ldub_code(env, s);
- reg = ((modrm >> 3) & 7) | REX_R(s);
- mod = (modrm >> 6) & 3;
- rm = (modrm & 7) | REX_B(s);
- gen_op_mov_v_reg(s, MO_32, s->T1, reg);
- if (mod != 3) {
- AddressParts a = gen_lea_modrm_0(env, s, modrm);
- /* specific case: we need to add a displacement */
- gen_exts(ot, s->T1);
- tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot);
- tcg_gen_shli_tl(s->tmp0, s->tmp0, ot);
- tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a, false), s->tmp0);
- gen_lea_v_seg(s, s->A0, a.def_seg, s->override);
- if (!(s->prefix & PREFIX_LOCK)) {
- gen_op_ld_v(s, ot, s->T0, s->A0);
- }
- } else {
- gen_op_mov_v_reg(s, ot, s->T0, rm);
- }
- bt_op:
- tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
- tcg_gen_movi_tl(s->tmp0, 1);
- tcg_gen_shl_tl(s->tmp0, s->tmp0, s->T1);
- if (s->prefix & PREFIX_LOCK) {
- switch (op) {
- case 0: /* bt */
- /* Needs no atomic ops; we suppressed the normal
- memory load for LOCK above so do it now. */
- gen_op_ld_v(s, ot, s->T0, s->A0);
- break;
- case 1: /* bts */
- tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, s->tmp0,
- s->mem_index, ot | MO_LE);
- break;
- case 2: /* btr */
- tcg_gen_not_tl(s->tmp0, s->tmp0);
- tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, s->tmp0,
- s->mem_index, ot | MO_LE);
- break;
- default:
- case 3: /* btc */
- tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, s->tmp0,
- s->mem_index, ot | MO_LE);
- break;
- }
- tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
- } else {
- tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
- switch (op) {
- case 0: /* bt */
- /* Data already loaded; nothing to do. */
- break;
- case 1: /* bts */
- tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
- break;
- case 2: /* btr */
- tcg_gen_andc_tl(s->T0, s->T0, s->tmp0);
- break;
- default:
- case 3: /* btc */
- tcg_gen_xor_tl(s->T0, s->T0, s->tmp0);
- break;
- }
- if (op != 0) {
- if (mod != 3) {
- gen_op_st_v(s, ot, s->T0, s->A0);
- } else {
- gen_op_mov_reg_v(s, ot, rm, s->T0);
- }
- }
- }
-
- /* Delay all CC updates until after the store above. Note that
- C is the result of the test, Z is unchanged, and the others
- are all undefined. */
- switch (s->cc_op) {
- case CC_OP_MULB ... CC_OP_MULQ:
- case CC_OP_ADDB ... CC_OP_ADDQ:
- case CC_OP_ADCB ... CC_OP_ADCQ:
- case CC_OP_SUBB ... CC_OP_SUBQ:
- case CC_OP_SBBB ... CC_OP_SBBQ:
- case CC_OP_LOGICB ... CC_OP_LOGICQ:
- case CC_OP_INCB ... CC_OP_INCQ:
- case CC_OP_DECB ... CC_OP_DECQ:
- case CC_OP_SHLB ... CC_OP_SHLQ:
- case CC_OP_SARB ... CC_OP_SARQ:
- case CC_OP_BMILGB ... CC_OP_BMILGQ:
- /* Z was going to be computed from the non-zero status of CC_DST.
- We can get that same Z value (and the new C value) by leaving
- CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
- same width. */
- tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
- set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
- break;
- default:
- /* Otherwise, generate EFLAGS and replace the C bit. */
- gen_compute_eflags(s);
- tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, s->tmp4,
- ctz32(CC_C), 1);
- break;
- }
- break;
case 0x1bc: /* bsf / tzcnt */
case 0x1bd: /* bsr / lzcnt */
ot = dflag;
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 1db9d1e2bc3..af142d6911a 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -205,6 +205,7 @@
#define sextT0 .special = X86_SPECIAL_SExtT0,
#define zextT0 .special = X86_SPECIAL_ZExtT0,
#define op0_Mw .special = X86_SPECIAL_Op0_Mw,
+#define btEvGv .special = X86_SPECIAL_BitTest,
#define vex1 .vex_class = 1,
#define vex1_rep3 .vex_class = 1, .vex_special = X86_VEX_REPScalar,
@@ -269,6 +270,24 @@ static inline const X86OpEntry *decode_by_prefix(DisasContext *s, const X86OpEnt
}
}
+static void decode_group8(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+ static const X86GenFunc group8_gen[8] = {
+ NULL, NULL, NULL, NULL,
+ gen_BT, gen_BTS, gen_BTR, gen_BTC,
+ };
+ int op = (get_modrm(s, env) >> 3) & 7;
+ entry->gen = group8_gen[op];
+ if (op == 4) {
+ /* prevent writeback and LOCK for BT */
+ entry->op1 = entry->op0;
+ entry->op0 = X86_TYPE_None;
+ entry->s0 = X86_SIZE_None;
+ } else {
+ entry->special = X86_SPECIAL_HasLock;
+ }
+}
+
static void decode_group15(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
{
static const X86OpEntry group15_reg[8] = {
@@ -1114,8 +1133,10 @@ static const X86OpEntry opcodes_0F[256] = {
[0xa0] = X86_OP_ENTRYr(PUSH, FS, w),
[0xa1] = X86_OP_ENTRYw(POP, FS, w),
[0xa2] = X86_OP_ENTRY0(CPUID),
+ [0xa3] = X86_OP_ENTRYrr(BT, E,v, G,v, btEvGv),
[0xb2] = X86_OP_ENTRY3(LSS, G,v, EM,p, None, None),
+ [0xb3] = X86_OP_ENTRY2(BTR, E,v, G,v, btEvGv),
[0xb4] = X86_OP_ENTRY3(LFS, G,v, EM,p, None, None),
[0xb5] = X86_OP_ENTRY3(LGS, G,v, EM,p, None, None),
[0xb6] = X86_OP_ENTRY3(MOV, G,v, E,b, None, None, zextT0), /* MOVZX */
@@ -1240,6 +1261,7 @@ static const X86OpEntry opcodes_0F[256] = {
[0xa8] = X86_OP_ENTRYr(PUSH, GS, w),
[0xa9] = X86_OP_ENTRYw(POP, GS, w),
[0xaa] = X86_OP_ENTRY0(RSM, chk(smm) svm(RSM)),
+ [0xab] = X86_OP_ENTRY2(BTS, E,v, G,v, btEvGv),
[0xae] = X86_OP_GROUP0(group15),
/*
* It's slightly more efficient to put Ev operand in T0 and allow gen_IMUL3
@@ -1249,6 +1271,8 @@ static const X86OpEntry opcodes_0F[256] = {
/* decoded as modrm, which is visible as a difference between page fault and #UD */
[0xb9] = X86_OP_ENTRYr(UD, nop,v), /* UD1 */
+ [0xba] = X86_OP_GROUP2(group8, E,v, I,b),
+ [0xbb] = X86_OP_ENTRY2(BTC, E,v, G,v, btEvGv),
[0xbe] = X86_OP_ENTRY3(MOV, G,v, E,b, None, None, sextT0), /* MOVSX */
[0xbf] = X86_OP_ENTRY3(MOV, G,v, E,w, None, None, sextT0), /* MOVSX */
@@ -2359,6 +2383,7 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
CPUX86State *env = cpu_env(cpu);
X86DecodedInsn decode;
X86DecodeFunc decode_func = decode_root;
+ bool accept_lock = false;
uint8_t cc_live, b;
s->pc = s->base.pc_next;
@@ -2532,12 +2557,11 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
switch (b) {
case 0x00 ... 0x01: /* mostly privileged instructions */
case 0x1a ... 0x1b: /* MPX */
- case 0xa3 ... 0xa5: /* BT, SHLD */
- case 0xab ... 0xad: /* BTS, SHRD */
+ case 0xa4 ... 0xa5: /* SHLD */
+ case 0xac ... 0xad: /* SHRD */
case 0xb0 ... 0xb1: /* cmpxchg */
- case 0xb3: /* btr */
- case 0xb8: /* integer ops */
- case 0xba ... 0xbd: /* integer ops */
+ case 0xb8: /* POPCNT */
+ case 0xbc ... 0xbd: /* LZCNT/TZCNT */
case 0xc0 ... 0xc1: /* xadd */
case 0xc7: /* grp9 */
disas_insn_old(s, cpu, b + 0x100);
@@ -2600,9 +2624,10 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
if (decode.op[0].has_ea) {
s->prefix |= PREFIX_LOCK;
}
- decode.e.special = X86_SPECIAL_HasLock;
/* fallthrough */
case X86_SPECIAL_HasLock:
+ case X86_SPECIAL_BitTest:
+ accept_lock = decode.op[0].has_ea;
break;
case X86_SPECIAL_Op0_Rd:
@@ -2644,10 +2669,8 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
break;
}
- if (s->prefix & PREFIX_LOCK) {
- if (decode.e.special != X86_SPECIAL_HasLock || !decode.op[0].has_ea) {
- goto illegal_op;
- }
+ if ((s->prefix & PREFIX_LOCK) && !accept_lock) {
+ goto illegal_op;
}
if (!validate_vex(s, &decode)) {
@@ -2693,7 +2716,7 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
if (decode.e.special != X86_SPECIAL_NoLoadEA &&
(decode.op[0].has_ea || decode.op[1].has_ea || decode.op[2].has_ea)) {
- gen_load_ea(s, &decode.mem, decode.e.vex_class == 12);
+ gen_load_ea(s, &decode);
}
if (s->prefix & PREFIX_LOCK) {
gen_load(s, &decode, 2, s->T1);
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index f788e1fa4c8..41398e5130c 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -73,9 +73,25 @@ static void gen_NM_exception(DisasContext *s)
gen_exception(s, EXCP07_PREX);
}
-static void gen_load_ea(DisasContext *s, AddressParts *mem, bool is_vsib)
+static void gen_load_ea(DisasContext *s, X86DecodedInsn *decode)
{
- TCGv ea = gen_lea_modrm_1(s, *mem, is_vsib);
+ AddressParts *mem = &decode->mem;
+ TCGv ea;
+
+ ea = gen_lea_modrm_1(s, *mem, decode->e.vex_class == 12);
+ if (decode->e.special == X86_SPECIAL_BitTest) {
+ int oplen = 3 + decode->op[0].ot;
+ int poslen = 8 << decode->op[2].ot;
+ TCGv ofs = tcg_temp_new();
+
+ /* Extract memory displacement from T1. */
+ assert (decode->op[2].unit == X86_OP_INT);
+ tcg_gen_sextract_tl(ofs, s->T1, oplen, poslen - oplen);
+
+ tcg_gen_add_tl(s->A0, ea, ofs);
+ ea = s->A0;
+ }
+
gen_lea_v_seg(s, ea, mem->def_seg, s->override);
}
@@ -1340,6 +1356,116 @@ static void gen_BSWAP(DisasContext *s, X86DecodedInsn *decode)
tcg_gen_bswap32_tl(s->T0, s->T0, TCG_BSWAP_OZ);
}
+static TCGv gen_bt_mask(DisasContext *s, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[1].ot;
+ TCGv mask = tcg_temp_new();
+
+ tcg_gen_andi_tl(s->T1, s->T1, (8 << ot) - 1);
+ tcg_gen_shl_tl(mask, tcg_constant_tl(1), s->T1);
+ return mask;
+}
+
+/* Expects truncated bit index in s->T1, 1 << s->T1 in MASK. */
+static void gen_bt_flags(DisasContext *s, X86DecodedInsn *decode, TCGv src, TCGv mask)
+{
+ /*
+ * C is the result of the test, Z is unchanged, and the others
+ * are all undefined.
+ */
+ switch (s->cc_op) {
+ case CC_OP_MULB ... CC_OP_MULQ:
+ case CC_OP_ADDB ... CC_OP_ADDQ:
+ case CC_OP_ADCB ... CC_OP_ADCQ:
+ case CC_OP_SUBB ... CC_OP_SUBQ:
+ case CC_OP_SBBB ... CC_OP_SBBQ:
+ case CC_OP_LOGICB ... CC_OP_LOGICQ:
+ case CC_OP_INCB ... CC_OP_INCQ:
+ case CC_OP_DECB ... CC_OP_DECQ:
+ case CC_OP_SHLB ... CC_OP_SHLQ:
+ case CC_OP_SARB ... CC_OP_SARQ:
+ case CC_OP_BMILGB ... CC_OP_BMILGQ:
+ /*
+ * Z was going to be computed from the non-zero status of CC_DST.
+ * We can get that same Z value (and the new C value) by leaving
+ * CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
+ * same width.
+ */
+ decode->cc_src = tcg_temp_new();
+ decode->cc_dst = cpu_cc_dst;
+ decode->cc_op = ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB;
+ tcg_gen_shr_tl(decode->cc_src, src, s->T1);
+ break;
+
+ default:
+ /* Otherwise, generate EFLAGS and replace the C bit. */
+ decode->cc_src = tcg_temp_new();
+ decode->cc_dst = tcg_temp_new();
+ decode->cc_op = CC_OP_ADCX;
+
+ gen_mov_eflags(s, decode->cc_src);
+ tcg_gen_setcond_tl(TCG_COND_TSTNE, decode->cc_dst, src, mask);
+ break;
+ }
+}
+
+static void gen_BT(DisasContext *s, X86DecodedInsn *decode)
+{
+ TCGv mask = gen_bt_mask(s, decode);
+
+ gen_bt_flags(s, decode, s->T0, mask);
+}
+
+static void gen_BTC(DisasContext *s, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+ TCGv old = tcg_temp_new();
+ TCGv mask = gen_bt_mask(s, decode);
+
+ if (s->prefix & PREFIX_LOCK) {
+ tcg_gen_atomic_fetch_xor_tl(old, s->A0, mask, s->mem_index, ot | MO_LE);
+ } else {
+ tcg_gen_mov_tl(old, s->T0);
+ tcg_gen_xor_tl(s->T0, s->T0, mask);
+ }
+
+ gen_bt_flags(s, decode, old, mask);
+}
+
+static void gen_BTR(DisasContext *s, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+ TCGv old = tcg_temp_new();
+ TCGv mask = gen_bt_mask(s, decode);
+
+ if (s->prefix & PREFIX_LOCK) {
+ TCGv maskc = tcg_temp_new();
+ tcg_gen_not_tl(maskc, mask);
+ tcg_gen_atomic_fetch_and_tl(old, s->A0, maskc, s->mem_index, ot | MO_LE);
+ } else {
+ tcg_gen_mov_tl(old, s->T0);
+ tcg_gen_andc_tl(s->T0, s->T0, mask);
+ }
+
+ gen_bt_flags(s, decode, old, mask);
+}
+
+static void gen_BTS(DisasContext *s, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+ TCGv old = tcg_temp_new();
+ TCGv mask = gen_bt_mask(s, decode);
+
+ if (s->prefix & PREFIX_LOCK) {
+ tcg_gen_atomic_fetch_or_tl(old, s->A0, mask, s->mem_index, ot | MO_LE);
+ } else {
+ tcg_gen_mov_tl(old, s->T0);
+ tcg_gen_or_tl(s->T0, s->T0, mask);
+ }
+
+ gen_bt_flags(s, decode, old, mask);
+}
+
static void gen_BZHI(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
--
2.45.1
^ permalink raw reply related [flat|nested] 56+ messages in thread
* Re: [PATCH 14/25] target/i386: convert bit test instructions to new decoder
2024-06-08 8:41 ` [PATCH 14/25] target/i386: convert bit test instructions to new decoder Paolo Bonzini
@ 2024-06-08 19:37 ` Richard Henderson
0 siblings, 0 replies; 56+ messages in thread
From: Richard Henderson @ 2024-06-08 19:37 UTC (permalink / raw)
To: Paolo Bonzini, qemu-devel
On 6/8/24 01:41, Paolo Bonzini wrote:
> - if (mod != 3) {
> - AddressParts a = gen_lea_modrm_0(env, s, modrm);
> - /* specific case: we need to add a displacement */
> - gen_exts(ot, s->T1);
> - tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot);
> - tcg_gen_shli_tl(s->tmp0, s->tmp0, ot);
> - tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a, false), s->tmp0);
This is not the same as
> + if (decode->e.special == X86_SPECIAL_BitTest) {
> + int oplen = 3 + decode->op[0].ot;
> + int poslen = 8 << decode->op[2].ot;
> + TCGv ofs = tcg_temp_new();
> +
> + /* Extract memory displacement from T1. */
> + assert (decode->op[2].unit == X86_OP_INT);
> + tcg_gen_sextract_tl(ofs, s->T1, oplen, poslen - oplen);
> +
> + tcg_gen_add_tl(s->A0, ea, ofs);
... this.
Combining the exts + sari into an sextract is fine, but this has lost the shli.
r~
^ permalink raw reply [flat|nested] 56+ messages in thread
* [PATCH 15/25] target/i386: pull load/writeback out of gen_shiftd_rm_T1
2024-06-08 8:40 [PATCH 00/25] target/i386: more progress towards new decoder Paolo Bonzini
` (13 preceding siblings ...)
2024-06-08 8:41 ` [PATCH 14/25] target/i386: convert bit test instructions to new decoder Paolo Bonzini
@ 2024-06-08 8:41 ` Paolo Bonzini
2024-06-08 19:39 ` Richard Henderson
2024-06-08 8:41 ` [PATCH 16/25] target/i386: adapt gen_shift_count for SHLD/SHRD Paolo Bonzini
` (9 subsequent siblings)
24 siblings, 1 reply; 56+ messages in thread
From: Paolo Bonzini @ 2024-06-08 8:41 UTC (permalink / raw)
To: qemu-devel
Use gen_ld_modrm/gen_st_modrm, moving them and gen_shift_flags to the
caller. This way, gen_shiftd_rm_T1 becomes something that the new
decoder can call.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 55 ++++++++++---------------------------
1 file changed, 14 insertions(+), 41 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index c3843092350..416db2f3b0e 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -535,15 +535,6 @@ static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
}
-static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
-{
- if (d == OR_TMP0) {
- gen_op_st_v(s, idx, s->T0, s->A0);
- } else {
- gen_op_mov_reg_v(s, idx, d, s->T0);
- }
-}
-
static void gen_update_eip_next(DisasContext *s)
{
assert(s->pc_save != -1);
@@ -1481,19 +1472,12 @@ static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result,
}
/* XXX: add faster immediate case */
-static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, int op1,
+static TCGv gen_shiftd_rm_T1(DisasContext *s, MemOp ot,
bool is_right, TCGv count_in)
{
target_ulong mask = (ot == MO_64 ? 63 : 31);
TCGv count;
- /* load */
- if (op1 == OR_TMP0) {
- gen_op_ld_v(s, ot, s->T0, s->A0);
- } else {
- gen_op_mov_v_reg(s, ot, s->T0, op1);
- }
-
count = tcg_temp_new();
tcg_gen_andi_tl(count, count_in, mask);
@@ -1558,10 +1542,7 @@ static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, int op1,
break;
}
- /* store */
- gen_op_st_rm_T0_A0(s, ot, op1);
-
- gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right);
+ return count;
}
#define X86_MAX_INSN_LENGTH 15
@@ -3071,9 +3052,9 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
CPUX86State *env = cpu_env(cpu);
int prefixes = s->prefix;
MemOp dflag = s->dflag;
- int shift;
+ TCGv shift;
MemOp ot;
- int modrm, reg, rm, mod, op, opreg;
+ int modrm, reg, rm, mod, op;
/* now check op code */
switch (b) {
@@ -3239,39 +3220,31 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
/* shifts */
case 0x1a4: /* shld imm */
op = 0;
- shift = 1;
+ shift = NULL;
goto do_shiftd;
case 0x1a5: /* shld cl */
op = 0;
- shift = 0;
+ shift = cpu_regs[R_ECX];
goto do_shiftd;
case 0x1ac: /* shrd imm */
op = 1;
- shift = 1;
+ shift = NULL;
goto do_shiftd;
case 0x1ad: /* shrd cl */
op = 1;
- shift = 0;
+ shift = cpu_regs[R_ECX];
do_shiftd:
ot = dflag;
modrm = x86_ldub_code(env, s);
- mod = (modrm >> 6) & 3;
- rm = (modrm & 7) | REX_B(s);
reg = ((modrm >> 3) & 7) | REX_R(s);
- if (mod != 3) {
- gen_lea_modrm(env, s, modrm);
- opreg = OR_TMP0;
- } else {
- opreg = rm;
+ gen_ld_modrm(env, s, modrm, ot);
+ if (!shift) {
+ shift = tcg_constant_tl(x86_ldub_code(env, s));
}
gen_op_mov_v_reg(s, ot, s->T1, reg);
-
- if (shift) {
- TCGv imm = tcg_constant_tl(x86_ldub_code(env, s));
- gen_shiftd_rm_T1(s, ot, opreg, op, imm);
- } else {
- gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
- }
+ shift = gen_shiftd_rm_T1(s, ot, op, shift);
+ gen_st_modrm(env, s, modrm, ot);
+ gen_shift_flags(s, ot, s->T0, s->tmp0, shift, op);
break;
case 0x1bc: /* bsf / tzcnt */
case 0x1bd: /* bsr / lzcnt */
--
2.45.1
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH 16/25] target/i386: adapt gen_shift_count for SHLD/SHRD
2024-06-08 8:40 [PATCH 00/25] target/i386: more progress towards new decoder Paolo Bonzini
` (14 preceding siblings ...)
2024-06-08 8:41 ` [PATCH 15/25] target/i386: pull load/writeback out of gen_shiftd_rm_T1 Paolo Bonzini
@ 2024-06-08 8:41 ` Paolo Bonzini
2024-06-08 19:42 ` Richard Henderson
2024-06-08 8:41 ` [PATCH 17/25] target/i386: convert SHLD/SHRD to new decoder Paolo Bonzini
` (8 subsequent siblings)
24 siblings, 1 reply; 56+ messages in thread
From: Paolo Bonzini @ 2024-06-08 8:41 UTC (permalink / raw)
To: qemu-devel
SHLD/SHRD can have 3 register operands - s->T0, s->T1 and either
1 or CL - and therefore decode->op[2] is taken by the low part
of the register being shifted. Pass X86_OP_* to gen_shift_count
from its current callers and hardcode cpu_regs[R_ECX] as the
shift count.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/emit.c.inc | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 41398e5130c..2e73b41cd3e 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -2998,16 +2998,16 @@ static void gen_PUSHF(DisasContext *s, X86DecodedInsn *decode)
}
static MemOp gen_shift_count(DisasContext *s, X86DecodedInsn *decode,
- bool *can_be_zero, TCGv *count)
+ bool *can_be_zero, TCGv *count, int unit)
{
MemOp ot = decode->op[0].ot;
int mask = (ot <= MO_32 ? 0x1f : 0x3f);
*can_be_zero = false;
- switch (decode->op[2].unit) {
+ switch (unit) {
case X86_OP_INT:
*count = tcg_temp_new();
- tcg_gen_andi_tl(*count, s->T1, mask);
+ tcg_gen_andi_tl(*count, cpu_regs[R_ECX], mask);
*can_be_zero = true;
break;
@@ -3192,7 +3192,7 @@ static void gen_RCL(DisasContext *s, X86DecodedInsn *decode)
bool have_1bit_cin, can_be_zero;
TCGv count;
TCGLabel *zero_label = NULL;
- MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count);
+ MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, decode->op[2].unit);
TCGv low, high, low_count;
if (!count) {
@@ -3244,7 +3244,7 @@ static void gen_RCR(DisasContext *s, X86DecodedInsn *decode)
bool have_1bit_cin, can_be_zero;
TCGv count;
TCGLabel *zero_label = NULL;
- MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count);
+ MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, decode->op[2].unit);
TCGv low, high, high_count;
if (!count) {
@@ -3422,7 +3422,7 @@ static void gen_ROL(DisasContext *s, X86DecodedInsn *decode)
{
bool can_be_zero;
TCGv count;
- MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count);
+ MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, decode->op[2].unit);
TCGv_i32 temp32, count32;
TCGv old = tcg_temp_new();
@@ -3450,7 +3450,7 @@ static void gen_ROR(DisasContext *s, X86DecodedInsn *decode)
{
bool can_be_zero;
TCGv count;
- MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count);
+ MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, decode->op[2].unit);
TCGv_i32 temp32, count32;
TCGv old = tcg_temp_new();
@@ -3562,7 +3562,7 @@ static void gen_SAR(DisasContext *s, X86DecodedInsn *decode)
{
bool can_be_zero;
TCGv count;
- MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count);
+ MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, decode->op[2].unit);
if (!count) {
return;
@@ -3690,7 +3690,7 @@ static void gen_SHL(DisasContext *s, X86DecodedInsn *decode)
{
bool can_be_zero;
TCGv count;
- MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count);
+ MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, decode->op[2].unit);
if (!count) {
return;
@@ -3722,7 +3722,7 @@ static void gen_SHR(DisasContext *s, X86DecodedInsn *decode)
{
bool can_be_zero;
TCGv count;
- MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count);
+ MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, decode->op[2].unit);
if (!count) {
return;
--
2.45.1
^ permalink raw reply related [flat|nested] 56+ messages in thread
* Re: [PATCH 16/25] target/i386: adapt gen_shift_count for SHLD/SHRD
2024-06-08 8:41 ` [PATCH 16/25] target/i386: adapt gen_shift_count for SHLD/SHRD Paolo Bonzini
@ 2024-06-08 19:42 ` Richard Henderson
0 siblings, 0 replies; 56+ messages in thread
From: Richard Henderson @ 2024-06-08 19:42 UTC (permalink / raw)
To: Paolo Bonzini, qemu-devel
On 6/8/24 01:41, Paolo Bonzini wrote:
> SHLD/SHRD can have 3 register operands - s->T0, s->T1 and either
> 1 or CL - and therefore decode->op[2] is taken by the low part
> of the register being shifted. Pass X86_OP_* to gen_shift_count
> from its current callers and hardcode cpu_regs[R_ECX] as the
> shift count.
>
> Signed-off-by: Paolo Bonzini<pbonzini@redhat.com>
> ---
> target/i386/tcg/emit.c.inc | 20 ++++++++++----------
> 1 file changed, 10 insertions(+), 10 deletions(-)
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 56+ messages in thread
* [PATCH 17/25] target/i386: convert SHLD/SHRD to new decoder
2024-06-08 8:40 [PATCH 00/25] target/i386: more progress towards new decoder Paolo Bonzini
` (15 preceding siblings ...)
2024-06-08 8:41 ` [PATCH 16/25] target/i386: adapt gen_shift_count for SHLD/SHRD Paolo Bonzini
@ 2024-06-08 8:41 ` Paolo Bonzini
2024-06-08 19:47 ` Richard Henderson
2024-06-08 8:41 ` [PATCH 18/25] target/i386: convert LZCNT/TZCNT/BSF/BSR/POPCNT " Paolo Bonzini
` (7 subsequent siblings)
24 siblings, 1 reply; 56+ messages in thread
From: Paolo Bonzini @ 2024-06-08 8:41 UTC (permalink / raw)
To: qemu-devel
Use the same flag generation code as SHL and SHR, but use
the existing gen_shiftd_rm_T1 function to compute the result
as well as CC_SRC.
Decoding-wise, SHLD/SHRD by immediate count as a 4 operand
instruction because s->T0 and s->T1 actually occupy three op
slots. The infrastructure used by opcodes in the 0F 3A table
works fine.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 83 +-------------------------------
target/i386/tcg/decode-new.c.inc | 6 ++-
target/i386/tcg/emit.c.inc | 42 ++++++++++++++++
3 files changed, 48 insertions(+), 83 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 416db2f3b0e..1e9036eb6e3 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -1429,57 +1429,11 @@ static bool check_cpl0(DisasContext *s)
return false;
}
-static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result,
- TCGv shm1, TCGv count, bool is_right)
-{
- TCGv_i32 z32, s32, oldop;
- TCGv z_tl;
-
- /* Store the results into the CC variables. If we know that the
- variable must be dead, store unconditionally. Otherwise we'll
- need to not disrupt the current contents. */
- z_tl = tcg_constant_tl(0);
- if (cc_op_live[s->cc_op] & USES_CC_DST) {
- tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
- result, cpu_cc_dst);
- } else {
- tcg_gen_mov_tl(cpu_cc_dst, result);
- }
- if (cc_op_live[s->cc_op] & USES_CC_SRC) {
- tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
- shm1, cpu_cc_src);
- } else {
- tcg_gen_mov_tl(cpu_cc_src, shm1);
- }
-
- /* Get the two potential CC_OP values into temporaries. */
- tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
- if (s->cc_op == CC_OP_DYNAMIC) {
- oldop = cpu_cc_op;
- } else {
- tcg_gen_movi_i32(s->tmp3_i32, s->cc_op);
- oldop = s->tmp3_i32;
- }
-
- /* Conditionally store the CC_OP value. */
- z32 = tcg_constant_i32(0);
- s32 = tcg_temp_new_i32();
- tcg_gen_trunc_tl_i32(s32, count);
- tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop);
-
- /* The CC_OP value is no longer predictable. */
- set_cc_op(s, CC_OP_DYNAMIC);
-}
-
/* XXX: add faster immediate case */
-static TCGv gen_shiftd_rm_T1(DisasContext *s, MemOp ot,
- bool is_right, TCGv count_in)
+static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot,
+ bool is_right, TCGv count)
{
target_ulong mask = (ot == MO_64 ? 63 : 31);
- TCGv count;
-
- count = tcg_temp_new();
- tcg_gen_andi_tl(count, count_in, mask);
switch (ot) {
case MO_16:
@@ -1541,8 +1495,6 @@ static TCGv gen_shiftd_rm_T1(DisasContext *s, MemOp ot,
tcg_gen_or_tl(s->T0, s->T0, s->T1);
break;
}
-
- return count;
}
#define X86_MAX_INSN_LENGTH 15
@@ -3052,7 +3004,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
CPUX86State *env = cpu_env(cpu);
int prefixes = s->prefix;
MemOp dflag = s->dflag;
- TCGv shift;
MemOp ot;
int modrm, reg, rm, mod, op;
@@ -3216,36 +3167,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
}
break;
- /**************************/
- /* shifts */
- case 0x1a4: /* shld imm */
- op = 0;
- shift = NULL;
- goto do_shiftd;
- case 0x1a5: /* shld cl */
- op = 0;
- shift = cpu_regs[R_ECX];
- goto do_shiftd;
- case 0x1ac: /* shrd imm */
- op = 1;
- shift = NULL;
- goto do_shiftd;
- case 0x1ad: /* shrd cl */
- op = 1;
- shift = cpu_regs[R_ECX];
- do_shiftd:
- ot = dflag;
- modrm = x86_ldub_code(env, s);
- reg = ((modrm >> 3) & 7) | REX_R(s);
- gen_ld_modrm(env, s, modrm, ot);
- if (!shift) {
- shift = tcg_constant_tl(x86_ldub_code(env, s));
- }
- gen_op_mov_v_reg(s, ot, s->T1, reg);
- shift = gen_shiftd_rm_T1(s, ot, op, shift);
- gen_st_modrm(env, s, modrm, ot);
- gen_shift_flags(s, ot, s->T0, s->tmp0, shift, op);
- break;
case 0x1bc: /* bsf / tzcnt */
case 0x1bd: /* bsr / lzcnt */
ot = dflag;
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index af142d6911a..bd9e7cd4df9 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -1134,6 +1134,8 @@ static const X86OpEntry opcodes_0F[256] = {
[0xa1] = X86_OP_ENTRYw(POP, FS, w),
[0xa2] = X86_OP_ENTRY0(CPUID),
[0xa3] = X86_OP_ENTRYrr(BT, E,v, G,v, btEvGv),
+ [0xa4] = X86_OP_ENTRY4(SHLD, E,v, 2op,v, G,v),
+ [0xa5] = X86_OP_ENTRY3(SHLD, E,v, 2op,v, G,v),
[0xb2] = X86_OP_ENTRY3(LSS, G,v, EM,p, None, None),
[0xb3] = X86_OP_ENTRY2(BTR, E,v, G,v, btEvGv),
@@ -1262,6 +1264,8 @@ static const X86OpEntry opcodes_0F[256] = {
[0xa9] = X86_OP_ENTRYw(POP, GS, w),
[0xaa] = X86_OP_ENTRY0(RSM, chk(smm) svm(RSM)),
[0xab] = X86_OP_ENTRY2(BTS, E,v, G,v, btEvGv),
+ [0xac] = X86_OP_ENTRY4(SHRD, E,v, 2op,v, G,v),
+ [0xad] = X86_OP_ENTRY3(SHRD, E,v, 2op,v, G,v),
[0xae] = X86_OP_GROUP0(group15),
/*
* It's slightly more efficient to put Ev operand in T0 and allow gen_IMUL3
@@ -2557,8 +2561,6 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
switch (b) {
case 0x00 ... 0x01: /* mostly privileged instructions */
case 0x1a ... 0x1b: /* MPX */
- case 0xa4 ... 0xa5: /* SHLD */
- case 0xac ... 0xad: /* SHRD */
case 0xb0 ... 0xb1: /* cmpxchg */
case 0xb8: /* POPCNT */
case 0xbc ... 0xbd: /* LZCNT/TZCNT */
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 2e73b41cd3e..aabc86669c2 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -3708,6 +3708,27 @@ static void gen_SHL(DisasContext *s, X86DecodedInsn *decode)
}
}
+static void gen_SHLD(DisasContext *s, X86DecodedInsn *decode)
+{
+ bool can_be_zero;
+ TCGv count;
+ int unit = decode->e.op3 == X86_TYPE_I ? X86_OP_IMM : X86_OP_INT;
+ MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, unit);
+
+ if (!count) {
+ return;
+ }
+
+ decode->cc_dst = s->T0;
+ decode->cc_src = s->tmp0;
+ gen_shiftd_rm_T1(s, ot, false, count);
+ if (can_be_zero) {
+ gen_shift_dynamic_flags(s, decode, count, CC_OP_SHLB + ot);
+ } else {
+ decode->cc_op = CC_OP_SHLB + ot;
+ }
+}
+
static void gen_SHLX(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
@@ -3740,6 +3761,27 @@ static void gen_SHR(DisasContext *s, X86DecodedInsn *decode)
}
}
+static void gen_SHRD(DisasContext *s, X86DecodedInsn *decode)
+{
+ bool can_be_zero;
+ TCGv count;
+ int unit = decode->e.op3 == X86_TYPE_I ? X86_OP_IMM : X86_OP_INT;
+ MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, unit);
+
+ if (!count) {
+ return;
+ }
+
+ decode->cc_dst = s->T0;
+ decode->cc_src = s->tmp0;
+ gen_shiftd_rm_T1(s, ot, true, count);
+ if (can_be_zero) {
+ gen_shift_dynamic_flags(s, decode, count, CC_OP_SARB + ot);
+ } else {
+ decode->cc_op = CC_OP_SARB + ot;
+ }
+}
+
static void gen_SHRX(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
--
2.45.1
^ permalink raw reply related [flat|nested] 56+ messages in thread
* Re: [PATCH 17/25] target/i386: convert SHLD/SHRD to new decoder
2024-06-08 8:41 ` [PATCH 17/25] target/i386: convert SHLD/SHRD to new decoder Paolo Bonzini
@ 2024-06-08 19:47 ` Richard Henderson
0 siblings, 0 replies; 56+ messages in thread
From: Richard Henderson @ 2024-06-08 19:47 UTC (permalink / raw)
To: Paolo Bonzini, qemu-devel
On 6/8/24 01:41, Paolo Bonzini wrote:
> Use the same flag generation code as SHL and SHR, but use
> the existing gen_shiftd_rm_T1 function to compute the result
> as well as CC_SRC.
>
> Decoding-wise, SHLD/SHRD by immediate count as a 4 operand
> instruction because s->T0 and s->T1 actually occupy three op
> slots. The infrastructure used by opcodes in the 0F 3A table
> works fine.
>
> Signed-off-by: Paolo Bonzini<pbonzini@redhat.com>
> ---
> target/i386/tcg/translate.c | 83 +-------------------------------
> target/i386/tcg/decode-new.c.inc | 6 ++-
> target/i386/tcg/emit.c.inc | 42 ++++++++++++++++
> 3 files changed, 48 insertions(+), 83 deletions(-)
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 56+ messages in thread
* [PATCH 18/25] target/i386: convert LZCNT/TZCNT/BSF/BSR/POPCNT to new decoder
2024-06-08 8:40 [PATCH 00/25] target/i386: more progress towards new decoder Paolo Bonzini
` (16 preceding siblings ...)
2024-06-08 8:41 ` [PATCH 17/25] target/i386: convert SHLD/SHRD to new decoder Paolo Bonzini
@ 2024-06-08 8:41 ` Paolo Bonzini
2024-06-08 19:53 ` Richard Henderson
2024-06-08 8:41 ` [PATCH 19/25] target/i386: convert XADD " Paolo Bonzini
` (6 subsequent siblings)
24 siblings, 1 reply; 56+ messages in thread
From: Paolo Bonzini @ 2024-06-08 8:41 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/decode-new.h | 1 +
target/i386/tcg/translate.c | 74 ----------------------------
target/i386/tcg/decode-new.c.inc | 51 +++++++++++++++++++-
target/i386/tcg/emit.c.inc | 82 ++++++++++++++++++++++++++++++++
4 files changed, 132 insertions(+), 76 deletions(-)
diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h
index f781bb5bbec..13be23145a8 100644
--- a/target/i386/tcg/decode-new.h
+++ b/target/i386/tcg/decode-new.h
@@ -119,6 +119,7 @@ typedef enum X86CPUIDFeature {
X86_FEAT_FXSR,
X86_FEAT_MOVBE,
X86_FEAT_PCLMULQDQ,
+ X86_FEAT_POPCNT,
X86_FEAT_SHA_NI,
X86_FEAT_SSE,
X86_FEAT_SSE2,
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 1e9036eb6e3..a9cf1332b43 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -818,11 +818,6 @@ static void gen_movs(DisasContext *s, MemOp ot)
gen_op_add_reg(s, s->aflag, R_EDI, dshift);
}
-static void gen_op_update1_cc(DisasContext *s)
-{
- tcg_gen_mov_tl(cpu_cc_dst, s->T0);
-}
-
static void gen_op_update2_cc(DisasContext *s)
{
tcg_gen_mov_tl(cpu_cc_src, s->T1);
@@ -3167,56 +3162,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
}
break;
- case 0x1bc: /* bsf / tzcnt */
- case 0x1bd: /* bsr / lzcnt */
- ot = dflag;
- modrm = x86_ldub_code(env, s);
- reg = ((modrm >> 3) & 7) | REX_R(s);
- gen_ld_modrm(env, s, modrm, ot);
- gen_extu(ot, s->T0);
-
- /* Note that lzcnt and tzcnt are in different extensions. */
- if ((prefixes & PREFIX_REPZ)
- && (b & 1
- ? s->cpuid_ext3_features & CPUID_EXT3_ABM
- : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
- int size = 8 << ot;
- /* For lzcnt/tzcnt, C bit is defined related to the input. */
- tcg_gen_mov_tl(cpu_cc_src, s->T0);
- if (b & 1) {
- /* For lzcnt, reduce the target_ulong result by the
- number of zeros that we expect to find at the top. */
- tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
- tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size);
- } else {
- /* For tzcnt, a zero input must return the operand size. */
- tcg_gen_ctzi_tl(s->T0, s->T0, size);
- }
- /* For lzcnt/tzcnt, Z bit is defined related to the result. */
- gen_op_update1_cc(s);
- set_cc_op(s, CC_OP_BMILGB + ot);
- } else {
- /* For bsr/bsf, only the Z bit is defined and it is related
- to the input and not the result. */
- tcg_gen_mov_tl(cpu_cc_dst, s->T0);
- set_cc_op(s, CC_OP_LOGICB + ot);
-
- /* ??? The manual says that the output is undefined when the
- input is zero, but real hardware leaves it unchanged, and
- real programs appear to depend on that. Accomplish this
- by passing the output as the value to return upon zero. */
- if (b & 1) {
- /* For bsr, return the bit index of the first 1 bit,
- not the count of leading zeros. */
- tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
- tcg_gen_clz_tl(s->T0, s->T0, s->T1);
- tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
- } else {
- tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
- }
- }
- gen_op_mov_reg_v(s, ot, reg, s->T0);
- break;
case 0x100:
modrm = x86_ldub_code(env, s);
mod = (modrm >> 6) & 3;
@@ -3811,25 +3756,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
}
gen_nop_modrm(env, s, modrm);
break;
- case 0x1b8: /* SSE4.2 popcnt */
- if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
- PREFIX_REPZ)
- goto illegal_op;
- if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
- goto illegal_op;
-
- modrm = x86_ldub_code(env, s);
- reg = ((modrm >> 3) & 7) | REX_R(s);
-
- ot = dflag;
- gen_ld_modrm(env, s, modrm, ot);
- gen_extu(ot, s->T0);
- tcg_gen_mov_tl(cpu_cc_src, s->T0);
- tcg_gen_ctpop_tl(s->T0, s->T0);
- gen_op_mov_reg_v(s, ot, reg, s->T0);
-
- set_cc_op(s, CC_OP_POPCNT);
- break;
default:
g_assert_not_reached();
}
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index bd9e7cd4df9..64ec731bf4a 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -469,6 +469,50 @@ static void decode_0F7F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, ui
*entry = *decode_by_prefix(s, opcodes_0F7F);
}
+static void decode_0FB8(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+ static const X86OpEntry popcnt =
+ X86_OP_ENTRYwr(POPCNT, G,v, E,v, cpuid(POPCNT) zextT0);
+
+ if (s->prefix & PREFIX_REPZ) {
+ *entry = popcnt;
+ } else {
+ memset(entry, 0, sizeof(*entry));
+ }
+}
+
+static void decode_0FBC(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+ /* For BSF, pass 2op as the third operand so that we can use zextT0 */
+ static const X86OpEntry opcodes_0FBC[4] = {
+ X86_OP_ENTRY3(BSF, G,v, E,v, 2op,v, zextT0),
+ X86_OP_ENTRY3(BSF, G,v, E,v, 2op,v, zextT0), /* 0x66 */
+ X86_OP_ENTRYwr(TZCNT, G,v, E,v, zextT0), /* 0xf3 */
+ X86_OP_ENTRY3(BSF, G,v, E,v, 2op,v, zextT0), /* 0xf2 */
+ };
+ if (!(s->cpuid_ext3_features & CPUID_EXT3_ABM)) {
+ *entry = opcodes_0FBC[0];
+ } else {
+ *entry = *decode_by_prefix(s, opcodes_0FBC);
+ }
+}
+
+static void decode_0FBD(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+ /* For BSR, pass 2op as the third operand so that we can use zextT0 */
+ static const X86OpEntry opcodes_0FBD[4] = {
+ X86_OP_ENTRY3(BSR, G,v, E,v, 2op,v, zextT0),
+ X86_OP_ENTRY3(BSR, G,v, E,v, 2op,v, zextT0), /* 0x66 */
+ X86_OP_ENTRYwr(LZCNT, G,v, E,v, zextT0), /* 0xf3 */
+ X86_OP_ENTRY3(BSR, G,v, E,v, 2op,v, zextT0), /* 0xf2 */
+ };
+ if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
+ *entry = opcodes_0FBD[0];
+ } else {
+ *entry = *decode_by_prefix(s, opcodes_0FBD);
+ }
+}
+
static void decode_0FD6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
{
static const X86OpEntry movq[4] = {
@@ -1273,10 +1317,13 @@ static const X86OpEntry opcodes_0F[256] = {
*/
[0xaf] = X86_OP_ENTRY3(IMUL3, G,v, E,v, 2op,v, sextT0),
+ [0xb8] = X86_OP_GROUP0(0FB8),
/* decoded as modrm, which is visible as a difference between page fault and #UD */
[0xb9] = X86_OP_ENTRYr(UD, nop,v), /* UD1 */
[0xba] = X86_OP_GROUP2(group8, E,v, I,b),
[0xbb] = X86_OP_ENTRY2(BTC, E,v, G,v, btEvGv),
+ [0xbc] = X86_OP_GROUP0(0FBC),
+ [0xbd] = X86_OP_GROUP0(0FBD),
[0xbe] = X86_OP_ENTRY3(MOV, G,v, E,b, None, None, sextT0), /* MOVSX */
[0xbf] = X86_OP_ENTRY3(MOV, G,v, E,w, None, None, sextT0), /* MOVSX */
@@ -2174,6 +2221,8 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid)
return (s->cpuid_ext_features & CPUID_EXT_MOVBE);
case X86_FEAT_PCLMULQDQ:
return (s->cpuid_ext_features & CPUID_EXT_PCLMULQDQ);
+ case X86_FEAT_POPCNT:
+ return (s->cpuid_ext_features & CPUID_EXT_POPCNT);
case X86_FEAT_SSE:
return (s->cpuid_features & CPUID_SSE);
case X86_FEAT_SSE2:
@@ -2562,8 +2611,6 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
case 0x00 ... 0x01: /* mostly privileged instructions */
case 0x1a ... 0x1b: /* MPX */
case 0xb0 ... 0xb1: /* cmpxchg */
- case 0xb8: /* POPCNT */
- case 0xbc ... 0xbd: /* LZCNT/TZCNT */
case 0xc0 ... 0xc1: /* xadd */
case 0xc7: /* grp9 */
disas_insn_old(s, cpu, b + 0x100);
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index aabc86669c2..2fbf2a5ce8c 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1345,6 +1345,47 @@ static void gen_BOUND(DisasContext *s, X86DecodedInsn *decode)
}
}
+/* Non-standard convention - on entry T0 is zero-extended input, T1 is the output. */
+static void gen_BSF(DisasContext *s, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+
+ /* Only the Z bit is defined and it is related to the input. */
+ decode->cc_dst = tcg_temp_new();
+ decode->cc_op = CC_OP_LOGICB + ot;
+ tcg_gen_mov_tl(decode->cc_dst, s->T0);
+
+ /*
+ * The manual says that the output is undefined when the
+ * input is zero, but real hardware leaves it unchanged, and
+ * real programs appear to depend on that. Accomplish this
+ * by passing the output as the value to return upon zero.
+ */
+ tcg_gen_ctz_tl(s->T0, s->T0, s->T1);
+}
+
+/* Non-standard convention - on entry T0 is zero-extended input, T1 is the output. */
+static void gen_BSR(DisasContext *s, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+
+ /* Only the Z bit is defined and it is related to the input. */
+ decode->cc_dst = tcg_temp_new();
+ decode->cc_op = CC_OP_LOGICB + ot;
+ tcg_gen_mov_tl(decode->cc_dst, s->T0);
+
+ /*
+ * The manual says that the output is undefined when the
+ * input is zero, but real hardware leaves it unchanged, and
+ * real programs appear to depend on that. Accomplish this
+ * by passing the output as the value to return upon zero.
+ * Plus, return the bit index of the first 1 bit.
+ */
+ tcg_gen_xori_tl(s->T1, s->T1, TARGET_LONG_BITS - 1);
+ tcg_gen_clz_tl(s->T0, s->T0, s->T1);
+ tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
+}
+
static void gen_BSWAP(DisasContext *s, X86DecodedInsn *decode)
{
#ifdef TARGET_X86_64
@@ -2254,6 +2295,24 @@ static void gen_LSS(DisasContext *s, X86DecodedInsn *decode)
gen_lxx_seg(s, decode, R_SS);
}
+static void gen_LZCNT(DisasContext *s, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+
+ /* C bit (cc_src) is defined related to the input. */
+ decode->cc_src = tcg_temp_new();
+ decode->cc_dst = s->T0;
+ decode->cc_op = CC_OP_BMILGB + ot;
+ tcg_gen_mov_tl(decode->cc_src, s->T0);
+
+ /*
+ * Reduce the target_ulong result by the number of zeros that
+ * we expect to find at the top.
+ */
+ tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
+ tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - (8 << ot));
+}
+
static void gen_MFENCE(DisasContext *s, X86DecodedInsn *decode)
{
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
@@ -2812,6 +2871,15 @@ static void gen_POPA(DisasContext *s, X86DecodedInsn *decode)
gen_popa(s);
}
+static void gen_POPCNT(DisasContext *s, X86DecodedInsn *decode)
+{
+ decode->cc_src = tcg_temp_new();
+ decode->cc_op = CC_OP_POPCNT;
+
+ tcg_gen_mov_tl(decode->cc_src, s->T0);
+ tcg_gen_ctpop_tl(s->T0, s->T0);
+}
+
static void gen_POPF(DisasContext *s, X86DecodedInsn *decode)
{
MemOp ot;
@@ -3893,6 +3961,20 @@ static void gen_SYSRET(DisasContext *s, X86DecodedInsn *decode)
s->base.is_jmp = DISAS_EOB_RECHECK_TF;
}
+static void gen_TZCNT(DisasContext *s, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+
+ /* C bit (cc_src) is defined related to the input. */
+ decode->cc_src = tcg_temp_new();
+ decode->cc_dst = s->T0;
+ decode->cc_op = CC_OP_BMILGB + ot;
+ tcg_gen_mov_tl(decode->cc_src, s->T0);
+
+ /* A zero input returns the operand size. */
+ tcg_gen_ctzi_tl(s->T0, s->T0, 8 << ot);
+}
+
static void gen_UD(DisasContext *s, X86DecodedInsn *decode)
{
gen_illegal_opcode(s);
--
2.45.1
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH 19/25] target/i386: convert XADD to new decoder
2024-06-08 8:40 [PATCH 00/25] target/i386: more progress towards new decoder Paolo Bonzini
` (17 preceding siblings ...)
2024-06-08 8:41 ` [PATCH 18/25] target/i386: convert LZCNT/TZCNT/BSF/BSR/POPCNT " Paolo Bonzini
@ 2024-06-08 8:41 ` Paolo Bonzini
2024-06-08 20:00 ` Richard Henderson
2024-06-08 8:41 ` [PATCH 20/25] target/i386: convert CMPXCHG " Paolo Bonzini
` (5 subsequent siblings)
24 siblings, 1 reply; 56+ messages in thread
From: Paolo Bonzini @ 2024-06-08 8:41 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 35 --------------------------------
target/i386/tcg/decode-new.c.inc | 3 ++-
target/i386/tcg/emit.c.inc | 24 ++++++++++++++++++++++
3 files changed, 26 insertions(+), 36 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index a9cf1332b43..7a63c927c1f 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -818,12 +818,6 @@ static void gen_movs(DisasContext *s, MemOp ot)
gen_op_add_reg(s, s->aflag, R_EDI, dshift);
}
-static void gen_op_update2_cc(DisasContext *s)
-{
- tcg_gen_mov_tl(cpu_cc_src, s->T1);
- tcg_gen_mov_tl(cpu_cc_dst, s->T0);
-}
-
/* compute all eflags to reg */
static void gen_mov_eflags(DisasContext *s, TCGv reg)
{
@@ -3006,35 +3000,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
switch (b) {
/**************************/
/* arith & logic */
- case 0x1c0:
- case 0x1c1: /* xadd Ev, Gv */
- ot = mo_b_d(b, dflag);
- modrm = x86_ldub_code(env, s);
- reg = ((modrm >> 3) & 7) | REX_R(s);
- mod = (modrm >> 6) & 3;
- gen_op_mov_v_reg(s, ot, s->T0, reg);
- if (mod == 3) {
- rm = (modrm & 7) | REX_B(s);
- gen_op_mov_v_reg(s, ot, s->T1, rm);
- tcg_gen_add_tl(s->T0, s->T0, s->T1);
- gen_op_mov_reg_v(s, ot, reg, s->T1);
- gen_op_mov_reg_v(s, ot, rm, s->T0);
- } else {
- gen_lea_modrm(env, s, modrm);
- if (s->prefix & PREFIX_LOCK) {
- tcg_gen_atomic_fetch_add_tl(s->T1, s->A0, s->T0,
- s->mem_index, ot | MO_LE);
- tcg_gen_add_tl(s->T0, s->T0, s->T1);
- } else {
- gen_op_ld_v(s, ot, s->T1, s->A0);
- tcg_gen_add_tl(s->T0, s->T0, s->T1);
- gen_op_st_v(s, ot, s->T0, s->A0);
- }
- gen_op_mov_reg_v(s, ot, reg, s->T1);
- }
- gen_op_update2_cc(s);
- set_cc_op(s, CC_OP_ADDB + ot);
- break;
case 0x1b0:
case 0x1b1: /* cmpxchg Ev, Gv */
{
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 64ec731bf4a..11ecd1c6c1d 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -1188,6 +1188,8 @@ static const X86OpEntry opcodes_0F[256] = {
[0xb6] = X86_OP_ENTRY3(MOV, G,v, E,b, None, None, zextT0), /* MOVZX */
[0xb7] = X86_OP_ENTRY3(MOV, G,v, E,w, None, None, zextT0), /* MOVZX */
+ [0xc0] = X86_OP_ENTRY2(XADD, E,b, G,b, lock),
+ [0xc1] = X86_OP_ENTRY2(XADD, E,v, G,v, lock),
[0xc2] = X86_OP_ENTRY4(VCMP, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
[0xc3] = X86_OP_ENTRY3(MOV, EM,y,G,y, None,None, cpuid(SSE2)), /* MOVNTI */
[0xc4] = X86_OP_ENTRY4(PINSRW, V,dq,H,dq,E,w, vex5 mmx p_00_66),
@@ -2611,7 +2613,6 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
case 0x00 ... 0x01: /* mostly privileged instructions */
case 0x1a ... 0x1b: /* MPX */
case 0xb0 ... 0xb1: /* cmpxchg */
- case 0xc0 ... 0xc1: /* xadd */
case 0xc7: /* grp9 */
disas_insn_old(s, cpu, b + 0x100);
return;
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 2fbf2a5ce8c..42e41a7a87c 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -4492,6 +4492,30 @@ static void gen_WRxxBASE(DisasContext *s, X86DecodedInsn *decode)
tcg_gen_mov_tl(base, s->T0);
}
+static void gen_XADD(DisasContext *s, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[1].ot;
+
+ decode->cc_dst = tcg_temp_new();
+ decode->cc_src = s->T1;
+ decode->cc_op = CC_OP_ADDB + ot;
+
+ if (s->prefix & PREFIX_LOCK) {
+ tcg_gen_atomic_fetch_add_tl(s->T0, s->A0, s->T1, s->mem_index, ot | MO_LE);
+ tcg_gen_add_tl(decode->cc_dst, s->T0, s->T1);
+ } else {
+ tcg_gen_add_tl(decode->cc_dst, s->T0, s->T1);
+ /*
+ * NOTE: writing memory first is important for MMU exceptions,
+ * but "new result" wins for XADD AX, AX.
+ */
+ gen_writeback(s, decode, 0, decode->cc_dst);
+ }
+ if (decode->op[0].has_ea || decode->op[2].n != decode->op[0].n) {
+ gen_writeback(s, decode, 2, s->T0);
+ }
+}
+
static void gen_XCHG(DisasContext *s, X86DecodedInsn *decode)
{
if (s->prefix & PREFIX_LOCK) {
--
2.45.1
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH 20/25] target/i386: convert CMPXCHG to new decoder
2024-06-08 8:40 [PATCH 00/25] target/i386: more progress towards new decoder Paolo Bonzini
` (18 preceding siblings ...)
2024-06-08 8:41 ` [PATCH 19/25] target/i386: convert XADD " Paolo Bonzini
@ 2024-06-08 8:41 ` Paolo Bonzini
2024-06-08 20:04 ` Richard Henderson
2024-06-08 8:41 ` [PATCH 21/25] target/i386: decode address before going back to translate.c Paolo Bonzini
` (4 subsequent siblings)
24 siblings, 1 reply; 56+ messages in thread
From: Paolo Bonzini @ 2024-06-08 8:41 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 79 --------------------------------
target/i386/tcg/decode-new.c.inc | 3 +-
target/i386/tcg/emit.c.inc | 51 +++++++++++++++++++++
3 files changed, 53 insertions(+), 80 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 7a63c927c1f..1f76339130a 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -434,13 +434,6 @@ static inline MemOp mo_stacksize(DisasContext *s)
return CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16;
}
-/* Select size 8 if lsb of B is clear, else OT. Used for decoding
- byte vs word opcodes. */
-static inline MemOp mo_b_d(int b, MemOp ot)
-{
- return b & 1 ? ot : MO_8;
-}
-
/* Compute the result of writing t0 to the OT-sized register REG.
*
* If DEST is NULL, store the result into the register and return the
@@ -715,11 +708,6 @@ static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign)
return dst;
}
-static void gen_extu(MemOp ot, TCGv reg)
-{
- gen_ext_tl(reg, reg, ot, false);
-}
-
static void gen_op_j_ecx(DisasContext *s, TCGCond cond, TCGLabel *label1)
{
TCGv tmp = gen_ext_tl(NULL, cpu_regs[R_ECX], s->aflag, false);
@@ -2998,73 +2986,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
/* now check op code */
switch (b) {
- /**************************/
- /* arith & logic */
- case 0x1b0:
- case 0x1b1: /* cmpxchg Ev, Gv */
- {
- TCGv oldv, newv, cmpv, dest;
-
- ot = mo_b_d(b, dflag);
- modrm = x86_ldub_code(env, s);
- reg = ((modrm >> 3) & 7) | REX_R(s);
- mod = (modrm >> 6) & 3;
- oldv = tcg_temp_new();
- newv = tcg_temp_new();
- cmpv = tcg_temp_new();
- gen_op_mov_v_reg(s, ot, newv, reg);
- tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
- gen_extu(ot, cmpv);
- if (s->prefix & PREFIX_LOCK) {
- if (mod == 3) {
- goto illegal_op;
- }
- gen_lea_modrm(env, s, modrm);
- tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
- s->mem_index, ot | MO_LE);
- } else {
- if (mod == 3) {
- rm = (modrm & 7) | REX_B(s);
- gen_op_mov_v_reg(s, ot, oldv, rm);
- gen_extu(ot, oldv);
-
- /*
- * Unlike the memory case, where "the destination operand receives
- * a write cycle without regard to the result of the comparison",
- * rm must not be touched altogether if the write fails, including
- * not zero-extending it on 64-bit processors. So, precompute
- * the result of a successful writeback and perform the movcond
- * directly on cpu_regs. Also need to write accumulator first, in
- * case rm is part of RAX too.
- */
- dest = gen_op_deposit_reg_v(s, ot, rm, newv, newv);
- tcg_gen_movcond_tl(TCG_COND_EQ, dest, oldv, cmpv, newv, dest);
- } else {
- gen_lea_modrm(env, s, modrm);
- gen_op_ld_v(s, ot, oldv, s->A0);
-
- /*
- * Perform an unconditional store cycle like physical cpu;
- * must be before changing accumulator to ensure
- * idempotency if the store faults and the instruction
- * is restarted
- */
- tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
- gen_op_st_v(s, ot, newv, s->A0);
- }
- }
- /*
- * Write EAX only if the cmpxchg fails; reuse newv as the destination,
- * since it's dead here.
- */
- dest = gen_op_deposit_reg_v(s, ot, R_EAX, newv, oldv);
- tcg_gen_movcond_tl(TCG_COND_EQ, dest, oldv, cmpv, dest, newv);
- tcg_gen_mov_tl(cpu_cc_src, oldv);
- tcg_gen_mov_tl(s->cc_srcT, cmpv);
- tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
- set_cc_op(s, CC_OP_SUBB + ot);
- }
- break;
case 0x1c7: /* cmpxchg8b */
modrm = x86_ldub_code(env, s);
mod = (modrm >> 6) & 3;
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 11ecd1c6c1d..00ffaeb0763 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -1181,6 +1181,8 @@ static const X86OpEntry opcodes_0F[256] = {
[0xa4] = X86_OP_ENTRY4(SHLD, E,v, 2op,v, G,v),
[0xa5] = X86_OP_ENTRY3(SHLD, E,v, 2op,v, G,v),
+ [0xb0] = X86_OP_ENTRY2(CMPXCHG,E,b, G,b, lock),
+ [0xb1] = X86_OP_ENTRY2(CMPXCHG,E,v, G,v, lock),
[0xb2] = X86_OP_ENTRY3(LSS, G,v, EM,p, None, None),
[0xb3] = X86_OP_ENTRY2(BTR, E,v, G,v, btEvGv),
[0xb4] = X86_OP_ENTRY3(LFS, G,v, EM,p, None, None),
@@ -2612,7 +2614,6 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
switch (b) {
case 0x00 ... 0x01: /* mostly privileged instructions */
case 0x1a ... 0x1b: /* MPX */
- case 0xb0 ... 0xb1: /* cmpxchg */
case 0xc7: /* grp9 */
disas_insn_old(s, cpu, b + 0x100);
return;
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 42e41a7a87c..857d270d247 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1708,6 +1708,57 @@ static void gen_CMPS(DisasContext *s, X86DecodedInsn *decode)
}
}
+static void gen_CMPXCHG(DisasContext *s, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[2].ot;
+ TCGv cmpv = tcg_temp_new();
+ TCGv oldv = tcg_temp_new();
+ TCGv newv = tcg_temp_new();
+ TCGv dest;
+
+ tcg_gen_ext_tl(cmpv, cpu_regs[R_EAX], ot);
+ tcg_gen_ext_tl(newv, s->T1, ot);
+ if (s->prefix & PREFIX_LOCK) {
+ tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
+ s->mem_index, ot | MO_LE);
+ } else {
+ tcg_gen_ext_tl(oldv, s->T0, ot);
+ if (decode->op[0].has_ea) {
+ /*
+ * Perform an unconditional store cycle like physical cpu;
+ * must be before changing accumulator to ensure
+ * idempotency if the store faults and the instruction
+ * is restarted
+ */
+ tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
+ gen_op_st_v(s, ot, newv, s->A0);
+ } else {
+ /*
+ * Unlike the memory case, where "the destination operand receives
+ * a write cycle without regard to the result of the comparison",
+ * rm must not be touched altogether if the write fails, including
+ * not zero-extending it on 64-bit processors. So, precompute
+ * the result of a successful writeback and perform the movcond
+ * directly on cpu_regs. In case rm is part of RAX, note that this
+ * movcond and the one below are mutually exclusive is executed.
+ */
+ dest = gen_op_deposit_reg_v(s, ot, decode->op[0].n, newv, newv);
+ tcg_gen_movcond_tl(TCG_COND_EQ, dest, oldv, cmpv, newv, dest);
+ }
+ decode->op[0].unit = X86_OP_SKIP;
+ }
+
+ /* Write RAX only if the cmpxchg fails. */
+ dest = gen_op_deposit_reg_v(s, ot, R_EAX, s->T0, oldv);
+ tcg_gen_movcond_tl(TCG_COND_NE, dest, oldv, cmpv, s->T0, dest);
+
+ tcg_gen_mov_tl(s->cc_srcT, cmpv);
+ tcg_gen_sub_tl(cmpv, cmpv, oldv);
+ decode->cc_dst = cmpv;
+ decode->cc_src = oldv;
+ decode->cc_op = CC_OP_SUBB + ot;
+}
+
static void gen_CPUID(DisasContext *s, X86DecodedInsn *decode)
{
gen_update_cc_op(s);
--
2.45.1
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH 21/25] target/i386: decode address before going back to translate.c
2024-06-08 8:40 [PATCH 00/25] target/i386: more progress towards new decoder Paolo Bonzini
` (19 preceding siblings ...)
2024-06-08 8:41 ` [PATCH 20/25] target/i386: convert CMPXCHG " Paolo Bonzini
@ 2024-06-08 8:41 ` Paolo Bonzini
2024-06-08 20:13 ` Richard Henderson
2024-06-08 8:41 ` [PATCH 22/25] target/i386: list instructions still in translate.c Paolo Bonzini
` (3 subsequent siblings)
24 siblings, 1 reply; 56+ messages in thread
From: Paolo Bonzini @ 2024-06-08 8:41 UTC (permalink / raw)
To: qemu-devel
There are now relatively few unconverted opcodes in translate.c (there
are 13 of them including 8 for x87), and all of them have the same
format with a mod/rm byte and no immediate. A good next step is
to remove the early bail out to disas_insn_x87/disas_insn_old,
instead giving these legacy translator functions the same prototype
as the other gen_* functions.
To do this, the X86DecodeInsn can be passed down to the places that
used to fetch address bytes from the instruction stream. To make
sure that everything is done cleanly, the CPUX86State* argument is
removed.
As part of the unification, the gen_lea_modrm() name is now free,
so rename gen_load_ea() to gen_lea_modrm(). This is as good a name
and it makes the changes to translate.c easier to review.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/decode-new.h | 14 ++-
target/i386/tcg/translate.c | 152 +++++++++++++------------------
target/i386/tcg/decode-new.c.inc | 44 ++++-----
target/i386/tcg/emit.c.inc | 2 +-
4 files changed, 94 insertions(+), 118 deletions(-)
diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h
index 13be23145a8..5d82136a287 100644
--- a/target/i386/tcg/decode-new.h
+++ b/target/i386/tcg/decode-new.h
@@ -263,12 +263,13 @@ typedef enum X86VEXSpecial {
typedef struct X86OpEntry X86OpEntry;
typedef struct X86DecodedInsn X86DecodedInsn;
+struct DisasContext;
/* Decode function for multibyte opcodes. */
-typedef void (*X86DecodeFunc)(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b);
+typedef void (*X86DecodeFunc)(struct DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b);
/* Code generation function. */
-typedef void (*X86GenFunc)(DisasContext *s, X86DecodedInsn *decode);
+typedef void (*X86GenFunc)(struct DisasContext *s, X86DecodedInsn *decode);
struct X86OpEntry {
/* Based on the is_decode flags. */
@@ -315,6 +316,14 @@ typedef struct X86DecodedOp {
};
} X86DecodedOp;
+typedef struct AddressParts {
+ int def_seg;
+ int base;
+ int index;
+ int scale;
+ target_long disp;
+} AddressParts;
+
struct X86DecodedInsn {
X86OpEntry e;
X86DecodedOp op[3];
@@ -332,3 +341,4 @@ struct X86DecodedInsn {
uint8_t b;
};
+static void gen_lea_modrm(struct DisasContext *s, X86DecodedInsn *decode);
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 1f76339130a..1bca4043a5c 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -29,6 +29,7 @@
#include "exec/helper-proto.h"
#include "exec/helper-gen.h"
#include "helper-tcg.h"
+#include "decode-new.h"
#include "exec/log.h"
@@ -1527,14 +1528,6 @@ static inline uint64_t x86_ldq_code(CPUX86State *env, DisasContext *s)
/* Decompose an address. */
-typedef struct AddressParts {
- int def_seg;
- int base;
- int index;
- int scale;
- target_long disp;
-} AddressParts;
-
static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
int modrm)
{
@@ -1693,24 +1686,11 @@ static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a, bool is_vsib)
return ea;
}
-static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
-{
- AddressParts a = gen_lea_modrm_0(env, s, modrm);
- TCGv ea = gen_lea_modrm_1(s, a, false);
- gen_lea_v_seg(s, ea, a.def_seg, s->override);
-}
-
-static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
-{
- (void)gen_lea_modrm_0(env, s, modrm);
-}
-
/* Used for BNDCL, BNDCU, BNDCN. */
-static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
+static void gen_bndck(DisasContext *s, X86DecodedInsn *decode,
TCGCond cond, TCGv_i64 bndv)
{
- AddressParts a = gen_lea_modrm_0(env, s, modrm);
- TCGv ea = gen_lea_modrm_1(s, a, false);
+ TCGv ea = gen_lea_modrm_1(s, decode->mem, false);
tcg_gen_extu_tl_i64(s->tmp1_i64, ea);
if (!CODE64(s)) {
@@ -1722,8 +1702,9 @@ static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
}
/* generate modrm load of memory or register. */
-static void gen_ld_modrm(CPUX86State *env, DisasContext *s, int modrm, MemOp ot)
+static void gen_ld_modrm(DisasContext *s, X86DecodedInsn *decode, MemOp ot)
{
+ int modrm = s->modrm;
int mod, rm;
mod = (modrm >> 6) & 3;
@@ -1731,14 +1712,15 @@ static void gen_ld_modrm(CPUX86State *env, DisasContext *s, int modrm, MemOp ot)
if (mod == 3) {
gen_op_mov_v_reg(s, ot, s->T0, rm);
} else {
- gen_lea_modrm(env, s, modrm);
+ gen_lea_modrm(s, decode);
gen_op_ld_v(s, ot, s->T0, s->A0);
}
}
/* generate modrm store of memory or register. */
-static void gen_st_modrm(CPUX86State *env, DisasContext *s, int modrm, MemOp ot)
+static void gen_st_modrm(DisasContext *s, X86DecodedInsn *decode, MemOp ot)
{
+ int modrm = s->modrm;
int mod, rm;
mod = (modrm >> 6) & 3;
@@ -1746,7 +1728,7 @@ static void gen_st_modrm(CPUX86State *env, DisasContext *s, int modrm, MemOp ot)
if (mod == 3) {
gen_op_mov_reg_v(s, ot, rm, s->T0);
} else {
- gen_lea_modrm(env, s, modrm);
+ gen_lea_modrm(s, decode);
gen_op_st_v(s, ot, s->T0, s->A0);
}
}
@@ -2314,12 +2296,12 @@ static void gen_sty_env_A0(DisasContext *s, int offset, bool align)
tcg_gen_qemu_st_i128(t, s->tmp0, mem_index, mop);
}
-static void gen_cmpxchg8b(DisasContext *s, CPUX86State *env, int modrm)
+static void gen_cmpxchg8b(DisasContext *s, X86DecodedInsn *decode)
{
TCGv_i64 cmp, val, old;
TCGv Z;
- gen_lea_modrm(env, s, modrm);
+ gen_lea_modrm(s, decode);
cmp = tcg_temp_new_i64();
val = tcg_temp_new_i64();
@@ -2368,13 +2350,13 @@ static void gen_cmpxchg8b(DisasContext *s, CPUX86State *env, int modrm)
}
#ifdef TARGET_X86_64
-static void gen_cmpxchg16b(DisasContext *s, CPUX86State *env, int modrm)
+static void gen_cmpxchg16b(DisasContext *s, X86DecodedInsn *decode)
{
MemOp mop = MO_TE | MO_128 | MO_ALIGN;
TCGv_i64 t0, t1;
TCGv_i128 cmp, val;
- gen_lea_modrm(env, s, modrm);
+ gen_lea_modrm(s, decode);
cmp = tcg_temp_new_i128();
val = tcg_temp_new_i128();
@@ -2412,31 +2394,32 @@ static void gen_cmpxchg16b(DisasContext *s, CPUX86State *env, int modrm)
}
#endif
-static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b)
+#include "emit.c.inc"
+
+static void gen_x87(DisasContext *s, X86DecodedInsn *decode)
{
- CPUX86State *env = cpu_env(cpu);
bool update_fip = true;
- int modrm, mod, rm, op;
+ int b = decode->b;
+ int modrm = s->modrm;
+ int mod, rm, op;
if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
/* if CR0.EM or CR0.TS are set, generate an FPU exception */
/* XXX: what to do if illegal op ? */
gen_exception(s, EXCP07_PREX);
- return true;
+ return;
}
- modrm = x86_ldub_code(env, s);
mod = (modrm >> 6) & 3;
rm = modrm & 7;
op = ((b & 7) << 3) | ((modrm >> 3) & 7);
if (mod != 3) {
/* memory op */
- AddressParts a = gen_lea_modrm_0(env, s, modrm);
- TCGv ea = gen_lea_modrm_1(s, a, false);
+ TCGv ea = gen_lea_modrm_1(s, decode->mem, false);
TCGv last_addr = tcg_temp_new();
bool update_fdp = true;
tcg_gen_mov_tl(last_addr, ea);
- gen_lea_v_seg(s, ea, a.def_seg, s->override);
+ gen_lea_v_seg(s, ea, decode->mem.def_seg, s->override);
switch (op) {
case 0x00 ... 0x07: /* fxxxs */
@@ -2626,11 +2609,11 @@ static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b)
gen_helper_fpop(tcg_env);
break;
default:
- return false;
+ goto illegal_op;
}
if (update_fdp) {
- int last_seg = s->override >= 0 ? s->override : a.def_seg;
+ int last_seg = s->override >= 0 ? s->override : decode->mem.def_seg;
tcg_gen_ld_i32(s->tmp2_i32, tcg_env,
offsetof(CPUX86State,
@@ -2667,7 +2650,7 @@ static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b)
update_fip = false;
break;
default:
- return false;
+ goto illegal_op;
}
break;
case 0x0c: /* grp d9/4 */
@@ -2686,7 +2669,7 @@ static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b)
gen_helper_fxam_ST0(tcg_env);
break;
default:
- return false;
+ goto illegal_op;
}
break;
case 0x0d: /* grp d9/5 */
@@ -2721,7 +2704,7 @@ static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b)
gen_helper_fldz_ST0(tcg_env);
break;
default:
- return false;
+ goto illegal_op;
}
}
break;
@@ -2823,7 +2806,7 @@ static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b)
gen_helper_fpop(tcg_env);
break;
default:
- return false;
+ goto illegal_op;
}
break;
case 0x1c:
@@ -2843,7 +2826,7 @@ static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b)
case 4: /* fsetpm (287 only, just do nop here) */
break;
default:
- return false;
+ goto illegal_op;
}
break;
case 0x1d: /* fucomi */
@@ -2895,7 +2878,7 @@ static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b)
gen_helper_fpop(tcg_env);
break;
default:
- return false;
+ goto illegal_op;
}
break;
case 0x38: /* ffreep sti, undocumented op */
@@ -2910,7 +2893,7 @@ static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b)
gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
break;
default:
- return false;
+ goto illegal_op;
}
break;
case 0x3d: /* fucomip */
@@ -2957,7 +2940,7 @@ static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b)
}
break;
default:
- return false;
+ goto illegal_op;
}
}
@@ -2969,25 +2952,24 @@ static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b)
tcg_gen_st_tl(eip_cur_tl(s),
tcg_env, offsetof(CPUX86State, fpip));
}
- return true;
+ return;
illegal_op:
gen_illegal_opcode(s);
- return true;
}
-static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
+static void gen_multi0F(DisasContext *s, X86DecodedInsn *decode)
{
- CPUX86State *env = cpu_env(cpu);
int prefixes = s->prefix;
MemOp dflag = s->dflag;
+ int b = decode->b + 0x100;
+ int modrm = s->modrm;
MemOp ot;
- int modrm, reg, rm, mod, op;
+ int reg, rm, mod, op;
/* now check op code */
switch (b) {
case 0x1c7: /* cmpxchg8b */
- modrm = x86_ldub_code(env, s);
mod = (modrm >> 6) & 3;
switch ((modrm >> 3) & 7) {
case 1: /* CMPXCHG8, CMPXCHG16 */
@@ -2999,14 +2981,14 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) {
goto illegal_op;
}
- gen_cmpxchg16b(s, env, modrm);
+ gen_cmpxchg16b(s, decode);
break;
}
#endif
if (!(s->cpuid_features & CPUID_CX8)) {
goto illegal_op;
}
- gen_cmpxchg8b(s, env, modrm);
+ gen_cmpxchg8b(s, decode);
break;
case 7: /* RDSEED, RDPID with f3 prefix */
@@ -3049,7 +3031,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
break;
case 0x100:
- modrm = x86_ldub_code(env, s);
mod = (modrm >> 6) & 3;
op = (modrm >> 3) & 7;
switch(op) {
@@ -3063,14 +3044,14 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
tcg_gen_ld32u_tl(s->T0, tcg_env,
offsetof(CPUX86State, ldt.selector));
ot = mod == 3 ? dflag : MO_16;
- gen_st_modrm(env, s, modrm, ot);
+ gen_st_modrm(s, decode, ot);
break;
case 2: /* lldt */
if (!PE(s) || VM86(s))
goto illegal_op;
if (check_cpl0(s)) {
gen_svm_check_intercept(s, SVM_EXIT_LDTR_WRITE);
- gen_ld_modrm(env, s, modrm, MO_16);
+ gen_ld_modrm(s, decode, MO_16);
tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
gen_helper_lldt(tcg_env, s->tmp2_i32);
}
@@ -3085,14 +3066,14 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
tcg_gen_ld32u_tl(s->T0, tcg_env,
offsetof(CPUX86State, tr.selector));
ot = mod == 3 ? dflag : MO_16;
- gen_st_modrm(env, s, modrm, ot);
+ gen_st_modrm(s, decode, ot);
break;
case 3: /* ltr */
if (!PE(s) || VM86(s))
goto illegal_op;
if (check_cpl0(s)) {
gen_svm_check_intercept(s, SVM_EXIT_TR_WRITE);
- gen_ld_modrm(env, s, modrm, MO_16);
+ gen_ld_modrm(s, decode, MO_16);
tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
gen_helper_ltr(tcg_env, s->tmp2_i32);
}
@@ -3101,7 +3082,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
case 5: /* verw */
if (!PE(s) || VM86(s))
goto illegal_op;
- gen_ld_modrm(env, s, modrm, MO_16);
+ gen_ld_modrm(s, decode, MO_16);
gen_update_cc_op(s);
if (op == 4) {
gen_helper_verr(tcg_env, s->T0);
@@ -3111,19 +3092,18 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
assume_cc_op(s, CC_OP_EFLAGS);
break;
default:
- goto unknown_op;
+ goto illegal_op;
}
break;
case 0x101:
- modrm = x86_ldub_code(env, s);
switch (modrm) {
CASE_MODRM_MEM_OP(0): /* sgdt */
if (s->flags & HF_UMIP_MASK && !check_cpl0(s)) {
break;
}
gen_svm_check_intercept(s, SVM_EXIT_GDTR_READ);
- gen_lea_modrm(env, s, modrm);
+ gen_lea_modrm(s, decode);
tcg_gen_ld32u_tl(s->T0,
tcg_env, offsetof(CPUX86State, gdt.limit));
gen_op_st_v(s, MO_16, s->T0, s->A0);
@@ -3179,7 +3159,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
break;
}
gen_svm_check_intercept(s, SVM_EXIT_IDTR_READ);
- gen_lea_modrm(env, s, modrm);
+ gen_lea_modrm(s, decode);
tcg_gen_ld32u_tl(s->T0, tcg_env, offsetof(CPUX86State, idt.limit));
gen_op_st_v(s, MO_16, s->T0, s->A0);
gen_add_A0_im(s, 2);
@@ -3329,7 +3309,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
break;
}
gen_svm_check_intercept(s, SVM_EXIT_GDTR_WRITE);
- gen_lea_modrm(env, s, modrm);
+ gen_lea_modrm(s, decode);
gen_op_ld_v(s, MO_16, s->T1, s->A0);
gen_add_A0_im(s, 2);
gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
@@ -3345,7 +3325,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
break;
}
gen_svm_check_intercept(s, SVM_EXIT_IDTR_WRITE);
- gen_lea_modrm(env, s, modrm);
+ gen_lea_modrm(s, decode);
gen_op_ld_v(s, MO_16, s->T1, s->A0);
gen_add_A0_im(s, 2);
gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
@@ -3369,7 +3349,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
*/
mod = (modrm >> 6) & 3;
ot = (mod != 3 ? MO_16 : s->dflag);
- gen_st_modrm(env, s, modrm, ot);
+ gen_st_modrm(s, decode, ot);
break;
case 0xee: /* rdpkru */
if (s->prefix & (PREFIX_LOCK | PREFIX_DATA
@@ -3396,7 +3376,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
break;
}
gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0);
- gen_ld_modrm(env, s, modrm, MO_16);
+ gen_ld_modrm(s, decode, MO_16);
/*
* Only the 4 lower bits of CR0 are modified.
* PE cannot be set to zero if already set to one.
@@ -3414,7 +3394,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
break;
}
gen_svm_check_intercept(s, SVM_EXIT_INVLPG);
- gen_lea_modrm(env, s, modrm);
+ gen_lea_modrm(s, decode);
gen_helper_flush_page(tcg_env, s->A0);
s->base.is_jmp = DISAS_EOB_NEXT;
break;
@@ -3447,12 +3427,11 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
break;
default:
- goto unknown_op;
+ goto illegal_op;
}
break;
case 0x11a:
- modrm = x86_ldub_code(env, s);
if (s->flags & HF_MPX_EN_MASK) {
mod = (modrm >> 6) & 3;
reg = ((modrm >> 3) & 7) | REX_R(s);
@@ -3463,7 +3442,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
|| s->aflag == MO_16) {
goto illegal_op;
}
- gen_bndck(env, s, modrm, TCG_COND_LTU, cpu_bndl[reg]);
+ gen_bndck(s, decode, TCG_COND_LTU, cpu_bndl[reg]);
} else if (prefixes & PREFIX_REPNZ) {
/* bndcu */
if (reg >= 4
@@ -3473,7 +3452,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
}
TCGv_i64 notu = tcg_temp_new_i64();
tcg_gen_not_i64(notu, cpu_bndu[reg]);
- gen_bndck(env, s, modrm, TCG_COND_GTU, notu);
+ gen_bndck(s, decode, TCG_COND_GTU, notu);
} else if (prefixes & PREFIX_DATA) {
/* bndmov -- from reg/mem */
if (reg >= 4 || s->aflag == MO_16) {
@@ -3489,7 +3468,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
tcg_gen_mov_i64(cpu_bndu[reg], cpu_bndu[reg2]);
}
} else {
- gen_lea_modrm(env, s, modrm);
+ gen_lea_modrm(s, decode);
if (CODE64(s)) {
tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
s->mem_index, MO_LEUQ);
@@ -3508,7 +3487,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
}
} else if (mod != 3) {
/* bndldx */
- AddressParts a = gen_lea_modrm_0(env, s, modrm);
+ AddressParts a = decode->mem;
if (reg >= 4
|| (prefixes & PREFIX_LOCK)
|| s->aflag == MO_16
@@ -3538,10 +3517,8 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
gen_set_hflag(s, HF_MPX_IU_MASK);
}
}
- gen_nop_modrm(env, s, modrm);
break;
case 0x11b:
- modrm = x86_ldub_code(env, s);
if (s->flags & HF_MPX_EN_MASK) {
mod = (modrm >> 6) & 3;
reg = ((modrm >> 3) & 7) | REX_R(s);
@@ -3552,7 +3529,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
|| s->aflag == MO_16) {
goto illegal_op;
}
- AddressParts a = gen_lea_modrm_0(env, s, modrm);
+ AddressParts a = decode->mem;
if (a.base >= 0) {
tcg_gen_extu_tl_i64(cpu_bndl[reg], cpu_regs[a.base]);
if (!CODE64(s)) {
@@ -3565,7 +3542,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
/* rip-relative generates #ud */
goto illegal_op;
}
- tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a, false));
+ tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, decode->mem, false));
if (!CODE64(s)) {
tcg_gen_ext32u_tl(s->A0, s->A0);
}
@@ -3580,7 +3557,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
|| s->aflag == MO_16) {
goto illegal_op;
}
- gen_bndck(env, s, modrm, TCG_COND_GTU, cpu_bndu[reg]);
+ gen_bndck(s, decode, TCG_COND_GTU, cpu_bndu[reg]);
} else if (prefixes & PREFIX_DATA) {
/* bndmov -- to reg/mem */
if (reg >= 4 || s->aflag == MO_16) {
@@ -3596,7 +3573,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
tcg_gen_mov_i64(cpu_bndu[reg2], cpu_bndu[reg]);
}
} else {
- gen_lea_modrm(env, s, modrm);
+ gen_lea_modrm(s, decode);
if (CODE64(s)) {
tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
s->mem_index, MO_LEUQ);
@@ -3613,7 +3590,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
}
} else if (mod != 3) {
/* bndstx */
- AddressParts a = gen_lea_modrm_0(env, s, modrm);
+ AddressParts a = decode->mem;
if (reg >= 4
|| (prefixes & PREFIX_LOCK)
|| s->aflag == MO_16
@@ -3640,7 +3617,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
}
}
}
- gen_nop_modrm(env, s, modrm);
break;
default:
g_assert_not_reached();
@@ -3649,12 +3625,8 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
illegal_op:
gen_illegal_opcode(s);
return;
- unknown_op:
- gen_unknown_opcode(env, s);
}
-#include "decode-new.h"
-#include "emit.c.inc"
#include "decode-new.c.inc"
void tcg_x86_init(void)
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 00ffaeb0763..d75d242e552 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -1092,6 +1092,8 @@ static void decode_MOV_CR_DR(DisasContext *s, CPUX86State *env, X86OpEntry *entr
}
static const X86OpEntry opcodes_0F[256] = {
+ [0x00] = X86_OP_ENTRY1(multi0F, nop,v, nolea), /* unconverted */
+ [0x01] = X86_OP_ENTRY1(multi0F, nop,v, nolea), /* unconverted */
[0x02] = X86_OP_ENTRYwr(LAR, G,v, E,w, chk(prot)),
[0x03] = X86_OP_ENTRYwr(LSL, G,v, E,w, chk(prot)),
[0x05] = X86_OP_ENTRY0(SYSCALL, chk(o64_intel)),
@@ -1197,6 +1199,7 @@ static const X86OpEntry opcodes_0F[256] = {
[0xc4] = X86_OP_ENTRY4(PINSRW, V,dq,H,dq,E,w, vex5 mmx p_00_66),
[0xc5] = X86_OP_ENTRY3(PEXTRW, G,d, U,dq,I,b, vex5 mmx p_00_66),
[0xc6] = X86_OP_ENTRY4(VSHUF, V,x, H,x, W,x, vex4 p_00_66),
+ [0xc7] = X86_OP_ENTRY1(multi0F, nop,v, nolea), /* unconverted */
[0xd0] = X86_OP_ENTRY3(VADDSUB, V,x, H,x, W,x, vex2 cpuid(SSE3) p_66_f2),
[0xd1] = X86_OP_ENTRY3(PSRLW_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
@@ -1239,6 +1242,8 @@ static const X86OpEntry opcodes_0F[256] = {
[0x18] = X86_OP_ENTRY1(NOP, nop,v), /* prefetch/reserved NOP */
[0x19] = X86_OP_ENTRY1(NOP, nop,v), /* reserved NOP */
+ [0x1a] = X86_OP_ENTRY1(multi0F, nop,v, nolea), /* unconverted MPX */
+ [0x1b] = X86_OP_ENTRY1(multi0F, nop,v, nolea), /* unconverted MPX */
[0x1c] = X86_OP_ENTRY1(NOP, nop,v), /* reserved NOP */
[0x1d] = X86_OP_ENTRY1(NOP, nop,v), /* reserved NOP */
[0x1e] = X86_OP_ENTRY1(NOP, nop,v), /* reserved NOP */
@@ -1776,6 +1781,19 @@ static const X86OpEntry opcodes_root[256] = {
[0xCE] = X86_OP_ENTRY0(INTO),
[0xCF] = X86_OP_ENTRY0(IRET, chk(vm86_iopl) svm(IRET)),
+ /*
+ * x87 is nolea because it needs the address without segment base,
+ * in order to store it in fdp.
+ */
+ [0xD8] = X86_OP_ENTRY1(x87, nop,v, nolea),
+ [0xD9] = X86_OP_ENTRY1(x87, nop,v, nolea),
+ [0xDA] = X86_OP_ENTRY1(x87, nop,v, nolea),
+ [0xDB] = X86_OP_ENTRY1(x87, nop,v, nolea),
+ [0xDC] = X86_OP_ENTRY1(x87, nop,v, nolea),
+ [0xDD] = X86_OP_ENTRY1(x87, nop,v, nolea),
+ [0xDE] = X86_OP_ENTRY1(x87, nop,v, nolea),
+ [0xDF] = X86_OP_ENTRY1(x87, nop,v, nolea),
+
[0xE8] = X86_OP_ENTRYr(CALL, J,z_f64),
[0xE9] = X86_OP_ENTRYr(JMP, J,z_f64),
[0xEA] = X86_OP_ENTRYrr(JMPF, I_unsigned,p, I_unsigned,w, chk(i64)),
@@ -2600,30 +2618,6 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
}
}
- /* Go back to old decoder for unconverted opcodes. */
- if (!(s->prefix & PREFIX_VEX)) {
- if ((b & ~7) == 0xd8) {
- if (!disas_insn_x87(s, cpu, b)) {
- goto unknown_op;
- }
- return;
- }
-
- if (b == 0x0f) {
- b = x86_ldub_code(env, s);
- switch (b) {
- case 0x00 ... 0x01: /* mostly privileged instructions */
- case 0x1a ... 0x1b: /* MPX */
- case 0xc7: /* grp9 */
- disas_insn_old(s, cpu, b + 0x100);
- return;
- default:
- decode_func = do_decode_0F;
- break;
- }
- }
- }
-
memset(&decode, 0, sizeof(decode));
decode.cc_op = -1;
decode.b = b;
@@ -2767,7 +2761,7 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
if (decode.e.special != X86_SPECIAL_NoLoadEA &&
(decode.op[0].has_ea || decode.op[1].has_ea || decode.op[2].has_ea)) {
- gen_load_ea(s, &decode);
+ gen_lea_modrm(s, &decode);
}
if (s->prefix & PREFIX_LOCK) {
gen_load(s, &decode, 2, s->T1);
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 857d270d247..9234dde6e58 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -73,7 +73,7 @@ static void gen_NM_exception(DisasContext *s)
gen_exception(s, EXCP07_PREX);
}
-static void gen_load_ea(DisasContext *s, X86DecodedInsn *decode)
+static void gen_lea_modrm(DisasContext *s, X86DecodedInsn *decode)
{
AddressParts *mem = &decode->mem;
TCGv ea;
--
2.45.1
^ permalink raw reply related [flat|nested] 56+ messages in thread
* Re: [PATCH 21/25] target/i386: decode address before going back to translate.c
2024-06-08 8:41 ` [PATCH 21/25] target/i386: decode address before going back to translate.c Paolo Bonzini
@ 2024-06-08 20:13 ` Richard Henderson
0 siblings, 0 replies; 56+ messages in thread
From: Richard Henderson @ 2024-06-08 20:13 UTC (permalink / raw)
To: Paolo Bonzini, qemu-devel
On 6/8/24 01:41, Paolo Bonzini wrote:
> There are now relatively few unconverted opcodes in translate.c (there
> are 13 of them including 8 for x87), and all of them have the same
> format with a mod/rm byte and no immediate. A good next step is
> to remove the early bail out to disas_insn_x87/disas_insn_old,
> instead giving these legacy translator functions the same prototype
> as the other gen_* functions.
>
> To do this, the X86DecodeInsn can be passed down to the places that
> used to fetch address bytes from the instruction stream. To make
> sure that everything is done cleanly, the CPUX86State* argument is
> removed.
>
> As part of the unification, the gen_lea_modrm() name is now free,
> so rename gen_load_ea() to gen_lea_modrm(). This is as good a name
> and it makes the changes to translate.c easier to review.
>
> Signed-off-by: Paolo Bonzini<pbonzini@redhat.com>
> ---
> target/i386/tcg/decode-new.h | 14 ++-
> target/i386/tcg/translate.c | 152 +++++++++++++------------------
> target/i386/tcg/decode-new.c.inc | 44 ++++-----
> target/i386/tcg/emit.c.inc | 2 +-
> 4 files changed, 94 insertions(+), 118 deletions(-)
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 56+ messages in thread
* [PATCH 22/25] target/i386: list instructions still in translate.c
2024-06-08 8:40 [PATCH 00/25] target/i386: more progress towards new decoder Paolo Bonzini
` (20 preceding siblings ...)
2024-06-08 8:41 ` [PATCH 21/25] target/i386: decode address before going back to translate.c Paolo Bonzini
@ 2024-06-08 8:41 ` Paolo Bonzini
2024-06-08 20:14 ` Richard Henderson
2024-06-08 8:41 ` [PATCH 23/25] target/i386: assert that cc_op* and pc_save are preserved Paolo Bonzini
` (2 subsequent siblings)
24 siblings, 1 reply; 56+ messages in thread
From: Paolo Bonzini @ 2024-06-08 8:41 UTC (permalink / raw)
To: qemu-devel
Group them so that it is easier to figure out which two-byte opcodes to
tackle together.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/decode-new.c.inc | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index d75d242e552..7463cf87f1a 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -129,6 +129,24 @@
*
* (^) these are the two cases in which Intel and AMD disagree on the
* primary exception class
+ *
+ * Instructions still in translate.c
+ * ---------------------------------
+ * x87:
+ * 0xD8 - 0xDF
+ *
+ * privileged/system:
+ * 0x0F 0x00 group 6 (SLDT, STR, LLDT, LTR, VERR, VERW)
+ * 0x0F 0x01 group 7 (SGDT, SIDT, LGDT, LIDT, SMSW, LMSW, INVLPG,
+ * MONITOR, MWAIT, CLAC, STAC, XGETBV, XSETBV,
+ * SWAPGS, RDTSCP)
+ *
+ * MPX:
+ * 0x0F 0x1A BNDLDX, BNDMOV, BNDCL, BNDCU
+ * 0x0F 0x1B BNDSTX, BNDMOV, BNDMK, BNDCN
+
+ * integer ops - atomic:
+ * 0x0F 0xC7 group 9 (CMPXCHG8B/CMPXCHG16B; also RDRAND, RDSEED, RDPID)
*/
#define X86_OP_NONE { 0 },
--
2.45.1
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH 23/25] target/i386: assert that cc_op* and pc_save are preserved
2024-06-08 8:40 [PATCH 00/25] target/i386: more progress towards new decoder Paolo Bonzini
` (21 preceding siblings ...)
2024-06-08 8:41 ` [PATCH 22/25] target/i386: list instructions still in translate.c Paolo Bonzini
@ 2024-06-08 8:41 ` Paolo Bonzini
2024-06-08 20:14 ` Richard Henderson
2024-06-08 8:41 ` [PATCH 24/25] target/i386: do not check PREFIX_LOCK in old-style decoder Paolo Bonzini
2024-06-08 8:41 ` [PATCH 25/25] target/i386: remove gen_ext_tl Paolo Bonzini
24 siblings, 1 reply; 56+ messages in thread
From: Paolo Bonzini @ 2024-06-08 8:41 UTC (permalink / raw)
To: qemu-devel
Now all decoding has been done before any code generation.
There is no need anymore to save and restore cc_op* and
pc_save but, for the time being, assert that this is indeed
the case.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 12 +++---------
1 file changed, 3 insertions(+), 9 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 1bca4043a5c..67f2e792166 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -3834,15 +3834,9 @@ static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
case 2:
/* Restore state that may affect the next instruction. */
dc->pc = dc->base.pc_next;
- /*
- * TODO: These save/restore can be removed after the table-based
- * decoder is complete; we will be decoding the insn completely
- * before any code generation that might affect these variables.
- */
- dc->cc_op_dirty = orig_cc_op_dirty;
- dc->cc_op = orig_cc_op;
- dc->pc_save = orig_pc_save;
- /* END TODO */
+ assert(dc->cc_op_dirty == orig_cc_op_dirty);
+ assert(dc->cc_op == orig_cc_op);
+ assert(dc->pc_save == orig_pc_save);
dc->base.num_insns--;
tcg_remove_ops_after(dc->prev_insn_end);
dc->base.insn_start = dc->prev_insn_start;
--
2.45.1
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH 24/25] target/i386: do not check PREFIX_LOCK in old-style decoder
2024-06-08 8:40 [PATCH 00/25] target/i386: more progress towards new decoder Paolo Bonzini
` (22 preceding siblings ...)
2024-06-08 8:41 ` [PATCH 23/25] target/i386: assert that cc_op* and pc_save are preserved Paolo Bonzini
@ 2024-06-08 8:41 ` Paolo Bonzini
2024-06-08 20:15 ` Richard Henderson
2024-06-08 8:41 ` [PATCH 25/25] target/i386: remove gen_ext_tl Paolo Bonzini
24 siblings, 1 reply; 56+ messages in thread
From: Paolo Bonzini @ 2024-06-08 8:41 UTC (permalink / raw)
To: qemu-devel
It is already checked before getting there.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 26 ++++++++------------------
1 file changed, 8 insertions(+), 18 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 67f2e792166..ee5ef3ccbc6 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -2993,7 +2993,7 @@ static void gen_multi0F(DisasContext *s, X86DecodedInsn *decode)
case 7: /* RDSEED, RDPID with f3 prefix */
if (mod != 3 ||
- (s->prefix & (PREFIX_LOCK | PREFIX_REPNZ))) {
+ (s->prefix & PREFIX_REPNZ)) {
goto illegal_op;
}
if (s->prefix & PREFIX_REPZ) {
@@ -3013,7 +3013,7 @@ static void gen_multi0F(DisasContext *s, X86DecodedInsn *decode)
case 6: /* RDRAND */
if (mod != 3 ||
- (s->prefix & (PREFIX_LOCK | PREFIX_REPZ | PREFIX_REPNZ)) ||
+ (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) ||
!(s->cpuid_ext_features & CPUID_EXT_RDRAND)) {
goto illegal_op;
}
@@ -3173,8 +3173,7 @@ static void gen_multi0F(DisasContext *s, X86DecodedInsn *decode)
case 0xd0: /* xgetbv */
if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
- || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
- | PREFIX_REPZ | PREFIX_REPNZ))) {
+ || (s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
goto illegal_op;
}
tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
@@ -3184,8 +3183,7 @@ static void gen_multi0F(DisasContext *s, X86DecodedInsn *decode)
case 0xd1: /* xsetbv */
if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
- || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
- | PREFIX_REPZ | PREFIX_REPNZ))) {
+ || (s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
goto illegal_op;
}
gen_svm_check_intercept(s, SVM_EXIT_XSETBV);
@@ -3352,8 +3350,7 @@ static void gen_multi0F(DisasContext *s, X86DecodedInsn *decode)
gen_st_modrm(s, decode, ot);
break;
case 0xee: /* rdpkru */
- if (s->prefix & (PREFIX_LOCK | PREFIX_DATA
- | PREFIX_REPZ | PREFIX_REPNZ)) {
+ if (s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ)) {
goto illegal_op;
}
tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
@@ -3361,8 +3358,7 @@ static void gen_multi0F(DisasContext *s, X86DecodedInsn *decode)
tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
break;
case 0xef: /* wrpkru */
- if (s->prefix & (PREFIX_LOCK | PREFIX_DATA
- | PREFIX_REPZ | PREFIX_REPNZ)) {
+ if (s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ)) {
goto illegal_op;
}
tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
@@ -3438,7 +3434,6 @@ static void gen_multi0F(DisasContext *s, X86DecodedInsn *decode)
if (prefixes & PREFIX_REPZ) {
/* bndcl */
if (reg >= 4
- || (prefixes & PREFIX_LOCK)
|| s->aflag == MO_16) {
goto illegal_op;
}
@@ -3446,7 +3441,6 @@ static void gen_multi0F(DisasContext *s, X86DecodedInsn *decode)
} else if (prefixes & PREFIX_REPNZ) {
/* bndcu */
if (reg >= 4
- || (prefixes & PREFIX_LOCK)
|| s->aflag == MO_16) {
goto illegal_op;
}
@@ -3460,7 +3454,7 @@ static void gen_multi0F(DisasContext *s, X86DecodedInsn *decode)
}
if (mod == 3) {
int reg2 = (modrm & 7) | REX_B(s);
- if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
+ if (reg2 >= 4) {
goto illegal_op;
}
if (s->flags & HF_MPX_IU_MASK) {
@@ -3489,7 +3483,6 @@ static void gen_multi0F(DisasContext *s, X86DecodedInsn *decode)
/* bndldx */
AddressParts a = decode->mem;
if (reg >= 4
- || (prefixes & PREFIX_LOCK)
|| s->aflag == MO_16
|| a.base < -1) {
goto illegal_op;
@@ -3525,7 +3518,6 @@ static void gen_multi0F(DisasContext *s, X86DecodedInsn *decode)
if (mod != 3 && (prefixes & PREFIX_REPZ)) {
/* bndmk */
if (reg >= 4
- || (prefixes & PREFIX_LOCK)
|| s->aflag == MO_16) {
goto illegal_op;
}
@@ -3553,7 +3545,6 @@ static void gen_multi0F(DisasContext *s, X86DecodedInsn *decode)
} else if (prefixes & PREFIX_REPNZ) {
/* bndcn */
if (reg >= 4
- || (prefixes & PREFIX_LOCK)
|| s->aflag == MO_16) {
goto illegal_op;
}
@@ -3565,7 +3556,7 @@ static void gen_multi0F(DisasContext *s, X86DecodedInsn *decode)
}
if (mod == 3) {
int reg2 = (modrm & 7) | REX_B(s);
- if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
+ if (reg2 >= 4) {
goto illegal_op;
}
if (s->flags & HF_MPX_IU_MASK) {
@@ -3592,7 +3583,6 @@ static void gen_multi0F(DisasContext *s, X86DecodedInsn *decode)
/* bndstx */
AddressParts a = decode->mem;
if (reg >= 4
- || (prefixes & PREFIX_LOCK)
|| s->aflag == MO_16
|| a.base < -1) {
goto illegal_op;
--
2.45.1
^ permalink raw reply related [flat|nested] 56+ messages in thread
* Re: [PATCH 24/25] target/i386: do not check PREFIX_LOCK in old-style decoder
2024-06-08 8:41 ` [PATCH 24/25] target/i386: do not check PREFIX_LOCK in old-style decoder Paolo Bonzini
@ 2024-06-08 20:15 ` Richard Henderson
2024-06-10 17:10 ` Paolo Bonzini
0 siblings, 1 reply; 56+ messages in thread
From: Richard Henderson @ 2024-06-08 20:15 UTC (permalink / raw)
To: Paolo Bonzini, qemu-devel
On 6/8/24 01:41, Paolo Bonzini wrote:
> It is already checked before getting there.
>
> Signed-off-by: Paolo Bonzini<pbonzini@redhat.com>
> ---
> target/i386/tcg/translate.c | 26 ++++++++------------------
> 1 file changed, 8 insertions(+), 18 deletions(-)
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [PATCH 24/25] target/i386: do not check PREFIX_LOCK in old-style decoder
2024-06-08 20:15 ` Richard Henderson
@ 2024-06-10 17:10 ` Paolo Bonzini
0 siblings, 0 replies; 56+ messages in thread
From: Paolo Bonzini @ 2024-06-10 17:10 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel
On Sat, Jun 8, 2024 at 10:16 PM Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> On 6/8/24 01:41, Paolo Bonzini wrote:
> > It is already checked before getting there.
> >
> > Signed-off-by: Paolo Bonzini<pbonzini@redhat.com>
> > ---
> > target/i386/tcg/translate.c | 26 ++++++++------------------
> > 1 file changed, 8 insertions(+), 18 deletions(-)
>
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
... except cmpxchg8b/cmpxchg16b do have to accept LOCK. Fortunately
it's trivial to convert them, with just an ugly temporary
if (decode.e.gen == gen_multi0F) {
accept_lock = true;
}
that only lasts one commit. I'll resend this part of the series later
(and BTx as well).
Paolo
^ permalink raw reply [flat|nested] 56+ messages in thread
* [PATCH 25/25] target/i386: remove gen_ext_tl
2024-06-08 8:40 [PATCH 00/25] target/i386: more progress towards new decoder Paolo Bonzini
` (23 preceding siblings ...)
2024-06-08 8:41 ` [PATCH 24/25] target/i386: do not check PREFIX_LOCK in old-style decoder Paolo Bonzini
@ 2024-06-08 8:41 ` Paolo Bonzini
2024-06-08 20:17 ` Richard Henderson
24 siblings, 1 reply; 56+ messages in thread
From: Paolo Bonzini @ 2024-06-08 8:41 UTC (permalink / raw)
To: qemu-devel
With the introduction of tcg_gen_ext_tl, most uses can be converted directly
because they do not have a NULL destination. tcg_gen_ext_tl is able to drop
no-ops like "tcg_gen_ext_tl(tcgv, tcgv, MO_TL)" just fine, and the only thing
that gen_ext_tl was adding on top was avoiding the creation of a useless
temporary. This can be done in the only place where it matters, which is
gen_op_j_ecx.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 41 +++++++++++++++----------------------
1 file changed, 17 insertions(+), 24 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index ee5ef3ccbc6..8089b502628 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -697,23 +697,16 @@ static inline TCGv gen_compute_Dshift(DisasContext *s, MemOp ot)
return dshift;
};
-static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign)
-{
- if (size == MO_TL) {
- return src;
- }
- if (!dst) {
- dst = tcg_temp_new();
- }
- tcg_gen_ext_tl(dst, src, size | (sign ? MO_SIGN : 0));
- return dst;
-}
-
static void gen_op_j_ecx(DisasContext *s, TCGCond cond, TCGLabel *label1)
{
- TCGv tmp = gen_ext_tl(NULL, cpu_regs[R_ECX], s->aflag, false);
-
- tcg_gen_brcondi_tl(cond, tmp, 0, label1);
+ TCGv lhs;
+ if (s->aflag == MO_TL) {
+ lhs = cpu_regs[R_ECX];
+ } else {
+ lhs = tcg_temp_new();
+ tcg_gen_ext_tl(lhs, cpu_regs[R_ECX], s->aflag);
+ }
+ tcg_gen_brcondi_tl(cond, lhs, 0, label1);
}
static inline void gen_op_jz_ecx(DisasContext *s, TCGLabel *label1)
@@ -886,16 +879,16 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
case CC_OP_SUBB ... CC_OP_SUBQ:
/* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
size = s->cc_op - CC_OP_SUBB;
- gen_ext_tl(s->cc_srcT, s->cc_srcT, size, false);
- gen_ext_tl(cpu_cc_src, cpu_cc_src, size, false);
+ tcg_gen_ext_tl(s->cc_srcT, s->cc_srcT, size);
+ tcg_gen_ext_tl(cpu_cc_src, cpu_cc_src, size);
return (CCPrepare) { .cond = TCG_COND_LTU, .reg = s->cc_srcT,
.reg2 = cpu_cc_src, .use_reg2 = true };
case CC_OP_ADDB ... CC_OP_ADDQ:
/* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
size = s->cc_op - CC_OP_ADDB;
- gen_ext_tl(cpu_cc_dst, cpu_cc_dst, size, false);
- gen_ext_tl(cpu_cc_src, cpu_cc_src, size, false);
+ tcg_gen_ext_tl(cpu_cc_dst, cpu_cc_dst, size);
+ tcg_gen_ext_tl(cpu_cc_src, cpu_cc_src, size);
return (CCPrepare) { .cond = TCG_COND_LTU, .reg = cpu_cc_dst,
.reg2 = cpu_cc_src, .use_reg2 = true };
@@ -920,7 +913,7 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
case CC_OP_BMILGB ... CC_OP_BMILGQ:
size = s->cc_op - CC_OP_BMILGB;
- gen_ext_tl(cpu_cc_src, cpu_cc_src, size, false);
+ tcg_gen_ext_tl(cpu_cc_src, cpu_cc_src, size);
return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src };
case CC_OP_ADCX:
@@ -1048,8 +1041,8 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
size = s->cc_op - CC_OP_SUBB;
switch (jcc_op) {
case JCC_BE:
- gen_ext_tl(s->cc_srcT, s->cc_srcT, size, false);
- gen_ext_tl(cpu_cc_src, cpu_cc_src, size, false);
+ tcg_gen_ext_tl(s->cc_srcT, s->cc_srcT, size);
+ tcg_gen_ext_tl(cpu_cc_src, cpu_cc_src, size);
cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->cc_srcT,
.reg2 = cpu_cc_src, .use_reg2 = true };
break;
@@ -1059,8 +1052,8 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
case JCC_LE:
cond = TCG_COND_LE;
fast_jcc_l:
- gen_ext_tl(s->cc_srcT, s->cc_srcT, size, true);
- gen_ext_tl(cpu_cc_src, cpu_cc_src, size, true);
+ tcg_gen_ext_tl(s->cc_srcT, s->cc_srcT, size | MO_SIGN);
+ tcg_gen_ext_tl(cpu_cc_src, cpu_cc_src, size | MO_SIGN);
cc = (CCPrepare) { .cond = cond, .reg = s->cc_srcT,
.reg2 = cpu_cc_src, .use_reg2 = true };
break;
--
2.45.1
^ permalink raw reply related [flat|nested] 56+ messages in thread
* Re: [PATCH 25/25] target/i386: remove gen_ext_tl
2024-06-08 8:41 ` [PATCH 25/25] target/i386: remove gen_ext_tl Paolo Bonzini
@ 2024-06-08 20:17 ` Richard Henderson
0 siblings, 0 replies; 56+ messages in thread
From: Richard Henderson @ 2024-06-08 20:17 UTC (permalink / raw)
To: Paolo Bonzini, qemu-devel
On 6/8/24 01:41, Paolo Bonzini wrote:
> With the introduction of tcg_gen_ext_tl, most uses can be converted directly
> because they do not have a NULL destination. tcg_gen_ext_tl is able to drop
> no-ops like "tcg_gen_ext_tl(tcgv, tcgv, MO_TL)" just fine, and the only thing
> that gen_ext_tl was adding on top was avoiding the creation of a useless
> temporary. This can be done in the only place where it matters, which is
> gen_op_j_ecx.
>
> Signed-off-by: Paolo Bonzini<pbonzini@redhat.com>
> ---
> target/i386/tcg/translate.c | 41 +++++++++++++++----------------------
> 1 file changed, 17 insertions(+), 24 deletions(-)
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 56+ messages in thread