* [PATCH v2 01/25] target/i386: use TSTEQ/TSTNE to test low bits
2024-05-06 8:09 [PATCH v2 00/25] target/i386: convert 1-byte opcodes to new decoder Paolo Bonzini
@ 2024-05-06 8:09 ` Paolo Bonzini
2024-05-06 8:09 ` [PATCH v2 02/25] target/i386: use TSTEQ/TSTNE to check flags Paolo Bonzini
` (23 subsequent siblings)
24 siblings, 0 replies; 38+ messages in thread
From: Paolo Bonzini @ 2024-05-06 8:09 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, zhao1.liu
When testing the sign bit or equality to zero of a partial register, it
is useful to use a single TSTEQ or TSTNE operation. It can also be used
to test the parity flag, using bit 0 of the population count.
Do not do this for target_ulong-sized values however; the optimizer would
produce a comparison against zero anyway, and it avoids shifts by 64
which are undefined behavior.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 28 ++++++++++++++++++++--------
target/i386/tcg/emit.c.inc | 5 ++---
2 files changed, 22 insertions(+), 11 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 051ffb5e1fd..4735f084d40 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -928,11 +928,21 @@ typedef struct CCPrepare {
bool no_setcond;
} CCPrepare;
+static CCPrepare gen_prepare_sign_nz(TCGv src, MemOp size)
+{
+ if (size == MO_TL) {
+ return (CCPrepare) { .cond = TCG_COND_LT, .reg = src, .mask = -1 };
+ } else {
+ return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = src, .mask = -1,
+ .imm = 1ull << ((8 << size) - 1) };
+ }
+}
+
/* compute eflags.C to reg */
static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
{
TCGv t0, t1;
- int size, shift;
+ MemOp size;
switch (s->cc_op) {
case CC_OP_SUBB ... CC_OP_SUBQ:
@@ -967,9 +977,7 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
case CC_OP_SHLB ... CC_OP_SHLQ:
/* (CC_SRC >> (DATA_BITS - 1)) & 1 */
size = s->cc_op - CC_OP_SHLB;
- shift = (8 << size) - 1;
- return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
- .mask = (target_ulong)1 << shift };
+ return gen_prepare_sign_nz(cpu_cc_src, size);
case CC_OP_MULB ... CC_OP_MULQ:
return (CCPrepare) { .cond = TCG_COND_NE,
@@ -1029,8 +1037,7 @@ static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
default:
{
MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
- TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
- return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
+ return gen_prepare_sign_nz(cpu_cc_dst, size);
}
}
}
@@ -1077,8 +1084,13 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
default:
{
MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
- TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
- return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
+ if (size == MO_TL) {
+ return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_dst,
+ .mask = -1 };
+ } else {
+ return (CCPrepare) { .cond = TCG_COND_TSTEQ, .reg = cpu_cc_dst,
+ .mask = -1, .imm = (1ull << (8 << size)) - 1 };
+ }
}
}
}
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 6bcf88ecd71..0e00f6635dd 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1209,7 +1209,7 @@ static void gen_CMPccXADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
[JCC_Z] = TCG_COND_EQ,
[JCC_BE] = TCG_COND_LEU,
[JCC_S] = TCG_COND_LT, /* test sign bit by comparing against 0 */
- [JCC_P] = TCG_COND_EQ, /* even parity - tests low bit of popcount */
+ [JCC_P] = TCG_COND_TSTEQ, /* even parity - tests low bit of popcount */
[JCC_L] = TCG_COND_LT,
[JCC_LE] = TCG_COND_LE,
};
@@ -1260,8 +1260,7 @@ static void gen_CMPccXADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
case JCC_P:
tcg_gen_ext8u_tl(s->tmp0, s->T0);
tcg_gen_ctpop_tl(s->tmp0, s->tmp0);
- tcg_gen_andi_tl(s->tmp0, s->tmp0, 1);
- cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(0);
+ cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(1);
break;
case JCC_S:
--
2.45.0
^ permalink raw reply related [flat|nested] 38+ messages in thread* [PATCH v2 02/25] target/i386: use TSTEQ/TSTNE to check flags
2024-05-06 8:09 [PATCH v2 00/25] target/i386: convert 1-byte opcodes to new decoder Paolo Bonzini
2024-05-06 8:09 ` [PATCH v2 01/25] target/i386: use TSTEQ/TSTNE to test low bits Paolo Bonzini
@ 2024-05-06 8:09 ` Paolo Bonzini
2024-05-06 8:09 ` [PATCH v2 03/25] target/i386: remove mask from CCPrepare Paolo Bonzini
` (22 subsequent siblings)
24 siblings, 0 replies; 38+ messages in thread
From: Paolo Bonzini @ 2024-05-06 8:09 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, zhao1.liu
The new conditions obviously come in handy when testing individual bits
of EFLAGS, and they make it possible to remove the .mask field of
CCPrepare.
Lowering to shift+and is done by the optimizer if necessary.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 32 ++++++++++++++++----------------
1 file changed, 16 insertions(+), 16 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 4735f084d40..62ba21c1d74 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -996,8 +996,8 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
case CC_OP_EFLAGS:
case CC_OP_SARB ... CC_OP_SARQ:
/* CC_SRC & 1 */
- return (CCPrepare) { .cond = TCG_COND_NE,
- .reg = cpu_cc_src, .mask = CC_C };
+ return (CCPrepare) { .cond = TCG_COND_TSTNE,
+ .reg = cpu_cc_src, .mask = -1, .imm = CC_C };
default:
/* The need to compute only C from CC_OP_DYNAMIC is important
@@ -1014,8 +1014,8 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
{
gen_compute_eflags(s);
- return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
- .mask = CC_P };
+ return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src,
+ .mask = -1, .imm = CC_P };
}
/* compute eflags.S to reg */
@@ -1029,8 +1029,8 @@ static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
case CC_OP_ADCX:
case CC_OP_ADOX:
case CC_OP_ADCOX:
- return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
- .mask = CC_S };
+ return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src,
+ .mask = -1, .imm = CC_S };
case CC_OP_CLR:
case CC_OP_POPCNT:
return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
@@ -1058,8 +1058,8 @@ static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
.reg = cpu_cc_src, .mask = -1 };
default:
gen_compute_eflags(s);
- return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
- .mask = CC_O };
+ return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src,
+ .mask = -1, .imm = CC_O };
}
}
@@ -1074,8 +1074,8 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
case CC_OP_ADCX:
case CC_OP_ADOX:
case CC_OP_ADCOX:
- return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
- .mask = CC_Z };
+ return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src,
+ .mask = -1, .imm = CC_Z };
case CC_OP_CLR:
return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
case CC_OP_POPCNT:
@@ -1153,8 +1153,8 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
break;
case JCC_BE:
gen_compute_eflags(s);
- cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
- .mask = CC_Z | CC_C };
+ cc = (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src,
+ .mask = -1, .imm = CC_Z | CC_C };
break;
case JCC_S:
cc = gen_prepare_eflags_s(s, reg);
@@ -1168,8 +1168,8 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
reg = s->tmp0;
}
tcg_gen_addi_tl(reg, cpu_cc_src, CC_O - CC_S);
- cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
- .mask = CC_O };
+ cc = (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = reg,
+ .mask = -1, .imm = CC_O };
break;
default:
case JCC_LE:
@@ -1178,8 +1178,8 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
reg = s->tmp0;
}
tcg_gen_addi_tl(reg, cpu_cc_src, CC_O - CC_S);
- cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
- .mask = CC_O | CC_Z };
+ cc = (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = reg,
+ .mask = -1, .imm = CC_O | CC_Z };
break;
}
break;
--
2.45.0
^ permalink raw reply related [flat|nested] 38+ messages in thread* [PATCH v2 03/25] target/i386: remove mask from CCPrepare
2024-05-06 8:09 [PATCH v2 00/25] target/i386: convert 1-byte opcodes to new decoder Paolo Bonzini
2024-05-06 8:09 ` [PATCH v2 01/25] target/i386: use TSTEQ/TSTNE to test low bits Paolo Bonzini
2024-05-06 8:09 ` [PATCH v2 02/25] target/i386: use TSTEQ/TSTNE to check flags Paolo Bonzini
@ 2024-05-06 8:09 ` Paolo Bonzini
2024-05-06 8:09 ` [PATCH v2 04/25] target/i386: cc_op is not dynamic in gen_jcc1 Paolo Bonzini
` (21 subsequent siblings)
24 siblings, 0 replies; 38+ messages in thread
From: Paolo Bonzini @ 2024-05-06 8:09 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, zhao1.liu
With the introduction of TSTEQ and TSTNE the .mask field is always -1,
so remove all the now-unnecessary code.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 81 +++++++++++++------------------------
1 file changed, 27 insertions(+), 54 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 62ba21c1d74..9aecd415b38 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -923,7 +923,6 @@ typedef struct CCPrepare {
TCGv reg;
TCGv reg2;
target_ulong imm;
- target_ulong mask;
bool use_reg2;
bool no_setcond;
} CCPrepare;
@@ -931,9 +930,9 @@ typedef struct CCPrepare {
static CCPrepare gen_prepare_sign_nz(TCGv src, MemOp size)
{
if (size == MO_TL) {
- return (CCPrepare) { .cond = TCG_COND_LT, .reg = src, .mask = -1 };
+ return (CCPrepare) { .cond = TCG_COND_LT, .reg = src };
} else {
- return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = src, .mask = -1,
+ return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = src,
.imm = 1ull << ((8 << size) - 1) };
}
}
@@ -962,17 +961,17 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
add_sub:
return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
- .reg2 = t1, .mask = -1, .use_reg2 = true };
+ .reg2 = t1, .use_reg2 = true };
case CC_OP_LOGICB ... CC_OP_LOGICQ:
case CC_OP_CLR:
case CC_OP_POPCNT:
- return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
+ return (CCPrepare) { .cond = TCG_COND_NEVER };
case CC_OP_INCB ... CC_OP_INCQ:
case CC_OP_DECB ... CC_OP_DECQ:
return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
- .mask = -1, .no_setcond = true };
+ .no_setcond = true };
case CC_OP_SHLB ... CC_OP_SHLQ:
/* (CC_SRC >> (DATA_BITS - 1)) & 1 */
@@ -981,23 +980,23 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
case CC_OP_MULB ... CC_OP_MULQ:
return (CCPrepare) { .cond = TCG_COND_NE,
- .reg = cpu_cc_src, .mask = -1 };
+ .reg = cpu_cc_src };
case CC_OP_BMILGB ... CC_OP_BMILGQ:
size = s->cc_op - CC_OP_BMILGB;
t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
- return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
+ return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0 };
case CC_OP_ADCX:
case CC_OP_ADCOX:
return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
- .mask = -1, .no_setcond = true };
+ .no_setcond = true };
case CC_OP_EFLAGS:
case CC_OP_SARB ... CC_OP_SARQ:
/* CC_SRC & 1 */
return (CCPrepare) { .cond = TCG_COND_TSTNE,
- .reg = cpu_cc_src, .mask = -1, .imm = CC_C };
+ .reg = cpu_cc_src, .imm = CC_C };
default:
/* The need to compute only C from CC_OP_DYNAMIC is important
@@ -1006,7 +1005,7 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
cpu_cc_src2, cpu_cc_op);
return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
- .mask = -1, .no_setcond = true };
+ .no_setcond = true };
}
}
@@ -1015,7 +1014,7 @@ static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
{
gen_compute_eflags(s);
return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src,
- .mask = -1, .imm = CC_P };
+ .imm = CC_P };
}
/* compute eflags.S to reg */
@@ -1030,10 +1029,10 @@ static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
case CC_OP_ADOX:
case CC_OP_ADCOX:
return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src,
- .mask = -1, .imm = CC_S };
+ .imm = CC_S };
case CC_OP_CLR:
case CC_OP_POPCNT:
- return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
+ return (CCPrepare) { .cond = TCG_COND_NEVER };
default:
{
MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
@@ -1049,17 +1048,16 @@ static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
case CC_OP_ADOX:
case CC_OP_ADCOX:
return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
- .mask = -1, .no_setcond = true };
+ .no_setcond = true };
case CC_OP_CLR:
case CC_OP_POPCNT:
- return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
+ return (CCPrepare) { .cond = TCG_COND_NEVER };
case CC_OP_MULB ... CC_OP_MULQ:
- return (CCPrepare) { .cond = TCG_COND_NE,
- .reg = cpu_cc_src, .mask = -1 };
+ return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src };
default:
gen_compute_eflags(s);
return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src,
- .mask = -1, .imm = CC_O };
+ .imm = CC_O };
}
}
@@ -1075,21 +1073,19 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
case CC_OP_ADOX:
case CC_OP_ADCOX:
return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src,
- .mask = -1, .imm = CC_Z };
+ .imm = CC_Z };
case CC_OP_CLR:
- return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
+ return (CCPrepare) { .cond = TCG_COND_ALWAYS };
case CC_OP_POPCNT:
- return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src,
- .mask = -1 };
+ return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src };
default:
{
MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
if (size == MO_TL) {
- return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_dst,
- .mask = -1 };
+ return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_dst };
} else {
return (CCPrepare) { .cond = TCG_COND_TSTEQ, .reg = cpu_cc_dst,
- .mask = -1, .imm = (1ull << (8 << size)) - 1 };
+ .imm = (1ull << (8 << size)) - 1 };
}
}
}
@@ -1117,7 +1113,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
gen_extu(size, s->tmp4);
t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
- .reg2 = t0, .mask = -1, .use_reg2 = true };
+ .reg2 = t0, .use_reg2 = true };
break;
case JCC_L:
@@ -1130,7 +1126,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
gen_exts(size, s->tmp4);
t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
- .reg2 = t0, .mask = -1, .use_reg2 = true };
+ .reg2 = t0, .use_reg2 = true };
break;
default:
@@ -1154,7 +1150,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
case JCC_BE:
gen_compute_eflags(s);
cc = (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src,
- .mask = -1, .imm = CC_Z | CC_C };
+ .imm = CC_Z | CC_C };
break;
case JCC_S:
cc = gen_prepare_eflags_s(s, reg);
@@ -1169,7 +1165,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
}
tcg_gen_addi_tl(reg, cpu_cc_src, CC_O - CC_S);
cc = (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = reg,
- .mask = -1, .imm = CC_O };
+ .imm = CC_O };
break;
default:
case JCC_LE:
@@ -1179,7 +1175,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
}
tcg_gen_addi_tl(reg, cpu_cc_src, CC_O - CC_S);
cc = (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = reg,
- .mask = -1, .imm = CC_O | CC_Z };
+ .imm = CC_O | CC_Z };
break;
}
break;
@@ -1204,16 +1200,6 @@ static void gen_setcc1(DisasContext *s, int b, TCGv reg)
return;
}
- if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
- cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
- tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
- tcg_gen_andi_tl(reg, reg, 1);
- return;
- }
- if (cc.mask != -1) {
- tcg_gen_andi_tl(reg, cc.reg, cc.mask);
- cc.reg = reg;
- }
if (cc.use_reg2) {
tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
} else {
@@ -1232,10 +1218,6 @@ static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
{
CCPrepare cc = gen_prepare_cc(s, b, s->T0);
- if (cc.mask != -1) {
- tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
- cc.reg = s->T0;
- }
if (cc.use_reg2) {
tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
} else {
@@ -1251,10 +1233,6 @@ static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
CCPrepare cc = gen_prepare_cc(s, b, s->T0);
gen_update_cc_op(s);
- if (cc.mask != -1) {
- tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
- cc.reg = s->T0;
- }
set_cc_op(s, CC_OP_DYNAMIC);
if (cc.use_reg2) {
tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
@@ -2519,11 +2497,6 @@ static void gen_cmovcc1(DisasContext *s, int b, TCGv dest, TCGv src)
{
CCPrepare cc = gen_prepare_cc(s, b, s->T1);
- if (cc.mask != -1) {
- TCGv t0 = tcg_temp_new();
- tcg_gen_andi_tl(t0, cc.reg, cc.mask);
- cc.reg = t0;
- }
if (!cc.use_reg2) {
cc.reg2 = tcg_constant_tl(cc.imm);
}
--
2.45.0
^ permalink raw reply related [flat|nested] 38+ messages in thread* [PATCH v2 04/25] target/i386: cc_op is not dynamic in gen_jcc1
2024-05-06 8:09 [PATCH v2 00/25] target/i386: convert 1-byte opcodes to new decoder Paolo Bonzini
` (2 preceding siblings ...)
2024-05-06 8:09 ` [PATCH v2 03/25] target/i386: remove mask from CCPrepare Paolo Bonzini
@ 2024-05-06 8:09 ` Paolo Bonzini
2024-05-06 15:53 ` Richard Henderson
2024-05-06 8:09 ` [PATCH v2 05/25] target/i386: cleanup cc_op changes for REP/REPZ/REPNZ Paolo Bonzini
` (20 subsequent siblings)
24 siblings, 1 reply; 38+ messages in thread
From: Paolo Bonzini @ 2024-05-06 8:09 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, zhao1.liu
Resetting cc_op to CC_OP_DYNAMIC should be done at control flow junctions,
which is not the case here. This translation block is ending and the
only effect of calling set_cc_op() would be a discard of s->cc_srcT.
This discard is useless (it's a temporary, not a global) and in fact
prevents gen_prepare_cc from returning s->cc_srcT.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 9aecd415b38..3f1d2858fc9 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -1227,13 +1227,13 @@ static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
/* Generate a conditional jump to label 'l1' according to jump opcode
value 'b'. In the fast case, T0 is guaranteed not to be used.
- A translation block must end soon. */
+ One or both of the branches will call gen_jmp_rel, so ensure
+ cc_op is clean. */
static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
{
CCPrepare cc = gen_prepare_cc(s, b, s->T0);
gen_update_cc_op(s);
- set_cc_op(s, CC_OP_DYNAMIC);
if (cc.use_reg2) {
tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
} else {
--
2.45.0
^ permalink raw reply related [flat|nested] 38+ messages in thread* Re: [PATCH v2 04/25] target/i386: cc_op is not dynamic in gen_jcc1
2024-05-06 8:09 ` [PATCH v2 04/25] target/i386: cc_op is not dynamic in gen_jcc1 Paolo Bonzini
@ 2024-05-06 15:53 ` Richard Henderson
0 siblings, 0 replies; 38+ messages in thread
From: Richard Henderson @ 2024-05-06 15:53 UTC (permalink / raw)
To: Paolo Bonzini, qemu-devel; +Cc: zhao1.liu
On 5/6/24 01:09, Paolo Bonzini wrote:
> Resetting cc_op to CC_OP_DYNAMIC should be done at control flow junctions,
> which is not the case here. This translation block is ending and the
> only effect of calling set_cc_op() would be a discard of s->cc_srcT.
> This discard is useless (it's a temporary, not a global) and in fact
> prevents gen_prepare_cc from returning s->cc_srcT.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> target/i386/tcg/translate.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 38+ messages in thread
* [PATCH v2 05/25] target/i386: cleanup cc_op changes for REP/REPZ/REPNZ
2024-05-06 8:09 [PATCH v2 00/25] target/i386: convert 1-byte opcodes to new decoder Paolo Bonzini
` (3 preceding siblings ...)
2024-05-06 8:09 ` [PATCH v2 04/25] target/i386: cc_op is not dynamic in gen_jcc1 Paolo Bonzini
@ 2024-05-06 8:09 ` Paolo Bonzini
2024-05-06 16:07 ` Richard Henderson
2024-05-06 8:09 ` [PATCH v2 06/25] target/i386: pull cc_op update to callers of gen_jmp_rel{, _csize} Paolo Bonzini
` (19 subsequent siblings)
24 siblings, 1 reply; 38+ messages in thread
From: Paolo Bonzini @ 2024-05-06 8:09 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, zhao1.liu
gen_update_cc_op must be called before control flow splits. Do it
where the jump on ECX!=0 is translated.
On the other hand, remove the call before gen_jcc1, which takes care of
it already, and explain why REPZ/REPNZ need not use CC_OP_DYNAMIC---the
translation block ends before any control-flow-dependent cc_op could
be observed.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 3f1d2858fc9..6b766f5dd3f 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -1242,11 +1242,15 @@ static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
}
/* XXX: does not work with gdbstub "ice" single step - not a
- serious problem */
+ serious problem. The caller can jump to the returned label
+ to stop the REP but, if the flags have changed, it has to call
+ gen_update_cc_op before doing so. */
static TCGLabel *gen_jz_ecx_string(DisasContext *s)
{
TCGLabel *l1 = gen_new_label();
TCGLabel *l2 = gen_new_label();
+
+ gen_update_cc_op(s);
gen_op_jnz_ecx(s, l1);
gen_set_label(l2);
gen_jmp_rel_csize(s, 0, 1);
@@ -1342,7 +1346,6 @@ static void gen_repz(DisasContext *s, MemOp ot,
void (*fn)(DisasContext *s, MemOp ot))
{
TCGLabel *l2;
- gen_update_cc_op(s);
l2 = gen_jz_ecx_string(s);
fn(s, ot);
gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
@@ -1364,11 +1367,13 @@ static void gen_repz2(DisasContext *s, MemOp ot, int nz,
void (*fn)(DisasContext *s, MemOp ot))
{
TCGLabel *l2;
- gen_update_cc_op(s);
l2 = gen_jz_ecx_string(s);
+ /*
+ * Only one iteration is done at a time, so there is
+ * no control flow junction here and cc_op is never dynamic.
+ */
fn(s, ot);
gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
- gen_update_cc_op(s);
gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);
if (s->repz_opt) {
gen_op_jz_ecx(s, l2);
--
2.45.0
^ permalink raw reply related [flat|nested] 38+ messages in thread* Re: [PATCH v2 05/25] target/i386: cleanup cc_op changes for REP/REPZ/REPNZ
2024-05-06 8:09 ` [PATCH v2 05/25] target/i386: cleanup cc_op changes for REP/REPZ/REPNZ Paolo Bonzini
@ 2024-05-06 16:07 ` Richard Henderson
2024-05-06 16:31 ` Paolo Bonzini
0 siblings, 1 reply; 38+ messages in thread
From: Richard Henderson @ 2024-05-06 16:07 UTC (permalink / raw)
To: Paolo Bonzini, qemu-devel; +Cc: zhao1.liu
On 5/6/24 01:09, Paolo Bonzini wrote:
> gen_update_cc_op must be called before control flow splits. Do it
> where the jump on ECX!=0 is translated.
>
> On the other hand, remove the call before gen_jcc1, which takes care of
> it already, and explain why REPZ/REPNZ need not use CC_OP_DYNAMIC---the
> translation block ends before any control-flow-dependent cc_op could
> be observed.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> target/i386/tcg/translate.c | 13 +++++++++----
> 1 file changed, 9 insertions(+), 4 deletions(-)
>
> diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
> index 3f1d2858fc9..6b766f5dd3f 100644
> --- a/target/i386/tcg/translate.c
> +++ b/target/i386/tcg/translate.c
> @@ -1242,11 +1242,15 @@ static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
> }
>
> /* XXX: does not work with gdbstub "ice" single step - not a
> - serious problem */
> + serious problem. The caller can jump to the returned label
> + to stop the REP but, if the flags have changed, it has to call
> + gen_update_cc_op before doing so. */
> static TCGLabel *gen_jz_ecx_string(DisasContext *s)
> {
> TCGLabel *l1 = gen_new_label();
> TCGLabel *l2 = gen_new_label();
> +
> + gen_update_cc_op(s);
> gen_op_jnz_ecx(s, l1);
> gen_set_label(l2);
> gen_jmp_rel_csize(s, 0, 1);
> @@ -1342,7 +1346,6 @@ static void gen_repz(DisasContext *s, MemOp ot,
> void (*fn)(DisasContext *s, MemOp ot))
> {
> TCGLabel *l2;
> - gen_update_cc_op(s);
> l2 = gen_jz_ecx_string(s);
> fn(s, ot);
> gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
Ok.
> @@ -1364,11 +1367,13 @@ static void gen_repz2(DisasContext *s, MemOp ot, int nz,
> void (*fn)(DisasContext *s, MemOp ot))
> {
> TCGLabel *l2;
> - gen_update_cc_op(s);
> l2 = gen_jz_ecx_string(s);
> + /*
> + * Only one iteration is done at a time, so there is
> + * no control flow junction here and cc_op is never dynamic.
> + */
> fn(s, ot);
> gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
> - gen_update_cc_op(s);
> gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);
> if (s->repz_opt) {
> gen_op_jz_ecx(s, l2);
Ok, but only because gen_jcc1 does the gen_update_cc_op. The comment is neither correct
nor necessary.
The reason to write cc_op before branches instead of junctions is to avoid having *two*
writes of cc_op on either side of the branch.
r~
^ permalink raw reply [flat|nested] 38+ messages in thread* Re: [PATCH v2 05/25] target/i386: cleanup cc_op changes for REP/REPZ/REPNZ
2024-05-06 16:07 ` Richard Henderson
@ 2024-05-06 16:31 ` Paolo Bonzini
2024-05-06 16:39 ` Richard Henderson
0 siblings, 1 reply; 38+ messages in thread
From: Paolo Bonzini @ 2024-05-06 16:31 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel, zhao1.liu
On Mon, May 6, 2024 at 6:08 PM Richard Henderson
<richard.henderson@linaro.org> wrote:
> > - gen_update_cc_op(s);
> > l2 = gen_jz_ecx_string(s);
> > + /*
> > + * Only one iteration is done at a time, so there is
> > + * no control flow junction here and cc_op is never dynamic.
> > + */
> > fn(s, ot);
> > gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
> > - gen_update_cc_op(s);
> > gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);
> > if (s->repz_opt) {
> > gen_op_jz_ecx(s, l2);
>
> Ok, but only because gen_jcc1 does the gen_update_cc_op. The comment is neither correct
> nor necessary.
Yeah, it's true that gen_jcc1 does the update. On the other hand,
there are two different kinds of cc_op updates:
1) at branches, if you know that only one of the sides might write
cc_op - so you ensure it's up-to-date before the branch - and set
CC_OP_DYNAMIC at the junction. Same if you have movcond instead of a
branch.
2) at end of translation block, to spill the value lazily (because in
the middle of the TB we are able to restore it from insn data). With
these patches there is never a need to do this, because gen_jmp_rel()
and gen_jmp_rel_csize() take care of it.
The comment deals with the former, the removal with the latter.
The idea of the comment is that after SCAS/CMPS you have CC_OP_SUB*,
so in principle you may expect that you need to set CC_OP_DYNAMIC
explicitly at the end of a REPZ/REPNZ, which is where the CX != 0 and
CX == 0 paths join. But this is not necessary, because there is
nothing after that instruction - the translation block ends.
So I guess the comment could instead be placed at the end of the function?
/*
* Only one iteration is done at a time, so the translation
* block has ended unconditionally at this point and there
* is no control flow junction - no need to set CC_OP_DYNAMIC.
*/
What do you think?
Paolo
^ permalink raw reply [flat|nested] 38+ messages in thread* Re: [PATCH v2 05/25] target/i386: cleanup cc_op changes for REP/REPZ/REPNZ
2024-05-06 16:31 ` Paolo Bonzini
@ 2024-05-06 16:39 ` Richard Henderson
0 siblings, 0 replies; 38+ messages in thread
From: Richard Henderson @ 2024-05-06 16:39 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel, zhao1.liu
On 5/6/24 09:31, Paolo Bonzini wrote:
> The comment deals with the former, the removal with the latter.
>
> The idea of the comment is that after SCAS/CMPS you have CC_OP_SUB*,
> so in principle you may expect that you need to set CC_OP_DYNAMIC
> explicitly at the end of a REPZ/REPNZ, which is where the CX != 0 and
> CX == 0 paths join. But this is not necessary, because there is
> nothing after that instruction - the translation block ends.
>
> So I guess the comment could instead be placed at the end of the function?
>
> /*
> * Only one iteration is done at a time, so the translation
> * block has ended unconditionally at this point and there
> * is no control flow junction - no need to set CC_OP_DYNAMIC.
> */
>
> What do you think?
Just before gen_jmp_rel_csize? Yes, that seems good.
r~
^ permalink raw reply [flat|nested] 38+ messages in thread
* [PATCH v2 06/25] target/i386: pull cc_op update to callers of gen_jmp_rel{, _csize}
2024-05-06 8:09 [PATCH v2 00/25] target/i386: convert 1-byte opcodes to new decoder Paolo Bonzini
` (4 preceding siblings ...)
2024-05-06 8:09 ` [PATCH v2 05/25] target/i386: cleanup cc_op changes for REP/REPZ/REPNZ Paolo Bonzini
@ 2024-05-06 8:09 ` Paolo Bonzini
2024-05-06 16:12 ` [PATCH v2 06/25] target/i386: pull cc_op update to callers of gen_jmp_rel{,_csize} Richard Henderson
2024-05-06 8:09 ` [PATCH v2 07/25] target/i386: extend cc_* when using them to compute flags Paolo Bonzini
` (18 subsequent siblings)
24 siblings, 1 reply; 38+ messages in thread
From: Paolo Bonzini @ 2024-05-06 8:09 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, zhao1.liu
gen_update_cc_op must be called before control flow splits. Doing it
in gen_jmp_rel{,_csize} may hide bugs, instead assert that cc_op is
clean---even if that means a few more calls to gen_update_cc_op().
With this new invariant, setting cc_op to CC_OP_DYNAMIC is unnecessary
since the caller should have done it.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 6b766f5dd3f..17bf85da0ce 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -2852,6 +2852,8 @@ static void gen_jmp_rel(DisasContext *s, MemOp ot, int diff, int tb_num)
target_ulong new_pc = s->pc + diff;
target_ulong new_eip = new_pc - s->cs_base;
+ assert(!s->cc_op_dirty);
+
/* In 64-bit mode, operand size is fixed at 64 bits. */
if (!CODE64(s)) {
if (ot == MO_16) {
@@ -2865,9 +2867,6 @@ static void gen_jmp_rel(DisasContext *s, MemOp ot, int diff, int tb_num)
}
new_eip &= mask;
- gen_update_cc_op(s);
- set_cc_op(s, CC_OP_DYNAMIC);
-
if (tb_cflags(s->base.tb) & CF_PCREL) {
tcg_gen_addi_tl(cpu_eip, cpu_eip, new_pc - s->pc_save);
/*
@@ -5145,6 +5144,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
: (int16_t)insn_get(env, s, MO_16));
gen_push_v(s, eip_next_tl(s));
gen_bnd_jmp(s);
+ gen_update_cc_op(s);
gen_jmp_rel(s, dflag, diff, 0);
}
break;
@@ -5168,6 +5168,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
? (int32_t)insn_get(env, s, MO_32)
: (int16_t)insn_get(env, s, MO_16));
gen_bnd_jmp(s);
+ gen_update_cc_op(s);
gen_jmp_rel(s, dflag, diff, 0);
}
break;
@@ -5188,6 +5189,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
case 0xeb: /* jmp Jb */
{
int diff = (int8_t)insn_get(env, s, MO_8);
+ gen_update_cc_op(s);
gen_jmp_rel(s, dflag, diff, 0);
}
break;
--
2.45.0
^ permalink raw reply related [flat|nested] 38+ messages in thread* Re: [PATCH v2 06/25] target/i386: pull cc_op update to callers of gen_jmp_rel{,_csize}
2024-05-06 8:09 ` [PATCH v2 06/25] target/i386: pull cc_op update to callers of gen_jmp_rel{, _csize} Paolo Bonzini
@ 2024-05-06 16:12 ` Richard Henderson
0 siblings, 0 replies; 38+ messages in thread
From: Richard Henderson @ 2024-05-06 16:12 UTC (permalink / raw)
To: Paolo Bonzini, qemu-devel; +Cc: zhao1.liu
On 5/6/24 01:09, Paolo Bonzini wrote:
> gen_update_cc_op must be called before control flow splits. Doing it
> in gen_jmp_rel{,_csize} may hide bugs, instead assert that cc_op is
> clean---even if that means a few more calls to gen_update_cc_op().
>
> With this new invariant, setting cc_op to CC_OP_DYNAMIC is unnecessary
> since the caller should have done it.
>
> Signed-off-by: Paolo Bonzini<pbonzini@redhat.com>
> ---
> target/i386/tcg/translate.c | 8 +++++---
> 1 file changed, 5 insertions(+), 3 deletions(-)
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 38+ messages in thread
* [PATCH v2 07/25] target/i386: extend cc_* when using them to compute flags
2024-05-06 8:09 [PATCH v2 00/25] target/i386: convert 1-byte opcodes to new decoder Paolo Bonzini
` (5 preceding siblings ...)
2024-05-06 8:09 ` [PATCH v2 06/25] target/i386: pull cc_op update to callers of gen_jmp_rel{, _csize} Paolo Bonzini
@ 2024-05-06 8:09 ` Paolo Bonzini
2024-05-06 16:16 ` Richard Henderson
2024-05-06 8:09 ` [PATCH v2 08/25] target/i386: do not use s->T0 and s->T1 as scratch registers for CCPrepare Paolo Bonzini
` (17 subsequent siblings)
24 siblings, 1 reply; 38+ messages in thread
From: Paolo Bonzini @ 2024-05-06 8:09 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, zhao1.liu
Instead of using s->tmp0 or s->tmp4 as the result, just extend the cc_*
registers in place. It is harmless and, if multiple setcc instructions
are used, the optimizer will be able to remove the redundant ones.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 44 +++++++++++++++----------------------
1 file changed, 18 insertions(+), 26 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 17bf85da0ce..d76f72c1b96 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -940,28 +940,24 @@ static CCPrepare gen_prepare_sign_nz(TCGv src, MemOp size)
/* compute eflags.C to reg */
static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
{
- TCGv t0, t1;
MemOp size;
switch (s->cc_op) {
case CC_OP_SUBB ... CC_OP_SUBQ:
/* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
size = s->cc_op - CC_OP_SUBB;
- t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
- /* If no temporary was used, be careful not to alias t1 and t0. */
- t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
- tcg_gen_mov_tl(t0, s->cc_srcT);
- gen_extu(size, t0);
- goto add_sub;
+ gen_ext_tl(s->cc_srcT, s->cc_srcT, size, false);
+ gen_ext_tl(cpu_cc_src, cpu_cc_src, size, false);
+ return (CCPrepare) { .cond = TCG_COND_LTU, .reg = s->cc_srcT,
+ .reg2 = cpu_cc_src, .use_reg2 = true };
case CC_OP_ADDB ... CC_OP_ADDQ:
/* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
size = s->cc_op - CC_OP_ADDB;
- t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
- t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
- add_sub:
- return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
- .reg2 = t1, .use_reg2 = true };
+ gen_ext_tl(cpu_cc_dst, cpu_cc_dst, size, false);
+ gen_ext_tl(cpu_cc_src, cpu_cc_src, size, false);
+ return (CCPrepare) { .cond = TCG_COND_LTU, .reg = cpu_cc_dst,
+ .reg2 = cpu_cc_src, .use_reg2 = true };
case CC_OP_LOGICB ... CC_OP_LOGICQ:
case CC_OP_CLR:
@@ -984,8 +980,8 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
case CC_OP_BMILGB ... CC_OP_BMILGQ:
size = s->cc_op - CC_OP_BMILGB;
- t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
- return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0 };
+ gen_ext_tl(cpu_cc_src, cpu_cc_src, size, false);
+ return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src };
case CC_OP_ADCX:
case CC_OP_ADCOX:
@@ -1098,7 +1094,6 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
int inv, jcc_op, cond;
MemOp size;
CCPrepare cc;
- TCGv t0;
inv = b & 1;
jcc_op = (b >> 1) & 7;
@@ -1109,24 +1104,21 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
size = s->cc_op - CC_OP_SUBB;
switch (jcc_op) {
case JCC_BE:
- tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
- gen_extu(size, s->tmp4);
- t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
- cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
- .reg2 = t0, .use_reg2 = true };
+ gen_ext_tl(s->cc_srcT, s->cc_srcT, size, false);
+ gen_ext_tl(cpu_cc_src, cpu_cc_src, size, false);
+ cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->cc_srcT,
+ .reg2 = cpu_cc_src, .use_reg2 = true };
break;
-
case JCC_L:
cond = TCG_COND_LT;
goto fast_jcc_l;
case JCC_LE:
cond = TCG_COND_LE;
fast_jcc_l:
- tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
- gen_exts(size, s->tmp4);
- t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
- cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
- .reg2 = t0, .use_reg2 = true };
+ gen_ext_tl(s->cc_srcT, s->cc_srcT, size, true);
+ gen_ext_tl(cpu_cc_src, cpu_cc_src, size, true);
+ cc = (CCPrepare) { .cond = cond, .reg = s->cc_srcT,
+ .reg2 = cpu_cc_src, .use_reg2 = true };
break;
default:
--
2.45.0
^ permalink raw reply related [flat|nested] 38+ messages in thread* [PATCH v2 08/25] target/i386: do not use s->T0 and s->T1 as scratch registers for CCPrepare
2024-05-06 8:09 [PATCH v2 00/25] target/i386: convert 1-byte opcodes to new decoder Paolo Bonzini
` (6 preceding siblings ...)
2024-05-06 8:09 ` [PATCH v2 07/25] target/i386: extend cc_* when using them to compute flags Paolo Bonzini
@ 2024-05-06 8:09 ` Paolo Bonzini
2024-05-06 16:18 ` Richard Henderson
2024-05-06 8:09 ` [PATCH v2 09/25] target/i386: clarify the "reg" argument of functions returning CCPrepare Paolo Bonzini
` (16 subsequent siblings)
24 siblings, 1 reply; 38+ messages in thread
From: Paolo Bonzini @ 2024-05-06 8:09 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, zhao1.liu
Instead of using s->T0 or s->T1, create a scratch register
when computing the C, NC, L or LE conditions.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index d76f72c1b96..2cd7868d596 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -998,6 +998,9 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
/* The need to compute only C from CC_OP_DYNAMIC is important
in efficiently implementing e.g. INC at the start of a TB. */
gen_update_cc_op(s);
+ if (!reg) {
+ reg = tcg_temp_new();
+ }
gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
cpu_cc_src2, cpu_cc_op);
return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
@@ -1152,8 +1155,8 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
break;
case JCC_L:
gen_compute_eflags(s);
- if (reg == cpu_cc_src) {
- reg = s->tmp0;
+ if (!reg || reg == cpu_cc_src) {
+ reg = tcg_temp_new();
}
tcg_gen_addi_tl(reg, cpu_cc_src, CC_O - CC_S);
cc = (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = reg,
@@ -1162,8 +1165,8 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
default:
case JCC_LE:
gen_compute_eflags(s);
- if (reg == cpu_cc_src) {
- reg = s->tmp0;
+ if (!reg || reg == cpu_cc_src) {
+ reg = tcg_temp_new();
}
tcg_gen_addi_tl(reg, cpu_cc_src, CC_O - CC_S);
cc = (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = reg,
@@ -1208,7 +1211,7 @@ static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
value 'b'. In the fast case, T0 is guaranteed not to be used. */
static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
{
- CCPrepare cc = gen_prepare_cc(s, b, s->T0);
+ CCPrepare cc = gen_prepare_cc(s, b, NULL);
if (cc.use_reg2) {
tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
@@ -1223,7 +1226,7 @@ static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
cc_op is clean. */
static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
{
- CCPrepare cc = gen_prepare_cc(s, b, s->T0);
+ CCPrepare cc = gen_prepare_cc(s, b, NULL);
gen_update_cc_op(s);
if (cc.use_reg2) {
@@ -2492,7 +2495,7 @@ static void gen_jcc(DisasContext *s, int b, int diff)
static void gen_cmovcc1(DisasContext *s, int b, TCGv dest, TCGv src)
{
- CCPrepare cc = gen_prepare_cc(s, b, s->T1);
+ CCPrepare cc = gen_prepare_cc(s, b, NULL);
if (!cc.use_reg2) {
cc.reg2 = tcg_constant_tl(cc.imm);
--
2.45.0
^ permalink raw reply related [flat|nested] 38+ messages in thread* [PATCH v2 09/25] target/i386: clarify the "reg" argument of functions returning CCPrepare
2024-05-06 8:09 [PATCH v2 00/25] target/i386: convert 1-byte opcodes to new decoder Paolo Bonzini
` (7 preceding siblings ...)
2024-05-06 8:09 ` [PATCH v2 08/25] target/i386: do not use s->T0 and s->T1 as scratch registers for CCPrepare Paolo Bonzini
@ 2024-05-06 8:09 ` Paolo Bonzini
2024-05-06 16:19 ` Richard Henderson
2024-05-06 8:09 ` [PATCH v2 10/25] target/i386: cleanup *gen_eob* Paolo Bonzini
` (15 subsequent siblings)
24 siblings, 1 reply; 38+ messages in thread
From: Paolo Bonzini @ 2024-05-06 8:09 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, zhao1.liu
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 2cd7868d596..7efd12cbe7e 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -937,7 +937,7 @@ static CCPrepare gen_prepare_sign_nz(TCGv src, MemOp size)
}
}
-/* compute eflags.C to reg */
+/* compute eflags.C, trying to store it in reg if not NULL */
static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
{
MemOp size;
@@ -1008,7 +1008,7 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
}
}
-/* compute eflags.P to reg */
+/* compute eflags.P, trying to store it in reg if not NULL */
static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
{
gen_compute_eflags(s);
@@ -1016,7 +1016,7 @@ static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
.imm = CC_P };
}
-/* compute eflags.S to reg */
+/* compute eflags.S, trying to store it in reg if not NULL */
static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
{
switch (s->cc_op) {
@@ -1040,7 +1040,7 @@ static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
}
}
-/* compute eflags.O to reg */
+/* compute eflags.O, trying to store it in reg if not NULL */
static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
{
switch (s->cc_op) {
@@ -1060,7 +1060,7 @@ static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
}
}
-/* compute eflags.Z to reg */
+/* compute eflags.Z, trying to store it in reg if not NULL */
static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
{
switch (s->cc_op) {
@@ -1090,8 +1090,9 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
}
}
-/* perform a conditional store into register 'reg' according to jump opcode
- value 'b'. In the fast case, T0 is guaranteed not to be used. */
+/* return how to compute jump opcode 'b'. 'reg' can be clobbered
+ * if needed; it may be used for CCPrepare.reg if that will
+ * provide more freedom in the translation of a subsequent setcond. */
static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
{
int inv, jcc_op, cond;
--
2.45.0
^ permalink raw reply related [flat|nested] 38+ messages in thread* [PATCH v2 10/25] target/i386: cleanup *gen_eob*
2024-05-06 8:09 [PATCH v2 00/25] target/i386: convert 1-byte opcodes to new decoder Paolo Bonzini
` (8 preceding siblings ...)
2024-05-06 8:09 ` [PATCH v2 09/25] target/i386: clarify the "reg" argument of functions returning CCPrepare Paolo Bonzini
@ 2024-05-06 8:09 ` Paolo Bonzini
2024-05-06 16:21 ` Richard Henderson
2024-05-06 8:09 ` [PATCH v2 11/25] target/i386: reintroduce debugging mechanism Paolo Bonzini
` (14 subsequent siblings)
24 siblings, 1 reply; 38+ messages in thread
From: Paolo Bonzini @ 2024-05-06 8:09 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, zhao1.liu
Create a new wrapper for syscall/sysret, and do not go through multiple
layers of wrappers.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 25 ++++++++++++-------------
1 file changed, 12 insertions(+), 13 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 7efd12cbe7e..e36ed4dcc10 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -2782,7 +2782,7 @@ static void gen_bnd_jmp(DisasContext *s)
If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
S->TF. This is used by the syscall/sysret insns. */
static void
-do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
+gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
{
bool inhibit_reset;
@@ -2816,28 +2816,27 @@ do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
}
static inline void
-gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
+gen_eob_syscall(DisasContext *s)
{
- do_gen_eob_worker(s, inhibit, recheck_tf, false);
+ gen_eob_worker(s, false, true, false);
}
-/* End of block.
- If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set. */
-static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
+/* End of block. Set HF_INHIBIT_IRQ_MASK if it isn't already set. */
+static void gen_eob_inhibit_irq(DisasContext *s)
{
- gen_eob_worker(s, inhibit, false);
+ gen_eob_worker(s, true, false, false);
}
/* End of block, resetting the inhibit irq flag. */
static void gen_eob(DisasContext *s)
{
- gen_eob_worker(s, false, false);
+ gen_eob_worker(s, false, false, false);
}
/* Jump to register */
static void gen_jr(DisasContext *s)
{
- do_gen_eob_worker(s, false, false, true);
+ gen_eob_worker(s, false, false, true);
}
/* Jump to eip+diff, truncating the result to OT. */
@@ -5590,7 +5589,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
gen_set_eflags(s, IF_MASK);
/* interruptions are enabled only the first insn after sti */
gen_update_eip_next(s);
- gen_eob_inhibit_irq(s, true);
+ gen_eob_inhibit_irq(s);
}
break;
case 0x62: /* bound */
@@ -5724,7 +5723,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
/* TF handling for the syscall insn is different. The TF bit is checked
after the syscall insn completes. This allows #DB to not be
generated after one has entered CPL0 if TF is set in FMASK. */
- gen_eob_worker(s, false, true);
+ gen_eob_syscall(s);
break;
case 0x107: /* sysret */
/* For Intel SYSRET is only valid in long mode */
@@ -5743,7 +5742,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
checked after the sysret insn completes. This allows #DB to be
generated "as if" the syscall insn in userspace has just
completed. */
- gen_eob_worker(s, false, true);
+ gen_eob_syscall(s);
}
break;
case 0x1a2: /* cpuid */
@@ -7058,7 +7057,7 @@ static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
case DISAS_EOB_INHIBIT_IRQ:
gen_update_cc_op(dc);
gen_update_eip_cur(dc);
- gen_eob_inhibit_irq(dc, true);
+ gen_eob_inhibit_irq(dc);
break;
case DISAS_JUMP:
gen_jr(dc);
--
2.45.0
^ permalink raw reply related [flat|nested] 38+ messages in thread* [PATCH v2 11/25] target/i386: reintroduce debugging mechanism
2024-05-06 8:09 [PATCH v2 00/25] target/i386: convert 1-byte opcodes to new decoder Paolo Bonzini
` (9 preceding siblings ...)
2024-05-06 8:09 ` [PATCH v2 10/25] target/i386: cleanup *gen_eob* Paolo Bonzini
@ 2024-05-06 8:09 ` Paolo Bonzini
2024-05-06 16:23 ` Richard Henderson
2024-05-06 8:09 ` [PATCH v2 12/25] target/i386: move 00-5F opcodes to new decoder Paolo Bonzini
` (13 subsequent siblings)
24 siblings, 1 reply; 38+ messages in thread
From: Paolo Bonzini @ 2024-05-06 8:09 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, zhao1.liu
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 27 +++++++++++++++++++++++++++
target/i386/tcg/decode-new.c.inc | 3 +++
2 files changed, 30 insertions(+)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index e36ed4dcc10..705e8f3ef49 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -2968,6 +2968,9 @@ static void gen_sty_env_A0(DisasContext *s, int offset, bool align)
tcg_gen_qemu_st_i128(t, s->tmp0, mem_index, mop);
}
+static bool first = true;
+static unsigned long limit;
+
#include "decode-new.h"
#include "emit.c.inc"
#include "decode-new.c.inc"
@@ -3123,15 +3126,39 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
prefixes = 0;
+ if (first) {
+ const char *limit_str = getenv("QEMU_I386_LIMIT");
+ limit = limit_str ? atol(limit_str) : -1;
+ first = false;
+ }
+ bool use_new = true;
+#ifdef CONFIG_USER_ONLY
+ use_new &= limit > 0;
+#endif
+
next_byte:
s->prefix = prefixes;
b = x86_ldub_code(env, s);
/* Collect prefixes. */
switch (b) {
default:
+#ifndef CONFIG_USER_ONLY
+ use_new &= b <= limit;
+#endif
+ if (use_new && 0) {
+ disas_insn_new(s, cpu, b);
+ return true;
+ }
break;
case 0x0f:
b = x86_ldub_code(env, s) + 0x100;
+#ifndef CONFIG_USER_ONLY
+ use_new &= b <= limit;
+#endif
+ if (use_new && 0) {
+ disas_insn_new(s, cpu, b);
+ return true;
+ }
break;
case 0xf3:
prefixes |= PREFIX_REPZ;
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 426c4594120..3fc6485d74c 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -1689,6 +1689,9 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b)
X86DecodeFunc decode_func = decode_root;
uint8_t cc_live;
+#ifdef CONFIG_USER_ONLY
+ if (limit) { --limit; }
+#endif
s->has_modrm = false;
next_byte:
--
2.45.0
^ permalink raw reply related [flat|nested] 38+ messages in thread* [PATCH v2 12/25] target/i386: move 00-5F opcodes to new decoder
2024-05-06 8:09 [PATCH v2 00/25] target/i386: convert 1-byte opcodes to new decoder Paolo Bonzini
` (10 preceding siblings ...)
2024-05-06 8:09 ` [PATCH v2 11/25] target/i386: reintroduce debugging mechanism Paolo Bonzini
@ 2024-05-06 8:09 ` Paolo Bonzini
2024-05-06 8:09 ` [PATCH v2 13/25] target/i386: extract gen_far_call/jmp, reordering temporaries Paolo Bonzini
` (12 subsequent siblings)
24 siblings, 0 replies; 38+ messages in thread
From: Paolo Bonzini @ 2024-05-06 8:09 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, zhao1.liu
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 2 +-
target/i386/tcg/decode-new.c.inc | 120 ++++++++++++++++++
target/i386/tcg/emit.c.inc | 202 +++++++++++++++++++++++++++++++
3 files changed, 323 insertions(+), 1 deletion(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 705e8f3ef49..8a9c265ae51 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -3145,7 +3145,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
#ifndef CONFIG_USER_ONLY
use_new &= b <= limit;
#endif
- if (use_new && 0) {
+ if (use_new && b <= 0x5f) {
disas_insn_new(s, cpu, b);
return true;
}
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 3fc6485d74c..1e792426ff5 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -121,6 +121,8 @@
#define X86_OP_GROUP2(op, op0, s0, op1, s1, ...) \
X86_OP_GROUP3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
+#define X86_OP_GROUPw(op, op0, s0, ...) \
+ X86_OP_GROUP3(op, op0, s0, None, None, None, None, ## __VA_ARGS__)
#define X86_OP_GROUP0(op, ...) \
X86_OP_GROUP3(op, None, None, None, None, None, None, ## __VA_ARGS__)
@@ -140,12 +142,23 @@
.op3 = X86_TYPE_I, .s3 = X86_SIZE_b, \
## __VA_ARGS__)
+/*
+ * Short forms that are mostly useful for ALU opcodes and other
+ * one-byte opcodes. For vector instructions it is usually
+ * clearer to write all three operands explicitly, because the
+ * corresponding gen_* function will use OP_PTRn rather than s->T0
+ * and s->T1.
+ */
+#define X86_OP_ENTRYrr(op, op0, s0, op1, s1, ...) \
+ X86_OP_ENTRY3(op, None, None, op0, s0, op1, s1, ## __VA_ARGS__)
#define X86_OP_ENTRY2(op, op0, s0, op1, s1, ...) \
X86_OP_ENTRY3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
#define X86_OP_ENTRYw(op, op0, s0, ...) \
X86_OP_ENTRY3(op, op0, s0, None, None, None, None, ## __VA_ARGS__)
#define X86_OP_ENTRYr(op, op0, s0, ...) \
X86_OP_ENTRY3(op, None, None, None, None, op0, s0, ## __VA_ARGS__)
+#define X86_OP_ENTRY1(op, op0, s0, ...) \
+ X86_OP_ENTRY3(op, op0, s0, 2op, s0, None, None, ## __VA_ARGS__)
#define X86_OP_ENTRY0(op, ...) \
X86_OP_ENTRY3(op, None, None, None, None, None, None, ## __VA_ARGS__)
@@ -1096,7 +1109,114 @@ static void decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint
}
static const X86OpEntry opcodes_root[256] = {
+ [0x00] = X86_OP_ENTRY2(ADD, E,b, G,b, lock),
+ [0x01] = X86_OP_ENTRY2(ADD, E,v, G,v, lock),
+ [0x02] = X86_OP_ENTRY2(ADD, G,b, E,b, lock),
+ [0x03] = X86_OP_ENTRY2(ADD, G,v, E,v, lock),
+ [0x04] = X86_OP_ENTRY2(ADD, 0,b, I,b, lock), /* AL, Ib */
+ [0x05] = X86_OP_ENTRY2(ADD, 0,v, I,z, lock), /* rAX, Iz */
+ [0x06] = X86_OP_ENTRYr(PUSH, ES, w, chk(i64)),
+ [0x07] = X86_OP_ENTRYw(POP, ES, w, chk(i64)),
+
+ [0x10] = X86_OP_ENTRY2(ADC, E,b, G,b, lock),
+ [0x11] = X86_OP_ENTRY2(ADC, E,v, G,v, lock),
+ [0x12] = X86_OP_ENTRY2(ADC, G,b, E,b, lock),
+ [0x13] = X86_OP_ENTRY2(ADC, G,v, E,v, lock),
+ [0x14] = X86_OP_ENTRY2(ADC, 0,b, I,b, lock), /* AL, Ib */
+ [0x15] = X86_OP_ENTRY2(ADC, 0,v, I,z, lock), /* rAX, Iz */
+ [0x16] = X86_OP_ENTRYr(PUSH, SS, w, chk(i64)),
+ [0x17] = X86_OP_ENTRYw(POP, SS, w, chk(i64)),
+
+ [0x20] = X86_OP_ENTRY2(AND, E,b, G,b, lock),
+ [0x21] = X86_OP_ENTRY2(AND, E,v, G,v, lock),
+ [0x22] = X86_OP_ENTRY2(AND, G,b, E,b, lock),
+ [0x23] = X86_OP_ENTRY2(AND, G,v, E,v, lock),
+ [0x24] = X86_OP_ENTRY2(AND, 0,b, I,b, lock), /* AL, Ib */
+ [0x25] = X86_OP_ENTRY2(AND, 0,v, I,z, lock), /* rAX, Iz */
+ [0x26] = {},
+ [0x27] = X86_OP_ENTRY0(DAA, chk(i64)),
+
+ [0x30] = X86_OP_ENTRY2(XOR, E,b, G,b, lock),
+ [0x31] = X86_OP_ENTRY2(XOR, E,v, G,v, lock),
+ [0x32] = X86_OP_ENTRY2(XOR, G,b, E,b, lock),
+ [0x33] = X86_OP_ENTRY2(XOR, G,v, E,v, lock),
+ [0x34] = X86_OP_ENTRY2(XOR, 0,b, I,b, lock), /* AL, Ib */
+ [0x35] = X86_OP_ENTRY2(XOR, 0,v, I,z, lock), /* rAX, Iz */
+ [0x36] = {},
+ [0x37] = X86_OP_ENTRY0(AAA, chk(i64)),
+
+ [0x40] = X86_OP_ENTRY1(INC, 0,v, chk(i64)),
+ [0x41] = X86_OP_ENTRY1(INC, 1,v, chk(i64)),
+ [0x42] = X86_OP_ENTRY1(INC, 2,v, chk(i64)),
+ [0x43] = X86_OP_ENTRY1(INC, 3,v, chk(i64)),
+ [0x44] = X86_OP_ENTRY1(INC, 4,v, chk(i64)),
+ [0x45] = X86_OP_ENTRY1(INC, 5,v, chk(i64)),
+ [0x46] = X86_OP_ENTRY1(INC, 6,v, chk(i64)),
+ [0x47] = X86_OP_ENTRY1(INC, 7,v, chk(i64)),
+
+ [0x50] = X86_OP_ENTRYr(PUSH, LoBits,d64),
+ [0x51] = X86_OP_ENTRYr(PUSH, LoBits,d64),
+ [0x52] = X86_OP_ENTRYr(PUSH, LoBits,d64),
+ [0x53] = X86_OP_ENTRYr(PUSH, LoBits,d64),
+ [0x54] = X86_OP_ENTRYr(PUSH, LoBits,d64),
+ [0x55] = X86_OP_ENTRYr(PUSH, LoBits,d64),
+ [0x56] = X86_OP_ENTRYr(PUSH, LoBits,d64),
+ [0x57] = X86_OP_ENTRYr(PUSH, LoBits,d64),
+
+
+ [0x08] = X86_OP_ENTRY2(OR, E,b, G,b, lock),
+ [0x09] = X86_OP_ENTRY2(OR, E,v, G,v, lock),
+ [0x0A] = X86_OP_ENTRY2(OR, G,b, E,b, lock),
+ [0x0B] = X86_OP_ENTRY2(OR, G,v, E,v, lock),
+ [0x0C] = X86_OP_ENTRY2(OR, 0,b, I,b, lock), /* AL, Ib */
+ [0x0D] = X86_OP_ENTRY2(OR, 0,v, I,z, lock), /* rAX, Iz */
+ [0x0E] = X86_OP_ENTRYr(PUSH, CS, w, chk(i64)),
[0x0F] = X86_OP_GROUP0(0F),
+
+ [0x18] = X86_OP_ENTRY2(SBB, E,b, G,b, lock),
+ [0x19] = X86_OP_ENTRY2(SBB, E,v, G,v, lock),
+ [0x1A] = X86_OP_ENTRY2(SBB, G,b, E,b, lock),
+ [0x1B] = X86_OP_ENTRY2(SBB, G,v, E,v, lock),
+ [0x1C] = X86_OP_ENTRY2(SBB, 0,b, I,b, lock), /* AL, Ib */
+ [0x1D] = X86_OP_ENTRY2(SBB, 0,v, I,z, lock), /* rAX, Iz */
+ [0x1E] = X86_OP_ENTRYr(PUSH, DS, w, chk(i64)),
+ [0x1F] = X86_OP_ENTRYw(POP, DS, w, chk(i64)),
+
+ [0x28] = X86_OP_ENTRY2(SUB, E,b, G,b, lock),
+ [0x29] = X86_OP_ENTRY2(SUB, E,v, G,v, lock),
+ [0x2A] = X86_OP_ENTRY2(SUB, G,b, E,b, lock),
+ [0x2B] = X86_OP_ENTRY2(SUB, G,v, E,v, lock),
+ [0x2C] = X86_OP_ENTRY2(SUB, 0,b, I,b, lock), /* AL, Ib */
+ [0x2D] = X86_OP_ENTRY2(SUB, 0,v, I,z, lock), /* rAX, Iz */
+ [0x2E] = {},
+ [0x2F] = X86_OP_ENTRY0(DAS, chk(i64)),
+
+ [0x38] = X86_OP_ENTRYrr(SUB, E,b, G,b),
+ [0x39] = X86_OP_ENTRYrr(SUB, E,v, G,v),
+ [0x3A] = X86_OP_ENTRYrr(SUB, G,b, E,b),
+ [0x3B] = X86_OP_ENTRYrr(SUB, G,v, E,v),
+ [0x3C] = X86_OP_ENTRYrr(SUB, 0,b, I,b), /* AL, Ib */
+ [0x3D] = X86_OP_ENTRYrr(SUB, 0,v, I,z), /* rAX, Iz */
+ [0x3E] = {},
+ [0x3F] = X86_OP_ENTRY0(AAS, chk(i64)),
+
+ [0x48] = X86_OP_ENTRY1(DEC, 0,v, chk(i64)),
+ [0x49] = X86_OP_ENTRY1(DEC, 1,v, chk(i64)),
+ [0x4A] = X86_OP_ENTRY1(DEC, 2,v, chk(i64)),
+ [0x4B] = X86_OP_ENTRY1(DEC, 3,v, chk(i64)),
+ [0x4C] = X86_OP_ENTRY1(DEC, 4,v, chk(i64)),
+ [0x4D] = X86_OP_ENTRY1(DEC, 5,v, chk(i64)),
+ [0x4E] = X86_OP_ENTRY1(DEC, 6,v, chk(i64)),
+ [0x4F] = X86_OP_ENTRY1(DEC, 7,v, chk(i64)),
+
+ [0x58] = X86_OP_ENTRYw(POP, LoBits,d64),
+ [0x59] = X86_OP_ENTRYw(POP, LoBits,d64),
+ [0x5A] = X86_OP_ENTRYw(POP, LoBits,d64),
+ [0x5B] = X86_OP_ENTRYw(POP, LoBits,d64),
+ [0x5C] = X86_OP_ENTRYw(POP, LoBits,d64),
+ [0x5D] = X86_OP_ENTRYw(POP, LoBits,d64),
+ [0x5E] = X86_OP_ENTRYw(POP, LoBits,d64),
+ [0x5F] = X86_OP_ENTRYw(POP, LoBits,d64),
};
#undef mmx
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 0e00f6635dd..a64186b8957 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -352,6 +352,20 @@ static void prepare_update2_cc(X86DecodedInsn *decode, DisasContext *s, CCOp op)
decode->cc_op = op;
}
+static void prepare_update_cc_incdec(X86DecodedInsn *decode, DisasContext *s, CCOp op)
+{
+ gen_compute_eflags_c(s, s->T1);
+ prepare_update2_cc(decode, s, op);
+}
+
+static void prepare_update3_cc(X86DecodedInsn *decode, DisasContext *s, CCOp op, TCGv reg)
+{
+ decode->cc_src2 = reg;
+ decode->cc_src = s->T1;
+ decode->cc_dst = s->T0;
+ decode->cc_op = op;
+}
+
static void gen_store_sse(DisasContext *s, X86DecodedInsn *decode, int src_ofs)
{
MemOp ot = decode->op[0].ot;
@@ -1040,6 +1054,37 @@ static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod
VSIB_AVX(VPGATHERD, vpgatherd)
VSIB_AVX(VPGATHERQ, vpgatherq)
+static void gen_AAA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_update_cc_op(s);
+ gen_helper_aaa(tcg_env);
+ set_cc_op(s, CC_OP_EFLAGS);
+}
+
+static void gen_AAS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_update_cc_op(s);
+ gen_helper_aas(tcg_env);
+ set_cc_op(s, CC_OP_EFLAGS);
+}
+
+static void gen_ADC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[1].ot;
+ TCGv c_in = tcg_temp_new();
+
+ gen_compute_eflags_c(s, c_in);
+ if (s->prefix & PREFIX_LOCK) {
+ tcg_gen_add_tl(s->T0, c_in, s->T1);
+ tcg_gen_atomic_add_fetch_tl(s->T0, s->A0, s->T0,
+ s->mem_index, ot | MO_LE);
+ } else {
+ tcg_gen_add_tl(s->T0, s->T0, s->T1);
+ tcg_gen_add_tl(s->T0, s->T0, c_in);
+ }
+ prepare_update3_cc(decode, s, CC_OP_ADCB + ot, c_in);
+}
+
/* ADCX/ADOX do not have memory operands and can use set_cc_op. */
static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op)
{
@@ -1093,11 +1138,37 @@ static void gen_ADCX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
gen_ADCOX(s, env, decode->op[0].ot, CC_OP_ADCX);
}
+static void gen_ADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[1].ot;
+
+ if (s->prefix & PREFIX_LOCK) {
+ tcg_gen_atomic_add_fetch_tl(s->T0, s->A0, s->T1,
+ s->mem_index, ot | MO_LE);
+ } else {
+ tcg_gen_add_tl(s->T0, s->T0, s->T1);
+ }
+ prepare_update2_cc(decode, s, CC_OP_ADDB + ot);
+}
+
static void gen_ADOX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
gen_ADCOX(s, env, decode->op[0].ot, CC_OP_ADOX);
}
+static void gen_AND(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[1].ot;
+
+ if (s->prefix & PREFIX_LOCK) {
+ tcg_gen_atomic_and_fetch_tl(s->T0, s->A0, s->T1,
+ s->mem_index, ot | MO_LE);
+ } else {
+ tcg_gen_and_tl(s->T0, s->T0, s->T1);
+ }
+ prepare_update1_cc(decode, s, CC_OP_LOGICB + ot);
+}
+
static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
@@ -1331,6 +1402,34 @@ static void gen_CVTTPx2PI(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
}
}
+static void gen_DAA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_update_cc_op(s);
+ gen_helper_daa(tcg_env);
+ set_cc_op(s, CC_OP_EFLAGS);
+}
+
+static void gen_DAS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_update_cc_op(s);
+ gen_helper_das(tcg_env);
+ set_cc_op(s, CC_OP_EFLAGS);
+}
+
+static void gen_DEC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[1].ot;
+
+ tcg_gen_movi_tl(s->T1, -1);
+ if (s->prefix & PREFIX_LOCK) {
+ tcg_gen_atomic_add_fetch_tl(s->T0, s->A0, s->T1,
+ s->mem_index, ot | MO_LE);
+ } else {
+ tcg_gen_add_tl(s->T0, s->T0, s->T1);
+ }
+ prepare_update_cc_incdec(decode, s, CC_OP_DECB + ot);
+}
+
static void gen_EMMS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
gen_helper_emms(tcg_env);
@@ -1349,6 +1448,20 @@ static void gen_EXTRQ_r(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod
gen_helper_extrq_r(tcg_env, OP_PTR0, OP_PTR2);
}
+static void gen_INC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[1].ot;
+
+ tcg_gen_movi_tl(s->T1, 1);
+ if (s->prefix & PREFIX_LOCK) {
+ tcg_gen_atomic_add_fetch_tl(s->T0, s->A0, s->T1,
+ s->mem_index, ot | MO_LE);
+ } else {
+ tcg_gen_add_tl(s->T0, s->T0, s->T1);
+ }
+ prepare_update_cc_incdec(decode, s, CC_OP_INCB + ot);
+}
+
static void gen_INSERTQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
TCGv_i32 length = tcg_constant_i32(decode->immediate & 63);
@@ -1501,6 +1614,19 @@ static void gen_MULX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
}
+static void gen_OR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[1].ot;
+
+ if (s->prefix & PREFIX_LOCK) {
+ tcg_gen_atomic_or_fetch_tl(s->T0, s->A0, s->T1,
+ s->mem_index, ot | MO_LE);
+ } else {
+ tcg_gen_or_tl(s->T0, s->T0, s->T1);
+ }
+ prepare_update1_cc(decode, s, CC_OP_LOGICB + ot);
+}
+
static void gen_PALIGNR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
@@ -1744,6 +1870,18 @@ static void gen_PMOVMSKB(DisasContext *s, CPUX86State *env, X86DecodedInsn *deco
}
}
+static void gen_POP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = gen_pop_T0(s);
+ if (decode->op[0].has_ea) {
+ /* NOTE: order is important for MMU exceptions */
+ gen_op_st_v(s, ot, s->T0, s->A0);
+ decode->op[0].unit = X86_OP_SKIP;
+ }
+ /* NOTE: writing back registers after update is important for pop %sp */
+ gen_pop_update(s, ot);
+}
+
static void gen_PSHUFW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
@@ -1890,6 +2028,11 @@ static void gen_PSLLDQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *deco
}
}
+static void gen_PUSH(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_push_v(s, s->T1);
+}
+
static void gen_RORX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
@@ -1924,6 +2067,28 @@ static void gen_SARX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
tcg_gen_sar_tl(s->T0, s->T0, s->T1);
}
+static void gen_SBB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+ TCGv c_in = tcg_temp_new();
+
+ gen_compute_eflags_c(s, c_in);
+ if (s->prefix & PREFIX_LOCK) {
+ tcg_gen_add_tl(s->T0, s->T1, c_in);
+ tcg_gen_neg_tl(s->T0, s->T0);
+ tcg_gen_atomic_add_fetch_tl(s->T0, s->A0, s->T0,
+ s->mem_index, ot | MO_LE);
+ } else {
+ /*
+ * TODO: SBB reg, reg could use gen_prepare_eflags_c followed by
+ * negsetcond, and CC_OP_SUBB as the cc_op.
+ */
+ tcg_gen_sub_tl(s->T0, s->T0, s->T1);
+ tcg_gen_sub_tl(s->T0, s->T0, c_in);
+ }
+ prepare_update3_cc(decode, s, CC_OP_SBBB + ot, c_in);
+}
+
static void gen_SHA1NEXTE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
gen_helper_sha1nexte(OP_PTR0, OP_PTR1, OP_PTR2);
@@ -2011,6 +2176,22 @@ static void gen_STMXCSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod
tcg_gen_ld32u_tl(s->T0, tcg_env, offsetof(CPUX86State, mxcsr));
}
+static void gen_SUB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[1].ot;
+
+ if (s->prefix & PREFIX_LOCK) {
+ tcg_gen_neg_tl(s->T0, s->T1);
+ tcg_gen_atomic_fetch_add_tl(s->cc_srcT, s->A0, s->T0,
+ s->mem_index, ot | MO_LE);
+ tcg_gen_sub_tl(s->T0, s->cc_srcT, s->T1);
+ } else {
+ tcg_gen_mov_tl(s->cc_srcT, s->T0);
+ tcg_gen_sub_tl(s->T0, s->T0, s->T1);
+ }
+ prepare_update2_cc(decode, s, CC_OP_SUBB + ot);
+}
+
static void gen_VAESIMC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
assert(!s->vex_l);
@@ -2490,3 +2671,24 @@ static void gen_VZEROUPPER(DisasContext *s, CPUX86State *env, X86DecodedInsn *de
tcg_gen_gvec_dup_imm(MO_64, offset, 16, 16, 0);
}
}
+
+static void gen_XOR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ /* special case XOR reg, reg */
+ if (decode->op[1].unit == X86_OP_INT &&
+ decode->op[2].unit == X86_OP_INT &&
+ decode->op[1].n == decode->op[2].n) {
+ tcg_gen_movi_tl(s->T0, 0);
+ decode->cc_op = CC_OP_CLR;
+ } else {
+ MemOp ot = decode->op[1].ot;
+
+ if (s->prefix & PREFIX_LOCK) {
+ tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T1,
+ s->mem_index, ot | MO_LE);
+ } else {
+ tcg_gen_xor_tl(s->T0, s->T0, s->T1);
+ }
+ prepare_update1_cc(decode, s, CC_OP_LOGICB + ot);
+ }
+}
--
2.45.0
^ permalink raw reply related [flat|nested] 38+ messages in thread* [PATCH v2 13/25] target/i386: extract gen_far_call/jmp, reordering temporaries
2024-05-06 8:09 [PATCH v2 00/25] target/i386: convert 1-byte opcodes to new decoder Paolo Bonzini
` (11 preceding siblings ...)
2024-05-06 8:09 ` [PATCH v2 12/25] target/i386: move 00-5F opcodes to new decoder Paolo Bonzini
@ 2024-05-06 8:09 ` Paolo Bonzini
2024-05-06 8:09 ` [PATCH v2 14/25] target/i386: allow instructions with more than one immediate Paolo Bonzini
` (11 subsequent siblings)
24 siblings, 0 replies; 38+ messages in thread
From: Paolo Bonzini @ 2024-05-06 8:09 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, zhao1.liu
Extract the code into new functions, and swap T0/T1 so that T0 corresponds
to the first immediate in the instruction stream.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 93 +++++++++++++++++++++----------------
1 file changed, 53 insertions(+), 40 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 8a9c265ae51..4069bd4f125 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -2511,12 +2511,13 @@ static inline void gen_op_movl_T0_seg(DisasContext *s, X86Seg seg_reg)
offsetof(CPUX86State,segs[seg_reg].selector));
}
-static inline void gen_op_movl_seg_T0_vm(DisasContext *s, X86Seg seg_reg)
+static void gen_op_movl_seg_real(DisasContext *s, X86Seg seg_reg, TCGv seg)
{
- tcg_gen_ext16u_tl(s->T0, s->T0);
- tcg_gen_st32_tl(s->T0, tcg_env,
+ TCGv selector = tcg_temp_new();
+ tcg_gen_ext16u_tl(selector, seg);
+ tcg_gen_st32_tl(selector, tcg_env,
offsetof(CPUX86State,segs[seg_reg].selector));
- tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4);
+ tcg_gen_shli_tl(cpu_seg_base[seg_reg], selector, 4);
}
/* move T0 to seg_reg and compute if the CPU state may change. Never
@@ -2536,13 +2537,45 @@ static void gen_movl_seg_T0(DisasContext *s, X86Seg seg_reg)
s->base.is_jmp = DISAS_EOB_NEXT;
}
} else {
- gen_op_movl_seg_T0_vm(s, seg_reg);
+ gen_op_movl_seg_real(s, seg_reg, s->T0);
if (seg_reg == R_SS) {
s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ;
}
}
}
+static void gen_far_call(DisasContext *s)
+{
+ TCGv_i32 new_cs = tcg_temp_new_i32();
+ tcg_gen_trunc_tl_i32(new_cs, s->T1);
+ if (PE(s) && !VM86(s)) {
+ gen_helper_lcall_protected(tcg_env, new_cs, s->T0,
+ tcg_constant_i32(s->dflag - 1),
+ eip_next_tl(s));
+ } else {
+ TCGv_i32 new_eip = tcg_temp_new_i32();
+ tcg_gen_trunc_tl_i32(new_eip, s->T0);
+ gen_helper_lcall_real(tcg_env, new_cs, new_eip,
+ tcg_constant_i32(s->dflag - 1),
+ eip_next_i32(s));
+ }
+ s->base.is_jmp = DISAS_JUMP;
+}
+
+static void gen_far_jmp(DisasContext *s)
+{
+ if (PE(s) && !VM86(s)) {
+ TCGv_i32 new_cs = tcg_temp_new_i32();
+ tcg_gen_trunc_tl_i32(new_cs, s->T1);
+ gen_helper_ljmp_protected(tcg_env, new_cs, s->T0,
+ eip_next_tl(s));
+ } else {
+ gen_op_movl_seg_real(s, R_CS, s->T1);
+ gen_op_jmp_v(s, s->T0);
+ }
+ s->base.is_jmp = DISAS_JUMP;
+}
+
static void gen_svm_check_intercept(DisasContext *s, uint32_t type)
{
/* no SVM activated; fast case */
@@ -3653,23 +3686,10 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
if (mod == 3) {
goto illegal_op;
}
- gen_op_ld_v(s, ot, s->T1, s->A0);
+ gen_op_ld_v(s, ot, s->T0, s->A0);
gen_add_A0_im(s, 1 << ot);
- gen_op_ld_v(s, MO_16, s->T0, s->A0);
- do_lcall:
- if (PE(s) && !VM86(s)) {
- tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
- gen_helper_lcall_protected(tcg_env, s->tmp2_i32, s->T1,
- tcg_constant_i32(dflag - 1),
- eip_next_tl(s));
- } else {
- tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
- tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
- gen_helper_lcall_real(tcg_env, s->tmp2_i32, s->tmp3_i32,
- tcg_constant_i32(dflag - 1),
- eip_next_i32(s));
- }
- s->base.is_jmp = DISAS_JUMP;
+ gen_op_ld_v(s, MO_16, s->T1, s->A0);
+ gen_far_call(s);
break;
case 4: /* jmp Ev */
if (dflag == MO_16) {
@@ -3683,19 +3703,10 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
if (mod == 3) {
goto illegal_op;
}
- gen_op_ld_v(s, ot, s->T1, s->A0);
+ gen_op_ld_v(s, ot, s->T0, s->A0);
gen_add_A0_im(s, 1 << ot);
- gen_op_ld_v(s, MO_16, s->T0, s->A0);
- do_ljmp:
- if (PE(s) && !VM86(s)) {
- tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
- gen_helper_ljmp_protected(tcg_env, s->tmp2_i32, s->T1,
- eip_next_tl(s));
- } else {
- gen_op_movl_seg_T0_vm(s, R_CS);
- gen_op_jmp_v(s, s->T1);
- }
- s->base.is_jmp = DISAS_JUMP;
+ gen_op_ld_v(s, MO_16, s->T1, s->A0);
+ gen_far_jmp(s);
break;
case 6: /* push Ev */
gen_push_v(s, s->T0);
@@ -5135,7 +5146,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
/* pop selector */
gen_add_A0_im(s, 1 << dflag);
gen_op_ld_v(s, dflag, s->T0, s->A0);
- gen_op_movl_seg_T0_vm(s, R_CS);
+ gen_op_movl_seg_real(s, R_CS, s->T0);
/* add stack offset */
gen_stack_update(s, val + (2 << dflag));
}
@@ -5180,10 +5191,11 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
offset = insn_get(env, s, ot);
selector = insn_get(env, s, MO_16);
- tcg_gen_movi_tl(s->T0, selector);
- tcg_gen_movi_tl(s->T1, offset);
+ tcg_gen_movi_tl(s->T0, offset);
+ tcg_gen_movi_tl(s->T1, selector);
}
- goto do_lcall;
+ gen_far_call(s);
+ break;
case 0xe9: /* jmp im */
{
int diff = (dflag != MO_16
@@ -5204,10 +5216,11 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
offset = insn_get(env, s, ot);
selector = insn_get(env, s, MO_16);
- tcg_gen_movi_tl(s->T0, selector);
- tcg_gen_movi_tl(s->T1, offset);
+ tcg_gen_movi_tl(s->T0, offset);
+ tcg_gen_movi_tl(s->T1, selector);
}
- goto do_ljmp;
+ gen_far_jmp(s);
+ break;
case 0xeb: /* jmp Jb */
{
int diff = (int8_t)insn_get(env, s, MO_8);
--
2.45.0
^ permalink raw reply related [flat|nested] 38+ messages in thread* [PATCH v2 14/25] target/i386: allow instructions with more than one immediate
2024-05-06 8:09 [PATCH v2 00/25] target/i386: convert 1-byte opcodes to new decoder Paolo Bonzini
` (12 preceding siblings ...)
2024-05-06 8:09 ` [PATCH v2 13/25] target/i386: extract gen_far_call/jmp, reordering temporaries Paolo Bonzini
@ 2024-05-06 8:09 ` Paolo Bonzini
2024-05-06 8:09 ` [PATCH v2 15/25] target/i386: move 60-BF opcodes to new decoder Paolo Bonzini
` (10 subsequent siblings)
24 siblings, 0 replies; 38+ messages in thread
From: Paolo Bonzini @ 2024-05-06 8:09 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, zhao1.liu
While keeping decode->immediate for convenience and for 4-operand instructions,
store the immediate in X86DecodedOp as well. This enables instructions
with more than one immediate such as ENTER. It can also be used for far
calls and jumps.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/decode-new.h | 17 ++++++++++++-----
target/i386/tcg/decode-new.c.inc | 2 +-
target/i386/tcg/emit.c.inc | 4 +++-
3 files changed, 16 insertions(+), 7 deletions(-)
diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h
index 15e6bfef4b1..8ffde8d1cd6 100644
--- a/target/i386/tcg/decode-new.h
+++ b/target/i386/tcg/decode-new.h
@@ -271,16 +271,23 @@ typedef struct X86DecodedOp {
bool has_ea;
int offset; /* For MMX and SSE */
- /*
- * This field is used internally by macros OP0_PTR/OP1_PTR/OP2_PTR,
- * do not access directly!
- */
- TCGv_ptr v_ptr;
+ union {
+ target_ulong imm;
+ /*
+ * This field is used internally by macros OP0_PTR/OP1_PTR/OP2_PTR,
+ * do not access directly!
+ */
+ TCGv_ptr v_ptr;
+ };
} X86DecodedOp;
struct X86DecodedInsn {
X86OpEntry e;
X86DecodedOp op[3];
+ /*
+ * Rightmost immediate, for convenience since most instructions have
+ * one (and also for 4-operand instructions).
+ */
target_ulong immediate;
AddressParts mem;
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 1e792426ff5..c6fd7a053bd 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -1473,7 +1473,7 @@ static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
case X86_TYPE_I: /* Immediate */
case X86_TYPE_J: /* Relative offset for a jump */
op->unit = X86_OP_IMM;
- decode->immediate = insn_get_signed(env, s, op->ot);
+ decode->immediate = op->imm = insn_get_signed(env, s, op->ot);
break;
case X86_TYPE_L: /* The upper 4 bits of the immediate select a 128-bit register */
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index a64186b8957..fc065caae79 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -259,7 +259,7 @@ static void gen_load(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv v)
}
break;
case X86_OP_IMM:
- tcg_gen_movi_tl(v, decode->immediate);
+ tcg_gen_movi_tl(v, op->imm);
break;
case X86_OP_MMX:
@@ -283,6 +283,8 @@ static void gen_load(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv v)
static TCGv_ptr op_ptr(X86DecodedInsn *decode, int opn)
{
X86DecodedOp *op = &decode->op[opn];
+
+ assert(op->unit == X86_OP_MMX || op->unit == X86_OP_SSE);
if (op->v_ptr) {
return op->v_ptr;
}
--
2.45.0
^ permalink raw reply related [flat|nested] 38+ messages in thread* [PATCH v2 15/25] target/i386: move 60-BF opcodes to new decoder
2024-05-06 8:09 [PATCH v2 00/25] target/i386: convert 1-byte opcodes to new decoder Paolo Bonzini
` (13 preceding siblings ...)
2024-05-06 8:09 ` [PATCH v2 14/25] target/i386: allow instructions with more than one immediate Paolo Bonzini
@ 2024-05-06 8:09 ` Paolo Bonzini
2024-05-06 16:44 ` Richard Henderson
2024-05-06 8:09 ` [PATCH v2 16/25] target/i386: generalize gen_movl_seg_T0 Paolo Bonzini
` (9 subsequent siblings)
24 siblings, 1 reply; 38+ messages in thread
From: Paolo Bonzini @ 2024-05-06 8:09 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, zhao1.liu
Compared to the old decoder, the main differences in translation
are for the little-used ARPL instruction. IMUL is adjusted a bit
to share more code to produce flags, but is otherwise very similar.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/decode-new.h | 3 +
target/i386/tcg/translate.c | 9 +-
target/i386/tcg/decode-new.c.inc | 185 ++++++++++++++++++
target/i386/tcg/emit.c.inc | 323 +++++++++++++++++++++++++++++++
4 files changed, 518 insertions(+), 2 deletions(-)
diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h
index 8ffde8d1cd6..790ad5e1d00 100644
--- a/target/i386/tcg/decode-new.h
+++ b/target/i386/tcg/decode-new.h
@@ -48,6 +48,7 @@ typedef enum X86OpType {
/* Custom */
X86_TYPE_WM, /* modrm byte selects an XMM/YMM memory operand */
+ X86_TYPE_I_unsigned, /* Immediate, zero-extended */
X86_TYPE_2op, /* 2-operand RMW instruction */
X86_TYPE_LoBits, /* encoded in bits 0-2 of the operand + REX.B */
X86_TYPE_0, /* Hard-coded GPRs (RAX..RDI) */
@@ -165,6 +166,8 @@ typedef enum X86InsnSpecial {
/* Always locked if it has a memory operand (XCHG) */
X86_SPECIAL_Locked,
+ /* Do not apply segment base to effective address */
+ X86_SPECIAL_NoSeg,
/*
* Rd/Mb or Rd/Mw in the manual: register operand 0 is treated as 32 bits
* (and writeback zero-extends it to 64 bits if applicable). PREFIX_DATA
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 4069bd4f125..8f633814586 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -1288,7 +1288,11 @@ static void gen_cmps(DisasContext *s, MemOp ot)
gen_string_movl_A0_EDI(s);
gen_op_ld_v(s, ot, s->T1, s->A0);
gen_string_movl_A0_ESI(s);
- gen_op(s, OP_CMPL, ot, OR_TMP0);
+ gen_op_ld_v(s, ot, s->T0, s->A0);
+ tcg_gen_mov_tl(cpu_cc_src, s->T1);
+ tcg_gen_mov_tl(s->cc_srcT, s->T0);
+ tcg_gen_sub_tl(cpu_cc_dst, s->T0, s->T1);
+ set_cc_op(s, CC_OP_SUBB + ot);
dshift = gen_compute_Dshift(s, ot);
gen_op_add_reg(s, s->aflag, R_ESI, dshift);
@@ -3121,6 +3125,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
s->pc = s->base.pc_next;
s->override = -1;
+ s->popl_esp_hack = 0;
#ifdef TARGET_X86_64
s->rex_r = 0;
s->rex_x = 0;
@@ -3178,7 +3183,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
#ifndef CONFIG_USER_ONLY
use_new &= b <= limit;
#endif
- if (use_new && b <= 0x5f) {
+ if (use_new && b <= 0xbf) {
disas_insn_new(s, cpu, b);
return true;
}
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index c6fd7a053bd..55fc0173a41 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -33,6 +33,22 @@
* ("cannot encode 16-bit or 32-bit size in 64-bit mode") as modifiers of the
* "v" or "z" sizes. The decoder simply makes them separate operand sizes.
*
+ * The manual lists immediate far destinations as Ap (technically an implicit
+ * argument). The decoder splits them into two immediates, using "Ip" for
+ * the offset part (that comes first in the instruction stream) and "Iw" for
+ * the segment/selector part. The size of the offset is given by s->dflag
+ * and the instructions are illegal in 64-bit mode, so the choice of "Ip"
+ * is somewhat arbitrary; "Iv" or "Iz" would work just as well.
+ *
+ * Operand types
+ * -------------
+ *
+ * Immediates are almost always signed or masked away in helpers. Two
+ * common exceptions are IN/OUT and absolute jumps. For these, there is
+ * an additional custom operand type "I_unsigned". Alternatively, the
+ * mask could be applied (and the original sign-extended value would be
+ * optimized away by TCG) in the emitter function.
+ *
* Vector operands
* ---------------
*
@@ -151,6 +167,8 @@
*/
#define X86_OP_ENTRYrr(op, op0, s0, op1, s1, ...) \
X86_OP_ENTRY3(op, None, None, op0, s0, op1, s1, ## __VA_ARGS__)
+#define X86_OP_ENTRYwr(op, op0, s0, op1, s1, ...) \
+ X86_OP_ENTRY3(op, op0, s0, None, None, op1, s1, ## __VA_ARGS__)
#define X86_OP_ENTRY2(op, op0, s0, op1, s1, ...) \
X86_OP_ENTRY3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
#define X86_OP_ENTRYw(op, op0, s0, ...) \
@@ -163,6 +181,7 @@
X86_OP_ENTRY3(op, None, None, None, None, None, None, ## __VA_ARGS__)
#define cpuid(feat) .cpuid = X86_FEAT_##feat,
+#define noseg .special = X86_SPECIAL_NoSeg,
#define xchg .special = X86_SPECIAL_Locked,
#define lock .special = X86_SPECIAL_HasLock,
#define mmx .special = X86_SPECIAL_MMX,
@@ -209,6 +228,8 @@
#define p_66_f3_f2 .valid_prefix = P_66 | P_F3 | P_F2,
#define p_00_66_f3_f2 .valid_prefix = P_00 | P_66 | P_F3 | P_F2,
+#define UNKNOWN_OPCODE ((X86OpEntry) {})
+
static uint8_t get_modrm(DisasContext *s, CPUX86State *env)
{
if (!s->has_modrm) {
@@ -1108,6 +1129,51 @@ static void decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint
do_decode_0F(s, env, entry, b);
}
+static void decode_63(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+ static const X86OpEntry arpl = X86_OP_ENTRY2(ARPL, E,w, G,w, chk(prot));
+ static const X86OpEntry mov = X86_OP_ENTRY3(MOV, G,v, E,v, None, None);
+ static const X86OpEntry movsxd = X86_OP_ENTRY3(MOV, G,v, E,d, None, None, sextT0);
+ if (!CODE64(s)) {
+ *entry = arpl;
+ } else if (REX_W(s)) {
+ *entry = movsxd;
+ } else {
+ *entry = mov;
+ }
+}
+
+static void decode_group1(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+ static const X86GenFunc group1_gen[8] = {
+ gen_ADD, gen_OR, gen_ADC, gen_SBB, gen_AND, gen_SUB, gen_XOR, gen_SUB,
+ };
+ int op = (get_modrm(s, env) >> 3) & 7;
+ entry->gen = group1_gen[op];
+
+ if (op == 7) {
+ /* prevent writeback for CMP */
+ entry->op1 = entry->op0;
+ entry->op0 = X86_TYPE_None;
+ entry->s0 = X86_SIZE_None;
+ } else {
+ entry->special = X86_SPECIAL_HasLock;
+ }
+}
+
+static void decode_group1A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+ int op = (get_modrm(s, env) >> 3) & 7;
+ if (op != 0) {
+ /* could be XOP prefix too */
+ *entry = UNKNOWN_OPCODE;
+ } else {
+ entry->gen = gen_POP;
+ /* The address must use the value of ESP after the pop. */
+ s->popl_esp_hack = 1 << mo_pushpop(s, s->dflag);
+ }
+}
+
static const X86OpEntry opcodes_root[256] = {
[0x00] = X86_OP_ENTRY2(ADD, E,b, G,b, lock),
[0x01] = X86_OP_ENTRY2(ADD, E,v, G,v, lock),
@@ -1163,6 +1229,60 @@ static const X86OpEntry opcodes_root[256] = {
[0x56] = X86_OP_ENTRYr(PUSH, LoBits,d64),
[0x57] = X86_OP_ENTRYr(PUSH, LoBits,d64),
+ [0x60] = X86_OP_ENTRY0(PUSHA, chk(i64)),
+ [0x61] = X86_OP_ENTRY0(POPA, chk(i64)),
+ [0x62] = X86_OP_ENTRYrr(BOUND, G,v, M,a, chk(i64)),
+ [0x63] = X86_OP_GROUP0(63),
+ [0x64] = {},
+ [0x65] = {},
+ [0x66] = {},
+ [0x67] = {},
+
+ [0x70] = X86_OP_ENTRYr(Jcc, J,b),
+ [0x71] = X86_OP_ENTRYr(Jcc, J,b),
+ [0x72] = X86_OP_ENTRYr(Jcc, J,b),
+ [0x73] = X86_OP_ENTRYr(Jcc, J,b),
+ [0x74] = X86_OP_ENTRYr(Jcc, J,b),
+ [0x75] = X86_OP_ENTRYr(Jcc, J,b),
+ [0x76] = X86_OP_ENTRYr(Jcc, J,b),
+ [0x77] = X86_OP_ENTRYr(Jcc, J,b),
+
+ [0x80] = X86_OP_GROUP2(group1, E,b, I,b),
+ [0x81] = X86_OP_GROUP2(group1, E,v, I,z),
+ [0x82] = X86_OP_GROUP2(group1, E,b, I,b, chk(i64)),
+ [0x83] = X86_OP_GROUP2(group1, E,v, I,b),
+ [0x84] = X86_OP_ENTRYrr(AND, E,b, G,b),
+ [0x85] = X86_OP_ENTRYrr(AND, E,v, G,v),
+ [0x86] = X86_OP_ENTRY2(XCHG, E,b, G,b, xchg),
+ [0x87] = X86_OP_ENTRY2(XCHG, E,v, G,v, xchg),
+
+ [0x90] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
+ [0x91] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
+ [0x92] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
+ [0x93] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
+ [0x94] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
+ [0x95] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
+ [0x96] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
+ [0x97] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
+
+ [0xA0] = X86_OP_ENTRY3(MOV, 0,b, O,b, None, None), /* AL, Ob */
+ [0xA1] = X86_OP_ENTRY3(MOV, 0,v, O,v, None, None), /* rAX, Ov */
+ [0xA2] = X86_OP_ENTRY3(MOV, O,b, 0,b, None, None), /* Ob, AL */
+ [0xA3] = X86_OP_ENTRY3(MOV, O,v, 0,v, None, None), /* Ov, rAX */
+ [0xA4] = X86_OP_ENTRYrr(MOVS, Y,b, X,b),
+ [0xA5] = X86_OP_ENTRYrr(MOVS, Y,v, X,v),
+ [0xA6] = X86_OP_ENTRYrr(CMPS, Y,b, X,b),
+ [0xA7] = X86_OP_ENTRYrr(CMPS, Y,v, X,v),
+
+ [0xB0] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
+ [0xB1] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
+ [0xB2] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
+ [0xB3] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
+ [0xB4] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
+ [0xB5] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
+ [0xB6] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
+ [0xB7] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
+
[0x08] = X86_OP_ENTRY2(OR, E,b, G,b, lock),
[0x09] = X86_OP_ENTRY2(OR, E,v, G,v, lock),
@@ -1217,6 +1337,61 @@ static const X86OpEntry opcodes_root[256] = {
[0x5D] = X86_OP_ENTRYw(POP, LoBits,d64),
[0x5E] = X86_OP_ENTRYw(POP, LoBits,d64),
[0x5F] = X86_OP_ENTRYw(POP, LoBits,d64),
+
+ [0x68] = X86_OP_ENTRYr(PUSH, I,z),
+ [0x69] = X86_OP_ENTRY3(IMUL3, G,v, E,v, I,z, sextT0),
+ [0x6A] = X86_OP_ENTRYr(PUSH, I,b),
+ [0x6B] = X86_OP_ENTRY3(IMUL3, G,v, E,v, I,b, sextT0),
+ [0x6C] = X86_OP_ENTRYrr(INS, Y,b, 2,w), /* DX */
+ [0x6D] = X86_OP_ENTRYrr(INS, Y,z, 2,w), /* DX */
+ [0x6E] = X86_OP_ENTRYrr(OUTS, X,b, 2,w), /* DX */
+ [0x6F] = X86_OP_ENTRYrr(OUTS, X,z, 2,w), /* DX */
+
+ [0x78] = X86_OP_ENTRYr(Jcc, J,b),
+ [0x79] = X86_OP_ENTRYr(Jcc, J,b),
+ [0x7A] = X86_OP_ENTRYr(Jcc, J,b),
+ [0x7B] = X86_OP_ENTRYr(Jcc, J,b),
+ [0x7C] = X86_OP_ENTRYr(Jcc, J,b),
+ [0x7D] = X86_OP_ENTRYr(Jcc, J,b),
+ [0x7E] = X86_OP_ENTRYr(Jcc, J,b),
+ [0x7F] = X86_OP_ENTRYr(Jcc, J,b),
+
+ [0x88] = X86_OP_ENTRY3(MOV, E,b, G,b, None, None),
+ [0x89] = X86_OP_ENTRY3(MOV, E,v, G,v, None, None),
+ [0x8A] = X86_OP_ENTRY3(MOV, G,b, E,b, None, None),
+ [0x8B] = X86_OP_ENTRY3(MOV, G,v, E,v, None, None),
+ [0x8C] = X86_OP_ENTRY3(MOV, E,v, S,w, None, None),
+ [0x8D] = X86_OP_ENTRY3(LEA, G,v, M,v, None, None, noseg),
+ [0x8E] = X86_OP_ENTRY3(MOV, S,w, E,v, None, None),
+ [0x8F] = X86_OP_GROUPw(group1A, E,v),
+
+ [0x98] = X86_OP_ENTRY1(CBW, 0,v), /* rAX */
+ [0x99] = X86_OP_ENTRY3(CWD, 2,v, 0,v, None, None), /* rDX, rAX */
+ [0x9A] = X86_OP_ENTRYrr(CALLF, I_unsigned,p, I_unsigned,w, chk(i64)),
+ [0x9B] = X86_OP_ENTRY0(WAIT),
+ [0x9C] = X86_OP_ENTRY0(PUSHF, chk(vm86_iopl) svm(PUSHF)),
+ [0x9D] = X86_OP_ENTRY0(POPF, chk(vm86_iopl) svm(POPF)),
+ [0x9E] = X86_OP_ENTRY0(SAHF),
+ [0x9F] = X86_OP_ENTRY0(LAHF),
+
+ [0xA8] = X86_OP_ENTRYrr(AND, 0,b, I,b), /* AL, Ib */
+ [0xA9] = X86_OP_ENTRYrr(AND, 0,v, I,z), /* rAX, Iz */
+ [0xAA] = X86_OP_ENTRY3(STOS, Y,b, 0,b, None, None),
+ [0xAB] = X86_OP_ENTRY3(STOS, Y,v, 0,v, None, None),
+ /* Manual writeback because REP LODS (!) has to write EAX/RAX after every LODS. */
+ [0xAC] = X86_OP_ENTRYr(LODS, X,b),
+ [0xAD] = X86_OP_ENTRYr(LODS, X,v),
+ [0xAE] = X86_OP_ENTRYrr(SCAS, 0,b, Y,b),
+ [0xAF] = X86_OP_ENTRYrr(SCAS, 0,v, Y,v),
+
+ [0xB8] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
+ [0xB9] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
+ [0xBA] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
+ [0xBB] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
+ [0xBC] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
+ [0xBD] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
+ [0xBE] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
+ [0xBF] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
};
#undef mmx
@@ -1476,6 +1651,11 @@ static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
decode->immediate = op->imm = insn_get_signed(env, s, op->ot);
break;
+ case X86_TYPE_I_unsigned: /* Immediate */
+ op->unit = X86_OP_IMM;
+ decode->immediate = op->imm = insn_get(env, s, op->ot);
+ break;
+
case X86_TYPE_L: /* The upper 4 bits of the immediate select a 128-bit register */
op->n = insn_get(env, s, op->ot) >> 4;
break;
@@ -2037,6 +2217,11 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b)
assert(decode.op[1].unit == X86_OP_INT);
break;
+ case X86_SPECIAL_NoSeg:
+ decode.mem.def_seg = -1;
+ s->override = -1;
+ break;
+
default:
break;
}
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index fc065caae79..c59793f170a 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1179,6 +1179,27 @@ static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
prepare_update1_cc(decode, s, CC_OP_LOGICB + ot);
}
+static void gen_ARPL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ TCGv zf = tcg_temp_new();
+ TCGv flags = tcg_temp_new();
+
+ gen_mov_eflags(s, flags);
+
+ /* Compute adjusted DST in T1, merging in SRC[RPL]. */
+ tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 0, 2);
+
+ /* Z flag set if DST[RPL] < SRC[RPL] */
+ tcg_gen_setcond_tl(TCG_COND_LTU, zf, s->T0, s->T1);
+ tcg_gen_deposit_tl(flags, flags, zf, ctz32(CC_Z), 1);
+
+ /* Place maximum RPL in DST */
+ tcg_gen_umax_tl(s->T0, s->T0, s->T1);
+
+ decode->cc_src = flags;
+ decode->cc_op = CC_OP_EFLAGS;
+}
+
static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
@@ -1243,6 +1264,17 @@ static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
set_cc_op(s, CC_OP_BMILGB + ot);
}
+static void gen_BOUND(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ TCGv_i32 op = tcg_temp_new_i32();
+ tcg_gen_trunc_tl_i32(op, s->T0);
+ if (decode->op[1].ot == MO_16) {
+ gen_helper_boundw(tcg_env, s->A0, op);
+ } else {
+ gen_helper_boundl(tcg_env, s->A0, op);
+ }
+}
+
static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
@@ -1263,6 +1295,18 @@ static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
prepare_update2_cc(decode, s, CC_OP_BMILGB + ot);
}
+static void gen_CALLF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_far_call(s);
+}
+
+static void gen_CBW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp src_ot = decode->op[0].ot - 1;
+
+ tcg_gen_ext_tl(s->T0, s->T0, src_ot | MO_SIGN);
+}
+
static void gen_CMPccXADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
TCGLabel *label_top = gen_new_label();
@@ -1366,6 +1410,18 @@ static void gen_CMPccXADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
decode->cc_op = CC_OP_SUBB + ot;
}
+static void gen_CMPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[2].ot;
+ if (s->prefix & PREFIX_REPNZ) {
+ gen_repz_cmps(s, ot, 1);
+ } else if (s->prefix & PREFIX_REPZ) {
+ gen_repz_cmps(s, ot, 0);
+ } else {
+ gen_cmps(s, ot);
+ }
+}
+
static void gen_CRC32(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[2].ot;
@@ -1404,6 +1460,13 @@ static void gen_CVTTPx2PI(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
}
}
+static void gen_CWD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ int shift = 8 << decode->op[0].ot;
+
+ tcg_gen_sextract_tl(s->T0, s->T0, shift - 1, 1);
+}
+
static void gen_DAA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
gen_update_cc_op(s);
@@ -1450,6 +1513,69 @@ static void gen_EXTRQ_r(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod
gen_helper_extrq_r(tcg_env, OP_PTR0, OP_PTR2);
}
+static void gen_IMUL3(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+ TCGv cc_src_rhs;
+
+ switch (ot) {
+ case MO_16:
+ /* s->T0 already sign-extended */
+ tcg_gen_ext16s_tl(s->T1, s->T1);
+ tcg_gen_mul_tl(s->T0, s->T0, s->T1);
+ /* Compare the full result to the extension of the truncated result. */
+ tcg_gen_ext16s_tl(s->T1, s->T0);
+ cc_src_rhs = s->T0;
+ break;
+
+ case MO_32:
+#ifdef TARGET_X86_64
+ if (TCG_TARGET_REG_BITS == 64) {
+ /*
+ * This produces fewer TCG ops, and better code if flags are needed,
+ * but it requires a 64-bit multiply even if they are not. Use it
+ * only if the target has 64-bits registers.
+ *
+ * s->T0 is already sign-extended.
+ */
+ tcg_gen_ext32s_tl(s->T1, s->T1);
+ tcg_gen_mul_tl(s->T0, s->T0, s->T1);
+ /* Compare the full result to the extension of the truncated result. */
+ tcg_gen_ext32s_tl(s->T1, s->T0);
+ cc_src_rhs = s->T0;
+ } else {
+ /* Variant that only needs a 32-bit widening multiply. */
+ TCGv_i32 hi = tcg_temp_new_i32();
+ TCGv_i32 lo = tcg_temp_new_i32();
+ tcg_gen_trunc_tl_i32(lo, s->T0);
+ tcg_gen_trunc_tl_i32(hi, s->T1);
+ tcg_gen_muls2_i32(lo, hi, lo, hi);
+ tcg_gen_extu_i32_tl(s->T0, lo);
+
+ cc_src_rhs = tcg_temp_new();
+ tcg_gen_extu_i32_tl(cc_src_rhs, hi);
+ /* Compare the high part to the sign bit of the truncated result */
+ tcg_gen_sari_i32(lo, lo, 31);
+ tcg_gen_extu_i32_tl(s->T1, lo);
+ }
+ break;
+
+ case MO_64:
+#endif
+ cc_src_rhs = tcg_temp_new();
+ tcg_gen_muls2_tl(s->T0, cc_src_rhs, s->T0, s->T1);
+ /* Compare the high part to the sign bit of the truncated result */
+ tcg_gen_sari_tl(s->T1, s->T0, TARGET_LONG_BITS - 1);
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+
+ tcg_gen_sub_tl(s->T1, s->T1, cc_src_rhs);
+ prepare_update2_cc(decode, s, CC_OP_MULB + ot);
+}
+
static void gen_INC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[1].ot;
@@ -1464,6 +1590,26 @@ static void gen_INC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
prepare_update_cc_incdec(decode, s, CC_OP_INCB + ot);
}
+static void gen_INS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[1].ot;
+ TCGv_i32 port = tcg_temp_new_i32();
+
+ tcg_gen_trunc_tl_i32(port, s->T1);
+ tcg_gen_ext16u_i32(port, port);
+ if (!gen_check_io(s, ot, port,
+ SVM_IOIO_TYPE_MASK | SVM_IOIO_STR_MASK)) {
+ return;
+ }
+
+ translator_io_start(&s->base);
+ if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
+ gen_repz_ins(s, ot);
+ } else {
+ gen_ins(s, ot);
+ }
+}
+
static void gen_INSERTQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
TCGv_i32 length = tcg_constant_i32(decode->immediate & 63);
@@ -1477,12 +1623,50 @@ static void gen_INSERTQ_r(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
gen_helper_insertq_r(tcg_env, OP_PTR0, OP_PTR2);
}
+static void gen_Jcc(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_bnd_jmp(s);
+ gen_jcc(s, decode->b & 0xf, decode->immediate);
+}
+
+static void gen_LAHF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM)) {
+ return gen_illegal_opcode(s);
+ }
+ gen_compute_eflags(s);
+ /* Note: gen_compute_eflags() only gives the condition codes */
+ tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
+ tcg_gen_deposit_tl(cpu_regs[R_EAX], cpu_regs[R_EAX], s->T0, 8, 8);
+}
+
static void gen_LDMXCSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T1);
gen_helper_ldmxcsr(tcg_env, s->tmp2_i32);
}
+static void gen_LEA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ tcg_gen_mov_tl(s->T0, s->A0);
+}
+
+static void gen_LODS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[2].ot;
+ if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
+ gen_repz_lods(s, ot);
+ } else {
+ gen_lods(s, ot);
+ }
+}
+
+static void gen_MOV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ /* nothing to do! */
+}
+#define gen_NOP gen_MOV
+
static void gen_MASKMOV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_DS, s->override);
@@ -1590,6 +1774,16 @@ static void gen_MOVq_dq(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod
return gen_MOVQ(s, env, decode);
}
+static void gen_MOVS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[2].ot;
+ if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
+ gen_repz_movs(s, ot);
+ } else {
+ gen_movs(s, ot);
+ }
+}
+
static void gen_MULX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
@@ -1629,6 +1823,25 @@ static void gen_OR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
prepare_update1_cc(decode, s, CC_OP_LOGICB + ot);
}
+static void gen_OUTS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[1].ot;
+ TCGv_i32 port = tcg_temp_new_i32();
+
+ tcg_gen_trunc_tl_i32(port, s->T1);
+ tcg_gen_ext16u_i32(port, port);
+ if (!gen_check_io(s, ot, port, SVM_IOIO_STR_MASK)) {
+ return;
+ }
+
+ translator_io_start(&s->base);
+ if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
+ gen_repz_outs(s, ot);
+ } else {
+ gen_outs(s, ot);
+ }
+}
+
static void gen_PALIGNR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
@@ -1884,6 +2097,33 @@ static void gen_POP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
gen_pop_update(s, ot);
}
+static void gen_POPA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_popa(s);
+}
+
+static void gen_POPF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot;
+ int mask = TF_MASK | AC_MASK | ID_MASK | NT_MASK;
+
+ if (CPL(s) == 0) {
+ mask |= IF_MASK | IOPL_MASK;
+ } else if (CPL(s) <= IOPL(s)) {
+ mask |= IF_MASK;
+ }
+ if (s->dflag == MO_16) {
+ mask &= 0xffff;
+ }
+
+ ot = gen_pop_T0(s);
+ gen_helper_write_eflags(tcg_env, s->T0, tcg_constant_i32(mask));
+ gen_pop_update(s, ot);
+ set_cc_op(s, CC_OP_EFLAGS);
+ /* abort translation because TF/AC flag may change */
+ s->base.is_jmp = DISAS_EOB_NEXT;
+}
+
static void gen_PSHUFW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
@@ -2035,6 +2275,18 @@ static void gen_PUSH(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
gen_push_v(s, s->T1);
}
+static void gen_PUSHA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_pusha(s);
+}
+
+static void gen_PUSHF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_update_cc_op(s);
+ gen_helper_read_eflags(s->T0, tcg_env);
+ gen_push_v(s, s->T0);
+}
+
static void gen_RORX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
@@ -2059,6 +2311,18 @@ static void gen_RORX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
}
+static void gen_SAHF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM)) {
+ return gen_illegal_opcode(s);
+ }
+ tcg_gen_shri_tl(s->T0, cpu_regs[R_EAX], 8);
+ gen_compute_eflags(s);
+ tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
+ tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
+ tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
+}
+
static void gen_SARX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
@@ -2091,6 +2355,18 @@ static void gen_SBB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
prepare_update3_cc(decode, s, CC_OP_SBBB + ot, c_in);
}
+static void gen_SCAS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[2].ot;
+ if (s->prefix & PREFIX_REPNZ) {
+ gen_repz_scas(s, ot, 1);
+ } else if (s->prefix & PREFIX_REPZ) {
+ gen_repz_scas(s, ot, 0);
+ } else {
+ gen_scas(s, ot);
+ }
+}
+
static void gen_SHA1NEXTE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
gen_helper_sha1nexte(OP_PTR0, OP_PTR1, OP_PTR2);
@@ -2178,6 +2454,16 @@ static void gen_STMXCSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod
tcg_gen_ld32u_tl(s->T0, tcg_env, offsetof(CPUX86State, mxcsr));
}
+static void gen_STOS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[1].ot;
+ if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
+ gen_repz_stos(s, ot);
+ } else {
+ gen_stos(s, ot);
+ }
+}
+
static void gen_SUB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[1].ot;
@@ -2674,6 +2960,43 @@ static void gen_VZEROUPPER(DisasContext *s, CPUX86State *env, X86DecodedInsn *de
}
}
+static void gen_WAIT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) == (HF_MP_MASK | HF_TS_MASK)) {
+ gen_NM_exception(s);
+ } else {
+ /* needs to be treated as I/O because of ferr_irq */
+ translator_io_start(&s->base);
+ gen_helper_fwait(tcg_env);
+ }
+}
+
+static void gen_XCHG(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ if (decode->b == 0x90 && !REX_B(s)) {
+ if (s->prefix & PREFIX_REPZ) {
+ gen_update_cc_op(s);
+ gen_update_eip_cur(s);
+ gen_helper_pause(tcg_env, cur_insn_len_i32(s));
+ s->base.is_jmp = DISAS_NORETURN;
+ }
+ /* No writeback. */
+ decode->op[0].unit = X86_OP_SKIP;
+ return;
+ }
+
+ if (s->prefix & PREFIX_LOCK) {
+ tcg_gen_atomic_xchg_tl(s->T0, s->A0, s->T1,
+ s->mem_index, decode->op[0].ot | MO_LE);
+ /* now store old value into register operand */
+ gen_op_mov_reg_v(s, decode->op[2].ot, decode->op[2].n, s->T0);
+ } else {
+ /* move destination value into source operand, source preserved in T1 */
+ gen_op_mov_reg_v(s, decode->op[2].ot, decode->op[2].n, s->T0);
+ tcg_gen_mov_tl(s->T0, s->T1);
+ }
+}
+
static void gen_XOR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
/* special case XOR reg, reg */
--
2.45.0
^ permalink raw reply related [flat|nested] 38+ messages in thread* Re: [PATCH v2 15/25] target/i386: move 60-BF opcodes to new decoder
2024-05-06 8:09 ` [PATCH v2 15/25] target/i386: move 60-BF opcodes to new decoder Paolo Bonzini
@ 2024-05-06 16:44 ` Richard Henderson
0 siblings, 0 replies; 38+ messages in thread
From: Richard Henderson @ 2024-05-06 16:44 UTC (permalink / raw)
To: Paolo Bonzini, qemu-devel; +Cc: zhao1.liu
On 5/6/24 01:09, Paolo Bonzini wrote:
> Compared to the old decoder, the main differences in translation
> are for the little-used ARPL instruction. IMUL is adjusted a bit
> to share more code to produce flags, but is otherwise very similar.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> +static void gen_POPA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
> +{
> + gen_popa(s);
> +}
...
> +static void gen_PUSHA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
> +{
> + gen_pusha(s);
> +}
3-space indent?
r~
^ permalink raw reply [flat|nested] 38+ messages in thread
* [PATCH v2 16/25] target/i386: generalize gen_movl_seg_T0
2024-05-06 8:09 [PATCH v2 00/25] target/i386: convert 1-byte opcodes to new decoder Paolo Bonzini
` (14 preceding siblings ...)
2024-05-06 8:09 ` [PATCH v2 15/25] target/i386: move 60-BF opcodes to new decoder Paolo Bonzini
@ 2024-05-06 8:09 ` Paolo Bonzini
2024-05-06 8:09 ` [PATCH v2 17/25] target/i386: move C0-FF opcodes to new decoder (except for x87) Paolo Bonzini
` (8 subsequent siblings)
24 siblings, 0 replies; 38+ messages in thread
From: Paolo Bonzini @ 2024-05-06 8:09 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, zhao1.liu
In the new decoder it is sometimes easier to put the segment
in T1 instead of T0, usually because another operand was loaded
by common code in T0. Genrealize gen_movl_seg_T0 to allow
using any source.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 16 ++++++++--------
target/i386/tcg/emit.c.inc | 4 ++--
2 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 8f633814586..708fe023224 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -2524,12 +2524,12 @@ static void gen_op_movl_seg_real(DisasContext *s, X86Seg seg_reg, TCGv seg)
tcg_gen_shli_tl(cpu_seg_base[seg_reg], selector, 4);
}
-/* move T0 to seg_reg and compute if the CPU state may change. Never
+/* move SRC to seg_reg and compute if the CPU state may change. Never
call this function with seg_reg == R_CS */
-static void gen_movl_seg_T0(DisasContext *s, X86Seg seg_reg)
+static void gen_movl_seg(DisasContext *s, X86Seg seg_reg, TCGv src)
{
if (PE(s) && !VM86(s)) {
- tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+ tcg_gen_trunc_tl_i32(s->tmp2_i32, src);
gen_helper_load_seg(tcg_env, tcg_constant_i32(seg_reg), s->tmp2_i32);
/* abort translation because the addseg value may change or
because ss32 may change. For R_SS, translation must always
@@ -2541,7 +2541,7 @@ static void gen_movl_seg_T0(DisasContext *s, X86Seg seg_reg)
s->base.is_jmp = DISAS_EOB_NEXT;
}
} else {
- gen_op_movl_seg_real(s, seg_reg, s->T0);
+ gen_op_movl_seg_real(s, seg_reg, src);
if (seg_reg == R_SS) {
s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ;
}
@@ -4083,13 +4083,13 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
goto illegal_op;
reg = b >> 3;
ot = gen_pop_T0(s);
- gen_movl_seg_T0(s, reg);
+ gen_movl_seg(s, reg, s->T0);
gen_pop_update(s, ot);
break;
case 0x1a1: /* pop fs */
case 0x1a9: /* pop gs */
ot = gen_pop_T0(s);
- gen_movl_seg_T0(s, (b >> 3) & 7);
+ gen_movl_seg(s, (b >> 3) & 7, s->T0);
gen_pop_update(s, ot);
break;
@@ -4136,7 +4136,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
if (reg >= 6 || reg == R_CS)
goto illegal_op;
gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
- gen_movl_seg_T0(s, reg);
+ gen_movl_seg(s, reg, s->T0);
break;
case 0x8c: /* mov Gv, seg */
modrm = x86_ldub_code(env, s);
@@ -4322,7 +4322,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
gen_add_A0_im(s, 1 << ot);
/* load the segment first to handle exceptions properly */
gen_op_ld_v(s, MO_16, s->T0, s->A0);
- gen_movl_seg_T0(s, op);
+ gen_movl_seg(s, op, s->T0);
/* then put the data */
gen_op_mov_reg_v(s, ot, reg, s->T1);
break;
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index c59793f170a..fd2e1db0d2e 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -306,8 +306,8 @@ static void gen_writeback(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv
case X86_OP_SKIP:
break;
case X86_OP_SEG:
- /* Note that gen_movl_seg_T0 takes care of interrupt shadow and TF. */
- gen_movl_seg_T0(s, op->n);
+ /* Note that gen_movl_seg takes care of interrupt shadow and TF. */
+ gen_movl_seg(s, op->n, s->T0);
break;
case X86_OP_INT:
if (op->has_ea) {
--
2.45.0
^ permalink raw reply related [flat|nested] 38+ messages in thread* [PATCH v2 17/25] target/i386: move C0-FF opcodes to new decoder (except for x87)
2024-05-06 8:09 [PATCH v2 00/25] target/i386: convert 1-byte opcodes to new decoder Paolo Bonzini
` (15 preceding siblings ...)
2024-05-06 8:09 ` [PATCH v2 16/25] target/i386: generalize gen_movl_seg_T0 Paolo Bonzini
@ 2024-05-06 8:09 ` Paolo Bonzini
2024-05-06 16:56 ` Richard Henderson
2024-05-06 8:09 ` [PATCH v2 18/25] target/i386: merge and enlarge a few ranges for call to disas_insn_new Paolo Bonzini
` (7 subsequent siblings)
24 siblings, 1 reply; 38+ messages in thread
From: Paolo Bonzini @ 2024-05-06 8:09 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, zhao1.liu
The shift instructions are rewritten instead of reusing code from the old
decoder. Rotates use CC_OP_ADCOX more extensively and generally rely
more on the optimizer, so that the code generators are shared between
the immediate-count and variable-count cases.
In particular, this makes gen_RCL and gen_RCR pretty efficient for the
count == 1 case, which becomes (apart from a few extra movs) something like:
(compute_cc_all if needed)
// save old value for OF calculation
mov cc_src2, T0
// the bulk of RCL is just this!
deposit T0, cc_src, T0, 1, TARGET_LONG_BITS - 1
// compute carry
shr cc_dst, cc_src2, length - 1
and cc_dst, cc_dst, 1
// compute overflow
xor cc_src2, cc_src2, T0
extract cc_src2, cc_src2, length - 1, 1
32-bit MUL and IMUL are also slightly more efficient on 64-bit hosts.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/decode-new.h | 1 +
target/i386/tcg/translate.c | 23 +-
target/i386/tcg/decode-new.c.inc | 142 +++++
target/i386/tcg/emit.c.inc | 1014 +++++++++++++++++++++++++++++-
4 files changed, 1169 insertions(+), 11 deletions(-)
diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h
index 790ad5e1d00..77bb31eb143 100644
--- a/target/i386/tcg/decode-new.h
+++ b/target/i386/tcg/decode-new.h
@@ -89,6 +89,7 @@ typedef enum X86OpSize {
X86_SIZE_x, /* 128/256-bit, based on operand size */
X86_SIZE_y, /* 32/64-bit, based on operand size */
X86_SIZE_z, /* 16-bit for 16-bit operand size, else 32-bit */
+ X86_SIZE_z_f64, /* 32-bit for 32-bit operand size or 64-bit mode, else 16-bit */
/* Custom */
X86_SIZE_d64,
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 708fe023224..79b6e2760fe 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -38,6 +38,9 @@
#include "exec/helper-info.c.inc"
#undef HELPER_H
+/* Fixes for Windows namespace pollution. */
+#undef IN
+#undef OUT
#define PREFIX_REPZ 0x01
#define PREFIX_REPNZ 0x02
@@ -2488,14 +2491,24 @@ static inline int insn_const_size(MemOp ot)
}
}
+static void gen_conditional_jump_labels(DisasContext *s, target_long diff,
+ TCGLabel *not_taken, TCGLabel *taken)
+{
+ if (not_taken) {
+ gen_set_label(not_taken);
+ }
+ gen_jmp_rel_csize(s, 0, 1);
+
+ gen_set_label(taken);
+ gen_jmp_rel(s, s->dflag, diff, 0);
+}
+
static void gen_jcc(DisasContext *s, int b, int diff)
{
TCGLabel *l1 = gen_new_label();
gen_jcc1(s, b, l1);
- gen_jmp_rel_csize(s, 0, 1);
- gen_set_label(l1);
- gen_jmp_rel(s, s->dflag, diff, 0);
+ gen_conditional_jump_labels(s, diff, NULL, l1);
}
static void gen_cmovcc1(DisasContext *s, int b, TCGv dest, TCGv src)
@@ -2752,7 +2765,7 @@ static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
/* an interrupt is different from an exception because of the
privilege checks */
-static void gen_interrupt(DisasContext *s, int intno)
+static void gen_interrupt(DisasContext *s, uint8_t intno)
{
gen_update_cc_op(s);
gen_update_eip_cur(s);
@@ -3183,7 +3196,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
#ifndef CONFIG_USER_ONLY
use_new &= b <= limit;
#endif
- if (use_new && b <= 0xbf) {
+ if (use_new && (b < 0xd8 || b >= 0xe0)) {
disas_insn_new(s, cpu, b);
return true;
}
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 55fc0173a41..a47ecab6dd4 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -135,6 +135,8 @@
## __VA_ARGS__ \
}
+#define X86_OP_GROUP1(op, op0, s0, ...) \
+ X86_OP_GROUP3(op, op0, s0, 2op, s0, None, None, ## __VA_ARGS__)
#define X86_OP_GROUP2(op, op0, s0, op1, s1, ...) \
X86_OP_GROUP3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
#define X86_OP_GROUPw(op, op0, s0, ...) \
@@ -1174,6 +1176,83 @@ static void decode_group1A(DisasContext *s, CPUX86State *env, X86OpEntry *entry,
}
}
+static void decode_group2(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+ static const X86GenFunc group2_gen[8] = {
+ gen_ROL, gen_ROR, gen_RCL, gen_RCR,
+ gen_SHL, gen_SHR, gen_SHL /* SAL, undocumented */, gen_SAR,
+ };
+ int op = (get_modrm(s, env) >> 3) & 7;
+ entry->gen = group2_gen[op];
+ if (op == 7) {
+ entry->special = X86_SPECIAL_SExtT0;
+ } else {
+ entry->special = X86_SPECIAL_ZExtT0;
+ }
+}
+
+static void decode_group3(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+ static const X86OpEntry opcodes_grp3[16] = {
+ /* 0xf6 */
+ [0x00] = X86_OP_ENTRYrr(AND, E,b, I,b),
+ [0x02] = X86_OP_ENTRY1(NOT, E,b, lock),
+ [0x03] = X86_OP_ENTRY1(NEG, E,b, lock),
+ [0x04] = X86_OP_ENTRYrr(MUL, E,b, 0,b, zextT0),
+ [0x05] = X86_OP_ENTRYrr(IMUL,E,b, 0,b, sextT0),
+ [0x06] = X86_OP_ENTRYr(DIV, E,b),
+ [0x07] = X86_OP_ENTRYr(IDIV, E,b),
+
+ /* 0xf7 */
+ [0x08] = X86_OP_ENTRYrr(AND, E,v, I,z),
+ [0x0a] = X86_OP_ENTRY1(NOT, E,v, lock),
+ [0x0b] = X86_OP_ENTRY1(NEG, E,v, lock),
+ [0x0c] = X86_OP_ENTRYrr(MUL, E,v, 0,v, zextT0),
+ [0x0d] = X86_OP_ENTRYrr(IMUL,E,v, 0,v, sextT0),
+ [0x0e] = X86_OP_ENTRYr(DIV, E,v),
+ [0x0f] = X86_OP_ENTRYr(IDIV, E,v),
+ };
+
+ int w = (*b & 1);
+ int reg = (get_modrm(s, env) >> 3) & 7;
+
+ *entry = opcodes_grp3[(w << 3) | reg];
+}
+
+static void decode_group4_5(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+ static const X86OpEntry opcodes_grp4_5[16] = {
+ /* 0xfe */
+ [0x00] = X86_OP_ENTRY1(INC, E,b, lock),
+ [0x01] = X86_OP_ENTRY1(DEC, E,b, lock),
+
+ /* 0xff */
+ [0x08] = X86_OP_ENTRY1(INC, E,v, lock),
+ [0x09] = X86_OP_ENTRY1(DEC, E,v, lock),
+ [0x0a] = X86_OP_ENTRY3(CALL_m, None, None, E,f64, None, None, zextT0),
+ [0x0b] = X86_OP_ENTRYr(CALLF_m, M,p),
+ [0x0c] = X86_OP_ENTRY3(JMP_m, None, None, E,f64, None, None, zextT0),
+ [0x0d] = X86_OP_ENTRYr(JMPF_m, M,p),
+ [0x0e] = X86_OP_ENTRYr(PUSH, E,f64),
+ };
+
+ int w = (*b & 1);
+ int reg = (get_modrm(s, env) >> 3) & 7;
+
+ *entry = opcodes_grp4_5[(w << 3) | reg];
+}
+
+
+static void decode_group11(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+ int op = (get_modrm(s, env) >> 3) & 7;
+ if (op != 0) {
+ *entry = UNKNOWN_OPCODE;
+ } else {
+ entry->gen = gen_MOV;
+ }
+}
+
static const X86OpEntry opcodes_root[256] = {
[0x00] = X86_OP_ENTRY2(ADD, E,b, G,b, lock),
[0x01] = X86_OP_ENTRY2(ADD, E,v, G,v, lock),
@@ -1283,6 +1362,38 @@ static const X86OpEntry opcodes_root[256] = {
[0xB6] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
[0xB7] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None),
+ [0xC0] = X86_OP_GROUP2(group2, E,b, I,b),
+ [0xC1] = X86_OP_GROUP2(group2, E,v, I,b),
+ [0xC2] = X86_OP_ENTRYr(RET, I,w),
+ [0xC3] = X86_OP_ENTRY0(RET),
+ [0xC4] = X86_OP_ENTRY3(LES, G,z, M,p, None, None, chk(i64)),
+ [0xC5] = X86_OP_ENTRY3(LDS, G,z, M,p, None, None, chk(i64)),
+ [0xC6] = X86_OP_GROUP3(group11, E,b, I,b, None, None), /* reg=000b */
+ [0xC7] = X86_OP_GROUP3(group11, E,v, I,z, None, None), /* reg=000b */
+
+ [0xD0] = X86_OP_GROUP1(group2, E,b),
+ [0xD1] = X86_OP_GROUP1(group2, E,v),
+ [0xD2] = X86_OP_GROUP2(group2, E,b, 1,b), /* CL */
+ [0xD3] = X86_OP_GROUP2(group2, E,v, 1,b), /* CL */
+ [0xD4] = X86_OP_ENTRY2(AAM, 0,w, I,b),
+ [0xD5] = X86_OP_ENTRY2(AAD, 0,w, I,b),
+ [0xD6] = X86_OP_ENTRYw(SALC, 0,b),
+ [0xD7] = X86_OP_ENTRY1(XLAT, 0,b, zextT0), /* AL read/written */
+
+ [0xE0] = X86_OP_ENTRYr(LOOPNE, J,b), /* implicit: CX with aflag size */
+ [0xE1] = X86_OP_ENTRYr(LOOPE, J,b), /* implicit: CX with aflag size */
+ [0xE2] = X86_OP_ENTRYr(LOOP, J,b), /* implicit: CX with aflag size */
+ [0xE3] = X86_OP_ENTRYr(JCXZ, J,b), /* implicit: CX with aflag size */
+ [0xE4] = X86_OP_ENTRYwr(IN, 0,b, I_unsigned,b), /* AL */
+ [0xE5] = X86_OP_ENTRYwr(IN, 0,v, I_unsigned,b), /* AX/EAX */
+ [0xE6] = X86_OP_ENTRYrr(OUT, 0,b, I_unsigned,b), /* AL */
+ [0xE7] = X86_OP_ENTRYrr(OUT, 0,v, I_unsigned,b), /* AX/EAX */
+
+ [0xF1] = X86_OP_ENTRY0(INT1, svm(ICEBP)),
+ [0xF4] = X86_OP_ENTRY0(HLT, chk(cpl0)),
+ [0xF5] = X86_OP_ENTRY0(CMC),
+ [0xF6] = X86_OP_GROUP1(group3, E,b),
+ [0xF7] = X86_OP_GROUP1(group3, E,v),
[0x08] = X86_OP_ENTRY2(OR, E,b, G,b, lock),
[0x09] = X86_OP_ENTRY2(OR, E,v, G,v, lock),
@@ -1392,6 +1503,33 @@ static const X86OpEntry opcodes_root[256] = {
[0xBD] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
[0xBE] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
[0xBF] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
+
+ [0xC8] = X86_OP_ENTRYrr(ENTER, I,w, I,b),
+ [0xC9] = X86_OP_ENTRY1(LEAVE, A,d64),
+ [0xCA] = X86_OP_ENTRYr(RETF, I,w),
+ [0xCB] = X86_OP_ENTRY0(RETF),
+ [0xCC] = X86_OP_ENTRY0(INT3),
+ [0xCD] = X86_OP_ENTRYr(INT, I,b, chk(vm86_iopl)),
+ [0xCE] = X86_OP_ENTRY0(INTO),
+ [0xCF] = X86_OP_ENTRY0(IRET, chk(vm86_iopl) svm(IRET)),
+
+ [0xE8] = X86_OP_ENTRYr(CALL, J,z_f64),
+ [0xE9] = X86_OP_ENTRYr(JMP, J,z_f64),
+ [0xEA] = X86_OP_ENTRYrr(JMPF, I_unsigned,p, I_unsigned,w, chk(i64)),
+ [0xEB] = X86_OP_ENTRYr(JMP, J,b),
+ [0xEC] = X86_OP_ENTRYwr(IN, 0,b, 2,w), /* AL, DX */
+ [0xED] = X86_OP_ENTRYwr(IN, 0,v, 2,w), /* AX/EAX, DX */
+ [0xEE] = X86_OP_ENTRYrr(OUT, 0,b, 2,w), /* DX, AL */
+ [0xEF] = X86_OP_ENTRYrr(OUT, 0,v, 2,w), /* DX, AX/EAX */
+
+ [0xF8] = X86_OP_ENTRY0(CLC),
+ [0xF9] = X86_OP_ENTRY0(STC),
+ [0xFA] = X86_OP_ENTRY0(CLI, chk(iopl)),
+ [0xFB] = X86_OP_ENTRY0(STI, chk(iopl)),
+ [0xFC] = X86_OP_ENTRY0(CLD),
+ [0xFD] = X86_OP_ENTRY0(STD),
+ [0xFE] = X86_OP_GROUP1(group4_5, E,b),
+ [0xFF] = X86_OP_GROUP1(group4_5, E,v),
};
#undef mmx
@@ -1471,6 +1609,10 @@ static bool decode_op_size(DisasContext *s, X86OpEntry *e, X86OpSize size, MemOp
*ot = s->dflag == MO_16 ? MO_16 : MO_32;
return true;
+ case X86_SIZE_z_f64: /* 32-bit for 32-bit operand size or 64-bit mode, else 16-bit */
+ *ot = !CODE64(s) && s->dflag == MO_16 ? MO_16 : MO_32;
+ return true;
+
case X86_SIZE_dq: /* SSE/AVX 128-bit */
if (e->special == X86_SPECIAL_MMX &&
!(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index fd2e1db0d2e..ffe458b80f9 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -19,6 +19,21 @@
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
+/*
+ * Sometimes, knowing what the backend has can produce better code.
+ * The exact opcode to check depends on 32- vs. 64-bit.
+ */
+#ifdef TARGET_X86_64
+#define TCG_TARGET_HAS_extract2_tl TCG_TARGET_HAS_extract2_i64
+#define TCG_TARGET_deposit_tl_valid TCG_TARGET_deposit_i64_valid
+#define TCG_TARGET_extract_tl_valid TCG_TARGET_extract_i64_valid
+#else
+#define TCG_TARGET_HAS_extract2_tl TCG_TARGET_HAS_extract2_i32
+#define TCG_TARGET_deposit_tl_valid TCG_TARGET_deposit_i32_valid
+#define TCG_TARGET_extract_tl_valid TCG_TARGET_extract_i32_valid
+#endif
+
+
#define ZMM_OFFSET(reg) offsetof(CPUX86State, xmm_regs[reg])
typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
@@ -45,6 +60,9 @@ typedef void (*SSEFunc_0_eppppii)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
TCGv_ptr reg_c, TCGv_ptr reg_d, TCGv_i32 even,
TCGv_i32 odd);
+static void gen_JMP_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode);
+static void gen_JMP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode);
+
static inline TCGv_i32 tcg_constant8u_i32(uint8_t val)
{
return tcg_constant_i32(val);
@@ -330,6 +348,7 @@ static void gen_writeback(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv
default:
g_assert_not_reached();
}
+ op->unit = X86_OP_SKIP;
}
static inline int vector_len(DisasContext *s, X86DecodedInsn *decode)
@@ -1063,6 +1082,22 @@ static void gen_AAA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
set_cc_op(s, CC_OP_EFLAGS);
}
+static void gen_AAD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_helper_aad(tcg_env, tcg_constant_i32(decode->immediate));
+ set_cc_op(s, CC_OP_LOGICB);
+}
+
+static void gen_AAM(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ if (decode->immediate == 0) {
+ gen_exception(s, EXCP00_DIVZ);
+ } else {
+ gen_helper_aam(tcg_env, tcg_constant_i32(decode->immediate));
+ set_cc_op(s, CC_OP_LOGICB);
+ }
+}
+
static void gen_AAS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
gen_update_cc_op(s);
@@ -1295,11 +1330,33 @@ static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
prepare_update2_cc(decode, s, CC_OP_BMILGB + ot);
}
+static void gen_CALL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_push_v(s, eip_next_tl(s));
+ gen_JMP(s, env, decode);
+}
+
+static void gen_CALL_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_push_v(s, eip_next_tl(s));
+ gen_JMP_m(s, env, decode);
+}
+
static void gen_CALLF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
gen_far_call(s);
}
+static void gen_CALLF_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[2].ot;
+
+ gen_op_ld_v(s, ot, s->T0, s->A0);
+ gen_add_A0_im(s, 1 << ot);
+ gen_op_ld_v(s, MO_16, s->T1, s->A0);
+ gen_far_call(s);
+}
+
static void gen_CBW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp src_ot = decode->op[0].ot - 1;
@@ -1307,6 +1364,28 @@ static void gen_CBW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
tcg_gen_ext_tl(s->T0, s->T0, src_ot | MO_SIGN);
}
+static void gen_CLC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_compute_eflags(s);
+ tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
+}
+
+static void gen_CLD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ tcg_gen_st_i32(tcg_constant_i32(1), tcg_env, offsetof(CPUX86State, df));
+}
+
+static void gen_CLI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_reset_eflags(s, IF_MASK);
+}
+
+static void gen_CMC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_compute_eflags(s);
+ tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
+}
+
static void gen_CMPccXADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
TCGLabel *label_top = gen_new_label();
@@ -1495,11 +1574,39 @@ static void gen_DEC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
prepare_update_cc_incdec(decode, s, CC_OP_DECB + ot);
}
+static void gen_DIV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[2].ot;
+
+ switch(ot) {
+ case MO_8:
+ gen_helper_divb_AL(tcg_env, s->T1);
+ break;
+ case MO_16:
+ gen_helper_divw_AX(tcg_env, s->T1);
+ break;
+ default:
+ case MO_32:
+ gen_helper_divl_EAX(tcg_env, s->T1);
+ break;
+#ifdef TARGET_X86_64
+ case MO_64:
+ gen_helper_divq_EAX(tcg_env, s->T1);
+ break;
+#endif
+ }
+}
+
static void gen_EMMS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
gen_helper_emms(tcg_env);
}
+static void gen_ENTER(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_enter(s, decode->op[1].imm, decode->op[2].imm);
+}
+
static void gen_EXTRQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
TCGv_i32 length = tcg_constant_i32(decode->immediate & 63);
@@ -1513,6 +1620,39 @@ static void gen_EXTRQ_r(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod
gen_helper_extrq_r(tcg_env, OP_PTR0, OP_PTR2);
}
+static void gen_HLT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+#ifdef CONFIG_SYSTEM_ONLY
+ gen_update_cc_op(s);
+ gen_update_eip_cur(s);
+ gen_helper_hlt(tcg_env, cur_insn_len_i32(s));
+ s->base.is_jmp = DISAS_NORETURN;
+#endif
+}
+
+static void gen_IDIV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[2].ot;
+
+ switch(ot) {
+ case MO_8:
+ gen_helper_idivb_AL(tcg_env, s->T1);
+ break;
+ case MO_16:
+ gen_helper_idivw_AX(tcg_env, s->T1);
+ break;
+ default:
+ case MO_32:
+ gen_helper_idivl_EAX(tcg_env, s->T1);
+ break;
+#ifdef TARGET_X86_64
+ case MO_64:
+ gen_helper_idivq_EAX(tcg_env, s->T1);
+ break;
+#endif
+ }
+}
+
static void gen_IMUL3(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
@@ -1576,6 +1716,80 @@ static void gen_IMUL3(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
prepare_update2_cc(decode, s, CC_OP_MULB + ot);
}
+static void gen_IMUL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[1].ot;
+ TCGv cc_src_rhs;
+
+ switch (ot) {
+ case MO_8:
+ /* s->T0 already sign-extended */
+ tcg_gen_ext8s_tl(s->T1, s->T1);
+ tcg_gen_mul_tl(s->T0, s->T0, s->T1);
+ gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
+ /* Compare the full result to the extension of the truncated result. */
+ tcg_gen_ext8s_tl(s->T1, s->T0);
+ cc_src_rhs = s->T0;
+ break;
+
+ case MO_16:
+ /* s->T0 already sign-extended */
+ tcg_gen_ext16s_tl(s->T1, s->T1);
+ tcg_gen_mul_tl(s->T0, s->T0, s->T1);
+ gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
+ tcg_gen_shri_tl(s->T1, s->T0, 16);
+ gen_op_mov_reg_v(s, MO_16, R_EDX, s->T1);
+ /* Compare the full result to the extension of the truncated result. */
+ tcg_gen_ext16s_tl(s->T1, s->T0);
+ cc_src_rhs = s->T0;
+ break;
+
+ case MO_32:
+#ifdef TARGET_X86_64
+ /* s->T0 already sign-extended */
+ tcg_gen_ext32s_tl(s->T1, s->T1);
+ tcg_gen_mul_tl(s->T0, s->T0, s->T1);
+ tcg_gen_ext32u_tl(cpu_regs[R_EAX], s->T0);
+ tcg_gen_shri_tl(cpu_regs[R_EDX], s->T0, 32);
+ /* Compare the full result to the extension of the truncated result. */
+ tcg_gen_ext32s_tl(s->T1, s->T0);
+ cc_src_rhs = s->T0;
+ break;
+
+ case MO_64:
+#endif
+ tcg_gen_muls2_tl(s->T0, cpu_regs[R_EDX], s->T0, s->T1);
+ tcg_gen_mov_tl(cpu_regs[R_EAX], s->T0);
+
+ /* Compare the high part to the sign bit of the truncated result */
+ tcg_gen_negsetcondi_tl(TCG_COND_LT, s->T1, s->T0, 0);
+ cc_src_rhs = cpu_regs[R_EDX];
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+
+ tcg_gen_sub_tl(s->T1, s->T1, cc_src_rhs);
+ prepare_update2_cc(decode, s, CC_OP_MULB + ot);
+}
+
+static void gen_IN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+ TCGv_i32 port = tcg_temp_new_i32();
+
+ tcg_gen_trunc_tl_i32(port, s->T1);
+ tcg_gen_ext16u_i32(port, port);
+ if (!gen_check_io(s, ot, port, SVM_IOIO_TYPE_MASK)) {
+ return;
+ }
+ translator_io_start(&s->base);
+ gen_helper_in_func(ot, s->T0, port);
+ gen_writeback(s, decode, 0, s->T0);
+ gen_bpt_io(s, port, ot);
+}
+
static void gen_INC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[1].ot;
@@ -1623,12 +1837,83 @@ static void gen_INSERTQ_r(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
gen_helper_insertq_r(tcg_env, OP_PTR0, OP_PTR2);
}
+static void gen_INT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_interrupt(s, decode->immediate);
+}
+
+static void gen_INT1(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_exception(s, EXCP01_DB);
+}
+
+static void gen_INT3(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_interrupt(s, EXCP03_INT3);
+}
+
+static void gen_INTO(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_update_cc_op(s);
+ gen_update_eip_cur(s);
+ gen_helper_into(tcg_env, cur_insn_len_i32(s));
+}
+
+static void gen_IRET(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ if (!PE(s) || VM86(s)) {
+ gen_helper_iret_real(tcg_env, tcg_constant_i32(s->dflag - 1));
+ } else {
+ gen_helper_iret_protected(tcg_env, tcg_constant_i32(s->dflag - 1),
+ eip_next_i32(s));
+ }
+ set_cc_op(s, CC_OP_EFLAGS);
+ s->base.is_jmp = DISAS_EOB_ONLY;
+}
+
static void gen_Jcc(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
gen_bnd_jmp(s);
gen_jcc(s, decode->b & 0xf, decode->immediate);
}
+static void gen_JCXZ(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ TCGLabel *taken = gen_new_label();
+
+ gen_update_cc_op(s);
+ gen_op_jz_ecx(s, taken);
+ gen_conditional_jump_labels(s, decode->immediate, NULL, taken);
+}
+
+static void gen_JMP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_update_cc_op(s);
+ gen_jmp_rel(s, s->dflag, decode->immediate, 0);
+}
+
+static void gen_JMP_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_op_jmp_v(s, s->T0);
+ gen_bnd_jmp(s);
+ s->base.is_jmp = DISAS_JUMP;
+}
+
+static void gen_JMPF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_far_jmp(s);
+}
+
+static void gen_JMPF_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[2].ot;
+
+ gen_op_ld_v(s, ot, s->T0, s->A0);
+ gen_add_A0_im(s, 1 << ot);
+ gen_op_ld_v(s, MO_16, s->T1, s->A0);
+ gen_far_jmp(s);
+}
+
static void gen_LAHF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM)) {
@@ -1646,11 +1931,38 @@ static void gen_LDMXCSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod
gen_helper_ldmxcsr(tcg_env, s->tmp2_i32);
}
+static void gen_lxx_seg(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, int seg)
+{
+ MemOp ot = decode->op[0].ot;
+
+ /* Offset already in s->T0. */
+ gen_add_A0_im(s, 1 << ot);
+ gen_op_ld_v(s, MO_16, s->T1, s->A0);
+
+ /* load the segment here to handle exceptions properly */
+ gen_movl_seg(s, seg, s->T1);
+}
+
+static void gen_LDS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_lxx_seg(s, env, decode, R_DS);
+}
+
static void gen_LEA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
tcg_gen_mov_tl(s->T0, s->A0);
}
+static void gen_LEAVE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_leave(s);
+}
+
+static void gen_LES(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_lxx_seg(s, env, decode, R_ES);
+}
+
static void gen_LODS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[2].ot;
@@ -1661,6 +1973,40 @@ static void gen_LODS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
}
+static void gen_LOOP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ TCGLabel *taken = gen_new_label();
+
+ gen_update_cc_op(s);
+ gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
+ gen_op_jnz_ecx(s, taken);
+ gen_conditional_jump_labels(s, decode->immediate, NULL, taken);
+}
+
+static void gen_LOOPE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ TCGLabel *taken = gen_new_label();
+ TCGLabel *not_taken = gen_new_label();
+
+ gen_update_cc_op(s);
+ gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
+ gen_op_jz_ecx(s, not_taken);
+ gen_jcc1(s, (JCC_Z << 1), taken); /* jz taken */
+ gen_conditional_jump_labels(s, decode->immediate, not_taken, taken);
+}
+
+static void gen_LOOPNE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ TCGLabel *taken = gen_new_label();
+ TCGLabel *not_taken = gen_new_label();
+
+ gen_update_cc_op(s);
+ gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
+ gen_op_jz_ecx(s, not_taken);
+ gen_jcc1(s, (JCC_Z << 1) | 1, taken); /* jnz taken */
+ gen_conditional_jump_labels(s, decode->immediate, not_taken, taken);
+}
+
static void gen_MOV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
/* nothing to do! */
@@ -1784,6 +2130,57 @@ static void gen_MOVS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
}
+static void gen_MUL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[1].ot;
+
+ switch (ot) {
+ case MO_8:
+ /* s->T0 already zero-extended */
+ tcg_gen_ext8u_tl(s->T1, s->T1);
+ tcg_gen_mul_tl(s->T0, s->T0, s->T1);
+ gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
+ tcg_gen_andi_tl(s->T1, s->T0, 0xff00);
+ decode->cc_dst = s->T0;
+ decode->cc_src = s->T1;
+ break;
+
+ case MO_16:
+ /* s->T0 already zero-extended */
+ tcg_gen_ext16u_tl(s->T1, s->T1);
+ tcg_gen_mul_tl(s->T0, s->T0, s->T1);
+ gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
+ tcg_gen_shri_tl(s->T1, s->T0, 16);
+ gen_op_mov_reg_v(s, MO_16, R_EDX, s->T1);
+ decode->cc_dst = s->T0;
+ decode->cc_src = s->T1;
+ break;
+
+ case MO_32:
+#ifdef TARGET_X86_64
+ /* s->T0 already zero-extended */
+ tcg_gen_ext32u_tl(s->T1, s->T1);
+ tcg_gen_mul_tl(s->T0, s->T0, s->T1);
+ tcg_gen_ext32u_tl(cpu_regs[R_EAX], s->T0);
+ tcg_gen_shri_tl(cpu_regs[R_EDX], s->T0, 32);
+ decode->cc_dst = cpu_regs[R_EAX];
+ decode->cc_src = cpu_regs[R_EDX];
+ break;
+
+ case MO_64:
+#endif
+ tcg_gen_mulu2_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->T0, s->T1);
+ decode->cc_dst = cpu_regs[R_EAX];
+ decode->cc_src = cpu_regs[R_EDX];
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+
+ decode->cc_op = CC_OP_MULB + ot;
+}
+
static void gen_MULX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
@@ -1810,6 +2207,46 @@ static void gen_MULX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
}
+static void gen_NEG(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+ TCGv oldv = tcg_temp_new();
+
+ if (s->prefix & PREFIX_LOCK) {
+ TCGv newv = tcg_temp_new();
+ TCGv cmpv = tcg_temp_new();
+ TCGLabel *label1 = gen_new_label();
+
+ gen_set_label(label1);
+ gen_op_ld_v(s, ot, oldv, s->A0);
+ tcg_gen_neg_tl(newv, oldv);
+ tcg_gen_atomic_cmpxchg_tl(cmpv, s->A0, oldv, newv,
+ s->mem_index, ot | MO_LE);
+ tcg_gen_brcond_tl(TCG_COND_NE, oldv, cmpv, label1);
+ } else {
+ tcg_gen_mov_tl(oldv, s->T0);
+ }
+ tcg_gen_neg_tl(s->T0, oldv);
+
+ decode->cc_dst = s->T0;
+ decode->cc_src = oldv;
+ tcg_gen_movi_tl(s->cc_srcT, 0);
+ decode->cc_op = CC_OP_SUBB + ot;
+}
+
+static void gen_NOT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+
+ if (s->prefix & PREFIX_LOCK) {
+ tcg_gen_movi_tl(s->T0, ~0);
+ tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0,
+ s->mem_index, ot | MO_LE);
+ } else {
+ tcg_gen_not_tl(s->T0, s->T0);
+ }
+}
+
static void gen_OR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[1].ot;
@@ -1823,6 +2260,23 @@ static void gen_OR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
prepare_update1_cc(decode, s, CC_OP_LOGICB + ot);
}
+static void gen_OUT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[1].ot;
+ TCGv_i32 port = tcg_temp_new_i32();
+ TCGv_i32 value = tcg_temp_new_i32();
+
+ tcg_gen_trunc_tl_i32(port, s->T1);
+ tcg_gen_ext16u_i32(port, port);
+ if (!gen_check_io(s, ot, port, 0)) {
+ return;
+ }
+ tcg_gen_trunc_tl_i32(value, s->T0);
+ translator_io_start(&s->base);
+ gen_helper_out_func(ot, port, value);
+ gen_bpt_io(s, port, ot);
+}
+
static void gen_OUTS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[1].ot;
@@ -2035,12 +2489,6 @@ static void gen_pmovmskb_vec(unsigned vece, TCGv_vec d, TCGv_vec s)
tcg_gen_or_vec(vece, d, d, t);
}
-#ifdef TARGET_X86_64
-#define TCG_TARGET_HAS_extract2_tl TCG_TARGET_HAS_extract2_i64
-#else
-#define TCG_TARGET_HAS_extract2_tl TCG_TARGET_HAS_extract2_i32
-#endif
-
static void gen_PMOVMSKB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
@@ -2287,6 +2735,438 @@ static void gen_PUSHF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
gen_push_v(s, s->T0);
}
+static MemOp gen_shift_count(DisasContext *s, X86DecodedInsn *decode,
+ bool *can_be_zero, TCGv *count)
+{
+ MemOp ot = decode->op[0].ot;
+ int mask = (ot <= MO_32 ? 0x1f : 0x3f);
+
+ *can_be_zero = false;
+ switch (decode->op[2].unit) {
+ case X86_OP_INT:
+ *count = tcg_temp_new();
+ tcg_gen_andi_tl(*count, s->T1, mask);
+ *can_be_zero = true;
+ break;
+
+ case X86_OP_IMM:
+ if ((decode->immediate & mask) == 0) {
+ *count = NULL;
+ break;
+ }
+ *count = tcg_temp_new();
+ tcg_gen_movi_tl(*count, decode->immediate & mask);
+ break;
+
+ case X86_OP_SKIP:
+ *count = tcg_temp_new();
+ tcg_gen_movi_tl(*count, 1);
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+
+ return ot;
+}
+
+/*
+ * Compute existing flags in decode->cc_src, for gen_* functions that wants
+ * to set the cc_op set to CC_OP_ADCOX. In particular, this allows rotate
+ * operations to compute the carry in decode->cc_dst and the overflow in
+ * decode->cc_src2.
+ *
+ * If need_flags is true, decode->cc_dst and decode->cc_src2 are preloaded
+ * with the value of CF and OF before the instruction, so that it is possible
+ * to keep the flags unmodified.
+ *
+ * Return true if carry could be made available cheaply as a 1-bit value in
+ * decode->cc_dst (trying a bit harder if want_carry is true). If false is
+ * returned, decode->cc_dst is uninitialized and the carry is only available
+ * as bit 0 of decode->cc_src.
+ */
+static bool gen_eflags_adcox(DisasContext *s, X86DecodedInsn *decode, bool want_carry, bool need_flags)
+{
+ bool got_cf = false;
+ bool got_of = false;
+
+ decode->cc_dst = tcg_temp_new();
+ decode->cc_src = tcg_temp_new();
+ decode->cc_src2 = tcg_temp_new();
+ decode->cc_op = CC_OP_ADCOX;
+
+ /* A lot more cc_ops could be "optimized" to avoid the extracts at
+ * the end (INC/DEC, BMILG, MUL), but they are all really unlikely
+ * to be followed by rotations within the same basic block.
+ */
+ switch (s->cc_op) {
+ case CC_OP_ADCOX:
+ /* No need to compute the full EFLAGS, CF/OF are already isolated. */
+ tcg_gen_mov_tl(decode->cc_src, cpu_cc_src);
+ if (need_flags) {
+ tcg_gen_mov_tl(decode->cc_src2, cpu_cc_src2);
+ got_of = true;
+ }
+ if (want_carry || need_flags) {
+ tcg_gen_mov_tl(decode->cc_dst, cpu_cc_dst);
+ got_cf = true;
+ }
+ break;
+
+ case CC_OP_LOGICB ... CC_OP_LOGICQ:
+ /* CF and OF are zero, do it just because it's easy. */
+ gen_mov_eflags(s, decode->cc_src);
+ if (need_flags) {
+ tcg_gen_movi_tl(decode->cc_src2, 0);
+ got_of = true;
+ }
+ if (want_carry || need_flags) {
+ tcg_gen_movi_tl(decode->cc_dst, 0);
+ got_cf = true;
+ }
+ break;
+
+ case CC_OP_SARB ... CC_OP_SARQ:
+ /*
+ * SHR/RCR/SHR/RCR/... is a relatively common occurrence of RCR.
+ * By computing CF without using eflags, the calls to cc_compute_all
+ * can be eliminated as dead code (except for the last RCR).
+ */
+ if (want_carry || need_flags) {
+ tcg_gen_andi_tl(decode->cc_dst, cpu_cc_src, 1);
+ got_cf = true;
+ }
+ gen_mov_eflags(s, decode->cc_src);
+ break;
+
+ case CC_OP_SHLB ... CC_OP_SHLQ:
+ /*
+ * Likewise for SHL/RCL/SHL/RCL/... but, if CF is not in the sign
+ * bit, we might as well fish CF out of EFLAGS and save a shift.
+ */
+ if (want_carry && (!need_flags || s->cc_op == CC_OP_SHLB + MO_TL)) {
+ tcg_gen_shri_tl(decode->cc_dst, cpu_cc_src, (8 << (s->cc_op - CC_OP_SHLB)) - 1);
+ got_cf = true;
+ }
+ gen_mov_eflags(s, decode->cc_src);
+ break;
+
+ default:
+ gen_mov_eflags(s, decode->cc_src);
+ break;
+ }
+
+ if (need_flags) {
+ /* If the flags could be left unmodified, always load them. */
+ if (!got_of) {
+ tcg_gen_extract_tl(decode->cc_src2, decode->cc_src, ctz32(CC_O), 1);
+ got_of = true;
+ }
+ if (!got_cf) {
+ tcg_gen_extract_tl(decode->cc_dst, decode->cc_src, ctz32(CC_C), 1);
+ got_cf = true;
+ }
+ }
+ return got_cf;
+}
+
+static void gen_rot_overflow(X86DecodedInsn *decode, TCGv result, TCGv old, TCGv count)
+{
+ MemOp ot = decode->op[0].ot;
+ TCGv temp = count ? tcg_temp_new() : decode->cc_src2;
+
+ tcg_gen_xor_tl(temp, old, result);
+ tcg_gen_extract_tl(temp, temp, (8 << ot) - 1, 1);
+ if (count) {
+ tcg_gen_movcond_tl(TCG_COND_EQ, decode->cc_src2, count, tcg_constant_tl(0),
+ decode->cc_src2, temp);
+ }
+}
+
+/*
+ * RCx operations are invariant modulo 8*operand_size+1. For 8 and 16-bit operands,
+ * this is less than 0x1f (the mask applied by gen_shift_count) so reduce further.
+ */
+static void gen_rotc_mod(MemOp ot, TCGv count)
+{
+ TCGv temp;
+
+ switch (ot) {
+ case MO_8:
+ temp = tcg_temp_new();
+ tcg_gen_subi_tl(temp, count, 18);
+ tcg_gen_movcond_tl(TCG_COND_GE, count, temp, tcg_constant_tl(0), temp, count);
+ tcg_gen_subi_tl(temp, count, 9);
+ tcg_gen_movcond_tl(TCG_COND_GE, count, temp, tcg_constant_tl(0), temp, count);
+ break;
+
+ case MO_16:
+ temp = tcg_temp_new();
+ tcg_gen_subi_tl(temp, count, 17);
+ tcg_gen_movcond_tl(TCG_COND_GE, count, temp, tcg_constant_tl(0), temp, count);
+ break;
+
+ default:
+ break;
+ }
+}
+
+/*
+ * The idea here is that the bit to the right of the new bit 0 is the
+ * new carry, and the bit to the right of the old bit 0 is the old carry.
+ * Just like a regular rotation, the result of the rotation is composed
+ * from a right shifted part and a left shifted part of s->T0. The new carry
+ * is extracted from the right-shifted portion, and the old carry is
+ * inserted at the end of the left-shifted portion.
+ *
+ * Because of the separate shifts involving the carry, gen_RCL and gen_RCR
+ * mostly operate on count-1. This also comes in handy when computing
+ * length - count, because (length-1) - (count-1) can be computed with
+ * a XOR, and that is commutative unlike subtraction.
+ */
+static void gen_RCL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ bool have_1bit_cin, can_be_zero;
+ TCGv count;
+ TCGLabel *zero_label = NULL;
+ MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count);
+ TCGv low, high, low_count;
+
+ if (!count) {
+ return;
+ }
+
+ low = tcg_temp_new();
+ high = tcg_temp_new();
+ low_count = tcg_temp_new();
+
+ gen_rotc_mod(ot, count);
+ have_1bit_cin = gen_eflags_adcox(s, decode, true, can_be_zero);
+ if (can_be_zero) {
+ zero_label = gen_new_label();
+ tcg_gen_brcondi_tl(TCG_COND_EQ, count, 0, zero_label);
+ }
+
+ /* Compute high part, including incoming carry. */
+ if (!have_1bit_cin || TCG_TARGET_deposit_tl_valid(1, TARGET_LONG_BITS - 1)) {
+ /* high = (T0 << 1) | cin */
+ TCGv cin = have_1bit_cin ? decode->cc_dst : decode->cc_src;
+ tcg_gen_deposit_tl(high, cin, s->T0, 1, TARGET_LONG_BITS - 1);
+ } else {
+ /* Same as above but without deposit; cin in cc_dst. */
+ tcg_gen_add_tl(high, s->T0, decode->cc_dst);
+ tcg_gen_add_tl(high, high, s->T0);
+ }
+ tcg_gen_subi_tl(count, count, 1);
+ tcg_gen_shl_tl(high, high, count);
+
+ /* Compute low part and outgoing carry, incoming s->T0 is zero extended */
+ tcg_gen_xori_tl(low_count, count, (8 << ot) - 1); /* LENGTH - 1 - (count - 1) */
+ tcg_gen_shr_tl(low, s->T0, low_count);
+ tcg_gen_andi_tl(decode->cc_dst, low, 1);
+ tcg_gen_shri_tl(low, low, 1);
+
+ /* Compute result and outgoing overflow */
+ tcg_gen_mov_tl(decode->cc_src2, s->T0);
+ tcg_gen_or_tl(s->T0, low, high);
+ gen_rot_overflow(decode, s->T0, decode->cc_src2, NULL);
+
+ if (zero_label) {
+ gen_set_label(zero_label);
+ }
+}
+
+static void gen_RCR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ bool have_1bit_cin, can_be_zero;
+ TCGv count;
+ TCGLabel *zero_label = NULL;
+ MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count);
+ TCGv low, high, high_count;
+
+ if (!count) {
+ return;
+ }
+
+ low = tcg_temp_new();
+ high = tcg_temp_new();
+ high_count = tcg_temp_new();
+
+ gen_rotc_mod(ot, count);
+ have_1bit_cin = gen_eflags_adcox(s, decode, true, can_be_zero);
+ if (can_be_zero) {
+ zero_label = gen_new_label();
+ tcg_gen_brcondi_tl(TCG_COND_EQ, count, 0, zero_label);
+ }
+
+ /* Save incoming carry into high, it will be shifted later. */
+ if (!have_1bit_cin || TCG_TARGET_deposit_tl_valid(1, TARGET_LONG_BITS - 1)) {
+ TCGv cin = have_1bit_cin ? decode->cc_dst : decode->cc_src;
+ tcg_gen_deposit_tl(high, cin, s->T0, 1, TARGET_LONG_BITS - 1);
+ } else {
+ /* Same as above but without deposit; cin in cc_dst. */
+ tcg_gen_add_tl(high, s->T0, decode->cc_dst);
+ tcg_gen_add_tl(high, high, s->T0);
+ }
+
+ /* Compute low part and outgoing carry, incoming s->T0 is zero extended */
+ tcg_gen_subi_tl(count, count, 1);
+ tcg_gen_shr_tl(low, s->T0, count);
+ tcg_gen_andi_tl(decode->cc_dst, low, 1);
+ tcg_gen_shri_tl(low, low, 1);
+
+ /* Move high part to the right position */
+ tcg_gen_xori_tl(high_count, count, (8 << ot) - 1); /* LENGTH - 1 - (count - 1) */
+ tcg_gen_shl_tl(high, high, high_count);
+
+ /* Compute result and outgoing overflow */
+ tcg_gen_mov_tl(decode->cc_src2, s->T0);
+ tcg_gen_or_tl(s->T0, low, high);
+ gen_rot_overflow(decode, s->T0, decode->cc_src2, NULL);
+
+ if (zero_label) {
+ gen_set_label(zero_label);
+ }
+}
+
+static void gen_RET(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ int16_t adjust = decode->e.op2 == X86_TYPE_I ? decode->immediate : 0;
+
+ MemOp ot = gen_pop_T0(s);
+ gen_stack_update(s, adjust + (1 << ot));
+ gen_op_jmp_v(s, s->T0);
+ gen_bnd_jmp(s);
+ s->base.is_jmp = DISAS_JUMP;
+}
+
+static void gen_RETF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ int16_t adjust = decode->e.op2 == X86_TYPE_I ? decode->immediate : 0;
+
+ if (!PE(s) || VM86(s)) {
+ gen_stack_A0(s);
+ /* pop offset */
+ gen_op_ld_v(s, s->dflag, s->T0, s->A0);
+ /* NOTE: keeping EIP updated is not a problem in case of
+ exception */
+ gen_op_jmp_v(s, s->T0);
+ /* pop selector */
+ gen_add_A0_im(s, 1 << s->dflag);
+ gen_op_ld_v(s, s->dflag, s->T0, s->A0);
+ gen_op_movl_seg_real(s, R_CS, s->T0);
+ /* add stack offset */
+ gen_stack_update(s, adjust + (2 << s->dflag));
+ } else {
+ gen_update_cc_op(s);
+ gen_update_eip_cur(s);
+ gen_helper_lret_protected(tcg_env, tcg_constant_i32(s->dflag - 1),
+ tcg_constant_i32(adjust));
+ }
+ s->base.is_jmp = DISAS_EOB_ONLY;
+}
+
+/*
+ * Return non-NULL if a 32-bit rotate works, after possibly replicating the input.
+ * The input has already been zero-extended upon operand decode.
+ */
+static TCGv_i32 gen_rot_replicate(MemOp ot, TCGv in)
+{
+ TCGv_i32 temp;
+ switch (ot) {
+ case MO_8:
+ temp = tcg_temp_new_i32();
+ tcg_gen_trunc_tl_i32(temp, in);
+ tcg_gen_muli_i32(temp, temp, 0x01010101);
+ return temp;
+
+ case MO_16:
+ temp = tcg_temp_new_i32();
+ tcg_gen_trunc_tl_i32(temp, in);
+ tcg_gen_deposit_i32(temp, temp, temp, 16, 16);
+ return temp;
+
+#ifdef TARGET_X86_64
+ case MO_32:
+ temp = tcg_temp_new_i32();
+ tcg_gen_trunc_tl_i32(temp, in);
+ return temp;
+#endif
+
+ default:
+ return NULL;
+ }
+}
+
+static void gen_rot_carry(X86DecodedInsn *decode, TCGv result, TCGv count, int bit)
+{
+ if (count == NULL) {
+ tcg_gen_extract_tl(decode->cc_dst, result, bit, 1);
+ } else {
+ TCGv temp = tcg_temp_new();
+ tcg_gen_extract_tl(temp, result, bit, 1);
+ tcg_gen_movcond_tl(TCG_COND_EQ, decode->cc_dst, count, tcg_constant_tl(0),
+ decode->cc_dst, temp);
+ }
+}
+
+static void gen_ROL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ bool can_be_zero;
+ TCGv count;
+ MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count);
+ TCGv_i32 temp32, count32;
+ TCGv old = tcg_temp_new();
+
+ if (!count) {
+ return;
+ }
+
+ gen_eflags_adcox(s, decode, false, can_be_zero);
+ tcg_gen_mov_tl(old, s->T0);
+ temp32 = gen_rot_replicate(ot, s->T0);
+ if (temp32) {
+ count32 = tcg_temp_new_i32();
+ tcg_gen_trunc_tl_i32(count32, count);
+ tcg_gen_rotl_i32(temp32, temp32, count32);
+ /* Zero extend to facilitate later optimization. */
+ tcg_gen_extu_i32_tl(s->T0, temp32);
+ } else {
+ tcg_gen_rotl_tl(s->T0, s->T0, count);
+ }
+ gen_rot_carry(decode, s->T0, count, 0);
+ gen_rot_overflow(decode, s->T0, old, count);
+}
+
+static void gen_ROR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ bool can_be_zero;
+ TCGv count;
+ MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count);
+ TCGv_i32 temp32, count32;
+ TCGv old = tcg_temp_new();
+
+ if (!count) {
+ return;
+ }
+
+ gen_eflags_adcox(s, decode, false, can_be_zero);
+ tcg_gen_mov_tl(old, s->T0);
+ temp32 = gen_rot_replicate(ot, s->T0);
+ if (temp32) {
+ count32 = tcg_temp_new_i32();
+ tcg_gen_trunc_tl_i32(count32, count);
+ tcg_gen_rotr_i32(temp32, temp32, count32);
+ /* Zero extend to facilitate later optimization. */
+ tcg_gen_extu_i32_tl(s->T0, temp32);
+ gen_rot_carry(decode, s->T0, count, 31);
+ } else {
+ tcg_gen_rotr_tl(s->T0, s->T0, count);
+ gen_rot_carry(decode, s->T0, count, TARGET_LONG_BITS - 1);
+ }
+ gen_rot_overflow(decode, s->T0, old, count);
+}
+
static void gen_RORX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
@@ -2323,6 +3203,57 @@ static void gen_SAHF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
}
+static void gen_SALC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_compute_eflags_c(s, s->T0);
+ tcg_gen_neg_tl(s->T0, s->T0);
+}
+
+static void gen_shift_dynamic_flags(DisasContext *s, X86DecodedInsn *decode, TCGv count, CCOp cc_op)
+{
+ TCGv_i32 count32 = tcg_temp_new_i32();
+ decode->cc_op = CC_OP_DYNAMIC;
+ decode->cc_op_dynamic = tcg_temp_new_i32();
+
+ assert(decode->cc_dst == s->T0);
+ if (cc_op_live[s->cc_op] & USES_CC_DST) {
+ decode->cc_dst = tcg_temp_new();
+ tcg_gen_movcond_tl(TCG_COND_EQ, decode->cc_dst, count, tcg_constant_tl(0),
+ cpu_cc_dst, s->T0);
+ }
+
+ if (cc_op_live[s->cc_op] & USES_CC_SRC) {
+ tcg_gen_movcond_tl(TCG_COND_EQ, decode->cc_src, count, tcg_constant_tl(0),
+ cpu_cc_src, decode->cc_src);
+ }
+
+ tcg_gen_trunc_tl_i32(count32, count);
+ tcg_gen_movcond_i32(TCG_COND_EQ, decode->cc_op_dynamic, count32, tcg_constant_i32(0),
+ cpu_cc_op, tcg_constant_i32(cc_op));
+}
+
+static void gen_SAR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ bool can_be_zero;
+ TCGv count;
+ MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count);
+
+ if (!count) {
+ return;
+ }
+
+ decode->cc_dst = s->T0;
+ decode->cc_src = tcg_temp_new();
+ tcg_gen_subi_tl(decode->cc_src, count, 1);
+ tcg_gen_sar_tl(decode->cc_src, s->T0, decode->cc_src);
+ tcg_gen_sar_tl(s->T0, s->T0, count);
+ if (can_be_zero) {
+ gen_shift_dynamic_flags(s, decode, count, CC_OP_SARB + ot);
+ } else {
+ decode->cc_op = CC_OP_SARB + ot;
+ }
+}
+
static void gen_SARX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
@@ -2421,6 +3352,28 @@ static void gen_SHA256RNDS2(DisasContext *s, CPUX86State *env, X86DecodedInsn *d
gen_helper_sha256rnds2(OP_PTR0, OP_PTR1, OP_PTR2, wk0, wk1);
}
+static void gen_SHL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ bool can_be_zero;
+ TCGv count;
+ MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count);
+
+ if (!count) {
+ return;
+ }
+
+ decode->cc_dst = s->T0;
+ decode->cc_src = tcg_temp_new();
+ tcg_gen_subi_tl(decode->cc_src, count, 1);
+ tcg_gen_shl_tl(decode->cc_src, s->T0, decode->cc_src);
+ tcg_gen_shl_tl(s->T0, s->T0, count);
+ if (can_be_zero) {
+ gen_shift_dynamic_flags(s, decode, count, CC_OP_SHLB + ot);
+ } else {
+ decode->cc_op = CC_OP_SHLB + ot;
+ }
+}
+
static void gen_SHLX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
@@ -2431,6 +3384,28 @@ static void gen_SHLX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
tcg_gen_shl_tl(s->T0, s->T0, s->T1);
}
+static void gen_SHR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ bool can_be_zero;
+ TCGv count;
+ MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count);
+
+ if (!count) {
+ return;
+ }
+
+ decode->cc_dst = s->T0;
+ decode->cc_src = tcg_temp_new();
+ tcg_gen_subi_tl(decode->cc_src, count, 1);
+ tcg_gen_shr_tl(decode->cc_src, s->T0, decode->cc_src);
+ tcg_gen_shr_tl(s->T0, s->T0, count);
+ if (can_be_zero) {
+ gen_shift_dynamic_flags(s, decode, count, CC_OP_SARB + ot);
+ } else {
+ decode->cc_op = CC_OP_SARB + ot;
+ }
+}
+
static void gen_SHRX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
@@ -2441,6 +3416,25 @@ static void gen_SHRX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
tcg_gen_shr_tl(s->T0, s->T0, s->T1);
}
+static void gen_STC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_compute_eflags(s);
+ tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
+}
+
+static void gen_STD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ tcg_gen_st_i32(tcg_constant_i32(-1), tcg_env, offsetof(CPUX86State, df));
+}
+
+static void gen_STI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_set_eflags(s, IF_MASK);
+ /* interruptions are enabled only the first insn after sti */
+ gen_update_eip_next(s);
+ gen_eob_inhibit_irq(s);
+}
+
static void gen_VAESKEYGEN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
@@ -2997,6 +3991,14 @@ static void gen_XCHG(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
}
+static void gen_XLAT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ /* AL is already zero-extended into s->T0. */
+ tcg_gen_add_tl(s->A0, cpu_regs[R_EBX], s->T0);
+ gen_add_A0_ds_seg(s);
+ gen_op_ld_v(s, MO_8, s->T0, s->A0);
+}
+
static void gen_XOR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
/* special case XOR reg, reg */
--
2.45.0
^ permalink raw reply related [flat|nested] 38+ messages in thread* Re: [PATCH v2 17/25] target/i386: move C0-FF opcodes to new decoder (except for x87)
2024-05-06 8:09 ` [PATCH v2 17/25] target/i386: move C0-FF opcodes to new decoder (except for x87) Paolo Bonzini
@ 2024-05-06 16:56 ` Richard Henderson
0 siblings, 0 replies; 38+ messages in thread
From: Richard Henderson @ 2024-05-06 16:56 UTC (permalink / raw)
To: Paolo Bonzini, qemu-devel; +Cc: zhao1.liu
On 5/6/24 01:09, Paolo Bonzini wrote:
> The shift instructions are rewritten instead of reusing code from the old
> decoder. Rotates use CC_OP_ADCOX more extensively and generally rely
> more on the optimizer, so that the code generators are shared between
> the immediate-count and variable-count cases.
>
> In particular, this makes gen_RCL and gen_RCR pretty efficient for the
> count == 1 case, which becomes (apart from a few extra movs) something like:
>
> (compute_cc_all if needed)
> // save old value for OF calculation
> mov cc_src2, T0
> // the bulk of RCL is just this!
> deposit T0, cc_src, T0, 1, TARGET_LONG_BITS - 1
> // compute carry
> shr cc_dst, cc_src2, length - 1
> and cc_dst, cc_dst, 1
> // compute overflow
> xor cc_src2, cc_src2, T0
> extract cc_src2, cc_src2, length - 1, 1
>
> 32-bit MUL and IMUL are also slightly more efficient on 64-bit hosts.
>
> Signed-off-by: Paolo Bonzini<pbonzini@redhat.com>
> ---
> target/i386/tcg/decode-new.h | 1 +
> target/i386/tcg/translate.c | 23 +-
> target/i386/tcg/decode-new.c.inc | 142 +++++
> target/i386/tcg/emit.c.inc | 1014 +++++++++++++++++++++++++++++-
> 4 files changed, 1169 insertions(+), 11 deletions(-)
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
r~
^ permalink raw reply [flat|nested] 38+ messages in thread
* [PATCH v2 18/25] target/i386: merge and enlarge a few ranges for call to disas_insn_new
2024-05-06 8:09 [PATCH v2 00/25] target/i386: convert 1-byte opcodes to new decoder Paolo Bonzini
` (16 preceding siblings ...)
2024-05-06 8:09 ` [PATCH v2 17/25] target/i386: move C0-FF opcodes to new decoder (except for x87) Paolo Bonzini
@ 2024-05-06 8:09 ` Paolo Bonzini
2024-05-06 8:09 ` [PATCH v2 19/25] target/i386: move remaining conditional operations to new decoder Paolo Bonzini
` (6 subsequent siblings)
24 siblings, 0 replies; 38+ messages in thread
From: Paolo Bonzini @ 2024-05-06 8:09 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, zhao1.liu
Since new opcodes are not going to be added in translate.c, round the
case labels that call to disas_insn_new(), including whole sets of
eight opcodes when possible.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 79b6e2760fe..b94d9504090 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -6868,9 +6868,8 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
break;
case 0x10e ... 0x117:
case 0x128 ... 0x12f:
- case 0x138 ... 0x13a:
- case 0x150 ... 0x179:
- case 0x17c ... 0x17f:
+ case 0x138 ... 0x13f:
+ case 0x150 ... 0x17f:
case 0x1c2:
case 0x1c4 ... 0x1c6:
case 0x1d0 ... 0x1fe:
--
2.45.0
^ permalink raw reply related [flat|nested] 38+ messages in thread* [PATCH v2 19/25] target/i386: move remaining conditional operations to new decoder
2024-05-06 8:09 [PATCH v2 00/25] target/i386: convert 1-byte opcodes to new decoder Paolo Bonzini
` (17 preceding siblings ...)
2024-05-06 8:09 ` [PATCH v2 18/25] target/i386: merge and enlarge a few ranges for call to disas_insn_new Paolo Bonzini
@ 2024-05-06 8:09 ` Paolo Bonzini
2024-05-06 8:09 ` [PATCH v2 20/25] target/i386: move BSWAP " Paolo Bonzini
` (5 subsequent siblings)
24 siblings, 0 replies; 38+ messages in thread
From: Paolo Bonzini @ 2024-05-06 8:09 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, zhao1.liu
Move long-displacement Jcc, SETcc and CMOVcc to the new decoder.
While filling in the tables makes the code seem longer, the new
emitters are all just one line of code.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/decode-new.h | 1 +
target/i386/tcg/translate.c | 2 +-
target/i386/tcg/decode-new.c.inc | 56 ++++++++++++++++++++++++++++++++
target/i386/tcg/emit.c.inc | 10 ++++++
4 files changed, 68 insertions(+), 1 deletion(-)
diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h
index 77bb31eb143..cd7ceca21e8 100644
--- a/target/i386/tcg/decode-new.h
+++ b/target/i386/tcg/decode-new.h
@@ -106,6 +106,7 @@ typedef enum X86CPUIDFeature {
X86_FEAT_AVX2,
X86_FEAT_BMI1,
X86_FEAT_BMI2,
+ X86_FEAT_CMOV,
X86_FEAT_CMPCCXADD,
X86_FEAT_F16C,
X86_FEAT_FMA,
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index b94d9504090..a80021930bf 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -3206,7 +3206,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
#ifndef CONFIG_USER_ONLY
use_new &= b <= limit;
#endif
- if (use_new && 0) {
+ if (use_new && (b >= 0x138 && b <= 0x19f)) {
disas_insn_new(s, cpu, b);
return true;
}
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index a47ecab6dd4..7528e9e4f07 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -993,6 +993,15 @@ static const X86OpEntry opcodes_0F[256] = {
/* Incorrectly listed as Mq,Vq in the manual */
[0x17] = X86_OP_ENTRY3(VMOVHPx_st, M,q, None,None, V,dq, vex5 p_00_66),
+ [0x40] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)),
+ [0x41] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)),
+ [0x42] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)),
+ [0x43] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)),
+ [0x44] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)),
+ [0x45] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)),
+ [0x46] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)),
+ [0x47] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)),
+
[0x50] = X86_OP_ENTRY3(MOVMSK, G,y, None,None, U,x, vex7 p_00_66),
[0x51] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), /* sqrtps */
[0x52] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex4_rep5 p_00_f3), /* rsqrtps */
@@ -1020,6 +1029,24 @@ static const X86OpEntry opcodes_0F[256] = {
[0x76] = X86_OP_ENTRY3(PCMPEQD, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
[0x77] = X86_OP_GROUP0(0F77),
+ [0x80] = X86_OP_ENTRYr(Jcc, J,z_f64),
+ [0x81] = X86_OP_ENTRYr(Jcc, J,z_f64),
+ [0x82] = X86_OP_ENTRYr(Jcc, J,z_f64),
+ [0x83] = X86_OP_ENTRYr(Jcc, J,z_f64),
+ [0x84] = X86_OP_ENTRYr(Jcc, J,z_f64),
+ [0x85] = X86_OP_ENTRYr(Jcc, J,z_f64),
+ [0x86] = X86_OP_ENTRYr(Jcc, J,z_f64),
+ [0x87] = X86_OP_ENTRYr(Jcc, J,z_f64),
+
+ [0x90] = X86_OP_ENTRYw(SETcc, E,b),
+ [0x91] = X86_OP_ENTRYw(SETcc, E,b),
+ [0x92] = X86_OP_ENTRYw(SETcc, E,b),
+ [0x93] = X86_OP_ENTRYw(SETcc, E,b),
+ [0x94] = X86_OP_ENTRYw(SETcc, E,b),
+ [0x95] = X86_OP_ENTRYw(SETcc, E,b),
+ [0x96] = X86_OP_ENTRYw(SETcc, E,b),
+ [0x97] = X86_OP_ENTRYw(SETcc, E,b),
+
[0x28] = X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex1 p_00_66), /* MOVAPS */
[0x29] = X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex1 p_00_66), /* MOVAPS */
[0x2A] = X86_OP_GROUP0(0F2A),
@@ -1032,6 +1059,15 @@ static const X86OpEntry opcodes_0F[256] = {
[0x38] = X86_OP_GROUP0(0F38),
[0x3a] = X86_OP_GROUP0(0F3A),
+ [0x48] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)),
+ [0x49] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)),
+ [0x4a] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)),
+ [0x4b] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)),
+ [0x4c] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)),
+ [0x4d] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)),
+ [0x4e] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)),
+ [0x4f] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)),
+
[0x58] = X86_OP_ENTRY3(VADD, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
[0x59] = X86_OP_ENTRY3(VMUL, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
[0x5a] = X86_OP_GROUP0(0F5A),
@@ -1057,6 +1093,24 @@ static const X86OpEntry opcodes_0F[256] = {
[0x7e] = X86_OP_GROUP0(0F7E),
[0x7f] = X86_OP_GROUP0(0F7F),
+ [0x88] = X86_OP_ENTRYr(Jcc, J,z_f64),
+ [0x89] = X86_OP_ENTRYr(Jcc, J,z_f64),
+ [0x8a] = X86_OP_ENTRYr(Jcc, J,z_f64),
+ [0x8b] = X86_OP_ENTRYr(Jcc, J,z_f64),
+ [0x8c] = X86_OP_ENTRYr(Jcc, J,z_f64),
+ [0x8d] = X86_OP_ENTRYr(Jcc, J,z_f64),
+ [0x8e] = X86_OP_ENTRYr(Jcc, J,z_f64),
+ [0x8f] = X86_OP_ENTRYr(Jcc, J,z_f64),
+
+ [0x98] = X86_OP_ENTRYw(SETcc, E,b),
+ [0x99] = X86_OP_ENTRYw(SETcc, E,b),
+ [0x9a] = X86_OP_ENTRYw(SETcc, E,b),
+ [0x9b] = X86_OP_ENTRYw(SETcc, E,b),
+ [0x9c] = X86_OP_ENTRYw(SETcc, E,b),
+ [0x9d] = X86_OP_ENTRYw(SETcc, E,b),
+ [0x9e] = X86_OP_ENTRYw(SETcc, E,b),
+ [0x9f] = X86_OP_ENTRYw(SETcc, E,b),
+
[0xae] = X86_OP_GROUP0(group15),
[0xc2] = X86_OP_ENTRY4(VCMP, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
@@ -1918,6 +1972,8 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid)
switch (cpuid) {
case X86_FEAT_None:
return true;
+ case X86_FEAT_CMOV:
+ return (s->cpuid_features & CPUID_CMOV);
case X86_FEAT_F16C:
return (s->cpuid_ext_features & CPUID_EXT_F16C);
case X86_FEAT_FMA:
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index ffe458b80f9..a48ff1536a4 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1386,6 +1386,11 @@ static void gen_CMC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
}
+static void gen_CMOVcc(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_cmovcc1(s, decode->b & 0xf, s->T0, s->T1);
+}
+
static void gen_CMPccXADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
TCGLabel *label_top = gen_new_label();
@@ -3298,6 +3303,11 @@ static void gen_SCAS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
}
+static void gen_SETcc(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_setcc1(s, decode->b & 0xf, s->T0);
+}
+
static void gen_SHA1NEXTE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
gen_helper_sha1nexte(OP_PTR0, OP_PTR1, OP_PTR2);
--
2.45.0
^ permalink raw reply related [flat|nested] 38+ messages in thread* [PATCH v2 20/25] target/i386: move BSWAP to new decoder
2024-05-06 8:09 [PATCH v2 00/25] target/i386: convert 1-byte opcodes to new decoder Paolo Bonzini
` (18 preceding siblings ...)
2024-05-06 8:09 ` [PATCH v2 19/25] target/i386: move remaining conditional operations to new decoder Paolo Bonzini
@ 2024-05-06 8:09 ` Paolo Bonzini
2024-05-06 8:09 ` [PATCH v2 21/25] target/i386: port extensions of one-byte opcodes " Paolo Bonzini
` (4 subsequent siblings)
24 siblings, 0 replies; 38+ messages in thread
From: Paolo Bonzini @ 2024-05-06 8:09 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, zhao1.liu
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 4 +++-
target/i386/tcg/decode-new.c.inc | 9 +++++++++
target/i386/tcg/emit.c.inc | 11 +++++++++++
3 files changed, 23 insertions(+), 1 deletion(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index a80021930bf..87ecf082316 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -3206,7 +3206,9 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
#ifndef CONFIG_USER_ONLY
use_new &= b <= limit;
#endif
- if (use_new && (b >= 0x138 && b <= 0x19f)) {
+ if (use_new &&
+ ((b >= 0x138 && b <= 0x19f) ||
+ (b >= 0x1c8 && b <= 0x1cf))) {
disas_insn_new(s, cpu, b);
return true;
}
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 7528e9e4f07..e65fa208a43 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -1118,6 +1118,15 @@ static const X86OpEntry opcodes_0F[256] = {
[0xc5] = X86_OP_ENTRY3(PEXTRW, G,d, U,dq,I,b, vex5 mmx p_00_66),
[0xc6] = X86_OP_ENTRY4(VSHUF, V,x, H,x, W,x, vex4 p_00_66),
+ [0xc8] = X86_OP_ENTRY1(BSWAP, LoBits,y),
+ [0xc9] = X86_OP_ENTRY1(BSWAP, LoBits,y),
+ [0xca] = X86_OP_ENTRY1(BSWAP, LoBits,y),
+ [0xcb] = X86_OP_ENTRY1(BSWAP, LoBits,y),
+ [0xcc] = X86_OP_ENTRY1(BSWAP, LoBits,y),
+ [0xcd] = X86_OP_ENTRY1(BSWAP, LoBits,y),
+ [0xce] = X86_OP_ENTRY1(BSWAP, LoBits,y),
+ [0xcf] = X86_OP_ENTRY1(BSWAP, LoBits,y),
+
[0xd0] = X86_OP_ENTRY3(VADDSUB, V,x, H,x, W,x, vex2 cpuid(SSE3) p_66_f2),
[0xd1] = X86_OP_ENTRY3(PSRLW_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
[0xd2] = X86_OP_ENTRY3(PSRLD_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index a48ff1536a4..c826adbbbb8 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1310,6 +1310,17 @@ static void gen_BOUND(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
}
}
+static void gen_BSWAP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+#ifdef TARGET_X86_64
+ if (s->dflag == MO_64) {
+ tcg_gen_bswap64_i64(s->T0, s->T0);
+ return;
+ }
+#endif
+ tcg_gen_bswap32_tl(s->T0, s->T0, TCG_BSWAP_OZ);
+}
+
static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[0].ot;
--
2.45.0
^ permalink raw reply related [flat|nested] 38+ messages in thread* [PATCH v2 21/25] target/i386: port extensions of one-byte opcodes to new decoder
2024-05-06 8:09 [PATCH v2 00/25] target/i386: convert 1-byte opcodes to new decoder Paolo Bonzini
` (19 preceding siblings ...)
2024-05-06 8:09 ` [PATCH v2 20/25] target/i386: move BSWAP " Paolo Bonzini
@ 2024-05-06 8:09 ` Paolo Bonzini
2024-05-06 8:09 ` [PATCH v2 22/25] target/i386: remove now-converted opcodes from old decoder Paolo Bonzini
` (3 subsequent siblings)
24 siblings, 0 replies; 38+ messages in thread
From: Paolo Bonzini @ 2024-05-06 8:09 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, zhao1.liu
A few two-byte opcodes are simple extensions of existing one-byte opcodes;
they are easy to decode and need no change to emit.c.inc. Port them to
the new decoder.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/decode-new.h | 1 +
target/i386/tcg/translate.c | 4 ++++
target/i386/tcg/decode-new.c.inc | 31 +++++++++++++++++++++++++++++++
target/i386/tcg/emit.c.inc | 15 +++++++++++++++
4 files changed, 51 insertions(+)
diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h
index cd7ceca21e8..2ea06b44787 100644
--- a/target/i386/tcg/decode-new.h
+++ b/target/i386/tcg/decode-new.h
@@ -47,6 +47,7 @@ typedef enum X86OpType {
X86_TYPE_Y, /* string destination */
/* Custom */
+ X86_TYPE_EM, /* modrm byte selects an ALU memory operand */
X86_TYPE_WM, /* modrm byte selects an XMM/YMM memory operand */
X86_TYPE_I_unsigned, /* Immediate, zero-extended */
X86_TYPE_2op, /* 2-operand RMW instruction */
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 87ecf082316..14417b961ce 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -3208,6 +3208,10 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
#endif
if (use_new &&
((b >= 0x138 && b <= 0x19f) ||
+ (b & ~9) == 0x1a0 ||
+ b == 0x1af || b == 0x1b2 ||
+ (b >= 0x1b4 && b <= 0x1b7) ||
+ b == 0x1be || b == 0x1bf || b == 0x1c3 ||
(b >= 0x1c8 && b <= 0x1cf))) {
disas_insn_new(s, cpu, b);
return true;
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index e65fa208a43..8311b479846 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -43,6 +43,12 @@
* Operand types
* -------------
*
+ * For memory-only operands, if the emitter functions wants to rely on
+ * generic load and writeback, the decoder needs to know the type of the
+ * operand. Therefore, M is often replaced by the more specific EM and WM
+ * (respectively selecting an ALU operand, like the operand type E, or a
+ * vector operand like the operand type W).
+ *
* Immediates are almost always signed or masked away in helpers. Two
* common exceptions are IN/OUT and absolute jumps. For these, there is
* an additional custom operand type "I_unsigned". Alternatively, the
@@ -1047,6 +1053,9 @@ static const X86OpEntry opcodes_0F[256] = {
[0x96] = X86_OP_ENTRYw(SETcc, E,b),
[0x97] = X86_OP_ENTRYw(SETcc, E,b),
+ [0xa0] = X86_OP_ENTRYr(PUSH, FS, w),
+ [0xa1] = X86_OP_ENTRYw(POP, FS, w),
+
[0x28] = X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex1 p_00_66), /* MOVAPS */
[0x29] = X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex1 p_00_66), /* MOVAPS */
[0x2A] = X86_OP_GROUP0(0F2A),
@@ -1111,9 +1120,26 @@ static const X86OpEntry opcodes_0F[256] = {
[0x9e] = X86_OP_ENTRYw(SETcc, E,b),
[0x9f] = X86_OP_ENTRYw(SETcc, E,b),
+ [0xa8] = X86_OP_ENTRYr(PUSH, GS, w),
+ [0xa9] = X86_OP_ENTRYw(POP, GS, w),
[0xae] = X86_OP_GROUP0(group15),
+ /*
+ * It's slightly more efficient to put Ev operand in T0 and allow gen_IMUL3
+ * to assume sextT0. Multiplication is commutative anyway.
+ */
+ [0xaf] = X86_OP_ENTRY3(IMUL3, G,v, E,v, 2op,v, sextT0),
+
+ [0xb2] = X86_OP_ENTRY3(LSS, G,v, M,p, None, None),
+ [0xb4] = X86_OP_ENTRY3(LFS, G,v, M,p, None, None),
+ [0xb5] = X86_OP_ENTRY3(LGS, G,v, M,p, None, None),
+ [0xb6] = X86_OP_ENTRY3(MOV, G,v, E,b, None, None, zextT0), /* MOVZX */
+ [0xb7] = X86_OP_ENTRY3(MOV, G,v, E,w, None, None, zextT0), /* MOVZX */
+
+ [0xbe] = X86_OP_ENTRY3(MOV, G,v, E,b, None, None, sextT0), /* MOVSX */
+ [0xbf] = X86_OP_ENTRY3(MOV, G,v, E,w, None, None, sextT0), /* MOVSX */
[0xc2] = X86_OP_ENTRY4(VCMP, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
+ [0xc3] = X86_OP_ENTRY3(MOV, EM,y,G,y, None,None, cpuid(SSE2)), /* MOVNTI */
[0xc4] = X86_OP_ENTRY4(PINSRW, V,dq,H,dq,E,w, vex5 mmx p_00_66),
[0xc5] = X86_OP_ENTRY3(PEXTRW, G,d, U,dq,I,b, vex5 mmx p_00_66),
[0xc6] = X86_OP_ENTRY4(VSHUF, V,x, H,x, W,x, vex4 p_00_66),
@@ -1815,8 +1841,13 @@ static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
case X86_TYPE_WM: /* modrm byte selects an XMM/YMM memory operand */
op->unit = X86_OP_SSE;
+ goto get_modrm_mem;
+
+ case X86_TYPE_EM: /* modrm byte selects an ALU memory operand */
+ op->unit = X86_OP_INT;
/* fall through */
case X86_TYPE_M: /* modrm byte selects a memory operand */
+ get_modrm_mem:
modrm = get_modrm(s, env);
if ((modrm >> 6) == 3) {
return false;
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index c826adbbbb8..01aed001075 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1979,6 +1979,16 @@ static void gen_LES(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
gen_lxx_seg(s, env, decode, R_ES);
}
+static void gen_LFS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_lxx_seg(s, env, decode, R_FS);
+}
+
+static void gen_LGS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_lxx_seg(s, env, decode, R_GS);
+}
+
static void gen_LODS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[2].ot;
@@ -2023,6 +2033,11 @@ static void gen_LOOPNE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode
gen_conditional_jump_labels(s, decode->immediate, not_taken, taken);
}
+static void gen_LSS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_lxx_seg(s, env, decode, R_SS);
+}
+
static void gen_MOV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
/* nothing to do! */
--
2.45.0
^ permalink raw reply related [flat|nested] 38+ messages in thread* [PATCH v2 22/25] target/i386: remove now-converted opcodes from old decoder
2024-05-06 8:09 [PATCH v2 00/25] target/i386: convert 1-byte opcodes to new decoder Paolo Bonzini
` (20 preceding siblings ...)
2024-05-06 8:09 ` [PATCH v2 21/25] target/i386: port extensions of one-byte opcodes " Paolo Bonzini
@ 2024-05-06 8:09 ` Paolo Bonzini
2024-05-06 8:09 ` [PATCH v2 23/25] target/i386: decode x87 instructions in a separate function Paolo Bonzini
` (2 subsequent siblings)
24 siblings, 0 replies; 38+ messages in thread
From: Paolo Bonzini @ 2024-05-06 8:09 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, zhao1.liu
Send all converted opcodes to disas_insn_new() directly from the big
decoding switch statement; once more, the debugging/bisecting logic
disappears.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/helper.h | 11 -
target/i386/tcg/shift_helper_template.h.inc | 108 -
target/i386/tcg/int_helper.c | 34 -
target/i386/tcg/translate.c | 2175 +------------------
target/i386/tcg/decode-new.c.inc | 3 -
5 files changed, 11 insertions(+), 2320 deletions(-)
delete mode 100644 target/i386/tcg/shift_helper_template.h.inc
diff --git a/target/i386/helper.h b/target/i386/helper.h
index ac2b04abd63..3c207ac62d6 100644
--- a/target/i386/helper.h
+++ b/target/i386/helper.h
@@ -207,15 +207,4 @@ DEF_HELPER_1(emms, void, env)
#define SHIFT 2
#include "tcg/ops_sse_header.h.inc"
-DEF_HELPER_3(rclb, tl, env, tl, tl)
-DEF_HELPER_3(rclw, tl, env, tl, tl)
-DEF_HELPER_3(rcll, tl, env, tl, tl)
-DEF_HELPER_3(rcrb, tl, env, tl, tl)
-DEF_HELPER_3(rcrw, tl, env, tl, tl)
-DEF_HELPER_3(rcrl, tl, env, tl, tl)
-#ifdef TARGET_X86_64
-DEF_HELPER_3(rclq, tl, env, tl, tl)
-DEF_HELPER_3(rcrq, tl, env, tl, tl)
-#endif
-
DEF_HELPER_1(rdrand, tl, env)
diff --git a/target/i386/tcg/shift_helper_template.h.inc b/target/i386/tcg/shift_helper_template.h.inc
deleted file mode 100644
index 54f15d6e05c..00000000000
--- a/target/i386/tcg/shift_helper_template.h.inc
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * x86 shift helpers
- *
- * Copyright (c) 2008 Fabrice Bellard
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#define DATA_BITS (1 << (3 + SHIFT))
-#define SHIFT_MASK (DATA_BITS - 1)
-#if DATA_BITS <= 32
-#define SHIFT1_MASK 0x1f
-#else
-#define SHIFT1_MASK 0x3f
-#endif
-
-#if DATA_BITS == 8
-#define SUFFIX b
-#define DATA_MASK 0xff
-#elif DATA_BITS == 16
-#define SUFFIX w
-#define DATA_MASK 0xffff
-#elif DATA_BITS == 32
-#define SUFFIX l
-#define DATA_MASK 0xffffffff
-#elif DATA_BITS == 64
-#define SUFFIX q
-#define DATA_MASK 0xffffffffffffffffULL
-#else
-#error unhandled operand size
-#endif
-
-target_ulong glue(helper_rcl, SUFFIX)(CPUX86State *env, target_ulong t0,
- target_ulong t1)
-{
- int count, eflags;
- target_ulong src;
- target_long res;
-
- count = t1 & SHIFT1_MASK;
-#if DATA_BITS == 16
- count = rclw_table[count];
-#elif DATA_BITS == 8
- count = rclb_table[count];
-#endif
- if (count) {
- eflags = env->cc_src;
- t0 &= DATA_MASK;
- src = t0;
- res = (t0 << count) | ((target_ulong)(eflags & CC_C) << (count - 1));
- if (count > 1) {
- res |= t0 >> (DATA_BITS + 1 - count);
- }
- t0 = res;
- env->cc_src = (eflags & ~(CC_C | CC_O)) |
- (lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) |
- ((src >> (DATA_BITS - count)) & CC_C);
- }
- return t0;
-}
-
-target_ulong glue(helper_rcr, SUFFIX)(CPUX86State *env, target_ulong t0,
- target_ulong t1)
-{
- int count, eflags;
- target_ulong src;
- target_long res;
-
- count = t1 & SHIFT1_MASK;
-#if DATA_BITS == 16
- count = rclw_table[count];
-#elif DATA_BITS == 8
- count = rclb_table[count];
-#endif
- if (count) {
- eflags = env->cc_src;
- t0 &= DATA_MASK;
- src = t0;
- res = (t0 >> count) |
- ((target_ulong)(eflags & CC_C) << (DATA_BITS - count));
- if (count > 1) {
- res |= t0 << (DATA_BITS + 1 - count);
- }
- t0 = res;
- env->cc_src = (eflags & ~(CC_C | CC_O)) |
- (lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) |
- ((src >> (count - 1)) & CC_C);
- }
- return t0;
-}
-
-#undef DATA_BITS
-#undef SHIFT_MASK
-#undef SHIFT1_MASK
-#undef DATA_TYPE
-#undef DATA_MASK
-#undef SUFFIX
diff --git a/target/i386/tcg/int_helper.c b/target/i386/tcg/int_helper.c
index ab85dc55400..df16130f5df 100644
--- a/target/i386/tcg/int_helper.c
+++ b/target/i386/tcg/int_helper.c
@@ -29,22 +29,6 @@
//#define DEBUG_MULDIV
-/* modulo 9 table */
-static const uint8_t rclb_table[32] = {
- 0, 1, 2, 3, 4, 5, 6, 7,
- 8, 0, 1, 2, 3, 4, 5, 6,
- 7, 8, 0, 1, 2, 3, 4, 5,
- 6, 7, 8, 0, 1, 2, 3, 4,
-};
-
-/* modulo 17 table */
-static const uint8_t rclw_table[32] = {
- 0, 1, 2, 3, 4, 5, 6, 7,
- 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 0, 1, 2, 3, 4, 5, 6,
- 7, 8, 9, 10, 11, 12, 13, 14,
-};
-
/* division, flags are undefined */
void helper_divb_AL(CPUX86State *env, target_ulong t0)
@@ -447,24 +431,6 @@ target_ulong helper_pext(target_ulong src, target_ulong mask)
return dest;
}
-#define SHIFT 0
-#include "shift_helper_template.h.inc"
-#undef SHIFT
-
-#define SHIFT 1
-#include "shift_helper_template.h.inc"
-#undef SHIFT
-
-#define SHIFT 2
-#include "shift_helper_template.h.inc"
-#undef SHIFT
-
-#ifdef TARGET_X86_64
-#define SHIFT 3
-#include "shift_helper_template.h.inc"
-#undef SHIFT
-#endif
-
/* Test that BIT is enabled in CR4. If not, raise an illegal opcode
exception. This reduces the requirements for rare CR4 bits being
mapped into HFLAGS. */
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 14417b961ce..634b162ae97 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -215,7 +215,6 @@ typedef struct DisasContext {
#ifdef CONFIG_USER_ONLY
STUB_HELPER(clgi, TCGv_env env)
STUB_HELPER(flush_page, TCGv_env env, TCGv addr)
-STUB_HELPER(hlt, TCGv_env env, TCGv_i32 pc_ofs)
STUB_HELPER(inb, TCGv ret, TCGv_env env, TCGv_i32 port)
STUB_HELPER(inw, TCGv ret, TCGv_env env, TCGv_i32 port)
STUB_HELPER(inl, TCGv ret, TCGv_env env, TCGv_i32 port)
@@ -242,21 +241,8 @@ static void gen_eob(DisasContext *s);
static void gen_jr(DisasContext *s);
static void gen_jmp_rel(DisasContext *s, MemOp ot, int diff, int tb_num);
static void gen_jmp_rel_csize(DisasContext *s, int diff, int tb_num);
-static void gen_op(DisasContext *s1, int op, MemOp ot, int d);
static void gen_exception_gpf(DisasContext *s);
-/* i386 arith/logic operations */
-enum {
- OP_ADDL,
- OP_ORL,
- OP_ADCL,
- OP_SBBL,
- OP_ANDL,
- OP_SUBL,
- OP_XORL,
- OP_CMPL,
-};
-
/* i386 shift ops */
enum {
OP_ROL,
@@ -442,13 +428,6 @@ static inline MemOp mo_b_d(int b, MemOp ot)
return b & 1 ? ot : MO_8;
}
-/* Select size 8 if lsb of B is clear, else OT capped at 32.
- Used for decoding operand size of port opcodes. */
-static inline MemOp mo_b_d32(int b, MemOp ot)
-{
- return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
-}
-
/* Compute the result of writing t0 to the OT-sized register REG.
*
* If DEST is NULL, store the result into the register and return the
@@ -851,25 +830,6 @@ static void gen_op_update2_cc(DisasContext *s)
tcg_gen_mov_tl(cpu_cc_dst, s->T0);
}
-static void gen_op_update3_cc(DisasContext *s, TCGv reg)
-{
- tcg_gen_mov_tl(cpu_cc_src2, reg);
- tcg_gen_mov_tl(cpu_cc_src, s->T1);
- tcg_gen_mov_tl(cpu_cc_dst, s->T0);
-}
-
-static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
-{
- tcg_gen_and_tl(cpu_cc_dst, s->T0, s->T1);
-}
-
-static void gen_op_update_neg_cc(DisasContext *s)
-{
- tcg_gen_mov_tl(cpu_cc_dst, s->T0);
- tcg_gen_neg_tl(cpu_cc_src, s->T0);
- tcg_gen_movi_tl(s->cc_srcT, 0);
-}
-
/* compute all eflags to reg */
static void gen_mov_eflags(DisasContext *s, TCGv reg)
{
@@ -1483,165 +1443,6 @@ static bool check_cpl0(DisasContext *s)
return false;
}
-/* If vm86, check for iopl == 3; if not, raise #GP and return false. */
-static bool check_vm86_iopl(DisasContext *s)
-{
- if (!VM86(s) || IOPL(s) == 3) {
- return true;
- }
- gen_exception_gpf(s);
- return false;
-}
-
-/* Check for iopl allowing access; if not, raise #GP and return false. */
-static bool check_iopl(DisasContext *s)
-{
- if (VM86(s) ? IOPL(s) == 3 : CPL(s) <= IOPL(s)) {
- return true;
- }
- gen_exception_gpf(s);
- return false;
-}
-
-/* if d == OR_TMP0, it means memory operand (address in A0) */
-static void gen_op(DisasContext *s1, int op, MemOp ot, int d)
-{
- /* Invalid lock prefix when destination is not memory or OP_CMPL. */
- if ((d != OR_TMP0 || op == OP_CMPL) && s1->prefix & PREFIX_LOCK) {
- gen_illegal_opcode(s1);
- return;
- }
-
- if (d != OR_TMP0) {
- gen_op_mov_v_reg(s1, ot, s1->T0, d);
- } else if (!(s1->prefix & PREFIX_LOCK)) {
- gen_op_ld_v(s1, ot, s1->T0, s1->A0);
- }
- switch(op) {
- case OP_ADCL:
- gen_compute_eflags_c(s1, s1->tmp4);
- if (s1->prefix & PREFIX_LOCK) {
- tcg_gen_add_tl(s1->T0, s1->tmp4, s1->T1);
- tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
- s1->mem_index, ot | MO_LE);
- } else {
- tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
- tcg_gen_add_tl(s1->T0, s1->T0, s1->tmp4);
- gen_op_st_rm_T0_A0(s1, ot, d);
- }
- gen_op_update3_cc(s1, s1->tmp4);
- set_cc_op(s1, CC_OP_ADCB + ot);
- break;
- case OP_SBBL:
- gen_compute_eflags_c(s1, s1->tmp4);
- if (s1->prefix & PREFIX_LOCK) {
- tcg_gen_add_tl(s1->T0, s1->T1, s1->tmp4);
- tcg_gen_neg_tl(s1->T0, s1->T0);
- tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
- s1->mem_index, ot | MO_LE);
- } else {
- tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
- tcg_gen_sub_tl(s1->T0, s1->T0, s1->tmp4);
- gen_op_st_rm_T0_A0(s1, ot, d);
- }
- gen_op_update3_cc(s1, s1->tmp4);
- set_cc_op(s1, CC_OP_SBBB + ot);
- break;
- case OP_ADDL:
- if (s1->prefix & PREFIX_LOCK) {
- tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T1,
- s1->mem_index, ot | MO_LE);
- } else {
- tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
- gen_op_st_rm_T0_A0(s1, ot, d);
- }
- gen_op_update2_cc(s1);
- set_cc_op(s1, CC_OP_ADDB + ot);
- break;
- case OP_SUBL:
- if (s1->prefix & PREFIX_LOCK) {
- tcg_gen_neg_tl(s1->T0, s1->T1);
- tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
- s1->mem_index, ot | MO_LE);
- tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1);
- } else {
- tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
- tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
- gen_op_st_rm_T0_A0(s1, ot, d);
- }
- gen_op_update2_cc(s1);
- set_cc_op(s1, CC_OP_SUBB + ot);
- break;
- default:
- case OP_ANDL:
- if (s1->prefix & PREFIX_LOCK) {
- tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, s1->T1,
- s1->mem_index, ot | MO_LE);
- } else {
- tcg_gen_and_tl(s1->T0, s1->T0, s1->T1);
- gen_op_st_rm_T0_A0(s1, ot, d);
- }
- gen_op_update1_cc(s1);
- set_cc_op(s1, CC_OP_LOGICB + ot);
- break;
- case OP_ORL:
- if (s1->prefix & PREFIX_LOCK) {
- tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, s1->T1,
- s1->mem_index, ot | MO_LE);
- } else {
- tcg_gen_or_tl(s1->T0, s1->T0, s1->T1);
- gen_op_st_rm_T0_A0(s1, ot, d);
- }
- gen_op_update1_cc(s1);
- set_cc_op(s1, CC_OP_LOGICB + ot);
- break;
- case OP_XORL:
- if (s1->prefix & PREFIX_LOCK) {
- tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, s1->T1,
- s1->mem_index, ot | MO_LE);
- } else {
- tcg_gen_xor_tl(s1->T0, s1->T0, s1->T1);
- gen_op_st_rm_T0_A0(s1, ot, d);
- }
- gen_op_update1_cc(s1);
- set_cc_op(s1, CC_OP_LOGICB + ot);
- break;
- case OP_CMPL:
- tcg_gen_mov_tl(cpu_cc_src, s1->T1);
- tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
- tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1);
- set_cc_op(s1, CC_OP_SUBB + ot);
- break;
- }
-}
-
-/* if d == OR_TMP0, it means memory operand (address in A0) */
-static void gen_inc(DisasContext *s1, MemOp ot, int d, int c)
-{
- if (s1->prefix & PREFIX_LOCK) {
- if (d != OR_TMP0) {
- /* Lock prefix when destination is not memory */
- gen_illegal_opcode(s1);
- return;
- }
- tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1);
- tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
- s1->mem_index, ot | MO_LE);
- } else {
- if (d != OR_TMP0) {
- gen_op_mov_v_reg(s1, ot, s1->T0, d);
- } else {
- gen_op_ld_v(s1, ot, s1->T0, s1->A0);
- }
- tcg_gen_addi_tl(s1->T0, s1->T0, (c > 0 ? 1 : -1));
- gen_op_st_rm_T0_A0(s1, ot, d);
- }
-
- gen_compute_eflags_c(s1, cpu_cc_src);
- tcg_gen_mov_tl(cpu_cc_dst, s1->T0);
- set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
-}
-
static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result,
TCGv shm1, TCGv count, bool is_right)
{
@@ -1684,298 +1485,6 @@ static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result,
set_cc_op(s, CC_OP_DYNAMIC);
}
-static void gen_shift_rm_T1(DisasContext *s, MemOp ot, int op1,
- int is_right, int is_arith)
-{
- target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
-
- /* load */
- if (op1 == OR_TMP0) {
- gen_op_ld_v(s, ot, s->T0, s->A0);
- } else {
- gen_op_mov_v_reg(s, ot, s->T0, op1);
- }
-
- tcg_gen_andi_tl(s->T1, s->T1, mask);
- tcg_gen_subi_tl(s->tmp0, s->T1, 1);
-
- if (is_right) {
- if (is_arith) {
- gen_exts(ot, s->T0);
- tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0);
- tcg_gen_sar_tl(s->T0, s->T0, s->T1);
- } else {
- gen_extu(ot, s->T0);
- tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
- tcg_gen_shr_tl(s->T0, s->T0, s->T1);
- }
- } else {
- tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
- tcg_gen_shl_tl(s->T0, s->T0, s->T1);
- }
-
- /* store */
- gen_op_st_rm_T0_A0(s, ot, op1);
-
- gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right);
-}
-
-static void gen_shift_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
- int is_right, int is_arith)
-{
- int mask = (ot == MO_64 ? 0x3f : 0x1f);
-
- /* load */
- if (op1 == OR_TMP0)
- gen_op_ld_v(s, ot, s->T0, s->A0);
- else
- gen_op_mov_v_reg(s, ot, s->T0, op1);
-
- op2 &= mask;
- if (op2 != 0) {
- if (is_right) {
- if (is_arith) {
- gen_exts(ot, s->T0);
- tcg_gen_sari_tl(s->tmp4, s->T0, op2 - 1);
- tcg_gen_sari_tl(s->T0, s->T0, op2);
- } else {
- gen_extu(ot, s->T0);
- tcg_gen_shri_tl(s->tmp4, s->T0, op2 - 1);
- tcg_gen_shri_tl(s->T0, s->T0, op2);
- }
- } else {
- tcg_gen_shli_tl(s->tmp4, s->T0, op2 - 1);
- tcg_gen_shli_tl(s->T0, s->T0, op2);
- }
- }
-
- /* store */
- gen_op_st_rm_T0_A0(s, ot, op1);
-
- /* update eflags if non zero shift */
- if (op2 != 0) {
- tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
- tcg_gen_mov_tl(cpu_cc_dst, s->T0);
- set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
- }
-}
-
-static void gen_rot_rm_T1(DisasContext *s, MemOp ot, int op1, int is_right)
-{
- target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
- TCGv_i32 t0, t1;
-
- /* load */
- if (op1 == OR_TMP0) {
- gen_op_ld_v(s, ot, s->T0, s->A0);
- } else {
- gen_op_mov_v_reg(s, ot, s->T0, op1);
- }
-
- tcg_gen_andi_tl(s->T1, s->T1, mask);
-
- switch (ot) {
- case MO_8:
- /* Replicate the 8-bit input so that a 32-bit rotate works. */
- tcg_gen_ext8u_tl(s->T0, s->T0);
- tcg_gen_muli_tl(s->T0, s->T0, 0x01010101);
- goto do_long;
- case MO_16:
- /* Replicate the 16-bit input so that a 32-bit rotate works. */
- tcg_gen_deposit_tl(s->T0, s->T0, s->T0, 16, 16);
- goto do_long;
- do_long:
-#ifdef TARGET_X86_64
- case MO_32:
- tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
- tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
- if (is_right) {
- tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
- } else {
- tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
- }
- tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
- break;
-#endif
- default:
- if (is_right) {
- tcg_gen_rotr_tl(s->T0, s->T0, s->T1);
- } else {
- tcg_gen_rotl_tl(s->T0, s->T0, s->T1);
- }
- break;
- }
-
- /* store */
- gen_op_st_rm_T0_A0(s, ot, op1);
-
- /* We'll need the flags computed into CC_SRC. */
- gen_compute_eflags(s);
-
- /* The value that was "rotated out" is now present at the other end
- of the word. Compute C into CC_DST and O into CC_SRC2. Note that
- since we've computed the flags into CC_SRC, these variables are
- currently dead. */
- if (is_right) {
- tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
- tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
- tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
- } else {
- tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
- tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
- }
- tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
- tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
-
- /* Now conditionally store the new CC_OP value. If the shift count
- is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
- Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
- exactly as we computed above. */
- t0 = tcg_constant_i32(0);
- t1 = tcg_temp_new_i32();
- tcg_gen_trunc_tl_i32(t1, s->T1);
- tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX);
- tcg_gen_movi_i32(s->tmp3_i32, CC_OP_EFLAGS);
- tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
- s->tmp2_i32, s->tmp3_i32);
-
- /* The CC_OP value is no longer predictable. */
- set_cc_op(s, CC_OP_DYNAMIC);
-}
-
-static void gen_rot_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
- int is_right)
-{
- int mask = (ot == MO_64 ? 0x3f : 0x1f);
- int shift;
-
- /* load */
- if (op1 == OR_TMP0) {
- gen_op_ld_v(s, ot, s->T0, s->A0);
- } else {
- gen_op_mov_v_reg(s, ot, s->T0, op1);
- }
-
- op2 &= mask;
- if (op2 != 0) {
- switch (ot) {
-#ifdef TARGET_X86_64
- case MO_32:
- tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
- if (is_right) {
- tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, op2);
- } else {
- tcg_gen_rotli_i32(s->tmp2_i32, s->tmp2_i32, op2);
- }
- tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
- break;
-#endif
- default:
- if (is_right) {
- tcg_gen_rotri_tl(s->T0, s->T0, op2);
- } else {
- tcg_gen_rotli_tl(s->T0, s->T0, op2);
- }
- break;
- case MO_8:
- mask = 7;
- goto do_shifts;
- case MO_16:
- mask = 15;
- do_shifts:
- shift = op2 & mask;
- if (is_right) {
- shift = mask + 1 - shift;
- }
- gen_extu(ot, s->T0);
- tcg_gen_shli_tl(s->tmp0, s->T0, shift);
- tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift);
- tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
- break;
- }
- }
-
- /* store */
- gen_op_st_rm_T0_A0(s, ot, op1);
-
- if (op2 != 0) {
- /* Compute the flags into CC_SRC. */
- gen_compute_eflags(s);
-
- /* The value that was "rotated out" is now present at the other end
- of the word. Compute C into CC_DST and O into CC_SRC2. Note that
- since we've computed the flags into CC_SRC, these variables are
- currently dead. */
- if (is_right) {
- tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
- tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
- tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
- } else {
- tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
- tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
- }
- tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
- tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
- set_cc_op(s, CC_OP_ADCOX);
- }
-}
-
-/* XXX: add faster immediate = 1 case */
-static void gen_rotc_rm_T1(DisasContext *s, MemOp ot, int op1,
- int is_right)
-{
- gen_compute_eflags(s);
- assert(s->cc_op == CC_OP_EFLAGS);
-
- /* load */
- if (op1 == OR_TMP0)
- gen_op_ld_v(s, ot, s->T0, s->A0);
- else
- gen_op_mov_v_reg(s, ot, s->T0, op1);
-
- if (is_right) {
- switch (ot) {
- case MO_8:
- gen_helper_rcrb(s->T0, tcg_env, s->T0, s->T1);
- break;
- case MO_16:
- gen_helper_rcrw(s->T0, tcg_env, s->T0, s->T1);
- break;
- case MO_32:
- gen_helper_rcrl(s->T0, tcg_env, s->T0, s->T1);
- break;
-#ifdef TARGET_X86_64
- case MO_64:
- gen_helper_rcrq(s->T0, tcg_env, s->T0, s->T1);
- break;
-#endif
- default:
- g_assert_not_reached();
- }
- } else {
- switch (ot) {
- case MO_8:
- gen_helper_rclb(s->T0, tcg_env, s->T0, s->T1);
- break;
- case MO_16:
- gen_helper_rclw(s->T0, tcg_env, s->T0, s->T1);
- break;
- case MO_32:
- gen_helper_rcll(s->T0, tcg_env, s->T0, s->T1);
- break;
-#ifdef TARGET_X86_64
- case MO_64:
- gen_helper_rclq(s->T0, tcg_env, s->T0, s->T1);
- break;
-#endif
- default:
- g_assert_not_reached();
- }
- }
- /* store */
- gen_op_st_rm_T0_A0(s, ot, op1);
-}
-
/* XXX: add faster immediate case */
static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, int op1,
bool is_right, TCGv count_in)
@@ -2060,63 +1569,6 @@ static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, int op1,
gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right);
}
-static void gen_shift(DisasContext *s1, int op, MemOp ot, int d, int s)
-{
- if (s != OR_TMP1)
- gen_op_mov_v_reg(s1, ot, s1->T1, s);
- switch(op) {
- case OP_ROL:
- gen_rot_rm_T1(s1, ot, d, 0);
- break;
- case OP_ROR:
- gen_rot_rm_T1(s1, ot, d, 1);
- break;
- case OP_SHL:
- case OP_SHL1:
- gen_shift_rm_T1(s1, ot, d, 0, 0);
- break;
- case OP_SHR:
- gen_shift_rm_T1(s1, ot, d, 1, 0);
- break;
- case OP_SAR:
- gen_shift_rm_T1(s1, ot, d, 1, 1);
- break;
- case OP_RCL:
- gen_rotc_rm_T1(s1, ot, d, 0);
- break;
- case OP_RCR:
- gen_rotc_rm_T1(s1, ot, d, 1);
- break;
- }
-}
-
-static void gen_shifti(DisasContext *s1, int op, MemOp ot, int d, int c)
-{
- switch(op) {
- case OP_ROL:
- gen_rot_rm_im(s1, ot, d, c, 0);
- break;
- case OP_ROR:
- gen_rot_rm_im(s1, ot, d, c, 1);
- break;
- case OP_SHL:
- case OP_SHL1:
- gen_shift_rm_im(s1, ot, d, c, 0, 0);
- break;
- case OP_SHR:
- gen_shift_rm_im(s1, ot, d, c, 1, 0);
- break;
- case OP_SAR:
- gen_shift_rm_im(s1, ot, d, c, 1, 1);
- break;
- default:
- /* currently not optimized */
- tcg_gen_movi_tl(s1->T1, c);
- gen_shift(s1, op, ot, d, OR_TMP1);
- break;
- }
-}
-
#define X86_MAX_INSN_LENGTH 15
static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
@@ -2152,11 +1604,6 @@ static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
return translator_ldub(env, &s->base, advance_pc(env, s, 1));
}
-static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
-{
- return translator_lduw(env, &s->base, advance_pc(env, s, 2));
-}
-
static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
{
return translator_lduw(env, &s->base, advance_pc(env, s, 2));
@@ -2482,15 +1929,6 @@ static target_long insn_get_signed(CPUX86State *env, DisasContext *s, MemOp ot)
return ret;
}
-static inline int insn_const_size(MemOp ot)
-{
- if (ot <= MO_32) {
- return 1 << ot;
- } else {
- return 4;
- }
-}
-
static void gen_conditional_jump_labels(DisasContext *s, target_long diff,
TCGLabel *not_taken, TCGLabel *taken)
{
@@ -2522,12 +1960,6 @@ static void gen_cmovcc1(DisasContext *s, int b, TCGv dest, TCGv src)
tcg_gen_movcond_tl(cc.cond, dest, cc.reg, cc.reg2, src, dest);
}
-static inline void gen_op_movl_T0_seg(DisasContext *s, X86Seg seg_reg)
-{
- tcg_gen_ld32u_tl(s->T0, tcg_env,
- offsetof(CPUX86State,segs[seg_reg].selector));
-}
-
static void gen_op_movl_seg_real(DisasContext *s, X86Seg seg_reg, TCGv seg)
{
TCGv selector = tcg_temp_new();
@@ -3018,9 +2450,6 @@ static void gen_sty_env_A0(DisasContext *s, int offset, bool align)
tcg_gen_qemu_st_i128(t, s->tmp0, mem_index, mop);
}
-static bool first = true;
-static unsigned long limit;
-
#include "decode-new.h"
#include "emit.c.inc"
#include "decode-new.c.inc"
@@ -3177,45 +2606,13 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
prefixes = 0;
- if (first) {
- const char *limit_str = getenv("QEMU_I386_LIMIT");
- limit = limit_str ? atol(limit_str) : -1;
- first = false;
- }
- bool use_new = true;
-#ifdef CONFIG_USER_ONLY
- use_new &= limit > 0;
-#endif
-
next_byte:
s->prefix = prefixes;
b = x86_ldub_code(env, s);
/* Collect prefixes. */
switch (b) {
- default:
-#ifndef CONFIG_USER_ONLY
- use_new &= b <= limit;
-#endif
- if (use_new && (b < 0xd8 || b >= 0xe0)) {
- disas_insn_new(s, cpu, b);
- return true;
- }
- break;
case 0x0f:
b = x86_ldub_code(env, s) + 0x100;
-#ifndef CONFIG_USER_ONLY
- use_new &= b <= limit;
-#endif
- if (use_new &&
- ((b >= 0x138 && b <= 0x19f) ||
- (b & ~9) == 0x1a0 ||
- b == 0x1af || b == 0x1b2 ||
- (b >= 0x1b4 && b <= 0x1b7) ||
- b == 0x1be || b == 0x1bf || b == 0x1c3 ||
- (b >= 0x1c8 && b <= 0x1cf))) {
- disas_insn_new(s, cpu, b);
- return true;
- }
break;
case 0xf3:
prefixes |= PREFIX_REPZ;
@@ -3313,558 +2710,6 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
switch (b) {
/**************************/
/* arith & logic */
- case 0x00 ... 0x05:
- case 0x08 ... 0x0d:
- case 0x10 ... 0x15:
- case 0x18 ... 0x1d:
- case 0x20 ... 0x25:
- case 0x28 ... 0x2d:
- case 0x30 ... 0x35:
- case 0x38 ... 0x3d:
- {
- int f;
- op = (b >> 3) & 7;
- f = (b >> 1) & 3;
-
- ot = mo_b_d(b, dflag);
-
- switch(f) {
- case 0: /* OP Ev, Gv */
- modrm = x86_ldub_code(env, s);
- reg = ((modrm >> 3) & 7) | REX_R(s);
- mod = (modrm >> 6) & 3;
- rm = (modrm & 7) | REX_B(s);
- if (mod != 3) {
- gen_lea_modrm(env, s, modrm);
- opreg = OR_TMP0;
- } else if (op == OP_XORL && rm == reg) {
- xor_zero:
- /* xor reg, reg optimisation */
- set_cc_op(s, CC_OP_CLR);
- tcg_gen_movi_tl(s->T0, 0);
- gen_op_mov_reg_v(s, ot, reg, s->T0);
- break;
- } else {
- opreg = rm;
- }
- gen_op_mov_v_reg(s, ot, s->T1, reg);
- gen_op(s, op, ot, opreg);
- break;
- case 1: /* OP Gv, Ev */
- modrm = x86_ldub_code(env, s);
- mod = (modrm >> 6) & 3;
- reg = ((modrm >> 3) & 7) | REX_R(s);
- rm = (modrm & 7) | REX_B(s);
- if (mod != 3) {
- gen_lea_modrm(env, s, modrm);
- gen_op_ld_v(s, ot, s->T1, s->A0);
- } else if (op == OP_XORL && rm == reg) {
- goto xor_zero;
- } else {
- gen_op_mov_v_reg(s, ot, s->T1, rm);
- }
- gen_op(s, op, ot, reg);
- break;
- case 2: /* OP A, Iv */
- val = insn_get(env, s, ot);
- tcg_gen_movi_tl(s->T1, val);
- gen_op(s, op, ot, OR_EAX);
- break;
- }
- }
- break;
-
- case 0x82:
- if (CODE64(s))
- goto illegal_op;
- /* fall through */
- case 0x80: /* GRP1 */
- case 0x81:
- case 0x83:
- {
- ot = mo_b_d(b, dflag);
-
- modrm = x86_ldub_code(env, s);
- mod = (modrm >> 6) & 3;
- rm = (modrm & 7) | REX_B(s);
- op = (modrm >> 3) & 7;
-
- if (mod != 3) {
- if (b == 0x83)
- s->rip_offset = 1;
- else
- s->rip_offset = insn_const_size(ot);
- gen_lea_modrm(env, s, modrm);
- opreg = OR_TMP0;
- } else {
- opreg = rm;
- }
-
- switch(b) {
- default:
- case 0x80:
- case 0x81:
- case 0x82:
- val = insn_get(env, s, ot);
- break;
- case 0x83:
- val = (int8_t)insn_get(env, s, MO_8);
- break;
- }
- tcg_gen_movi_tl(s->T1, val);
- gen_op(s, op, ot, opreg);
- }
- break;
-
- /**************************/
- /* inc, dec, and other misc arith */
- case 0x40 ... 0x47: /* inc Gv */
- ot = dflag;
- gen_inc(s, ot, OR_EAX + (b & 7), 1);
- break;
- case 0x48 ... 0x4f: /* dec Gv */
- ot = dflag;
- gen_inc(s, ot, OR_EAX + (b & 7), -1);
- break;
- case 0xf6: /* GRP3 */
- case 0xf7:
- ot = mo_b_d(b, dflag);
-
- modrm = x86_ldub_code(env, s);
- mod = (modrm >> 6) & 3;
- rm = (modrm & 7) | REX_B(s);
- op = (modrm >> 3) & 7;
- if (mod != 3) {
- if (op == 0) {
- s->rip_offset = insn_const_size(ot);
- }
- gen_lea_modrm(env, s, modrm);
- /* For those below that handle locked memory, don't load here. */
- if (!(s->prefix & PREFIX_LOCK)
- || op != 2) {
- gen_op_ld_v(s, ot, s->T0, s->A0);
- }
- } else {
- gen_op_mov_v_reg(s, ot, s->T0, rm);
- }
-
- switch(op) {
- case 0: /* test */
- val = insn_get(env, s, ot);
- tcg_gen_movi_tl(s->T1, val);
- gen_op_testl_T0_T1_cc(s);
- set_cc_op(s, CC_OP_LOGICB + ot);
- break;
- case 2: /* not */
- if (s->prefix & PREFIX_LOCK) {
- if (mod == 3) {
- goto illegal_op;
- }
- tcg_gen_movi_tl(s->T0, ~0);
- tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0,
- s->mem_index, ot | MO_LE);
- } else {
- tcg_gen_not_tl(s->T0, s->T0);
- if (mod != 3) {
- gen_op_st_v(s, ot, s->T0, s->A0);
- } else {
- gen_op_mov_reg_v(s, ot, rm, s->T0);
- }
- }
- break;
- case 3: /* neg */
- if (s->prefix & PREFIX_LOCK) {
- TCGLabel *label1;
- TCGv a0, t0, t1, t2;
-
- if (mod == 3) {
- goto illegal_op;
- }
- a0 = s->A0;
- t0 = s->T0;
- label1 = gen_new_label();
-
- gen_set_label(label1);
- t1 = tcg_temp_new();
- t2 = tcg_temp_new();
- tcg_gen_mov_tl(t2, t0);
- tcg_gen_neg_tl(t1, t0);
- tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1,
- s->mem_index, ot | MO_LE);
- tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
-
- tcg_gen_neg_tl(s->T0, t0);
- } else {
- tcg_gen_neg_tl(s->T0, s->T0);
- if (mod != 3) {
- gen_op_st_v(s, ot, s->T0, s->A0);
- } else {
- gen_op_mov_reg_v(s, ot, rm, s->T0);
- }
- }
- gen_op_update_neg_cc(s);
- set_cc_op(s, CC_OP_SUBB + ot);
- break;
- case 4: /* mul */
- switch(ot) {
- case MO_8:
- gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
- tcg_gen_ext8u_tl(s->T0, s->T0);
- tcg_gen_ext8u_tl(s->T1, s->T1);
- /* XXX: use 32 bit mul which could be faster */
- tcg_gen_mul_tl(s->T0, s->T0, s->T1);
- gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
- tcg_gen_mov_tl(cpu_cc_dst, s->T0);
- tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
- set_cc_op(s, CC_OP_MULB);
- break;
- case MO_16:
- gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
- tcg_gen_ext16u_tl(s->T0, s->T0);
- tcg_gen_ext16u_tl(s->T1, s->T1);
- /* XXX: use 32 bit mul which could be faster */
- tcg_gen_mul_tl(s->T0, s->T0, s->T1);
- gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
- tcg_gen_mov_tl(cpu_cc_dst, s->T0);
- tcg_gen_shri_tl(s->T0, s->T0, 16);
- gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
- tcg_gen_mov_tl(cpu_cc_src, s->T0);
- set_cc_op(s, CC_OP_MULW);
- break;
- default:
- case MO_32:
- tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
- tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
- tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
- s->tmp2_i32, s->tmp3_i32);
- tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
- tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
- tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
- tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
- set_cc_op(s, CC_OP_MULL);
- break;
-#ifdef TARGET_X86_64
- case MO_64:
- tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
- s->T0, cpu_regs[R_EAX]);
- tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
- tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
- set_cc_op(s, CC_OP_MULQ);
- break;
-#endif
- }
- break;
- case 5: /* imul */
- switch(ot) {
- case MO_8:
- gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
- tcg_gen_ext8s_tl(s->T0, s->T0);
- tcg_gen_ext8s_tl(s->T1, s->T1);
- /* XXX: use 32 bit mul which could be faster */
- tcg_gen_mul_tl(s->T0, s->T0, s->T1);
- gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
- tcg_gen_mov_tl(cpu_cc_dst, s->T0);
- tcg_gen_ext8s_tl(s->tmp0, s->T0);
- tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
- set_cc_op(s, CC_OP_MULB);
- break;
- case MO_16:
- gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
- tcg_gen_ext16s_tl(s->T0, s->T0);
- tcg_gen_ext16s_tl(s->T1, s->T1);
- /* XXX: use 32 bit mul which could be faster */
- tcg_gen_mul_tl(s->T0, s->T0, s->T1);
- gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
- tcg_gen_mov_tl(cpu_cc_dst, s->T0);
- tcg_gen_ext16s_tl(s->tmp0, s->T0);
- tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
- tcg_gen_shri_tl(s->T0, s->T0, 16);
- gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
- set_cc_op(s, CC_OP_MULW);
- break;
- default:
- case MO_32:
- tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
- tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
- tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
- s->tmp2_i32, s->tmp3_i32);
- tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
- tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
- tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
- tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
- tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
- tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
- set_cc_op(s, CC_OP_MULL);
- break;
-#ifdef TARGET_X86_64
- case MO_64:
- tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
- s->T0, cpu_regs[R_EAX]);
- tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
- tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
- tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
- set_cc_op(s, CC_OP_MULQ);
- break;
-#endif
- }
- break;
- case 6: /* div */
- switch(ot) {
- case MO_8:
- gen_helper_divb_AL(tcg_env, s->T0);
- break;
- case MO_16:
- gen_helper_divw_AX(tcg_env, s->T0);
- break;
- default:
- case MO_32:
- gen_helper_divl_EAX(tcg_env, s->T0);
- break;
-#ifdef TARGET_X86_64
- case MO_64:
- gen_helper_divq_EAX(tcg_env, s->T0);
- break;
-#endif
- }
- break;
- case 7: /* idiv */
- switch(ot) {
- case MO_8:
- gen_helper_idivb_AL(tcg_env, s->T0);
- break;
- case MO_16:
- gen_helper_idivw_AX(tcg_env, s->T0);
- break;
- default:
- case MO_32:
- gen_helper_idivl_EAX(tcg_env, s->T0);
- break;
-#ifdef TARGET_X86_64
- case MO_64:
- gen_helper_idivq_EAX(tcg_env, s->T0);
- break;
-#endif
- }
- break;
- default:
- goto unknown_op;
- }
- break;
-
- case 0xfe: /* GRP4 */
- case 0xff: /* GRP5 */
- ot = mo_b_d(b, dflag);
-
- modrm = x86_ldub_code(env, s);
- mod = (modrm >> 6) & 3;
- rm = (modrm & 7) | REX_B(s);
- op = (modrm >> 3) & 7;
- if (op >= 2 && b == 0xfe) {
- goto unknown_op;
- }
- if (CODE64(s)) {
- if (op == 2 || op == 4) {
- /* operand size for jumps is 64 bit */
- ot = MO_64;
- } else if (op == 3 || op == 5) {
- ot = dflag != MO_16 ? MO_32 + REX_W(s) : MO_16;
- } else if (op == 6) {
- /* default push size is 64 bit */
- ot = mo_pushpop(s, dflag);
- }
- }
- if (mod != 3) {
- gen_lea_modrm(env, s, modrm);
- if (op >= 2 && op != 3 && op != 5)
- gen_op_ld_v(s, ot, s->T0, s->A0);
- } else {
- gen_op_mov_v_reg(s, ot, s->T0, rm);
- }
-
- switch(op) {
- case 0: /* inc Ev */
- if (mod != 3)
- opreg = OR_TMP0;
- else
- opreg = rm;
- gen_inc(s, ot, opreg, 1);
- break;
- case 1: /* dec Ev */
- if (mod != 3)
- opreg = OR_TMP0;
- else
- opreg = rm;
- gen_inc(s, ot, opreg, -1);
- break;
- case 2: /* call Ev */
- /* XXX: optimize if memory (no 'and' is necessary) */
- if (dflag == MO_16) {
- tcg_gen_ext16u_tl(s->T0, s->T0);
- }
- gen_push_v(s, eip_next_tl(s));
- gen_op_jmp_v(s, s->T0);
- gen_bnd_jmp(s);
- s->base.is_jmp = DISAS_JUMP;
- break;
- case 3: /* lcall Ev */
- if (mod == 3) {
- goto illegal_op;
- }
- gen_op_ld_v(s, ot, s->T0, s->A0);
- gen_add_A0_im(s, 1 << ot);
- gen_op_ld_v(s, MO_16, s->T1, s->A0);
- gen_far_call(s);
- break;
- case 4: /* jmp Ev */
- if (dflag == MO_16) {
- tcg_gen_ext16u_tl(s->T0, s->T0);
- }
- gen_op_jmp_v(s, s->T0);
- gen_bnd_jmp(s);
- s->base.is_jmp = DISAS_JUMP;
- break;
- case 5: /* ljmp Ev */
- if (mod == 3) {
- goto illegal_op;
- }
- gen_op_ld_v(s, ot, s->T0, s->A0);
- gen_add_A0_im(s, 1 << ot);
- gen_op_ld_v(s, MO_16, s->T1, s->A0);
- gen_far_jmp(s);
- break;
- case 6: /* push Ev */
- gen_push_v(s, s->T0);
- break;
- default:
- goto unknown_op;
- }
- break;
-
- case 0x84: /* test Ev, Gv */
- case 0x85:
- ot = mo_b_d(b, dflag);
-
- modrm = x86_ldub_code(env, s);
- reg = ((modrm >> 3) & 7) | REX_R(s);
-
- gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
- gen_op_mov_v_reg(s, ot, s->T1, reg);
- gen_op_testl_T0_T1_cc(s);
- set_cc_op(s, CC_OP_LOGICB + ot);
- break;
-
- case 0xa8: /* test eAX, Iv */
- case 0xa9:
- ot = mo_b_d(b, dflag);
- val = insn_get(env, s, ot);
-
- gen_op_mov_v_reg(s, ot, s->T0, OR_EAX);
- tcg_gen_movi_tl(s->T1, val);
- gen_op_testl_T0_T1_cc(s);
- set_cc_op(s, CC_OP_LOGICB + ot);
- break;
-
- case 0x98: /* CWDE/CBW */
- switch (dflag) {
-#ifdef TARGET_X86_64
- case MO_64:
- gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
- tcg_gen_ext32s_tl(s->T0, s->T0);
- gen_op_mov_reg_v(s, MO_64, R_EAX, s->T0);
- break;
-#endif
- case MO_32:
- gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
- tcg_gen_ext16s_tl(s->T0, s->T0);
- gen_op_mov_reg_v(s, MO_32, R_EAX, s->T0);
- break;
- case MO_16:
- gen_op_mov_v_reg(s, MO_8, s->T0, R_EAX);
- tcg_gen_ext8s_tl(s->T0, s->T0);
- gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
- break;
- default:
- g_assert_not_reached();
- }
- break;
- case 0x99: /* CDQ/CWD */
- switch (dflag) {
-#ifdef TARGET_X86_64
- case MO_64:
- gen_op_mov_v_reg(s, MO_64, s->T0, R_EAX);
- tcg_gen_sari_tl(s->T0, s->T0, 63);
- gen_op_mov_reg_v(s, MO_64, R_EDX, s->T0);
- break;
-#endif
- case MO_32:
- gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
- tcg_gen_ext32s_tl(s->T0, s->T0);
- tcg_gen_sari_tl(s->T0, s->T0, 31);
- gen_op_mov_reg_v(s, MO_32, R_EDX, s->T0);
- break;
- case MO_16:
- gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
- tcg_gen_ext16s_tl(s->T0, s->T0);
- tcg_gen_sari_tl(s->T0, s->T0, 15);
- gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
- break;
- default:
- g_assert_not_reached();
- }
- break;
- case 0x1af: /* imul Gv, Ev */
- case 0x69: /* imul Gv, Ev, I */
- case 0x6b:
- ot = dflag;
- modrm = x86_ldub_code(env, s);
- reg = ((modrm >> 3) & 7) | REX_R(s);
- if (b == 0x69)
- s->rip_offset = insn_const_size(ot);
- else if (b == 0x6b)
- s->rip_offset = 1;
- gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
- if (b == 0x69) {
- val = insn_get(env, s, ot);
- tcg_gen_movi_tl(s->T1, val);
- } else if (b == 0x6b) {
- val = (int8_t)insn_get(env, s, MO_8);
- tcg_gen_movi_tl(s->T1, val);
- } else {
- gen_op_mov_v_reg(s, ot, s->T1, reg);
- }
- switch (ot) {
-#ifdef TARGET_X86_64
- case MO_64:
- tcg_gen_muls2_i64(cpu_regs[reg], s->T1, s->T0, s->T1);
- tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
- tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
- tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, s->T1);
- break;
-#endif
- case MO_32:
- tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
- tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
- tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
- s->tmp2_i32, s->tmp3_i32);
- tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
- tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
- tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
- tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
- tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
- break;
- default:
- tcg_gen_ext16s_tl(s->T0, s->T0);
- tcg_gen_ext16s_tl(s->T1, s->T1);
- /* XXX: use 32 bit mul which could be faster */
- tcg_gen_mul_tl(s->T0, s->T0, s->T1);
- tcg_gen_mov_tl(cpu_cc_dst, s->T0);
- tcg_gen_ext16s_tl(s->tmp0, s->T0);
- tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
- gen_op_mov_reg_v(s, ot, reg, s->T0);
- break;
- }
- set_cc_op(s, CC_OP_MULB + ot);
- break;
case 0x1c0:
case 0x1c1: /* xadd Ev, Gv */
ot = mo_b_d(b, dflag);
@@ -4022,375 +2867,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
break;
/**************************/
- /* push/pop */
- case 0x50 ... 0x57: /* push */
- gen_op_mov_v_reg(s, MO_32, s->T0, (b & 7) | REX_B(s));
- gen_push_v(s, s->T0);
- break;
- case 0x58 ... 0x5f: /* pop */
- ot = gen_pop_T0(s);
- /* NOTE: order is important for pop %sp */
- gen_pop_update(s, ot);
- gen_op_mov_reg_v(s, ot, (b & 7) | REX_B(s), s->T0);
- break;
- case 0x60: /* pusha */
- if (CODE64(s))
- goto illegal_op;
- gen_pusha(s);
- break;
- case 0x61: /* popa */
- if (CODE64(s))
- goto illegal_op;
- gen_popa(s);
- break;
- case 0x68: /* push Iv */
- case 0x6a:
- ot = mo_pushpop(s, dflag);
- if (b == 0x68)
- val = insn_get(env, s, ot);
- else
- val = (int8_t)insn_get(env, s, MO_8);
- tcg_gen_movi_tl(s->T0, val);
- gen_push_v(s, s->T0);
- break;
- case 0x8f: /* pop Ev */
- modrm = x86_ldub_code(env, s);
- mod = (modrm >> 6) & 3;
- ot = gen_pop_T0(s);
- if (mod == 3) {
- /* NOTE: order is important for pop %sp */
- gen_pop_update(s, ot);
- rm = (modrm & 7) | REX_B(s);
- gen_op_mov_reg_v(s, ot, rm, s->T0);
- } else {
- /* NOTE: order is important too for MMU exceptions */
- s->popl_esp_hack = 1 << ot;
- gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
- s->popl_esp_hack = 0;
- gen_pop_update(s, ot);
- }
- break;
- case 0xc8: /* enter */
- {
- int level;
- val = x86_lduw_code(env, s);
- level = x86_ldub_code(env, s);
- gen_enter(s, val, level);
- }
- break;
- case 0xc9: /* leave */
- gen_leave(s);
- break;
- case 0x06: /* push es */
- case 0x0e: /* push cs */
- case 0x16: /* push ss */
- case 0x1e: /* push ds */
- if (CODE64(s))
- goto illegal_op;
- gen_op_movl_T0_seg(s, b >> 3);
- gen_push_v(s, s->T0);
- break;
- case 0x1a0: /* push fs */
- case 0x1a8: /* push gs */
- gen_op_movl_T0_seg(s, (b >> 3) & 7);
- gen_push_v(s, s->T0);
- break;
- case 0x07: /* pop es */
- case 0x17: /* pop ss */
- case 0x1f: /* pop ds */
- if (CODE64(s))
- goto illegal_op;
- reg = b >> 3;
- ot = gen_pop_T0(s);
- gen_movl_seg(s, reg, s->T0);
- gen_pop_update(s, ot);
- break;
- case 0x1a1: /* pop fs */
- case 0x1a9: /* pop gs */
- ot = gen_pop_T0(s);
- gen_movl_seg(s, (b >> 3) & 7, s->T0);
- gen_pop_update(s, ot);
- break;
-
- /**************************/
- /* mov */
- case 0x88:
- case 0x89: /* mov Gv, Ev */
- ot = mo_b_d(b, dflag);
- modrm = x86_ldub_code(env, s);
- reg = ((modrm >> 3) & 7) | REX_R(s);
-
- /* generate a generic store */
- gen_ldst_modrm(env, s, modrm, ot, reg, 1);
- break;
- case 0xc6:
- case 0xc7: /* mov Ev, Iv */
- ot = mo_b_d(b, dflag);
- modrm = x86_ldub_code(env, s);
- mod = (modrm >> 6) & 3;
- if (mod != 3) {
- s->rip_offset = insn_const_size(ot);
- gen_lea_modrm(env, s, modrm);
- }
- val = insn_get(env, s, ot);
- tcg_gen_movi_tl(s->T0, val);
- if (mod != 3) {
- gen_op_st_v(s, ot, s->T0, s->A0);
- } else {
- gen_op_mov_reg_v(s, ot, (modrm & 7) | REX_B(s), s->T0);
- }
- break;
- case 0x8a:
- case 0x8b: /* mov Ev, Gv */
- ot = mo_b_d(b, dflag);
- modrm = x86_ldub_code(env, s);
- reg = ((modrm >> 3) & 7) | REX_R(s);
-
- gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
- gen_op_mov_reg_v(s, ot, reg, s->T0);
- break;
- case 0x8e: /* mov seg, Gv */
- modrm = x86_ldub_code(env, s);
- reg = (modrm >> 3) & 7;
- if (reg >= 6 || reg == R_CS)
- goto illegal_op;
- gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
- gen_movl_seg(s, reg, s->T0);
- break;
- case 0x8c: /* mov Gv, seg */
- modrm = x86_ldub_code(env, s);
- reg = (modrm >> 3) & 7;
- mod = (modrm >> 6) & 3;
- if (reg >= 6)
- goto illegal_op;
- gen_op_movl_T0_seg(s, reg);
- ot = mod == 3 ? dflag : MO_16;
- gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
- break;
-
- case 0x1b6: /* movzbS Gv, Eb */
- case 0x1b7: /* movzwS Gv, Eb */
- case 0x1be: /* movsbS Gv, Eb */
- case 0x1bf: /* movswS Gv, Eb */
- {
- MemOp d_ot;
- MemOp s_ot;
-
- /* d_ot is the size of destination */
- d_ot = dflag;
- /* ot is the size of source */
- ot = (b & 1) + MO_8;
- /* s_ot is the sign+size of source */
- s_ot = b & 8 ? MO_SIGN | ot : ot;
-
- modrm = x86_ldub_code(env, s);
- reg = ((modrm >> 3) & 7) | REX_R(s);
- mod = (modrm >> 6) & 3;
- rm = (modrm & 7) | REX_B(s);
-
- if (mod == 3) {
- if (s_ot == MO_SB && byte_reg_is_xH(s, rm)) {
- tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8);
- } else {
- gen_op_mov_v_reg(s, ot, s->T0, rm);
- switch (s_ot) {
- case MO_UB:
- tcg_gen_ext8u_tl(s->T0, s->T0);
- break;
- case MO_SB:
- tcg_gen_ext8s_tl(s->T0, s->T0);
- break;
- case MO_UW:
- tcg_gen_ext16u_tl(s->T0, s->T0);
- break;
- default:
- case MO_SW:
- tcg_gen_ext16s_tl(s->T0, s->T0);
- break;
- }
- }
- gen_op_mov_reg_v(s, d_ot, reg, s->T0);
- } else {
- gen_lea_modrm(env, s, modrm);
- gen_op_ld_v(s, s_ot, s->T0, s->A0);
- gen_op_mov_reg_v(s, d_ot, reg, s->T0);
- }
- }
- break;
-
- case 0x8d: /* lea */
- modrm = x86_ldub_code(env, s);
- mod = (modrm >> 6) & 3;
- if (mod == 3)
- goto illegal_op;
- reg = ((modrm >> 3) & 7) | REX_R(s);
- {
- AddressParts a = gen_lea_modrm_0(env, s, modrm);
- TCGv ea = gen_lea_modrm_1(s, a, false);
- gen_lea_v_seg(s, s->aflag, ea, -1, -1);
- gen_op_mov_reg_v(s, dflag, reg, s->A0);
- }
- break;
-
- case 0xa0: /* mov EAX, Ov */
- case 0xa1:
- case 0xa2: /* mov Ov, EAX */
- case 0xa3:
- {
- target_ulong offset_addr;
-
- ot = mo_b_d(b, dflag);
- offset_addr = insn_get_addr(env, s, s->aflag);
- tcg_gen_movi_tl(s->A0, offset_addr);
- gen_add_A0_ds_seg(s);
- if ((b & 2) == 0) {
- gen_op_ld_v(s, ot, s->T0, s->A0);
- gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
- } else {
- gen_op_mov_v_reg(s, ot, s->T0, R_EAX);
- gen_op_st_v(s, ot, s->T0, s->A0);
- }
- }
- break;
- case 0xd7: /* xlat */
- tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]);
- tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]);
- tcg_gen_add_tl(s->A0, s->A0, s->T0);
- gen_add_A0_ds_seg(s);
- gen_op_ld_v(s, MO_8, s->T0, s->A0);
- gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
- break;
- case 0xb0 ... 0xb7: /* mov R, Ib */
- val = insn_get(env, s, MO_8);
- tcg_gen_movi_tl(s->T0, val);
- gen_op_mov_reg_v(s, MO_8, (b & 7) | REX_B(s), s->T0);
- break;
- case 0xb8 ... 0xbf: /* mov R, Iv */
-#ifdef TARGET_X86_64
- if (dflag == MO_64) {
- uint64_t tmp;
- /* 64 bit case */
- tmp = x86_ldq_code(env, s);
- reg = (b & 7) | REX_B(s);
- tcg_gen_movi_tl(s->T0, tmp);
- gen_op_mov_reg_v(s, MO_64, reg, s->T0);
- } else
-#endif
- {
- ot = dflag;
- val = insn_get(env, s, ot);
- reg = (b & 7) | REX_B(s);
- tcg_gen_movi_tl(s->T0, val);
- gen_op_mov_reg_v(s, ot, reg, s->T0);
- }
- break;
-
- case 0x91 ... 0x97: /* xchg R, EAX */
- do_xchg_reg_eax:
- ot = dflag;
- reg = (b & 7) | REX_B(s);
- rm = R_EAX;
- goto do_xchg_reg;
- case 0x86:
- case 0x87: /* xchg Ev, Gv */
- ot = mo_b_d(b, dflag);
- modrm = x86_ldub_code(env, s);
- reg = ((modrm >> 3) & 7) | REX_R(s);
- mod = (modrm >> 6) & 3;
- if (mod == 3) {
- rm = (modrm & 7) | REX_B(s);
- do_xchg_reg:
- gen_op_mov_v_reg(s, ot, s->T0, reg);
- gen_op_mov_v_reg(s, ot, s->T1, rm);
- gen_op_mov_reg_v(s, ot, rm, s->T0);
- gen_op_mov_reg_v(s, ot, reg, s->T1);
- } else {
- gen_lea_modrm(env, s, modrm);
- gen_op_mov_v_reg(s, ot, s->T0, reg);
- /* for xchg, lock is implicit */
- tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0,
- s->mem_index, ot | MO_LE);
- gen_op_mov_reg_v(s, ot, reg, s->T1);
- }
- break;
- case 0xc4: /* les Gv */
- /* In CODE64 this is VEX3; see above. */
- op = R_ES;
- goto do_lxx;
- case 0xc5: /* lds Gv */
- /* In CODE64 this is VEX2; see above. */
- op = R_DS;
- goto do_lxx;
- case 0x1b2: /* lss Gv */
- op = R_SS;
- goto do_lxx;
- case 0x1b4: /* lfs Gv */
- op = R_FS;
- goto do_lxx;
- case 0x1b5: /* lgs Gv */
- op = R_GS;
- do_lxx:
- ot = dflag != MO_16 ? MO_32 : MO_16;
- modrm = x86_ldub_code(env, s);
- reg = ((modrm >> 3) & 7) | REX_R(s);
- mod = (modrm >> 6) & 3;
- if (mod == 3)
- goto illegal_op;
- gen_lea_modrm(env, s, modrm);
- gen_op_ld_v(s, ot, s->T1, s->A0);
- gen_add_A0_im(s, 1 << ot);
- /* load the segment first to handle exceptions properly */
- gen_op_ld_v(s, MO_16, s->T0, s->A0);
- gen_movl_seg(s, op, s->T0);
- /* then put the data */
- gen_op_mov_reg_v(s, ot, reg, s->T1);
- break;
-
- /************************/
/* shifts */
- case 0xc0:
- case 0xc1:
- /* shift Ev,Ib */
- shift = 2;
- grp2:
- {
- ot = mo_b_d(b, dflag);
- modrm = x86_ldub_code(env, s);
- mod = (modrm >> 6) & 3;
- op = (modrm >> 3) & 7;
-
- if (mod != 3) {
- if (shift == 2) {
- s->rip_offset = 1;
- }
- gen_lea_modrm(env, s, modrm);
- opreg = OR_TMP0;
- } else {
- opreg = (modrm & 7) | REX_B(s);
- }
-
- /* simpler op */
- if (shift == 0) {
- gen_shift(s, op, ot, opreg, OR_ECX);
- } else {
- if (shift == 2) {
- shift = x86_ldub_code(env, s);
- }
- gen_shifti(s, op, ot, opreg, shift);
- }
- }
- break;
- case 0xd0:
- case 0xd1:
- /* shift Ev,1 */
- shift = 1;
- goto grp2;
- case 0xd2:
- case 0xd3:
- /* shift Ev,cl */
- shift = 0;
- goto grp2;
-
case 0x1a4: /* shld imm */
op = 0;
shift = 1;
@@ -4987,374 +3464,6 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
}
}
break;
- /************************/
- /* string ops */
-
- case 0xa4: /* movsS */
- case 0xa5:
- ot = mo_b_d(b, dflag);
- if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
- gen_repz_movs(s, ot);
- } else {
- gen_movs(s, ot);
- }
- break;
-
- case 0xaa: /* stosS */
- case 0xab:
- ot = mo_b_d(b, dflag);
- gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
- if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
- gen_repz_stos(s, ot);
- } else {
- gen_stos(s, ot);
- }
- break;
- case 0xac: /* lodsS */
- case 0xad:
- ot = mo_b_d(b, dflag);
- if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
- gen_repz_lods(s, ot);
- } else {
- gen_lods(s, ot);
- }
- break;
- case 0xae: /* scasS */
- case 0xaf:
- ot = mo_b_d(b, dflag);
- gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
- if (prefixes & PREFIX_REPNZ) {
- gen_repz_scas(s, ot, 1);
- } else if (prefixes & PREFIX_REPZ) {
- gen_repz_scas(s, ot, 0);
- } else {
- gen_scas(s, ot);
- }
- break;
-
- case 0xa6: /* cmpsS */
- case 0xa7:
- ot = mo_b_d(b, dflag);
- if (prefixes & PREFIX_REPNZ) {
- gen_repz_cmps(s, ot, 1);
- } else if (prefixes & PREFIX_REPZ) {
- gen_repz_cmps(s, ot, 0);
- } else {
- gen_cmps(s, ot);
- }
- break;
- case 0x6c: /* insS */
- case 0x6d:
- ot = mo_b_d32(b, dflag);
- tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
- tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
- if (!gen_check_io(s, ot, s->tmp2_i32,
- SVM_IOIO_TYPE_MASK | SVM_IOIO_STR_MASK)) {
- break;
- }
- translator_io_start(&s->base);
- if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
- gen_repz_ins(s, ot);
- } else {
- gen_ins(s, ot);
- }
- break;
- case 0x6e: /* outsS */
- case 0x6f:
- ot = mo_b_d32(b, dflag);
- tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
- tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
- if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_STR_MASK)) {
- break;
- }
- translator_io_start(&s->base);
- if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
- gen_repz_outs(s, ot);
- } else {
- gen_outs(s, ot);
- }
- break;
-
- /************************/
- /* port I/O */
-
- case 0xe4:
- case 0xe5:
- ot = mo_b_d32(b, dflag);
- val = x86_ldub_code(env, s);
- tcg_gen_movi_i32(s->tmp2_i32, val);
- if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) {
- break;
- }
- translator_io_start(&s->base);
- gen_helper_in_func(ot, s->T1, s->tmp2_i32);
- gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
- gen_bpt_io(s, s->tmp2_i32, ot);
- break;
- case 0xe6:
- case 0xe7:
- ot = mo_b_d32(b, dflag);
- val = x86_ldub_code(env, s);
- tcg_gen_movi_i32(s->tmp2_i32, val);
- if (!gen_check_io(s, ot, s->tmp2_i32, 0)) {
- break;
- }
- translator_io_start(&s->base);
- gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
- tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
- gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
- gen_bpt_io(s, s->tmp2_i32, ot);
- break;
- case 0xec:
- case 0xed:
- ot = mo_b_d32(b, dflag);
- tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
- tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
- if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) {
- break;
- }
- translator_io_start(&s->base);
- gen_helper_in_func(ot, s->T1, s->tmp2_i32);
- gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
- gen_bpt_io(s, s->tmp2_i32, ot);
- break;
- case 0xee:
- case 0xef:
- ot = mo_b_d32(b, dflag);
- tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
- tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
- if (!gen_check_io(s, ot, s->tmp2_i32, 0)) {
- break;
- }
- translator_io_start(&s->base);
- gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
- tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
- gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
- gen_bpt_io(s, s->tmp2_i32, ot);
- break;
-
- /************************/
- /* control */
- case 0xc2: /* ret im */
- val = x86_ldsw_code(env, s);
- ot = gen_pop_T0(s);
- gen_stack_update(s, val + (1 << ot));
- /* Note that gen_pop_T0 uses a zero-extending load. */
- gen_op_jmp_v(s, s->T0);
- gen_bnd_jmp(s);
- s->base.is_jmp = DISAS_JUMP;
- break;
- case 0xc3: /* ret */
- ot = gen_pop_T0(s);
- gen_pop_update(s, ot);
- /* Note that gen_pop_T0 uses a zero-extending load. */
- gen_op_jmp_v(s, s->T0);
- gen_bnd_jmp(s);
- s->base.is_jmp = DISAS_JUMP;
- break;
- case 0xca: /* lret im */
- val = x86_ldsw_code(env, s);
- do_lret:
- if (PE(s) && !VM86(s)) {
- gen_update_cc_op(s);
- gen_update_eip_cur(s);
- gen_helper_lret_protected(tcg_env, tcg_constant_i32(dflag - 1),
- tcg_constant_i32(val));
- } else {
- gen_stack_A0(s);
- /* pop offset */
- gen_op_ld_v(s, dflag, s->T0, s->A0);
- /* NOTE: keeping EIP updated is not a problem in case of
- exception */
- gen_op_jmp_v(s, s->T0);
- /* pop selector */
- gen_add_A0_im(s, 1 << dflag);
- gen_op_ld_v(s, dflag, s->T0, s->A0);
- gen_op_movl_seg_real(s, R_CS, s->T0);
- /* add stack offset */
- gen_stack_update(s, val + (2 << dflag));
- }
- s->base.is_jmp = DISAS_EOB_ONLY;
- break;
- case 0xcb: /* lret */
- val = 0;
- goto do_lret;
- case 0xcf: /* iret */
- gen_svm_check_intercept(s, SVM_EXIT_IRET);
- if (!PE(s) || VM86(s)) {
- /* real mode or vm86 mode */
- if (!check_vm86_iopl(s)) {
- break;
- }
- gen_helper_iret_real(tcg_env, tcg_constant_i32(dflag - 1));
- } else {
- gen_helper_iret_protected(tcg_env, tcg_constant_i32(dflag - 1),
- eip_next_i32(s));
- }
- set_cc_op(s, CC_OP_EFLAGS);
- s->base.is_jmp = DISAS_EOB_ONLY;
- break;
- case 0xe8: /* call im */
- {
- int diff = (dflag != MO_16
- ? (int32_t)insn_get(env, s, MO_32)
- : (int16_t)insn_get(env, s, MO_16));
- gen_push_v(s, eip_next_tl(s));
- gen_bnd_jmp(s);
- gen_update_cc_op(s);
- gen_jmp_rel(s, dflag, diff, 0);
- }
- break;
- case 0x9a: /* lcall im */
- {
- unsigned int selector, offset;
-
- if (CODE64(s))
- goto illegal_op;
- ot = dflag;
- offset = insn_get(env, s, ot);
- selector = insn_get(env, s, MO_16);
-
- tcg_gen_movi_tl(s->T0, offset);
- tcg_gen_movi_tl(s->T1, selector);
- }
- gen_far_call(s);
- break;
- case 0xe9: /* jmp im */
- {
- int diff = (dflag != MO_16
- ? (int32_t)insn_get(env, s, MO_32)
- : (int16_t)insn_get(env, s, MO_16));
- gen_bnd_jmp(s);
- gen_update_cc_op(s);
- gen_jmp_rel(s, dflag, diff, 0);
- }
- break;
- case 0xea: /* ljmp im */
- {
- unsigned int selector, offset;
-
- if (CODE64(s))
- goto illegal_op;
- ot = dflag;
- offset = insn_get(env, s, ot);
- selector = insn_get(env, s, MO_16);
-
- tcg_gen_movi_tl(s->T0, offset);
- tcg_gen_movi_tl(s->T1, selector);
- }
- gen_far_jmp(s);
- break;
- case 0xeb: /* jmp Jb */
- {
- int diff = (int8_t)insn_get(env, s, MO_8);
- gen_update_cc_op(s);
- gen_jmp_rel(s, dflag, diff, 0);
- }
- break;
- case 0x70 ... 0x7f: /* jcc Jb */
- {
- int diff = (int8_t)insn_get(env, s, MO_8);
- gen_bnd_jmp(s);
- gen_jcc(s, b, diff);
- }
- break;
- case 0x180 ... 0x18f: /* jcc Jv */
- {
- int diff = (dflag != MO_16
- ? (int32_t)insn_get(env, s, MO_32)
- : (int16_t)insn_get(env, s, MO_16));
- gen_bnd_jmp(s);
- gen_jcc(s, b, diff);
- }
- break;
-
- case 0x190 ... 0x19f: /* setcc Gv */
- modrm = x86_ldub_code(env, s);
- gen_setcc1(s, b, s->T0);
- gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
- break;
- case 0x140 ... 0x14f: /* cmov Gv, Ev */
- if (!(s->cpuid_features & CPUID_CMOV)) {
- goto illegal_op;
- }
- ot = dflag;
- modrm = x86_ldub_code(env, s);
- reg = ((modrm >> 3) & 7) | REX_R(s);
- gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
- gen_cmovcc1(s, b ^ 1, s->T0, cpu_regs[reg]);
- gen_op_mov_reg_v(s, ot, reg, s->T0);
- break;
-
- /************************/
- /* flags */
- case 0x9c: /* pushf */
- gen_svm_check_intercept(s, SVM_EXIT_PUSHF);
- if (check_vm86_iopl(s)) {
- gen_update_cc_op(s);
- gen_helper_read_eflags(s->T0, tcg_env);
- gen_push_v(s, s->T0);
- }
- break;
- case 0x9d: /* popf */
- gen_svm_check_intercept(s, SVM_EXIT_POPF);
- if (check_vm86_iopl(s)) {
- int mask = TF_MASK | AC_MASK | ID_MASK | NT_MASK;
-
- if (CPL(s) == 0) {
- mask |= IF_MASK | IOPL_MASK;
- } else if (CPL(s) <= IOPL(s)) {
- mask |= IF_MASK;
- }
- if (dflag == MO_16) {
- mask &= 0xffff;
- }
-
- ot = gen_pop_T0(s);
- gen_helper_write_eflags(tcg_env, s->T0, tcg_constant_i32(mask));
- gen_pop_update(s, ot);
- set_cc_op(s, CC_OP_EFLAGS);
- /* abort translation because TF/AC flag may change */
- s->base.is_jmp = DISAS_EOB_NEXT;
- }
- break;
- case 0x9e: /* sahf */
- if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
- goto illegal_op;
- tcg_gen_shri_tl(s->T0, cpu_regs[R_EAX], 8);
- gen_compute_eflags(s);
- tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
- tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
- tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
- break;
- case 0x9f: /* lahf */
- if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
- goto illegal_op;
- gen_compute_eflags(s);
- /* Note: gen_compute_eflags() only gives the condition codes */
- tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
- tcg_gen_deposit_tl(cpu_regs[R_EAX], cpu_regs[R_EAX], s->T0, 8, 8);
- break;
- case 0xf5: /* cmc */
- gen_compute_eflags(s);
- tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
- break;
- case 0xf8: /* clc */
- gen_compute_eflags(s);
- tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
- break;
- case 0xf9: /* stc */
- gen_compute_eflags(s);
- tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
- break;
- case 0xfc: /* cld */
- tcg_gen_movi_i32(s->tmp2_i32, 1);
- tcg_gen_st_i32(s->tmp2_i32, tcg_env, offsetof(CPUX86State, df));
- break;
- case 0xfd: /* std */
- tcg_gen_movi_i32(s->tmp2_i32, -1);
- tcg_gen_st_i32(s->tmp2_i32, tcg_env, offsetof(CPUX86State, df));
- break;
/************************/
/* bit operations */
@@ -5545,188 +3654,6 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
}
gen_op_mov_reg_v(s, ot, reg, s->T0);
break;
- /************************/
- /* bcd */
- case 0x27: /* daa */
- if (CODE64(s))
- goto illegal_op;
- gen_update_cc_op(s);
- gen_helper_daa(tcg_env);
- set_cc_op(s, CC_OP_EFLAGS);
- break;
- case 0x2f: /* das */
- if (CODE64(s))
- goto illegal_op;
- gen_update_cc_op(s);
- gen_helper_das(tcg_env);
- set_cc_op(s, CC_OP_EFLAGS);
- break;
- case 0x37: /* aaa */
- if (CODE64(s))
- goto illegal_op;
- gen_update_cc_op(s);
- gen_helper_aaa(tcg_env);
- set_cc_op(s, CC_OP_EFLAGS);
- break;
- case 0x3f: /* aas */
- if (CODE64(s))
- goto illegal_op;
- gen_update_cc_op(s);
- gen_helper_aas(tcg_env);
- set_cc_op(s, CC_OP_EFLAGS);
- break;
- case 0xd4: /* aam */
- if (CODE64(s))
- goto illegal_op;
- val = x86_ldub_code(env, s);
- if (val == 0) {
- gen_exception(s, EXCP00_DIVZ);
- } else {
- gen_helper_aam(tcg_env, tcg_constant_i32(val));
- set_cc_op(s, CC_OP_LOGICB);
- }
- break;
- case 0xd5: /* aad */
- if (CODE64(s))
- goto illegal_op;
- val = x86_ldub_code(env, s);
- gen_helper_aad(tcg_env, tcg_constant_i32(val));
- set_cc_op(s, CC_OP_LOGICB);
- break;
- /************************/
- /* misc */
- case 0x90: /* nop */
- /* XXX: correct lock test for all insn */
- if (prefixes & PREFIX_LOCK) {
- goto illegal_op;
- }
- /* If REX_B is set, then this is xchg eax, r8d, not a nop. */
- if (REX_B(s)) {
- goto do_xchg_reg_eax;
- }
- if (prefixes & PREFIX_REPZ) {
- gen_update_cc_op(s);
- gen_update_eip_cur(s);
- gen_helper_pause(tcg_env, cur_insn_len_i32(s));
- s->base.is_jmp = DISAS_NORETURN;
- }
- break;
- case 0x9b: /* fwait */
- if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
- (HF_MP_MASK | HF_TS_MASK)) {
- gen_exception(s, EXCP07_PREX);
- } else {
- /* needs to be treated as I/O because of ferr_irq */
- translator_io_start(&s->base);
- gen_helper_fwait(tcg_env);
- }
- break;
- case 0xcc: /* int3 */
- gen_interrupt(s, EXCP03_INT3);
- break;
- case 0xcd: /* int N */
- val = x86_ldub_code(env, s);
- if (check_vm86_iopl(s)) {
- gen_interrupt(s, val);
- }
- break;
- case 0xce: /* into */
- if (CODE64(s))
- goto illegal_op;
- gen_update_cc_op(s);
- gen_update_eip_cur(s);
- gen_helper_into(tcg_env, cur_insn_len_i32(s));
- break;
-#ifdef WANT_ICEBP
- case 0xf1: /* icebp (undocumented, exits to external debugger) */
- gen_svm_check_intercept(s, SVM_EXIT_ICEBP);
- gen_debug(s);
- break;
-#endif
- case 0xfa: /* cli */
- if (check_iopl(s)) {
- gen_reset_eflags(s, IF_MASK);
- }
- break;
- case 0xfb: /* sti */
- if (check_iopl(s)) {
- gen_set_eflags(s, IF_MASK);
- /* interruptions are enabled only the first insn after sti */
- gen_update_eip_next(s);
- gen_eob_inhibit_irq(s);
- }
- break;
- case 0x62: /* bound */
- if (CODE64(s))
- goto illegal_op;
- ot = dflag;
- modrm = x86_ldub_code(env, s);
- reg = (modrm >> 3) & 7;
- mod = (modrm >> 6) & 3;
- if (mod == 3)
- goto illegal_op;
- gen_op_mov_v_reg(s, ot, s->T0, reg);
- gen_lea_modrm(env, s, modrm);
- tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
- if (ot == MO_16) {
- gen_helper_boundw(tcg_env, s->A0, s->tmp2_i32);
- } else {
- gen_helper_boundl(tcg_env, s->A0, s->tmp2_i32);
- }
- break;
- case 0x1c8 ... 0x1cf: /* bswap reg */
- reg = (b & 7) | REX_B(s);
-#ifdef TARGET_X86_64
- if (dflag == MO_64) {
- tcg_gen_bswap64_i64(cpu_regs[reg], cpu_regs[reg]);
- break;
- }
-#endif
- tcg_gen_bswap32_tl(cpu_regs[reg], cpu_regs[reg], TCG_BSWAP_OZ);
- break;
- case 0xd6: /* salc */
- if (CODE64(s))
- goto illegal_op;
- gen_compute_eflags_c(s, s->T0);
- tcg_gen_neg_tl(s->T0, s->T0);
- gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
- break;
- case 0xe0: /* loopnz */
- case 0xe1: /* loopz */
- case 0xe2: /* loop */
- case 0xe3: /* jecxz */
- {
- TCGLabel *l1, *l2;
- int diff = (int8_t)insn_get(env, s, MO_8);
-
- l1 = gen_new_label();
- l2 = gen_new_label();
- gen_update_cc_op(s);
- b &= 3;
- switch(b) {
- case 0: /* loopnz */
- case 1: /* loopz */
- gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
- gen_op_jz_ecx(s, l2);
- gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
- break;
- case 2: /* loop */
- gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
- gen_op_jnz_ecx(s, l1);
- break;
- default:
- case 3: /* jcxz */
- gen_op_jz_ecx(s, l1);
- break;
- }
-
- gen_set_label(l2);
- gen_jmp_rel_csize(s, 0, 1);
-
- gen_set_label(l1);
- gen_jmp_rel(s, dflag, diff, 0);
- }
- break;
case 0x130: /* wrmsr */
case 0x132: /* rdmsr */
if (check_cpl0(s)) {
@@ -5814,14 +3741,6 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
gen_update_eip_cur(s);
gen_helper_cpuid(tcg_env);
break;
- case 0xf4: /* hlt */
- if (check_cpl0(s)) {
- gen_update_cc_op(s);
- gen_update_eip_cur(s);
- gen_helper_hlt(tcg_env, cur_insn_len_i32(s));
- s->base.is_jmp = DISAS_NORETURN;
- }
- break;
case 0x100:
modrm = x86_ldub_code(env, s);
mod = (modrm >> 6) & 3;
@@ -6226,72 +4145,6 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
/* nothing to do */
}
break;
- case 0x63: /* arpl or movslS (x86_64) */
-#ifdef TARGET_X86_64
- if (CODE64(s)) {
- int d_ot;
- /* d_ot is the size of destination */
- d_ot = dflag;
-
- modrm = x86_ldub_code(env, s);
- reg = ((modrm >> 3) & 7) | REX_R(s);
- mod = (modrm >> 6) & 3;
- rm = (modrm & 7) | REX_B(s);
-
- if (mod == 3) {
- gen_op_mov_v_reg(s, MO_32, s->T0, rm);
- /* sign extend */
- if (d_ot == MO_64) {
- tcg_gen_ext32s_tl(s->T0, s->T0);
- }
- gen_op_mov_reg_v(s, d_ot, reg, s->T0);
- } else {
- gen_lea_modrm(env, s, modrm);
- gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0);
- gen_op_mov_reg_v(s, d_ot, reg, s->T0);
- }
- } else
-#endif
- {
- TCGLabel *label1;
- TCGv t0, t1, t2;
-
- if (!PE(s) || VM86(s))
- goto illegal_op;
- t0 = tcg_temp_new();
- t1 = tcg_temp_new();
- t2 = tcg_temp_new();
- ot = MO_16;
- modrm = x86_ldub_code(env, s);
- reg = (modrm >> 3) & 7;
- mod = (modrm >> 6) & 3;
- rm = modrm & 7;
- if (mod != 3) {
- gen_lea_modrm(env, s, modrm);
- gen_op_ld_v(s, ot, t0, s->A0);
- } else {
- gen_op_mov_v_reg(s, ot, t0, rm);
- }
- gen_op_mov_v_reg(s, ot, t1, reg);
- tcg_gen_andi_tl(s->tmp0, t0, 3);
- tcg_gen_andi_tl(t1, t1, 3);
- tcg_gen_movi_tl(t2, 0);
- label1 = gen_new_label();
- tcg_gen_brcond_tl(TCG_COND_GE, s->tmp0, t1, label1);
- tcg_gen_andi_tl(t0, t0, ~3);
- tcg_gen_or_tl(t0, t0, t1);
- tcg_gen_movi_tl(t2, CC_Z);
- gen_set_label(label1);
- if (mod != 3) {
- gen_op_st_v(s, ot, t0, s->A0);
- } else {
- gen_op_mov_reg_v(s, ot, rm, t0);
- }
- gen_compute_eflags(s);
- tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
- tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
- }
- break;
case 0x102: /* lar */
case 0x103: /* lsl */
{
@@ -6618,18 +4471,6 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
}
break;
/* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
- case 0x1c3: /* MOVNTI reg, mem */
- if (!(s->cpuid_features & CPUID_SSE2))
- goto illegal_op;
- ot = mo_64_32(dflag);
- modrm = x86_ldub_code(env, s);
- mod = (modrm >> 6) & 3;
- if (mod == 3)
- goto illegal_op;
- reg = ((modrm >> 3) & 7) | REX_R(s);
- /* generate a generic store */
- gen_ldst_modrm(env, s, modrm, ot, reg, 1);
- break;
case 0x1ae:
modrm = x86_ldub_code(env, s);
switch (modrm) {
@@ -6872,13 +4713,19 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
set_cc_op(s, CC_OP_POPCNT);
break;
+ case 0 ... 0xd7:
+ case 0xe0 ... 0xff:
case 0x10e ... 0x117:
case 0x128 ... 0x12f:
- case 0x138 ... 0x13f:
- case 0x150 ... 0x17f:
- case 0x1c2:
- case 0x1c4 ... 0x1c6:
- case 0x1d0 ... 0x1fe:
+ case 0x138 ... 0x19f:
+ case 0x1a0 ... 0x1a1:
+ case 0x1a8 ... 0x1a9:
+ case 0x1af:
+ case 0x1b2:
+ case 0x1b4 ... 0x1b7:
+ case 0x1be ... 0x1bf:
+ case 0x1c2 ... 0x1c6:
+ case 0x1c8 ... 0x1ff:
disas_insn_new(s, cpu, b);
break;
default:
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 8311b479846..14218882681 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -2227,9 +2227,6 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b)
X86DecodeFunc decode_func = decode_root;
uint8_t cc_live;
-#ifdef CONFIG_USER_ONLY
- if (limit) { --limit; }
-#endif
s->has_modrm = false;
next_byte:
--
2.45.0
^ permalink raw reply related [flat|nested] 38+ messages in thread* [PATCH v2 23/25] target/i386: decode x87 instructions in a separate function
2024-05-06 8:09 [PATCH v2 00/25] target/i386: convert 1-byte opcodes to new decoder Paolo Bonzini
` (21 preceding siblings ...)
2024-05-06 8:09 ` [PATCH v2 22/25] target/i386: remove now-converted opcodes from old decoder Paolo Bonzini
@ 2024-05-06 8:09 ` Paolo Bonzini
2024-05-06 8:09 ` [PATCH v2 24/25] target/i386: split legacy decoder into " Paolo Bonzini
2024-05-06 8:09 ` [PATCH v2 25/25] target/i386: remove duplicate prefix decoding Paolo Bonzini
24 siblings, 0 replies; 38+ messages in thread
From: Paolo Bonzini @ 2024-05-06 8:09 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, zhao1.liu
These are unlikely to be converted to the table-based decoding
soon (perhaps there could be generic ESC decoding in decode-new.c.inc
for the Mod/RM byte, but not operand decoding), so keep them separate
from the remaining legacy-decoded instructions.
Acked-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 1120 ++++++++++++++++++-----------------
1 file changed, 566 insertions(+), 554 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 634b162ae97..e077fdd8c71 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -2552,6 +2552,570 @@ static void gen_cmpxchg16b(DisasContext *s, CPUX86State *env, int modrm)
}
#endif
+static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b)
+{
+ CPUX86State *env = cpu_env(cpu);
+ bool update_fip = true;
+ int modrm, mod, rm, op;
+
+ if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
+ /* if CR0.EM or CR0.TS are set, generate an FPU exception */
+ /* XXX: what to do if illegal op ? */
+ gen_exception(s, EXCP07_PREX);
+ return true;
+ }
+ modrm = x86_ldub_code(env, s);
+ mod = (modrm >> 6) & 3;
+ rm = modrm & 7;
+ op = ((b & 7) << 3) | ((modrm >> 3) & 7);
+ if (mod != 3) {
+ /* memory op */
+ AddressParts a = gen_lea_modrm_0(env, s, modrm);
+ TCGv ea = gen_lea_modrm_1(s, a, false);
+ TCGv last_addr = tcg_temp_new();
+ bool update_fdp = true;
+
+ tcg_gen_mov_tl(last_addr, ea);
+ gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
+
+ switch (op) {
+ case 0x00 ... 0x07: /* fxxxs */
+ case 0x10 ... 0x17: /* fixxxl */
+ case 0x20 ... 0x27: /* fxxxl */
+ case 0x30 ... 0x37: /* fixxx */
+ {
+ int op1;
+ op1 = op & 7;
+
+ switch (op >> 4) {
+ case 0:
+ tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
+ s->mem_index, MO_LEUL);
+ gen_helper_flds_FT0(tcg_env, s->tmp2_i32);
+ break;
+ case 1:
+ tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
+ s->mem_index, MO_LEUL);
+ gen_helper_fildl_FT0(tcg_env, s->tmp2_i32);
+ break;
+ case 2:
+ tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
+ s->mem_index, MO_LEUQ);
+ gen_helper_fldl_FT0(tcg_env, s->tmp1_i64);
+ break;
+ case 3:
+ default:
+ tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
+ s->mem_index, MO_LESW);
+ gen_helper_fildl_FT0(tcg_env, s->tmp2_i32);
+ break;
+ }
+
+ gen_helper_fp_arith_ST0_FT0(op1);
+ if (op1 == 3) {
+ /* fcomp needs pop */
+ gen_helper_fpop(tcg_env);
+ }
+ }
+ break;
+ case 0x08: /* flds */
+ case 0x0a: /* fsts */
+ case 0x0b: /* fstps */
+ case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
+ case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
+ case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
+ switch (op & 7) {
+ case 0:
+ switch (op >> 4) {
+ case 0:
+ tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
+ s->mem_index, MO_LEUL);
+ gen_helper_flds_ST0(tcg_env, s->tmp2_i32);
+ break;
+ case 1:
+ tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
+ s->mem_index, MO_LEUL);
+ gen_helper_fildl_ST0(tcg_env, s->tmp2_i32);
+ break;
+ case 2:
+ tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
+ s->mem_index, MO_LEUQ);
+ gen_helper_fldl_ST0(tcg_env, s->tmp1_i64);
+ break;
+ case 3:
+ default:
+ tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
+ s->mem_index, MO_LESW);
+ gen_helper_fildl_ST0(tcg_env, s->tmp2_i32);
+ break;
+ }
+ break;
+ case 1:
+ /* XXX: the corresponding CPUID bit must be tested ! */
+ switch (op >> 4) {
+ case 1:
+ gen_helper_fisttl_ST0(s->tmp2_i32, tcg_env);
+ tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
+ s->mem_index, MO_LEUL);
+ break;
+ case 2:
+ gen_helper_fisttll_ST0(s->tmp1_i64, tcg_env);
+ tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
+ s->mem_index, MO_LEUQ);
+ break;
+ case 3:
+ default:
+ gen_helper_fistt_ST0(s->tmp2_i32, tcg_env);
+ tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
+ s->mem_index, MO_LEUW);
+ break;
+ }
+ gen_helper_fpop(tcg_env);
+ break;
+ default:
+ switch (op >> 4) {
+ case 0:
+ gen_helper_fsts_ST0(s->tmp2_i32, tcg_env);
+ tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
+ s->mem_index, MO_LEUL);
+ break;
+ case 1:
+ gen_helper_fistl_ST0(s->tmp2_i32, tcg_env);
+ tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
+ s->mem_index, MO_LEUL);
+ break;
+ case 2:
+ gen_helper_fstl_ST0(s->tmp1_i64, tcg_env);
+ tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
+ s->mem_index, MO_LEUQ);
+ break;
+ case 3:
+ default:
+ gen_helper_fist_ST0(s->tmp2_i32, tcg_env);
+ tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
+ s->mem_index, MO_LEUW);
+ break;
+ }
+ if ((op & 7) == 3) {
+ gen_helper_fpop(tcg_env);
+ }
+ break;
+ }
+ break;
+ case 0x0c: /* fldenv mem */
+ gen_helper_fldenv(tcg_env, s->A0,
+ tcg_constant_i32(s->dflag - 1));
+ update_fip = update_fdp = false;
+ break;
+ case 0x0d: /* fldcw mem */
+ tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
+ s->mem_index, MO_LEUW);
+ gen_helper_fldcw(tcg_env, s->tmp2_i32);
+ update_fip = update_fdp = false;
+ break;
+ case 0x0e: /* fnstenv mem */
+ gen_helper_fstenv(tcg_env, s->A0,
+ tcg_constant_i32(s->dflag - 1));
+ update_fip = update_fdp = false;
+ break;
+ case 0x0f: /* fnstcw mem */
+ gen_helper_fnstcw(s->tmp2_i32, tcg_env);
+ tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
+ s->mem_index, MO_LEUW);
+ update_fip = update_fdp = false;
+ break;
+ case 0x1d: /* fldt mem */
+ gen_helper_fldt_ST0(tcg_env, s->A0);
+ break;
+ case 0x1f: /* fstpt mem */
+ gen_helper_fstt_ST0(tcg_env, s->A0);
+ gen_helper_fpop(tcg_env);
+ break;
+ case 0x2c: /* frstor mem */
+ gen_helper_frstor(tcg_env, s->A0,
+ tcg_constant_i32(s->dflag - 1));
+ update_fip = update_fdp = false;
+ break;
+ case 0x2e: /* fnsave mem */
+ gen_helper_fsave(tcg_env, s->A0,
+ tcg_constant_i32(s->dflag - 1));
+ update_fip = update_fdp = false;
+ break;
+ case 0x2f: /* fnstsw mem */
+ gen_helper_fnstsw(s->tmp2_i32, tcg_env);
+ tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
+ s->mem_index, MO_LEUW);
+ update_fip = update_fdp = false;
+ break;
+ case 0x3c: /* fbld */
+ gen_helper_fbld_ST0(tcg_env, s->A0);
+ break;
+ case 0x3e: /* fbstp */
+ gen_helper_fbst_ST0(tcg_env, s->A0);
+ gen_helper_fpop(tcg_env);
+ break;
+ case 0x3d: /* fildll */
+ tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
+ s->mem_index, MO_LEUQ);
+ gen_helper_fildll_ST0(tcg_env, s->tmp1_i64);
+ break;
+ case 0x3f: /* fistpll */
+ gen_helper_fistll_ST0(s->tmp1_i64, tcg_env);
+ tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
+ s->mem_index, MO_LEUQ);
+ gen_helper_fpop(tcg_env);
+ break;
+ default:
+ return false;
+ }
+
+ if (update_fdp) {
+ int last_seg = s->override >= 0 ? s->override : a.def_seg;
+
+ tcg_gen_ld_i32(s->tmp2_i32, tcg_env,
+ offsetof(CPUX86State,
+ segs[last_seg].selector));
+ tcg_gen_st16_i32(s->tmp2_i32, tcg_env,
+ offsetof(CPUX86State, fpds));
+ tcg_gen_st_tl(last_addr, tcg_env,
+ offsetof(CPUX86State, fpdp));
+ }
+ } else {
+ /* register float ops */
+ int opreg = rm;
+
+ switch (op) {
+ case 0x08: /* fld sti */
+ gen_helper_fpush(tcg_env);
+ gen_helper_fmov_ST0_STN(tcg_env,
+ tcg_constant_i32((opreg + 1) & 7));
+ break;
+ case 0x09: /* fxchg sti */
+ case 0x29: /* fxchg4 sti, undocumented op */
+ case 0x39: /* fxchg7 sti, undocumented op */
+ gen_helper_fxchg_ST0_STN(tcg_env, tcg_constant_i32(opreg));
+ break;
+ case 0x0a: /* grp d9/2 */
+ switch (rm) {
+ case 0: /* fnop */
+ /*
+ * check exceptions (FreeBSD FPU probe)
+ * needs to be treated as I/O because of ferr_irq
+ */
+ translator_io_start(&s->base);
+ gen_helper_fwait(tcg_env);
+ update_fip = false;
+ break;
+ default:
+ return false;
+ }
+ break;
+ case 0x0c: /* grp d9/4 */
+ switch (rm) {
+ case 0: /* fchs */
+ gen_helper_fchs_ST0(tcg_env);
+ break;
+ case 1: /* fabs */
+ gen_helper_fabs_ST0(tcg_env);
+ break;
+ case 4: /* ftst */
+ gen_helper_fldz_FT0(tcg_env);
+ gen_helper_fcom_ST0_FT0(tcg_env);
+ break;
+ case 5: /* fxam */
+ gen_helper_fxam_ST0(tcg_env);
+ break;
+ default:
+ return false;
+ }
+ break;
+ case 0x0d: /* grp d9/5 */
+ {
+ switch (rm) {
+ case 0:
+ gen_helper_fpush(tcg_env);
+ gen_helper_fld1_ST0(tcg_env);
+ break;
+ case 1:
+ gen_helper_fpush(tcg_env);
+ gen_helper_fldl2t_ST0(tcg_env);
+ break;
+ case 2:
+ gen_helper_fpush(tcg_env);
+ gen_helper_fldl2e_ST0(tcg_env);
+ break;
+ case 3:
+ gen_helper_fpush(tcg_env);
+ gen_helper_fldpi_ST0(tcg_env);
+ break;
+ case 4:
+ gen_helper_fpush(tcg_env);
+ gen_helper_fldlg2_ST0(tcg_env);
+ break;
+ case 5:
+ gen_helper_fpush(tcg_env);
+ gen_helper_fldln2_ST0(tcg_env);
+ break;
+ case 6:
+ gen_helper_fpush(tcg_env);
+ gen_helper_fldz_ST0(tcg_env);
+ break;
+ default:
+ return false;
+ }
+ }
+ break;
+ case 0x0e: /* grp d9/6 */
+ switch (rm) {
+ case 0: /* f2xm1 */
+ gen_helper_f2xm1(tcg_env);
+ break;
+ case 1: /* fyl2x */
+ gen_helper_fyl2x(tcg_env);
+ break;
+ case 2: /* fptan */
+ gen_helper_fptan(tcg_env);
+ break;
+ case 3: /* fpatan */
+ gen_helper_fpatan(tcg_env);
+ break;
+ case 4: /* fxtract */
+ gen_helper_fxtract(tcg_env);
+ break;
+ case 5: /* fprem1 */
+ gen_helper_fprem1(tcg_env);
+ break;
+ case 6: /* fdecstp */
+ gen_helper_fdecstp(tcg_env);
+ break;
+ default:
+ case 7: /* fincstp */
+ gen_helper_fincstp(tcg_env);
+ break;
+ }
+ break;
+ case 0x0f: /* grp d9/7 */
+ switch (rm) {
+ case 0: /* fprem */
+ gen_helper_fprem(tcg_env);
+ break;
+ case 1: /* fyl2xp1 */
+ gen_helper_fyl2xp1(tcg_env);
+ break;
+ case 2: /* fsqrt */
+ gen_helper_fsqrt(tcg_env);
+ break;
+ case 3: /* fsincos */
+ gen_helper_fsincos(tcg_env);
+ break;
+ case 5: /* fscale */
+ gen_helper_fscale(tcg_env);
+ break;
+ case 4: /* frndint */
+ gen_helper_frndint(tcg_env);
+ break;
+ case 6: /* fsin */
+ gen_helper_fsin(tcg_env);
+ break;
+ default:
+ case 7: /* fcos */
+ gen_helper_fcos(tcg_env);
+ break;
+ }
+ break;
+ case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
+ case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
+ case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
+ {
+ int op1;
+
+ op1 = op & 7;
+ if (op >= 0x20) {
+ gen_helper_fp_arith_STN_ST0(op1, opreg);
+ if (op >= 0x30) {
+ gen_helper_fpop(tcg_env);
+ }
+ } else {
+ gen_helper_fmov_FT0_STN(tcg_env,
+ tcg_constant_i32(opreg));
+ gen_helper_fp_arith_ST0_FT0(op1);
+ }
+ }
+ break;
+ case 0x02: /* fcom */
+ case 0x22: /* fcom2, undocumented op */
+ gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
+ gen_helper_fcom_ST0_FT0(tcg_env);
+ break;
+ case 0x03: /* fcomp */
+ case 0x23: /* fcomp3, undocumented op */
+ case 0x32: /* fcomp5, undocumented op */
+ gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
+ gen_helper_fcom_ST0_FT0(tcg_env);
+ gen_helper_fpop(tcg_env);
+ break;
+ case 0x15: /* da/5 */
+ switch (rm) {
+ case 1: /* fucompp */
+ gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(1));
+ gen_helper_fucom_ST0_FT0(tcg_env);
+ gen_helper_fpop(tcg_env);
+ gen_helper_fpop(tcg_env);
+ break;
+ default:
+ return false;
+ }
+ break;
+ case 0x1c:
+ switch (rm) {
+ case 0: /* feni (287 only, just do nop here) */
+ break;
+ case 1: /* fdisi (287 only, just do nop here) */
+ break;
+ case 2: /* fclex */
+ gen_helper_fclex(tcg_env);
+ update_fip = false;
+ break;
+ case 3: /* fninit */
+ gen_helper_fninit(tcg_env);
+ update_fip = false;
+ break;
+ case 4: /* fsetpm (287 only, just do nop here) */
+ break;
+ default:
+ return false;
+ }
+ break;
+ case 0x1d: /* fucomi */
+ if (!(s->cpuid_features & CPUID_CMOV)) {
+ goto illegal_op;
+ }
+ gen_update_cc_op(s);
+ gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
+ gen_helper_fucomi_ST0_FT0(tcg_env);
+ set_cc_op(s, CC_OP_EFLAGS);
+ break;
+ case 0x1e: /* fcomi */
+ if (!(s->cpuid_features & CPUID_CMOV)) {
+ goto illegal_op;
+ }
+ gen_update_cc_op(s);
+ gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
+ gen_helper_fcomi_ST0_FT0(tcg_env);
+ set_cc_op(s, CC_OP_EFLAGS);
+ break;
+ case 0x28: /* ffree sti */
+ gen_helper_ffree_STN(tcg_env, tcg_constant_i32(opreg));
+ break;
+ case 0x2a: /* fst sti */
+ gen_helper_fmov_STN_ST0(tcg_env, tcg_constant_i32(opreg));
+ break;
+ case 0x2b: /* fstp sti */
+ case 0x0b: /* fstp1 sti, undocumented op */
+ case 0x3a: /* fstp8 sti, undocumented op */
+ case 0x3b: /* fstp9 sti, undocumented op */
+ gen_helper_fmov_STN_ST0(tcg_env, tcg_constant_i32(opreg));
+ gen_helper_fpop(tcg_env);
+ break;
+ case 0x2c: /* fucom st(i) */
+ gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
+ gen_helper_fucom_ST0_FT0(tcg_env);
+ break;
+ case 0x2d: /* fucomp st(i) */
+ gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
+ gen_helper_fucom_ST0_FT0(tcg_env);
+ gen_helper_fpop(tcg_env);
+ break;
+ case 0x33: /* de/3 */
+ switch (rm) {
+ case 1: /* fcompp */
+ gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(1));
+ gen_helper_fcom_ST0_FT0(tcg_env);
+ gen_helper_fpop(tcg_env);
+ gen_helper_fpop(tcg_env);
+ break;
+ default:
+ return false;
+ }
+ break;
+ case 0x38: /* ffreep sti, undocumented op */
+ gen_helper_ffree_STN(tcg_env, tcg_constant_i32(opreg));
+ gen_helper_fpop(tcg_env);
+ break;
+ case 0x3c: /* df/4 */
+ switch (rm) {
+ case 0:
+ gen_helper_fnstsw(s->tmp2_i32, tcg_env);
+ tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
+ gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
+ break;
+ default:
+ return false;
+ }
+ break;
+ case 0x3d: /* fucomip */
+ if (!(s->cpuid_features & CPUID_CMOV)) {
+ goto illegal_op;
+ }
+ gen_update_cc_op(s);
+ gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
+ gen_helper_fucomi_ST0_FT0(tcg_env);
+ gen_helper_fpop(tcg_env);
+ set_cc_op(s, CC_OP_EFLAGS);
+ break;
+ case 0x3e: /* fcomip */
+ if (!(s->cpuid_features & CPUID_CMOV)) {
+ goto illegal_op;
+ }
+ gen_update_cc_op(s);
+ gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
+ gen_helper_fcomi_ST0_FT0(tcg_env);
+ gen_helper_fpop(tcg_env);
+ set_cc_op(s, CC_OP_EFLAGS);
+ break;
+ case 0x10 ... 0x13: /* fcmovxx */
+ case 0x18 ... 0x1b:
+ {
+ int op1;
+ TCGLabel *l1;
+ static const uint8_t fcmov_cc[8] = {
+ (JCC_B << 1),
+ (JCC_Z << 1),
+ (JCC_BE << 1),
+ (JCC_P << 1),
+ };
+
+ if (!(s->cpuid_features & CPUID_CMOV)) {
+ goto illegal_op;
+ }
+ op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
+ l1 = gen_new_label();
+ gen_jcc1_noeob(s, op1, l1);
+ gen_helper_fmov_ST0_STN(tcg_env,
+ tcg_constant_i32(opreg));
+ gen_set_label(l1);
+ }
+ break;
+ default:
+ return false;
+ }
+ }
+
+ if (update_fip) {
+ tcg_gen_ld_i32(s->tmp2_i32, tcg_env,
+ offsetof(CPUX86State, segs[R_CS].selector));
+ tcg_gen_st16_i32(s->tmp2_i32, tcg_env,
+ offsetof(CPUX86State, fpcs));
+ tcg_gen_st_tl(eip_cur_tl(s),
+ tcg_env, offsetof(CPUX86State, fpip));
+ }
+ return true;
+
+ illegal_op:
+ gen_illegal_opcode(s);
+ return true;
+}
+
/* convert one instruction. s->base.is_jmp is set if the translation must
be stopped. Return the next pc value */
static bool disas_insn(DisasContext *s, CPUState *cpu)
@@ -2908,560 +3472,8 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
/************************/
/* floats */
case 0xd8 ... 0xdf:
- {
- bool update_fip = true;
-
- if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
- /* if CR0.EM or CR0.TS are set, generate an FPU exception */
- /* XXX: what to do if illegal op ? */
- gen_exception(s, EXCP07_PREX);
- break;
- }
- modrm = x86_ldub_code(env, s);
- mod = (modrm >> 6) & 3;
- rm = modrm & 7;
- op = ((b & 7) << 3) | ((modrm >> 3) & 7);
- if (mod != 3) {
- /* memory op */
- AddressParts a = gen_lea_modrm_0(env, s, modrm);
- TCGv ea = gen_lea_modrm_1(s, a, false);
- TCGv last_addr = tcg_temp_new();
- bool update_fdp = true;
-
- tcg_gen_mov_tl(last_addr, ea);
- gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
-
- switch (op) {
- case 0x00 ... 0x07: /* fxxxs */
- case 0x10 ... 0x17: /* fixxxl */
- case 0x20 ... 0x27: /* fxxxl */
- case 0x30 ... 0x37: /* fixxx */
- {
- int op1;
- op1 = op & 7;
-
- switch (op >> 4) {
- case 0:
- tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
- s->mem_index, MO_LEUL);
- gen_helper_flds_FT0(tcg_env, s->tmp2_i32);
- break;
- case 1:
- tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
- s->mem_index, MO_LEUL);
- gen_helper_fildl_FT0(tcg_env, s->tmp2_i32);
- break;
- case 2:
- tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
- s->mem_index, MO_LEUQ);
- gen_helper_fldl_FT0(tcg_env, s->tmp1_i64);
- break;
- case 3:
- default:
- tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
- s->mem_index, MO_LESW);
- gen_helper_fildl_FT0(tcg_env, s->tmp2_i32);
- break;
- }
-
- gen_helper_fp_arith_ST0_FT0(op1);
- if (op1 == 3) {
- /* fcomp needs pop */
- gen_helper_fpop(tcg_env);
- }
- }
- break;
- case 0x08: /* flds */
- case 0x0a: /* fsts */
- case 0x0b: /* fstps */
- case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
- case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
- case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
- switch (op & 7) {
- case 0:
- switch (op >> 4) {
- case 0:
- tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
- s->mem_index, MO_LEUL);
- gen_helper_flds_ST0(tcg_env, s->tmp2_i32);
- break;
- case 1:
- tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
- s->mem_index, MO_LEUL);
- gen_helper_fildl_ST0(tcg_env, s->tmp2_i32);
- break;
- case 2:
- tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
- s->mem_index, MO_LEUQ);
- gen_helper_fldl_ST0(tcg_env, s->tmp1_i64);
- break;
- case 3:
- default:
- tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
- s->mem_index, MO_LESW);
- gen_helper_fildl_ST0(tcg_env, s->tmp2_i32);
- break;
- }
- break;
- case 1:
- /* XXX: the corresponding CPUID bit must be tested ! */
- switch (op >> 4) {
- case 1:
- gen_helper_fisttl_ST0(s->tmp2_i32, tcg_env);
- tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
- s->mem_index, MO_LEUL);
- break;
- case 2:
- gen_helper_fisttll_ST0(s->tmp1_i64, tcg_env);
- tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
- s->mem_index, MO_LEUQ);
- break;
- case 3:
- default:
- gen_helper_fistt_ST0(s->tmp2_i32, tcg_env);
- tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
- s->mem_index, MO_LEUW);
- break;
- }
- gen_helper_fpop(tcg_env);
- break;
- default:
- switch (op >> 4) {
- case 0:
- gen_helper_fsts_ST0(s->tmp2_i32, tcg_env);
- tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
- s->mem_index, MO_LEUL);
- break;
- case 1:
- gen_helper_fistl_ST0(s->tmp2_i32, tcg_env);
- tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
- s->mem_index, MO_LEUL);
- break;
- case 2:
- gen_helper_fstl_ST0(s->tmp1_i64, tcg_env);
- tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
- s->mem_index, MO_LEUQ);
- break;
- case 3:
- default:
- gen_helper_fist_ST0(s->tmp2_i32, tcg_env);
- tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
- s->mem_index, MO_LEUW);
- break;
- }
- if ((op & 7) == 3) {
- gen_helper_fpop(tcg_env);
- }
- break;
- }
- break;
- case 0x0c: /* fldenv mem */
- gen_helper_fldenv(tcg_env, s->A0,
- tcg_constant_i32(dflag - 1));
- update_fip = update_fdp = false;
- break;
- case 0x0d: /* fldcw mem */
- tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
- s->mem_index, MO_LEUW);
- gen_helper_fldcw(tcg_env, s->tmp2_i32);
- update_fip = update_fdp = false;
- break;
- case 0x0e: /* fnstenv mem */
- gen_helper_fstenv(tcg_env, s->A0,
- tcg_constant_i32(dflag - 1));
- update_fip = update_fdp = false;
- break;
- case 0x0f: /* fnstcw mem */
- gen_helper_fnstcw(s->tmp2_i32, tcg_env);
- tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
- s->mem_index, MO_LEUW);
- update_fip = update_fdp = false;
- break;
- case 0x1d: /* fldt mem */
- gen_helper_fldt_ST0(tcg_env, s->A0);
- break;
- case 0x1f: /* fstpt mem */
- gen_helper_fstt_ST0(tcg_env, s->A0);
- gen_helper_fpop(tcg_env);
- break;
- case 0x2c: /* frstor mem */
- gen_helper_frstor(tcg_env, s->A0,
- tcg_constant_i32(dflag - 1));
- update_fip = update_fdp = false;
- break;
- case 0x2e: /* fnsave mem */
- gen_helper_fsave(tcg_env, s->A0,
- tcg_constant_i32(dflag - 1));
- update_fip = update_fdp = false;
- break;
- case 0x2f: /* fnstsw mem */
- gen_helper_fnstsw(s->tmp2_i32, tcg_env);
- tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
- s->mem_index, MO_LEUW);
- update_fip = update_fdp = false;
- break;
- case 0x3c: /* fbld */
- gen_helper_fbld_ST0(tcg_env, s->A0);
- break;
- case 0x3e: /* fbstp */
- gen_helper_fbst_ST0(tcg_env, s->A0);
- gen_helper_fpop(tcg_env);
- break;
- case 0x3d: /* fildll */
- tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
- s->mem_index, MO_LEUQ);
- gen_helper_fildll_ST0(tcg_env, s->tmp1_i64);
- break;
- case 0x3f: /* fistpll */
- gen_helper_fistll_ST0(s->tmp1_i64, tcg_env);
- tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
- s->mem_index, MO_LEUQ);
- gen_helper_fpop(tcg_env);
- break;
- default:
- goto unknown_op;
- }
-
- if (update_fdp) {
- int last_seg = s->override >= 0 ? s->override : a.def_seg;
-
- tcg_gen_ld_i32(s->tmp2_i32, tcg_env,
- offsetof(CPUX86State,
- segs[last_seg].selector));
- tcg_gen_st16_i32(s->tmp2_i32, tcg_env,
- offsetof(CPUX86State, fpds));
- tcg_gen_st_tl(last_addr, tcg_env,
- offsetof(CPUX86State, fpdp));
- }
- } else {
- /* register float ops */
- opreg = rm;
-
- switch (op) {
- case 0x08: /* fld sti */
- gen_helper_fpush(tcg_env);
- gen_helper_fmov_ST0_STN(tcg_env,
- tcg_constant_i32((opreg + 1) & 7));
- break;
- case 0x09: /* fxchg sti */
- case 0x29: /* fxchg4 sti, undocumented op */
- case 0x39: /* fxchg7 sti, undocumented op */
- gen_helper_fxchg_ST0_STN(tcg_env, tcg_constant_i32(opreg));
- break;
- case 0x0a: /* grp d9/2 */
- switch (rm) {
- case 0: /* fnop */
- /*
- * check exceptions (FreeBSD FPU probe)
- * needs to be treated as I/O because of ferr_irq
- */
- translator_io_start(&s->base);
- gen_helper_fwait(tcg_env);
- update_fip = false;
- break;
- default:
- goto unknown_op;
- }
- break;
- case 0x0c: /* grp d9/4 */
- switch (rm) {
- case 0: /* fchs */
- gen_helper_fchs_ST0(tcg_env);
- break;
- case 1: /* fabs */
- gen_helper_fabs_ST0(tcg_env);
- break;
- case 4: /* ftst */
- gen_helper_fldz_FT0(tcg_env);
- gen_helper_fcom_ST0_FT0(tcg_env);
- break;
- case 5: /* fxam */
- gen_helper_fxam_ST0(tcg_env);
- break;
- default:
- goto unknown_op;
- }
- break;
- case 0x0d: /* grp d9/5 */
- {
- switch (rm) {
- case 0:
- gen_helper_fpush(tcg_env);
- gen_helper_fld1_ST0(tcg_env);
- break;
- case 1:
- gen_helper_fpush(tcg_env);
- gen_helper_fldl2t_ST0(tcg_env);
- break;
- case 2:
- gen_helper_fpush(tcg_env);
- gen_helper_fldl2e_ST0(tcg_env);
- break;
- case 3:
- gen_helper_fpush(tcg_env);
- gen_helper_fldpi_ST0(tcg_env);
- break;
- case 4:
- gen_helper_fpush(tcg_env);
- gen_helper_fldlg2_ST0(tcg_env);
- break;
- case 5:
- gen_helper_fpush(tcg_env);
- gen_helper_fldln2_ST0(tcg_env);
- break;
- case 6:
- gen_helper_fpush(tcg_env);
- gen_helper_fldz_ST0(tcg_env);
- break;
- default:
- goto unknown_op;
- }
- }
- break;
- case 0x0e: /* grp d9/6 */
- switch (rm) {
- case 0: /* f2xm1 */
- gen_helper_f2xm1(tcg_env);
- break;
- case 1: /* fyl2x */
- gen_helper_fyl2x(tcg_env);
- break;
- case 2: /* fptan */
- gen_helper_fptan(tcg_env);
- break;
- case 3: /* fpatan */
- gen_helper_fpatan(tcg_env);
- break;
- case 4: /* fxtract */
- gen_helper_fxtract(tcg_env);
- break;
- case 5: /* fprem1 */
- gen_helper_fprem1(tcg_env);
- break;
- case 6: /* fdecstp */
- gen_helper_fdecstp(tcg_env);
- break;
- default:
- case 7: /* fincstp */
- gen_helper_fincstp(tcg_env);
- break;
- }
- break;
- case 0x0f: /* grp d9/7 */
- switch (rm) {
- case 0: /* fprem */
- gen_helper_fprem(tcg_env);
- break;
- case 1: /* fyl2xp1 */
- gen_helper_fyl2xp1(tcg_env);
- break;
- case 2: /* fsqrt */
- gen_helper_fsqrt(tcg_env);
- break;
- case 3: /* fsincos */
- gen_helper_fsincos(tcg_env);
- break;
- case 5: /* fscale */
- gen_helper_fscale(tcg_env);
- break;
- case 4: /* frndint */
- gen_helper_frndint(tcg_env);
- break;
- case 6: /* fsin */
- gen_helper_fsin(tcg_env);
- break;
- default:
- case 7: /* fcos */
- gen_helper_fcos(tcg_env);
- break;
- }
- break;
- case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
- case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
- case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
- {
- int op1;
-
- op1 = op & 7;
- if (op >= 0x20) {
- gen_helper_fp_arith_STN_ST0(op1, opreg);
- if (op >= 0x30) {
- gen_helper_fpop(tcg_env);
- }
- } else {
- gen_helper_fmov_FT0_STN(tcg_env,
- tcg_constant_i32(opreg));
- gen_helper_fp_arith_ST0_FT0(op1);
- }
- }
- break;
- case 0x02: /* fcom */
- case 0x22: /* fcom2, undocumented op */
- gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
- gen_helper_fcom_ST0_FT0(tcg_env);
- break;
- case 0x03: /* fcomp */
- case 0x23: /* fcomp3, undocumented op */
- case 0x32: /* fcomp5, undocumented op */
- gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
- gen_helper_fcom_ST0_FT0(tcg_env);
- gen_helper_fpop(tcg_env);
- break;
- case 0x15: /* da/5 */
- switch (rm) {
- case 1: /* fucompp */
- gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(1));
- gen_helper_fucom_ST0_FT0(tcg_env);
- gen_helper_fpop(tcg_env);
- gen_helper_fpop(tcg_env);
- break;
- default:
- goto unknown_op;
- }
- break;
- case 0x1c:
- switch (rm) {
- case 0: /* feni (287 only, just do nop here) */
- break;
- case 1: /* fdisi (287 only, just do nop here) */
- break;
- case 2: /* fclex */
- gen_helper_fclex(tcg_env);
- update_fip = false;
- break;
- case 3: /* fninit */
- gen_helper_fninit(tcg_env);
- update_fip = false;
- break;
- case 4: /* fsetpm (287 only, just do nop here) */
- break;
- default:
- goto unknown_op;
- }
- break;
- case 0x1d: /* fucomi */
- if (!(s->cpuid_features & CPUID_CMOV)) {
- goto illegal_op;
- }
- gen_update_cc_op(s);
- gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
- gen_helper_fucomi_ST0_FT0(tcg_env);
- set_cc_op(s, CC_OP_EFLAGS);
- break;
- case 0x1e: /* fcomi */
- if (!(s->cpuid_features & CPUID_CMOV)) {
- goto illegal_op;
- }
- gen_update_cc_op(s);
- gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
- gen_helper_fcomi_ST0_FT0(tcg_env);
- set_cc_op(s, CC_OP_EFLAGS);
- break;
- case 0x28: /* ffree sti */
- gen_helper_ffree_STN(tcg_env, tcg_constant_i32(opreg));
- break;
- case 0x2a: /* fst sti */
- gen_helper_fmov_STN_ST0(tcg_env, tcg_constant_i32(opreg));
- break;
- case 0x2b: /* fstp sti */
- case 0x0b: /* fstp1 sti, undocumented op */
- case 0x3a: /* fstp8 sti, undocumented op */
- case 0x3b: /* fstp9 sti, undocumented op */
- gen_helper_fmov_STN_ST0(tcg_env, tcg_constant_i32(opreg));
- gen_helper_fpop(tcg_env);
- break;
- case 0x2c: /* fucom st(i) */
- gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
- gen_helper_fucom_ST0_FT0(tcg_env);
- break;
- case 0x2d: /* fucomp st(i) */
- gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
- gen_helper_fucom_ST0_FT0(tcg_env);
- gen_helper_fpop(tcg_env);
- break;
- case 0x33: /* de/3 */
- switch (rm) {
- case 1: /* fcompp */
- gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(1));
- gen_helper_fcom_ST0_FT0(tcg_env);
- gen_helper_fpop(tcg_env);
- gen_helper_fpop(tcg_env);
- break;
- default:
- goto unknown_op;
- }
- break;
- case 0x38: /* ffreep sti, undocumented op */
- gen_helper_ffree_STN(tcg_env, tcg_constant_i32(opreg));
- gen_helper_fpop(tcg_env);
- break;
- case 0x3c: /* df/4 */
- switch (rm) {
- case 0:
- gen_helper_fnstsw(s->tmp2_i32, tcg_env);
- tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
- gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
- break;
- default:
- goto unknown_op;
- }
- break;
- case 0x3d: /* fucomip */
- if (!(s->cpuid_features & CPUID_CMOV)) {
- goto illegal_op;
- }
- gen_update_cc_op(s);
- gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
- gen_helper_fucomi_ST0_FT0(tcg_env);
- gen_helper_fpop(tcg_env);
- set_cc_op(s, CC_OP_EFLAGS);
- break;
- case 0x3e: /* fcomip */
- if (!(s->cpuid_features & CPUID_CMOV)) {
- goto illegal_op;
- }
- gen_update_cc_op(s);
- gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg));
- gen_helper_fcomi_ST0_FT0(tcg_env);
- gen_helper_fpop(tcg_env);
- set_cc_op(s, CC_OP_EFLAGS);
- break;
- case 0x10 ... 0x13: /* fcmovxx */
- case 0x18 ... 0x1b:
- {
- int op1;
- TCGLabel *l1;
- static const uint8_t fcmov_cc[8] = {
- (JCC_B << 1),
- (JCC_Z << 1),
- (JCC_BE << 1),
- (JCC_P << 1),
- };
-
- if (!(s->cpuid_features & CPUID_CMOV)) {
- goto illegal_op;
- }
- op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
- l1 = gen_new_label();
- gen_jcc1_noeob(s, op1, l1);
- gen_helper_fmov_ST0_STN(tcg_env,
- tcg_constant_i32(opreg));
- gen_set_label(l1);
- }
- break;
- default:
- goto unknown_op;
- }
- }
-
- if (update_fip) {
- tcg_gen_ld_i32(s->tmp2_i32, tcg_env,
- offsetof(CPUX86State, segs[R_CS].selector));
- tcg_gen_st16_i32(s->tmp2_i32, tcg_env,
- offsetof(CPUX86State, fpcs));
- tcg_gen_st_tl(eip_cur_tl(s),
- tcg_env, offsetof(CPUX86State, fpip));
- }
+ if (!disas_insn_x87(s, cpu, b)) {
+ goto unknown_op;
}
break;
--
2.45.0
^ permalink raw reply related [flat|nested] 38+ messages in thread* [PATCH v2 24/25] target/i386: split legacy decoder into a separate function
2024-05-06 8:09 [PATCH v2 00/25] target/i386: convert 1-byte opcodes to new decoder Paolo Bonzini
` (22 preceding siblings ...)
2024-05-06 8:09 ` [PATCH v2 23/25] target/i386: decode x87 instructions in a separate function Paolo Bonzini
@ 2024-05-06 8:09 ` Paolo Bonzini
2024-05-06 8:09 ` [PATCH v2 25/25] target/i386: remove duplicate prefix decoding Paolo Bonzini
24 siblings, 0 replies; 38+ messages in thread
From: Paolo Bonzini @ 2024-05-06 8:09 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, zhao1.liu
Split the bits that have some duplication with disas_insn_new, from
those that should be the main topic of the conversion. This is the
first step towards removing duplicate decoding of prefixes between
disas_insn and disas_insn_new.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 58 +++++++++++++++++++++++--------------
1 file changed, 37 insertions(+), 21 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index e077fdd8c71..8c1062c8e13 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -3116,15 +3116,15 @@ static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b)
return true;
}
+static void disas_insn_old(DisasContext *s, CPUState *cpu, int b);
+
/* convert one instruction. s->base.is_jmp is set if the translation must
be stopped. Return the next pc value */
static bool disas_insn(DisasContext *s, CPUState *cpu)
{
CPUX86State *env = cpu_env(cpu);
int b, prefixes;
- int shift;
- MemOp ot, aflag, dflag;
- int modrm, reg, rm, mod, op, opreg, val;
+ MemOp aflag, dflag;
bool orig_cc_op_dirty = s->cc_op_dirty;
CCOp orig_cc_op = s->cc_op;
target_ulong orig_pc_save = s->pc_save;
@@ -3270,6 +3270,38 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
s->aflag = aflag;
s->dflag = dflag;
+ switch (b) {
+ case 0 ... 0xd7:
+ case 0xe0 ... 0xff:
+ case 0x10e ... 0x117:
+ case 0x128 ... 0x12f:
+ case 0x138 ... 0x19f:
+ case 0x1a0 ... 0x1a1:
+ case 0x1a8 ... 0x1a9:
+ case 0x1af:
+ case 0x1b2:
+ case 0x1b4 ... 0x1b7:
+ case 0x1be ... 0x1bf:
+ case 0x1c2 ... 0x1c6:
+ case 0x1c8 ... 0x1ff:
+ disas_insn_new(s, cpu, b);
+ break;
+ default:
+ disas_insn_old(s, cpu, b);
+ break;
+ }
+ return true;
+}
+
+static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
+{
+ CPUX86State *env = cpu_env(cpu);
+ int prefixes = s->prefix;
+ MemOp dflag = s->dflag;
+ int shift;
+ MemOp ot;
+ int modrm, reg, rm, mod, op, opreg, val;
+
/* now check op code */
switch (b) {
/**************************/
@@ -4725,31 +4757,15 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
set_cc_op(s, CC_OP_POPCNT);
break;
- case 0 ... 0xd7:
- case 0xe0 ... 0xff:
- case 0x10e ... 0x117:
- case 0x128 ... 0x12f:
- case 0x138 ... 0x19f:
- case 0x1a0 ... 0x1a1:
- case 0x1a8 ... 0x1a9:
- case 0x1af:
- case 0x1b2:
- case 0x1b4 ... 0x1b7:
- case 0x1be ... 0x1bf:
- case 0x1c2 ... 0x1c6:
- case 0x1c8 ... 0x1ff:
- disas_insn_new(s, cpu, b);
- break;
default:
goto unknown_op;
}
- return true;
+ return;
illegal_op:
gen_illegal_opcode(s);
- return true;
+ return;
unknown_op:
gen_unknown_opcode(env, s);
- return true;
}
void tcg_x86_init(void)
--
2.45.0
^ permalink raw reply related [flat|nested] 38+ messages in thread* [PATCH v2 25/25] target/i386: remove duplicate prefix decoding
2024-05-06 8:09 [PATCH v2 00/25] target/i386: convert 1-byte opcodes to new decoder Paolo Bonzini
` (23 preceding siblings ...)
2024-05-06 8:09 ` [PATCH v2 24/25] target/i386: split legacy decoder into " Paolo Bonzini
@ 2024-05-06 8:09 ` Paolo Bonzini
24 siblings, 0 replies; 38+ messages in thread
From: Paolo Bonzini @ 2024-05-06 8:09 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson, zhao1.liu
Now that a bulk of opcodes go through the new decoder, it is sensible
to do some cleanup. Go immediately through disas_insn_new and only jump
back after parsing the prefixes.
disas_insn() now only contains the three sigsetjmp cases, and they
are more easily managed if they are inlined into i386_tr_translate_insn.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 259 +++++++------------------------
target/i386/tcg/decode-new.c.inc | 60 +++++--
2 files changed, 100 insertions(+), 219 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 8c1062c8e13..df6e046d0c3 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -2450,10 +2450,6 @@ static void gen_sty_env_A0(DisasContext *s, int offset, bool align)
tcg_gen_qemu_st_i128(t, s->tmp0, mem_index, mop);
}
-#include "decode-new.h"
-#include "emit.c.inc"
-#include "decode-new.c.inc"
-
static void gen_cmpxchg8b(DisasContext *s, CPUX86State *env, int modrm)
{
TCGv_i64 cmp, val, old;
@@ -3116,183 +3112,6 @@ static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b)
return true;
}
-static void disas_insn_old(DisasContext *s, CPUState *cpu, int b);
-
-/* convert one instruction. s->base.is_jmp is set if the translation must
- be stopped. Return the next pc value */
-static bool disas_insn(DisasContext *s, CPUState *cpu)
-{
- CPUX86State *env = cpu_env(cpu);
- int b, prefixes;
- MemOp aflag, dflag;
- bool orig_cc_op_dirty = s->cc_op_dirty;
- CCOp orig_cc_op = s->cc_op;
- target_ulong orig_pc_save = s->pc_save;
-
- s->pc = s->base.pc_next;
- s->override = -1;
- s->popl_esp_hack = 0;
-#ifdef TARGET_X86_64
- s->rex_r = 0;
- s->rex_x = 0;
- s->rex_b = 0;
-#endif
- s->rip_offset = 0; /* for relative ip address */
- s->vex_l = 0;
- s->vex_v = 0;
- s->vex_w = false;
- switch (sigsetjmp(s->jmpbuf, 0)) {
- case 0:
- break;
- case 1:
- gen_exception_gpf(s);
- return true;
- case 2:
- /* Restore state that may affect the next instruction. */
- s->pc = s->base.pc_next;
- /*
- * TODO: These save/restore can be removed after the table-based
- * decoder is complete; we will be decoding the insn completely
- * before any code generation that might affect these variables.
- */
- s->cc_op_dirty = orig_cc_op_dirty;
- s->cc_op = orig_cc_op;
- s->pc_save = orig_pc_save;
- /* END TODO */
- s->base.num_insns--;
- tcg_remove_ops_after(s->prev_insn_end);
- s->base.insn_start = s->prev_insn_start;
- s->base.is_jmp = DISAS_TOO_MANY;
- return false;
- default:
- g_assert_not_reached();
- }
-
- prefixes = 0;
-
- next_byte:
- s->prefix = prefixes;
- b = x86_ldub_code(env, s);
- /* Collect prefixes. */
- switch (b) {
- case 0x0f:
- b = x86_ldub_code(env, s) + 0x100;
- break;
- case 0xf3:
- prefixes |= PREFIX_REPZ;
- prefixes &= ~PREFIX_REPNZ;
- goto next_byte;
- case 0xf2:
- prefixes |= PREFIX_REPNZ;
- prefixes &= ~PREFIX_REPZ;
- goto next_byte;
- case 0xf0:
- prefixes |= PREFIX_LOCK;
- goto next_byte;
- case 0x2e:
- s->override = R_CS;
- goto next_byte;
- case 0x36:
- s->override = R_SS;
- goto next_byte;
- case 0x3e:
- s->override = R_DS;
- goto next_byte;
- case 0x26:
- s->override = R_ES;
- goto next_byte;
- case 0x64:
- s->override = R_FS;
- goto next_byte;
- case 0x65:
- s->override = R_GS;
- goto next_byte;
- case 0x66:
- prefixes |= PREFIX_DATA;
- goto next_byte;
- case 0x67:
- prefixes |= PREFIX_ADR;
- goto next_byte;
-#ifdef TARGET_X86_64
- case 0x40 ... 0x4f:
- if (CODE64(s)) {
- /* REX prefix */
- prefixes |= PREFIX_REX;
- s->vex_w = (b >> 3) & 1;
- s->rex_r = (b & 0x4) << 1;
- s->rex_x = (b & 0x2) << 2;
- s->rex_b = (b & 0x1) << 3;
- goto next_byte;
- }
- break;
-#endif
- case 0xc5: /* 2-byte VEX */
- case 0xc4: /* 3-byte VEX */
- if (CODE32(s) && !VM86(s)) {
- int vex2 = x86_ldub_code(env, s);
- s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
-
- if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
- /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
- otherwise the instruction is LES or LDS. */
- break;
- }
- disas_insn_new(s, cpu, b);
- return s->pc;
- }
- break;
- }
-
- /* Post-process prefixes. */
- if (CODE64(s)) {
- /* In 64-bit mode, the default data size is 32-bit. Select 64-bit
- data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
- over 0x66 if both are present. */
- dflag = (REX_W(s) ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
- /* In 64-bit mode, 0x67 selects 32-bit addressing. */
- aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
- } else {
- /* In 16/32-bit mode, 0x66 selects the opposite data size. */
- if (CODE32(s) ^ ((prefixes & PREFIX_DATA) != 0)) {
- dflag = MO_32;
- } else {
- dflag = MO_16;
- }
- /* In 16/32-bit mode, 0x67 selects the opposite addressing. */
- if (CODE32(s) ^ ((prefixes & PREFIX_ADR) != 0)) {
- aflag = MO_32;
- } else {
- aflag = MO_16;
- }
- }
-
- s->prefix = prefixes;
- s->aflag = aflag;
- s->dflag = dflag;
-
- switch (b) {
- case 0 ... 0xd7:
- case 0xe0 ... 0xff:
- case 0x10e ... 0x117:
- case 0x128 ... 0x12f:
- case 0x138 ... 0x19f:
- case 0x1a0 ... 0x1a1:
- case 0x1a8 ... 0x1a9:
- case 0x1af:
- case 0x1b2:
- case 0x1b4 ... 0x1b7:
- case 0x1be ... 0x1bf:
- case 0x1c2 ... 0x1c6:
- case 0x1c8 ... 0x1ff:
- disas_insn_new(s, cpu, b);
- break;
- default:
- disas_insn_old(s, cpu, b);
- break;
- }
- return true;
-}
-
static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
{
CPUX86State *env = cpu_env(cpu);
@@ -3501,14 +3320,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
}
break;
- /************************/
- /* floats */
- case 0xd8 ... 0xdf:
- if (!disas_insn_x87(s, cpu, b)) {
- goto unknown_op;
- }
- break;
-
/************************/
/* bit operations */
case 0x1ba: /* bt/bts/btr/btc Gv, im */
@@ -4758,7 +4569,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
set_cc_op(s, CC_OP_POPCNT);
break;
default:
- goto unknown_op;
+ g_assert_not_reached();
}
return;
illegal_op:
@@ -4768,6 +4579,10 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
gen_unknown_opcode(env, s);
}
+#include "decode-new.h"
+#include "emit.c.inc"
+#include "decode-new.c.inc"
+
void tcg_x86_init(void)
{
static const char reg_names[CPU_NB_REGS][4] = {
@@ -4889,7 +4704,6 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
dc->cc_op = CC_OP_DYNAMIC;
dc->cc_op_dirty = false;
- dc->popl_esp_hack = 0;
/* select memory access functions */
dc->mem_index = cpu_mmu_index(cpu, false);
dc->cpuid_features = env->features[FEAT_1_EDX];
@@ -4941,6 +4755,9 @@ static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
{
DisasContext *dc = container_of(dcbase, DisasContext, base);
+ bool orig_cc_op_dirty = dc->cc_op_dirty;
+ CCOp orig_cc_op = dc->cc_op;
+ target_ulong orig_pc_save = dc->pc_save;
#ifdef TARGET_VSYSCALL_PAGE
/*
@@ -4953,23 +4770,51 @@ static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
}
#endif
- if (disas_insn(dc, cpu)) {
- target_ulong pc_next = dc->pc;
- dc->base.pc_next = pc_next;
+ switch (sigsetjmp(dc->jmpbuf, 0)) {
+ case 0:
+ disas_insn(dc, cpu);
+ break;
+ case 1:
+ gen_exception_gpf(dc);
+ break;
+ case 2:
+ /* Restore state that may affect the next instruction. */
+ dc->pc = dc->base.pc_next;
+ /*
+ * TODO: These save/restore can be removed after the table-based
+ * decoder is complete; we will be decoding the insn completely
+ * before any code generation that might affect these variables.
+ */
+ dc->cc_op_dirty = orig_cc_op_dirty;
+ dc->cc_op = orig_cc_op;
+ dc->pc_save = orig_pc_save;
+ /* END TODO */
+ dc->base.num_insns--;
+ tcg_remove_ops_after(dc->prev_insn_end);
+ dc->base.insn_start = dc->prev_insn_start;
+ dc->base.is_jmp = DISAS_TOO_MANY;
+ return;
+ default:
+ g_assert_not_reached();
+ }
- if (dc->base.is_jmp == DISAS_NEXT) {
- if (dc->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) {
- /*
- * If single step mode, we generate only one instruction and
- * generate an exception.
- * If irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
- * the flag and abort the translation to give the irqs a
- * chance to happen.
- */
- dc->base.is_jmp = DISAS_EOB_NEXT;
- } else if (!is_same_page(&dc->base, pc_next)) {
- dc->base.is_jmp = DISAS_TOO_MANY;
- }
+ /*
+ * Instruction decoding completed (possibly with #GP if the
+ * 15-byte boundary was exceeded).
+ */
+ dc->base.pc_next = dc->pc;
+ if (dc->base.is_jmp == DISAS_NEXT) {
+ if (dc->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) {
+ /*
+ * If single step mode, we generate only one instruction and
+ * generate an exception.
+ * If irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
+ * the flag and abort the translation to give the irqs a
+ * chance to happen.
+ */
+ dc->base.is_jmp = DISAS_EOB_NEXT;
+ } else if (!is_same_page(&dc->base, dc->base.pc_next)) {
+ dc->base.is_jmp = DISAS_TOO_MANY;
}
}
}
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 14218882681..46682cfe070 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -2219,22 +2219,31 @@ illegal:
* Convert one instruction. s->base.is_jmp is set if the translation must
* be stopped.
*/
-static void disas_insn_new(DisasContext *s, CPUState *cpu, int b)
+static void disas_insn(DisasContext *s, CPUState *cpu)
{
CPUX86State *env = cpu_env(cpu);
- bool first = true;
X86DecodedInsn decode;
X86DecodeFunc decode_func = decode_root;
- uint8_t cc_live;
+ uint8_t cc_live, b;
+ s->pc = s->base.pc_next;
+ s->override = -1;
+ s->popl_esp_hack = 0;
+#ifdef TARGET_X86_64
+ s->rex_r = 0;
+ s->rex_x = 0;
+ s->rex_b = 0;
+#endif
+ s->rip_offset = 0; /* for relative ip address */
+ s->vex_l = 0;
+ s->vex_v = 0;
+ s->vex_w = false;
s->has_modrm = false;
+ s->prefix = 0;
next_byte:
- if (first) {
- first = false;
- } else {
- b = x86_ldub_code(env, s);
- }
+ b = x86_ldub_code(env, s);
+
/* Collect prefixes. */
switch (b) {
case 0xf3:
@@ -2346,10 +2355,6 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b)
}
break;
default:
- if (b >= 0x100) {
- b -= 0x100;
- decode_func = do_decode_0F;
- }
break;
}
@@ -2378,6 +2383,37 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b)
}
}
+ /* Go back to old decoder for unconverted opcodes. */
+ if (!(s->prefix & PREFIX_VEX)) {
+ if ((b & ~7) == 0xd8) {
+ if (!disas_insn_x87(s, cpu, b)) {
+ goto unknown_op;
+ }
+ return;
+ }
+
+ if (b == 0x0f) {
+ b = x86_ldub_code(env, s);
+ switch (b) {
+ case 0x00 ... 0x0d: /* mostly privileged instructions */
+ case 0x18 ... 0x27: /* prefetch, MPX, mov from/to CR and DR */
+ case 0x30 ... 0x37: /* more privileged instructions */
+ case 0xa2 ... 0xa7: /* CPUID, BT, SHLD */
+ case 0xaa ... 0xae: /* RSM, SHRD, grp15 */
+ case 0xb0 ... 0xb1: /* cmpxchg */
+ case 0xb3: /* btr */
+ case 0xb8 ... 0xbd: /* integer ops */
+ case 0xc0 ... 0xc1: /* xadd */
+ case 0xc7: /* grp9 */
+ disas_insn_old(s, cpu, b + 0x100);
+ return;
+ default:
+ decode_func = do_decode_0F;
+ break;
+ }
+ }
+ }
+
memset(&decode, 0, sizeof(decode));
decode.cc_op = -1;
decode.b = b;
--
2.45.0
^ permalink raw reply related [flat|nested] 38+ messages in thread