* [PATCH 1/5] target/i386: Tidy cc_op_str usage
2024-07-01 2:51 [PATCH 0/5] target/i386: CCOp cleanups Richard Henderson
@ 2024-07-01 2:51 ` Richard Henderson
2024-07-01 2:51 ` [PATCH 2/5] target/i386: Convert cc_op_live to a function Richard Henderson
` (4 subsequent siblings)
5 siblings, 0 replies; 10+ messages in thread
From: Richard Henderson @ 2024-07-01 2:51 UTC (permalink / raw)
To: qemu-devel; +Cc: pbonzini
Make const. Use the read-only strings directly; do not copy
them into an on-stack buffer with snprintf. Allow for holes
in the cc_op_str array, now present with CC_OP_POPCNT.
Fixes: 460231ad369 ("target/i386: give CC_OP_POPCNT low bits corresponding to MO_TL")
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/cpu-dump.c | 17 +++++++++++------
1 file changed, 11 insertions(+), 6 deletions(-)
diff --git a/target/i386/cpu-dump.c b/target/i386/cpu-dump.c
index 3bb8e44091..dc6723aede 100644
--- a/target/i386/cpu-dump.c
+++ b/target/i386/cpu-dump.c
@@ -27,7 +27,7 @@
/***********************************************************/
/* x86 debug */
-static const char *cc_op_str[CC_OP_NB] = {
+static const char * const cc_op_str[] = {
[CC_OP_DYNAMIC] = "DYNAMIC",
[CC_OP_EFLAGS] = "EFLAGS",
@@ -347,7 +347,6 @@ void x86_cpu_dump_state(CPUState *cs, FILE *f, int flags)
X86CPU *cpu = X86_CPU(cs);
CPUX86State *env = &cpu->env;
int eflags, i, nb;
- char cc_op_name[32];
static const char *seg_name[6] = { "ES", "CS", "SS", "DS", "FS", "GS" };
eflags = cpu_compute_eflags(env);
@@ -456,10 +455,16 @@ void x86_cpu_dump_state(CPUState *cs, FILE *f, int flags)
env->dr[6], env->dr[7]);
}
if (flags & CPU_DUMP_CCOP) {
- if ((unsigned)env->cc_op < CC_OP_NB)
- snprintf(cc_op_name, sizeof(cc_op_name), "%s", cc_op_str[env->cc_op]);
- else
- snprintf(cc_op_name, sizeof(cc_op_name), "[%d]", env->cc_op);
+ const char *cc_op_name = NULL;
+ char cc_op_buf[32];
+
+ if ((unsigned)env->cc_op < ARRAY_SIZE(cc_op_str)) {
+ cc_op_name = cc_op_str[env->cc_op];
+ }
+ if (cc_op_name == NULL) {
+ snprintf(cc_op_buf, sizeof(cc_op_buf), "[%d]", env->cc_op);
+ cc_op_name = cc_op_buf;
+ }
#ifdef TARGET_X86_64
if (env->hflags & HF_CS64_MASK) {
qemu_fprintf(f, "CCS=%016" PRIx64 " CCD=%016" PRIx64 " CCO=%s\n",
--
2.34.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH 2/5] target/i386: Convert cc_op_live to a function
2024-07-01 2:51 [PATCH 0/5] target/i386: CCOp cleanups Richard Henderson
2024-07-01 2:51 ` [PATCH 1/5] target/i386: Tidy cc_op_str usage Richard Henderson
@ 2024-07-01 2:51 ` Richard Henderson
2024-07-01 2:51 ` [PATCH 3/5] target/i386: Rearrange CCOp Richard Henderson
` (3 subsequent siblings)
5 siblings, 0 replies; 10+ messages in thread
From: Richard Henderson @ 2024-07-01 2:51 UTC (permalink / raw)
To: qemu-devel; +Cc: pbonzini
Assert that op is known.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/tcg/translate.c | 56 +++++++++++++++++++-------------
target/i386/tcg/decode-new.c.inc | 2 +-
target/i386/tcg/emit.c.inc | 6 ++--
3 files changed, 39 insertions(+), 25 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 95bad55bf4..e5a8aaf793 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -290,26 +290,38 @@ enum {
};
/* Bit set if the global variable is live after setting CC_OP to X. */
-static const uint8_t cc_op_live[CC_OP_NB] = {
- [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
- [CC_OP_EFLAGS] = USES_CC_SRC,
- [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
- [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
- [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
- [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
- [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
- [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
- [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
- [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
- [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
- [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
- [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
- [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
- [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
- [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
- [CC_OP_CLR] = 0,
- [CC_OP_POPCNT] = USES_CC_DST,
-};
+static uint8_t cc_op_live(CCOp op)
+{
+ switch (op) {
+ case CC_OP_CLR:
+ return 0;
+ case CC_OP_EFLAGS:
+ return USES_CC_SRC;
+ case CC_OP_POPCNT:
+ case CC_OP_LOGICB ... CC_OP_LOGICQ:
+ return USES_CC_DST;
+ case CC_OP_MULB ... CC_OP_MULQ:
+ case CC_OP_ADDB ... CC_OP_ADDQ:
+ case CC_OP_INCB ... CC_OP_INCQ:
+ case CC_OP_DECB ... CC_OP_DECQ:
+ case CC_OP_SHLB ... CC_OP_SHLQ:
+ case CC_OP_SARB ... CC_OP_SARQ:
+ case CC_OP_BMILGB ... CC_OP_BMILGQ:
+ case CC_OP_ADCX:
+ return USES_CC_DST | USES_CC_SRC;
+ case CC_OP_ADOX:
+ return USES_CC_SRC | USES_CC_SRC2;
+ case CC_OP_SUBB ... CC_OP_SUBQ:
+ return USES_CC_DST | USES_CC_SRC | USES_CC_SRCT;
+ case CC_OP_DYNAMIC:
+ case CC_OP_ADCB ... CC_OP_ADCQ:
+ case CC_OP_SBBB ... CC_OP_SBBQ:
+ case CC_OP_ADCOX:
+ return USES_CC_DST | USES_CC_SRC | USES_CC_SRC2;
+ default:
+ g_assert_not_reached();
+ }
+}
static void set_cc_op_1(DisasContext *s, CCOp op, bool dirty)
{
@@ -320,7 +332,7 @@ static void set_cc_op_1(DisasContext *s, CCOp op, bool dirty)
}
/* Discard CC computation that will no longer be used. */
- dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
+ dead = cc_op_live(s->cc_op) & ~cc_op_live(op);
if (dead & USES_CC_DST) {
tcg_gen_discard_tl(cpu_cc_dst);
}
@@ -816,7 +828,7 @@ static void gen_mov_eflags(DisasContext *s, TCGv reg)
src2 = cpu_cc_src2;
/* Take care to not read values that are not live. */
- live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
+ live = cc_op_live(s->cc_op) & ~USES_CC_SRCT;
dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
if (dead) {
TCGv zero = tcg_constant_tl(0);
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 0d846c32c2..ba4b8d16f9 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -2792,7 +2792,7 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
tcg_gen_mov_i32(cpu_cc_op, decode.cc_op_dynamic);
}
set_cc_op(s, decode.cc_op);
- cc_live = cc_op_live[decode.cc_op];
+ cc_live = cc_op_live(decode.cc_op);
} else {
cc_live = 0;
}
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index fc7477833b..38b399783e 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -3531,18 +3531,20 @@ static void gen_shift_dynamic_flags(DisasContext *s, X86DecodedInsn *decode, TCG
{
TCGv_i32 count32 = tcg_temp_new_i32();
TCGv_i32 old_cc_op;
+ uint8_t live;
decode->cc_op = CC_OP_DYNAMIC;
decode->cc_op_dynamic = tcg_temp_new_i32();
assert(decode->cc_dst == s->T0);
- if (cc_op_live[s->cc_op] & USES_CC_DST) {
+ live = cc_op_live(s->cc_op);
+ if (live & USES_CC_DST) {
decode->cc_dst = tcg_temp_new();
tcg_gen_movcond_tl(TCG_COND_EQ, decode->cc_dst, count, tcg_constant_tl(0),
cpu_cc_dst, s->T0);
}
- if (cc_op_live[s->cc_op] & USES_CC_SRC) {
+ if (live & USES_CC_SRC) {
tcg_gen_movcond_tl(TCG_COND_EQ, decode->cc_src, count, tcg_constant_tl(0),
cpu_cc_src, decode->cc_src);
}
--
2.34.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH 3/5] target/i386: Rearrange CCOp
2024-07-01 2:51 [PATCH 0/5] target/i386: CCOp cleanups Richard Henderson
2024-07-01 2:51 ` [PATCH 1/5] target/i386: Tidy cc_op_str usage Richard Henderson
2024-07-01 2:51 ` [PATCH 2/5] target/i386: Convert cc_op_live to a function Richard Henderson
@ 2024-07-01 2:51 ` Richard Henderson
2024-07-01 2:51 ` [PATCH 4/5] target/i386: Remove default in cc_op_live Richard Henderson
` (2 subsequent siblings)
5 siblings, 0 replies; 10+ messages in thread
From: Richard Henderson @ 2024-07-01 2:51 UTC (permalink / raw)
To: qemu-devel; +Cc: pbonzini
Define CC_OP_{FIRST,LAST}_BWLQ. Remove CC_OP_NB.
Give the first few enumerators explicit integer constants.
Move CC_OP_POPCNT up in the enumeration; remove unused
CC_OP_POPCNT*__ placeholders. Align the BWLQ enumerators.
This will be used to simplify ((op - CC_OP_*B) & 3).
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/cpu.h | 44 ++++++++++++++++++++++++--------------------
1 file changed, 24 insertions(+), 20 deletions(-)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 29daf37048..df4272fdae 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1270,14 +1270,27 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
* are only needed for conditional branches.
*/
typedef enum {
- CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */
- CC_OP_EFLAGS, /* all cc are explicitly computed, CC_SRC = flags */
- CC_OP_ADCX, /* CC_DST = C, CC_SRC = rest. */
- CC_OP_ADOX, /* CC_SRC2 = O, CC_SRC = rest. */
- CC_OP_ADCOX, /* CC_DST = C, CC_SRC2 = O, CC_SRC = rest. */
- CC_OP_CLR, /* Z and P set, all other flags clear. */
+ CC_OP_DYNAMIC = 0, /* must use dynamic code to get cc_op */
+ CC_OP_EFLAGS = 1, /* all cc are explicitly computed, CC_SRC = flags */
+ CC_OP_ADCX = 2, /* CC_DST = C, CC_SRC = rest. */
+ CC_OP_ADOX = 3, /* CC_SRC2 = O, CC_SRC = rest. */
+ CC_OP_ADCOX = 4, /* CC_DST = C, CC_SRC2 = O, CC_SRC = rest. */
+ CC_OP_CLR = 5, /* Z and P set, all other flags clear. */
- CC_OP_MULB, /* modify all flags, C, O = (CC_SRC != 0) */
+ /*
+ * Z via CC_DST, all other flags clear.
+ * Treat CC_OP_POPCNT like the other BWLQ ops in making the low bits
+ * equal MO_TL; this gives a value of either 6 or 7.
+ */
+#ifdef TARGET_X86_64
+ CC_OP_POPCNT = 7,
+#else
+ CC_OP_POPCNT = 6,
+#endif
+
+#define CC_OP_FIRST_BWLQ CC_OP_POPCNT
+
+ CC_OP_MULB = 8, /* modify all flags, C, O = (CC_SRC != 0) */
CC_OP_MULW,
CC_OP_MULL,
CC_OP_MULQ,
@@ -1332,20 +1345,11 @@ typedef enum {
CC_OP_BMILGL,
CC_OP_BMILGQ,
- /*
- * Note that only CC_OP_POPCNT (i.e. the one with MO_TL size)
- * is used or implemented, because the translation needs
- * to zero-extend CC_DST anyway.
- */
- CC_OP_POPCNTB__, /* Z via CC_DST, all other flags clear. */
- CC_OP_POPCNTW__,
- CC_OP_POPCNTL__,
- CC_OP_POPCNTQ__,
- CC_OP_POPCNT = sizeof(target_ulong) == 8 ? CC_OP_POPCNTQ__ : CC_OP_POPCNTL__,
-
- CC_OP_NB,
+#define CC_OP_LAST_BWLQ CC_OP_BMILGQ
} CCOp;
-QEMU_BUILD_BUG_ON(CC_OP_NB >= 128);
+
+/* See X86DecodedInsn.cc_op, using int8_t. */
+QEMU_BUILD_BUG_ON(CC_OP_LAST_BWLQ > INT8_MAX);
typedef struct SegmentCache {
uint32_t selector;
--
2.34.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH 4/5] target/i386: Remove default in cc_op_live
2024-07-01 2:51 [PATCH 0/5] target/i386: CCOp cleanups Richard Henderson
` (2 preceding siblings ...)
2024-07-01 2:51 ` [PATCH 3/5] target/i386: Rearrange CCOp Richard Henderson
@ 2024-07-01 2:51 ` Richard Henderson
2024-07-01 2:51 ` [PATCH 5/5] target/i386: Introduce cc_op_size Richard Henderson
2024-07-01 17:48 ` [PATCH 0/5] target/i386: CCOp cleanups Paolo Bonzini
5 siblings, 0 replies; 10+ messages in thread
From: Richard Henderson @ 2024-07-01 2:51 UTC (permalink / raw)
To: qemu-devel; +Cc: pbonzini
Now that CC_OP_NB is gone, push the assert after the switch.
This will allow -Wswitch to diagnose missing entries.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/tcg/translate.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index e5a8aaf793..e675afca47 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -318,9 +318,8 @@ static uint8_t cc_op_live(CCOp op)
case CC_OP_SBBB ... CC_OP_SBBQ:
case CC_OP_ADCOX:
return USES_CC_DST | USES_CC_SRC | USES_CC_SRC2;
- default:
- g_assert_not_reached();
}
+ g_assert_not_reached();
}
static void set_cc_op_1(DisasContext *s, CCOp op, bool dirty)
--
2.34.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH 5/5] target/i386: Introduce cc_op_size
2024-07-01 2:51 [PATCH 0/5] target/i386: CCOp cleanups Richard Henderson
` (3 preceding siblings ...)
2024-07-01 2:51 ` [PATCH 4/5] target/i386: Remove default in cc_op_live Richard Henderson
@ 2024-07-01 2:51 ` Richard Henderson
2024-07-01 17:48 ` [PATCH 0/5] target/i386: CCOp cleanups Paolo Bonzini
5 siblings, 0 replies; 10+ messages in thread
From: Richard Henderson @ 2024-07-01 2:51 UTC (permalink / raw)
To: qemu-devel; +Cc: pbonzini
Replace arithmetic on cc_op with a helper function.
Assert that the op has a size and that it is valid
for the configuration.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/tcg/translate.c | 29 ++++++++++++++++++-----------
target/i386/tcg/emit.c.inc | 3 ++-
2 files changed, 20 insertions(+), 12 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index e675afca47..e98bed0805 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -322,6 +322,16 @@ static uint8_t cc_op_live(CCOp op)
g_assert_not_reached();
}
+static MemOp cc_op_size(CCOp op)
+{
+ MemOp size = op & 3;
+
+ assert(op >= CC_OP_FIRST_BWLQ && op <= CC_OP_LAST_BWLQ);
+ assert(size <= MO_TL);
+
+ return size;
+}
+
static void set_cc_op_1(DisasContext *s, CCOp op, bool dirty)
{
int dead;
@@ -884,7 +894,7 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
switch (s->cc_op) {
case CC_OP_SUBB ... CC_OP_SUBQ:
/* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
- size = s->cc_op - CC_OP_SUBB;
+ size = cc_op_size(s->cc_op);
gen_ext_tl(s->cc_srcT, s->cc_srcT, size, false);
gen_ext_tl(cpu_cc_src, cpu_cc_src, size, false);
return (CCPrepare) { .cond = TCG_COND_LTU, .reg = s->cc_srcT,
@@ -892,7 +902,7 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
case CC_OP_ADDB ... CC_OP_ADDQ:
/* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
- size = s->cc_op - CC_OP_ADDB;
+ size = cc_op_size(s->cc_op);
gen_ext_tl(cpu_cc_dst, cpu_cc_dst, size, false);
gen_ext_tl(cpu_cc_src, cpu_cc_src, size, false);
return (CCPrepare) { .cond = TCG_COND_LTU, .reg = cpu_cc_dst,
@@ -910,7 +920,7 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
case CC_OP_SHLB ... CC_OP_SHLQ:
/* (CC_SRC >> (DATA_BITS - 1)) & 1 */
- size = s->cc_op - CC_OP_SHLB;
+ size = cc_op_size(s->cc_op);
return gen_prepare_sign_nz(cpu_cc_src, size);
case CC_OP_MULB ... CC_OP_MULQ:
@@ -918,7 +928,7 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
.reg = cpu_cc_src };
case CC_OP_BMILGB ... CC_OP_BMILGQ:
- size = s->cc_op - CC_OP_BMILGB;
+ size = cc_op_size(s->cc_op);
gen_ext_tl(cpu_cc_src, cpu_cc_src, size, false);
return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src };
@@ -972,10 +982,7 @@ static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
case CC_OP_POPCNT:
return (CCPrepare) { .cond = TCG_COND_NEVER };
default:
- {
- MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
- return gen_prepare_sign_nz(cpu_cc_dst, size);
- }
+ return gen_prepare_sign_nz(cpu_cc_dst, cc_op_size(s->cc_op));
}
}
@@ -1016,7 +1023,7 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
return (CCPrepare) { .cond = TCG_COND_ALWAYS };
default:
{
- MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
+ MemOp size = cc_op_size(s->cc_op);
if (size == MO_TL) {
return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_dst };
} else {
@@ -1042,7 +1049,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
switch (s->cc_op) {
case CC_OP_SUBB ... CC_OP_SUBQ:
/* We optimize relational operators for the cmp/jcc case. */
- size = s->cc_op - CC_OP_SUBB;
+ size = cc_op_size(s->cc_op);
switch (jcc_op) {
case JCC_BE:
gen_ext_tl(s->cc_srcT, s->cc_srcT, size, false);
@@ -3176,7 +3183,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
same width. */
tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
- set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
+ set_cc_op(s, CC_OP_SARB + cc_op_size(s->cc_op));
break;
default:
/* Otherwise, generate EFLAGS and replace the C bit. */
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 38b399783e..e9d5d196ce 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -3106,7 +3106,8 @@ static bool gen_eflags_adcox(DisasContext *s, X86DecodedInsn *decode, bool want_
* bit, we might as well fish CF out of EFLAGS and save a shift.
*/
if (want_carry && (!need_flags || s->cc_op == CC_OP_SHLB + MO_TL)) {
- tcg_gen_shri_tl(decode->cc_dst, cpu_cc_src, (8 << (s->cc_op - CC_OP_SHLB)) - 1);
+ MemOp size = cc_op_size(s->cc_op);
+ tcg_gen_shri_tl(decode->cc_dst, cpu_cc_src, (8 << size) - 1);
got_cf = true;
}
gen_mov_eflags(s, decode->cc_src);
--
2.34.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [PATCH 0/5] target/i386: CCOp cleanups
2024-07-01 2:51 [PATCH 0/5] target/i386: CCOp cleanups Richard Henderson
` (4 preceding siblings ...)
2024-07-01 2:51 ` [PATCH 5/5] target/i386: Introduce cc_op_size Richard Henderson
@ 2024-07-01 17:48 ` Paolo Bonzini
2024-07-01 19:05 ` Richard Henderson
5 siblings, 1 reply; 10+ messages in thread
From: Paolo Bonzini @ 2024-07-01 17:48 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel
On Mon, Jul 1, 2024 at 4:51 AM Richard Henderson
<richard.henderson@linaro.org> wrote:
> While debugging #2413, I spent quite a bit of time trying to work
> out if the CCOp value was incorrect. I think the following is a
> worthwhile cleanup, isolating potential problems to asserts.
Hi Richard,
no objections at all to introducing more asserts. I think keeping the
array is a better underlying implementation for cc_op_live() however.
I'm also not very fond of mixing "sized" and "unsized" CCOps in the
4..7 range, there's no real reason why CC_OP_DYNAMIC and CC_OP_CLR
must be close to CC_OP_EFLAGS and the ADCOX CCOps. I also think it's
clearer to keep CC_OP_POPCNT[BWLQ] (even though in practice only one
will be used because popcnt needs zero extension anyway).
As an aside, I'm wondering if CC_OP_CLR is particularly important; I
expect "xor reg, reg" to be followed by more ALU operations most of
the time and to not be followed by a jump, so it only saves a spill if
xor reg, reg is followed by a lot or store. If gen_XOR used either
CC_OP_LOGICn or CC_OP_EFLAGS for "xor reg, reg", the values in
decode->cc_op_* (CC_OP_DST=0 for CC_OP_LOGICn; CC_OP_SRC=CC_Z|CC_P for
CC_OP_EFLAGS) would be constant and wouldn't add to register pressure.
Paolo
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH 0/5] target/i386: CCOp cleanups
2024-07-01 17:48 ` [PATCH 0/5] target/i386: CCOp cleanups Paolo Bonzini
@ 2024-07-01 19:05 ` Richard Henderson
2024-07-01 19:30 ` Paolo Bonzini
0 siblings, 1 reply; 10+ messages in thread
From: Richard Henderson @ 2024-07-01 19:05 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
On 7/1/24 10:48, Paolo Bonzini wrote:
> On Mon, Jul 1, 2024 at 4:51 AM Richard Henderson
> <richard.henderson@linaro.org> wrote:
>> While debugging #2413, I spent quite a bit of time trying to work
>> out if the CCOp value was incorrect. I think the following is a
>> worthwhile cleanup, isolating potential problems to asserts.
>
> Hi Richard,
>
> no objections at all to introducing more asserts. I think keeping the
> array is a better underlying implementation for cc_op_live() however.
Hmm. I had an implementation that would detect missing entries at runtime, but this one
detects missing entries at compile time.
>
> I'm also not very fond of mixing "sized" and "unsized" CCOps in the
> 4..7 range, there's no real reason why CC_OP_DYNAMIC and CC_OP_CLR
> must be close to CC_OP_EFLAGS and the ADCOX CCOps. I also think it's
> clearer to keep CC_OP_POPCNT[BWLQ] (even though in practice only one
> will be used because popcnt needs zero extension anyway).
My objection to keeping the unused POPCNT* enumerators is that it interferes with proper
cooperation with -Wswitch, to diagnose missing enumerators. This is also why I removed
CC_OP_NB.
If those extra enumerators are present, you either have to include them explicitly in the
switch, which is less than desirable, or add a default case, which disables -Wswitch entirely.
If you don't like abusing 4..7, then we can use 8..11, placing POPCNT at 10 or 11.
> As an aside, I'm wondering if CC_OP_CLR is particularly important; I
> expect "xor reg, reg" to be followed by more ALU operations most of
> the time and to not be followed by a jump, so it only saves a spill if
> xor reg, reg is followed by a lot or store. If gen_XOR used either
> CC_OP_LOGICn or CC_OP_EFLAGS for "xor reg, reg", the values in
> decode->cc_op_* (CC_OP_DST=0 for CC_OP_LOGICn; CC_OP_SRC=CC_Z|CC_P for
> CC_OP_EFLAGS) would be constant and wouldn't add to register pressure.
You could easily be right. Improvements to tcg in the last 11 years may have made it
redundant, or it might have been wishful thinking even at the time.
r~
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 0/5] target/i386: CCOp cleanups
2024-07-01 19:05 ` Richard Henderson
@ 2024-07-01 19:30 ` Paolo Bonzini
2024-07-01 19:51 ` Richard Henderson
0 siblings, 1 reply; 10+ messages in thread
From: Paolo Bonzini @ 2024-07-01 19:30 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel
On Mon, Jul 1, 2024 at 9:05 PM Richard Henderson
<richard.henderson@linaro.org> wrote:
> > no objections at all to introducing more asserts. I think keeping the
> > array is a better underlying implementation for cc_op_live() however.
>
> Hmm. I had an implementation that would detect missing entries at runtime, but this one
> detects missing entries at compile time.
But how common is it to add new CCOps? I find the array more readable
(and it can also be printed quickly from gdb), while the switch
statement optimizes for a really rare case.
> > I'm also not very fond of mixing "sized" and "unsized" CCOps in the
> > 4..7 range, there's no real reason why CC_OP_DYNAMIC and CC_OP_CLR
> > must be close to CC_OP_EFLAGS and the ADCOX CCOps. I also think it's
> > clearer to keep CC_OP_POPCNT[BWLQ] (even though in practice only one
> > will be used because popcnt needs zero extension anyway).
>
> My objection to keeping the unused POPCNT* enumerators is that it interferes with proper
> cooperation with -Wswitch, to diagnose missing enumerators. This is also why I removed
> CC_OP_NB.
Yes, I agree with removing CC_OP_NB. However the unused POPCNT[BWLQ]
can be implemented trivially (they're all the same).
> > As an aside, I'm wondering if CC_OP_CLR is particularly important; I
> > expect "xor reg, reg" to be followed by more ALU operations most of
> > the time and to not be followed by a jump, so it only saves a spill if
> > xor reg, reg is followed by a lot or store. If gen_XOR used either
> > CC_OP_LOGICn or CC_OP_EFLAGS for "xor reg, reg", the values in
> > decode->cc_op_* (CC_OP_DST=0 for CC_OP_LOGICn; CC_OP_SRC=CC_Z|CC_P for
> > CC_OP_EFLAGS) would be constant and wouldn't add to register pressure.
>
> You could easily be right. Improvements to tcg in the last 11 years may have made it
> redundant, or it might have been wishful thinking even at the time.
Maybe. Just looking at the last couple years, with PCREL the cost of
translation has decreased substantially, and with the new decoder the
number of tcg ops has increased a bit. In both cases that means that
counting the tcg ops becomes less important.
BTW I found an easy way to implement X86_SPECIAL_BitTest without
crashes (just use cpu_regs[op->n] when computing the displacement
since you cannot have ah/bh/ch/dh). But I think it will be for 9.2.
Maybe these patches can wait too?
Paolo
^ permalink raw reply [flat|nested] 10+ messages in thread