* [PATCH for-8.0 1/3] target/i386: Split out gen_cmpxchg8b, gen_cmpxchg16b
2022-11-12 7:56 [PATCH for-8.0 0/3] target/i386: cmpxchg8b and cmpxchg16b cleanup Richard Henderson
@ 2022-11-12 7:56 ` Richard Henderson
2022-11-12 7:56 ` [PATCH for-8.0 2/3] target/i386: Inline cmpxchg8b Richard Henderson
2022-11-12 7:56 ` [PATCH for-8.0 3/3] target/i386: Inline cmpxchg16b Richard Henderson
2 siblings, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2022-11-12 7:56 UTC (permalink / raw)
To: qemu-devel; +Cc: pbonzini, eduardo
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/tcg/translate.c | 48 ++++++++++++++++++++++++-------------
1 file changed, 31 insertions(+), 17 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 28a4e6dc1d..1175540a2c 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -2974,6 +2974,34 @@ static void gen_sty_env_A0(DisasContext *s, int offset, bool align)
#include "emit.c.inc"
#include "decode-new.c.inc"
+static void gen_cmpxchg8b(DisasContext *s, CPUX86State *env, int modrm)
+{
+ gen_lea_modrm(env, s, modrm);
+
+ if ((s->prefix & PREFIX_LOCK) &&
+ (tb_cflags(s->base.tb) & CF_PARALLEL)) {
+ gen_helper_cmpxchg8b(cpu_env, s->A0);
+ } else {
+ gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
+ }
+ set_cc_op(s, CC_OP_EFLAGS);
+}
+
+#ifdef TARGET_X86_64
+static void gen_cmpxchg16b(DisasContext *s, CPUX86State *env, int modrm)
+{
+ gen_lea_modrm(env, s, modrm);
+
+ if ((s->prefix & PREFIX_LOCK) &&
+ (tb_cflags(s->base.tb) & CF_PARALLEL)) {
+ gen_helper_cmpxchg16b(cpu_env, s->A0);
+ } else {
+ gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
+ }
+ set_cc_op(s, CC_OP_EFLAGS);
+}
+#endif
+
/* convert one instruction. s->base.is_jmp is set if the translation must
be stopped. Return the next pc value */
static bool disas_insn(DisasContext *s, CPUState *cpu)
@@ -3814,28 +3842,14 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) {
goto illegal_op;
}
- gen_lea_modrm(env, s, modrm);
- if ((s->prefix & PREFIX_LOCK) &&
- (tb_cflags(s->base.tb) & CF_PARALLEL)) {
- gen_helper_cmpxchg16b(cpu_env, s->A0);
- } else {
- gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
- }
- set_cc_op(s, CC_OP_EFLAGS);
+ gen_cmpxchg16b(s, env, modrm);
break;
}
-#endif
+#endif
if (!(s->cpuid_features & CPUID_CX8)) {
goto illegal_op;
}
- gen_lea_modrm(env, s, modrm);
- if ((s->prefix & PREFIX_LOCK) &&
- (tb_cflags(s->base.tb) & CF_PARALLEL)) {
- gen_helper_cmpxchg8b(cpu_env, s->A0);
- } else {
- gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
- }
- set_cc_op(s, CC_OP_EFLAGS);
+ gen_cmpxchg8b(s, env, modrm);
break;
case 7: /* RDSEED */
--
2.34.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH for-8.0 2/3] target/i386: Inline cmpxchg8b
2022-11-12 7:56 [PATCH for-8.0 0/3] target/i386: cmpxchg8b and cmpxchg16b cleanup Richard Henderson
2022-11-12 7:56 ` [PATCH for-8.0 1/3] target/i386: Split out gen_cmpxchg8b, gen_cmpxchg16b Richard Henderson
@ 2022-11-12 7:56 ` Richard Henderson
2022-11-12 7:56 ` [PATCH for-8.0 3/3] target/i386: Inline cmpxchg16b Richard Henderson
2 siblings, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2022-11-12 7:56 UTC (permalink / raw)
To: qemu-devel; +Cc: pbonzini, eduardo
Use tcg_gen_atomic_cmpxchg_i64 for the atomic case,
and tcg_gen_nonatomic_cmpxchg_i64 otherwise.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/helper.h | 2 --
target/i386/tcg/mem_helper.c | 57 ------------------------------------
target/i386/tcg/translate.c | 54 ++++++++++++++++++++++++++++++----
3 files changed, 49 insertions(+), 64 deletions(-)
diff --git a/target/i386/helper.h b/target/i386/helper.h
index b7de5429ef..2df8049f91 100644
--- a/target/i386/helper.h
+++ b/target/i386/helper.h
@@ -66,8 +66,6 @@ DEF_HELPER_1(rsm, void, env)
#endif /* !CONFIG_USER_ONLY */
DEF_HELPER_2(into, void, env, int)
-DEF_HELPER_2(cmpxchg8b_unlocked, void, env, tl)
-DEF_HELPER_2(cmpxchg8b, void, env, tl)
#ifdef TARGET_X86_64
DEF_HELPER_2(cmpxchg16b_unlocked, void, env, tl)
DEF_HELPER_2(cmpxchg16b, void, env, tl)
diff --git a/target/i386/tcg/mem_helper.c b/target/i386/tcg/mem_helper.c
index e3cdafd2d4..814786bb87 100644
--- a/target/i386/tcg/mem_helper.c
+++ b/target/i386/tcg/mem_helper.c
@@ -27,63 +27,6 @@
#include "tcg/tcg.h"
#include "helper-tcg.h"
-void helper_cmpxchg8b_unlocked(CPUX86State *env, target_ulong a0)
-{
- uintptr_t ra = GETPC();
- uint64_t oldv, cmpv, newv;
- int eflags;
-
- eflags = cpu_cc_compute_all(env, CC_OP);
-
- cmpv = deposit64(env->regs[R_EAX], 32, 32, env->regs[R_EDX]);
- newv = deposit64(env->regs[R_EBX], 32, 32, env->regs[R_ECX]);
-
- oldv = cpu_ldq_data_ra(env, a0, ra);
- newv = (cmpv == oldv ? newv : oldv);
- /* always do the store */
- cpu_stq_data_ra(env, a0, newv, ra);
-
- if (oldv == cmpv) {
- eflags |= CC_Z;
- } else {
- env->regs[R_EAX] = (uint32_t)oldv;
- env->regs[R_EDX] = (uint32_t)(oldv >> 32);
- eflags &= ~CC_Z;
- }
- CC_SRC = eflags;
-}
-
-void helper_cmpxchg8b(CPUX86State *env, target_ulong a0)
-{
-#ifdef CONFIG_ATOMIC64
- uint64_t oldv, cmpv, newv;
- int eflags;
-
- eflags = cpu_cc_compute_all(env, CC_OP);
-
- cmpv = deposit64(env->regs[R_EAX], 32, 32, env->regs[R_EDX]);
- newv = deposit64(env->regs[R_EBX], 32, 32, env->regs[R_ECX]);
-
- {
- uintptr_t ra = GETPC();
- int mem_idx = cpu_mmu_index(env, false);
- MemOpIdx oi = make_memop_idx(MO_TEUQ, mem_idx);
- oldv = cpu_atomic_cmpxchgq_le_mmu(env, a0, cmpv, newv, oi, ra);
- }
-
- if (oldv == cmpv) {
- eflags |= CC_Z;
- } else {
- env->regs[R_EAX] = (uint32_t)oldv;
- env->regs[R_EDX] = (uint32_t)(oldv >> 32);
- eflags &= ~CC_Z;
- }
- CC_SRC = eflags;
-#else
- cpu_loop_exit_atomic(env_cpu(env), GETPC());
-#endif /* CONFIG_ATOMIC64 */
-}
-
#ifdef TARGET_X86_64
void helper_cmpxchg16b_unlocked(CPUX86State *env, target_ulong a0)
{
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 1175540a2c..a134d63946 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -2976,15 +2976,59 @@ static void gen_sty_env_A0(DisasContext *s, int offset, bool align)
static void gen_cmpxchg8b(DisasContext *s, CPUX86State *env, int modrm)
{
+ TCGv_i64 cmp, val, old;
+ TCGv Z;
+
gen_lea_modrm(env, s, modrm);
- if ((s->prefix & PREFIX_LOCK) &&
- (tb_cflags(s->base.tb) & CF_PARALLEL)) {
- gen_helper_cmpxchg8b(cpu_env, s->A0);
+ cmp = tcg_temp_new_i64();
+ val = tcg_temp_new_i64();
+ old = tcg_temp_new_i64();
+
+ /* Construct the comparison values from the register pair. */
+ tcg_gen_concat_tl_i64(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]);
+ tcg_gen_concat_tl_i64(val, cpu_regs[R_EBX], cpu_regs[R_ECX]);
+
+ /* Only require atomic with LOCK; non-parallel handled in generator. */
+ if (s->prefix & PREFIX_LOCK) {
+ tcg_gen_atomic_cmpxchg_i64(old, s->A0, cmp, val, s->mem_index, MO_TEUQ);
} else {
- gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
+ tcg_gen_nonatomic_cmpxchg_i64(old, s->A0, cmp, val,
+ s->mem_index, MO_TEUQ);
}
- set_cc_op(s, CC_OP_EFLAGS);
+ tcg_temp_free_i64(val);
+
+ /* Set tmp0 to match the required value of Z. */
+ tcg_gen_setcond_i64(TCG_COND_EQ, cmp, old, cmp);
+ Z = tcg_temp_new();
+ tcg_gen_trunc_i64_tl(Z, cmp);
+ tcg_temp_free_i64(cmp);
+
+ /*
+ * Extract the result values for the register pair.
+ * For 32-bit, we may do this unconditionally, because on success (Z=1),
+ * the old value matches the previous value in EDX:EAX. For x86_64,
+ * the store must be conditional, because we must leave the source
+ * registers unchanged on success, and zero-extend the writeback
+ * on failure (Z=0).
+ */
+ if (TARGET_LONG_BITS == 32) {
+ tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], old);
+ } else {
+ TCGv zero = tcg_constant_tl(0);
+
+ tcg_gen_extr_i64_tl(s->T0, s->T1, old);
+ tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EAX], Z, zero,
+ s->T0, cpu_regs[R_EAX]);
+ tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EDX], Z, zero,
+ s->T1, cpu_regs[R_EDX]);
+ }
+ tcg_temp_free_i64(old);
+
+ /* Update Z. */
+ gen_compute_eflags(s);
+ tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, Z, ctz32(CC_Z), 1);
+ tcg_temp_free(Z);
}
#ifdef TARGET_X86_64
--
2.34.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH for-8.0 3/3] target/i386: Inline cmpxchg16b
2022-11-12 7:56 [PATCH for-8.0 0/3] target/i386: cmpxchg8b and cmpxchg16b cleanup Richard Henderson
2022-11-12 7:56 ` [PATCH for-8.0 1/3] target/i386: Split out gen_cmpxchg8b, gen_cmpxchg16b Richard Henderson
2022-11-12 7:56 ` [PATCH for-8.0 2/3] target/i386: Inline cmpxchg8b Richard Henderson
@ 2022-11-12 7:56 ` Richard Henderson
2 siblings, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2022-11-12 7:56 UTC (permalink / raw)
To: qemu-devel; +Cc: pbonzini, eduardo
Use tcg_gen_atomic_cmpxchg_i128 for the atomic case,
and tcg_gen_qemu_ld/st_i128 otherwise.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/helper.h | 4 ---
target/i386/tcg/mem_helper.c | 69 ------------------------------------
target/i386/tcg/translate.c | 44 ++++++++++++++++++++---
3 files changed, 39 insertions(+), 78 deletions(-)
diff --git a/target/i386/helper.h b/target/i386/helper.h
index 2df8049f91..e627a93107 100644
--- a/target/i386/helper.h
+++ b/target/i386/helper.h
@@ -66,10 +66,6 @@ DEF_HELPER_1(rsm, void, env)
#endif /* !CONFIG_USER_ONLY */
DEF_HELPER_2(into, void, env, int)
-#ifdef TARGET_X86_64
-DEF_HELPER_2(cmpxchg16b_unlocked, void, env, tl)
-DEF_HELPER_2(cmpxchg16b, void, env, tl)
-#endif
DEF_HELPER_FLAGS_1(single_step, TCG_CALL_NO_WG, noreturn, env)
DEF_HELPER_1(rechecking_single_step, void, env)
DEF_HELPER_1(cpuid, void, env)
diff --git a/target/i386/tcg/mem_helper.c b/target/i386/tcg/mem_helper.c
index 814786bb87..3ef84e90d9 100644
--- a/target/i386/tcg/mem_helper.c
+++ b/target/i386/tcg/mem_helper.c
@@ -27,75 +27,6 @@
#include "tcg/tcg.h"
#include "helper-tcg.h"
-#ifdef TARGET_X86_64
-void helper_cmpxchg16b_unlocked(CPUX86State *env, target_ulong a0)
-{
- uintptr_t ra = GETPC();
- Int128 oldv, cmpv, newv;
- uint64_t o0, o1;
- int eflags;
- bool success;
-
- if ((a0 & 0xf) != 0) {
- raise_exception_ra(env, EXCP0D_GPF, GETPC());
- }
- eflags = cpu_cc_compute_all(env, CC_OP);
-
- cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]);
- newv = int128_make128(env->regs[R_EBX], env->regs[R_ECX]);
-
- o0 = cpu_ldq_data_ra(env, a0 + 0, ra);
- o1 = cpu_ldq_data_ra(env, a0 + 8, ra);
-
- oldv = int128_make128(o0, o1);
- success = int128_eq(oldv, cmpv);
- if (!success) {
- newv = oldv;
- }
-
- cpu_stq_data_ra(env, a0 + 0, int128_getlo(newv), ra);
- cpu_stq_data_ra(env, a0 + 8, int128_gethi(newv), ra);
-
- if (success) {
- eflags |= CC_Z;
- } else {
- env->regs[R_EAX] = int128_getlo(oldv);
- env->regs[R_EDX] = int128_gethi(oldv);
- eflags &= ~CC_Z;
- }
- CC_SRC = eflags;
-}
-
-void helper_cmpxchg16b(CPUX86State *env, target_ulong a0)
-{
- uintptr_t ra = GETPC();
-
- if ((a0 & 0xf) != 0) {
- raise_exception_ra(env, EXCP0D_GPF, ra);
- } else if (HAVE_CMPXCHG128) {
- int eflags = cpu_cc_compute_all(env, CC_OP);
-
- Int128 cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]);
- Int128 newv = int128_make128(env->regs[R_EBX], env->regs[R_ECX]);
-
- int mem_idx = cpu_mmu_index(env, false);
- MemOpIdx oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
- Int128 oldv = cpu_atomic_cmpxchgo_le_mmu(env, a0, cmpv, newv, oi, ra);
-
- if (int128_eq(oldv, cmpv)) {
- eflags |= CC_Z;
- } else {
- env->regs[R_EAX] = int128_getlo(oldv);
- env->regs[R_EDX] = int128_gethi(oldv);
- eflags &= ~CC_Z;
- }
- CC_SRC = eflags;
- } else {
- cpu_loop_exit_atomic(env_cpu(env), ra);
- }
-}
-#endif
-
void helper_boundw(CPUX86State *env, target_ulong a0, int v)
{
int low, high;
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index a134d63946..6dfcfaf31a 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -3034,15 +3034,49 @@ static void gen_cmpxchg8b(DisasContext *s, CPUX86State *env, int modrm)
#ifdef TARGET_X86_64
static void gen_cmpxchg16b(DisasContext *s, CPUX86State *env, int modrm)
{
+ MemOp mop = MO_TE | MO_128 | MO_ALIGN;
+ TCGv_i64 t0, t1;
+ TCGv_i128 cmp, val;
+
gen_lea_modrm(env, s, modrm);
- if ((s->prefix & PREFIX_LOCK) &&
- (tb_cflags(s->base.tb) & CF_PARALLEL)) {
- gen_helper_cmpxchg16b(cpu_env, s->A0);
+ cmp = tcg_temp_new_i128();
+ val = tcg_temp_new_i128();
+ tcg_gen_concat_i64_i128(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]);
+ tcg_gen_concat_i64_i128(val, cpu_regs[R_EBX], cpu_regs[R_ECX]);
+
+ /* Only require atomic with LOCK; non-parallel handled in generator. */
+ if (s->prefix & PREFIX_LOCK) {
+ tcg_gen_atomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop);
} else {
- gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
+ tcg_gen_nonatomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop);
}
- set_cc_op(s, CC_OP_EFLAGS);
+
+ tcg_gen_extr_i128_i64(s->T0, s->T1, val);
+ tcg_temp_free_i128(cmp);
+ tcg_temp_free_i128(val);
+
+ /* Determine success after the fact. */
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
+ tcg_gen_xor_i64(t0, s->T0, cpu_regs[R_EAX]);
+ tcg_gen_xor_i64(t1, s->T1, cpu_regs[R_EDX]);
+ tcg_gen_or_i64(t0, t0, t1);
+ tcg_temp_free_i64(t1);
+
+ /* Update Z. */
+ gen_compute_eflags(s);
+ tcg_gen_setcondi_i64(TCG_COND_EQ, t0, t0, 0);
+ tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, t0, ctz32(CC_Z), 1);
+ tcg_temp_free_i64(t0);
+
+ /*
+ * Extract the result values for the register pair. We may do this
+ * unconditionally, because on success (Z=1), the old value matches
+ * the previous value in RDX:RAX.
+ */
+ tcg_gen_mov_i64(cpu_regs[R_EAX], s->T0);
+ tcg_gen_mov_i64(cpu_regs[R_EDX], s->T1);
}
#endif
--
2.34.1
^ permalink raw reply related [flat|nested] 4+ messages in thread