From: Paolo Bonzini <pbonzini@redhat.com>
To: qemu-devel@nongnu.org
Cc: Richard Henderson <richard.henderson@linaro.org>
Subject: [PULL 22/46] target/i386: implement CMPccXADD
Date: Sun, 31 Dec 2023 09:44:38 +0100 [thread overview]
Message-ID: <20231231084502.235366-23-pbonzini@redhat.com> (raw)
In-Reply-To: <20231231084502.235366-1-pbonzini@redhat.com>
The main difficulty here is that a page fault when writing to the destination
must not overwrite the flags. Therefore, the flags computation must be
inlined instead of using gen_jcc1*.
For simplicity, I am using an unconditional cmpxchg operation, that becomes
a NOP if the comparison fails.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/cpu.c | 2 +-
target/i386/tcg/decode-new.c.inc | 25 ++++++++
target/i386/tcg/decode-new.h | 1 +
target/i386/tcg/emit.c.inc | 104 +++++++++++++++++++++++++++++++
target/i386/tcg/translate.c | 2 +
5 files changed, 133 insertions(+), 1 deletion(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 95d5f16cd5e..fd47ee7defb 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -738,7 +738,7 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
#define TCG_7_0_EDX_FEATURES (CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_KERNEL_FEATURES)
#define TCG_7_1_EAX_FEATURES (CPUID_7_1_EAX_FZRM | CPUID_7_1_EAX_FSRS | \
- CPUID_7_1_EAX_FSRC)
+ CPUID_7_1_EAX_FSRC | CPUID_7_1_EAX_CMPCCXADD)
#define TCG_7_1_EDX_FEATURES 0
#define TCG_7_2_EDX_FEATURES 0
#define TCG_APM_FEATURES 0
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 717d7307722..426c4594120 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -538,6 +538,28 @@ static const X86OpEntry opcodes_0F38_00toEF[240] = {
[0xdd] = X86_OP_ENTRY3(VAESENCLAST, V,x, H,x, W,x, vex4 cpuid(AES) p_66),
[0xde] = X86_OP_ENTRY3(VAESDEC, V,x, H,x, W,x, vex4 cpuid(AES) p_66),
[0xdf] = X86_OP_ENTRY3(VAESDECLAST, V,x, H,x, W,x, vex4 cpuid(AES) p_66),
+
+ /*
+ * REG selects srcdest2 operand, VEX.vvvv selects src3. VEX class not found
+ * in manual, assumed to be 13 from the VEX.L0 constraint.
+ */
+ [0xe0] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
+ [0xe1] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
+ [0xe2] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
+ [0xe3] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
+ [0xe4] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
+ [0xe5] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
+ [0xe6] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
+ [0xe7] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
+
+ [0xe8] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
+ [0xe9] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
+ [0xea] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
+ [0xeb] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
+ [0xec] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
+ [0xed] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
+ [0xee] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
+ [0xef] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
};
/* five rows for no prefix, 66, F3, F2, 66+F2 */
@@ -1503,6 +1525,9 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid)
return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_AVX2);
case X86_FEAT_SHA_NI:
return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SHA_NI);
+
+ case X86_FEAT_CMPCCXADD:
+ return (s->cpuid_7_1_eax_features & CPUID_7_1_EAX_CMPCCXADD);
}
g_assert_not_reached();
}
diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h
index 25220fc4362..15e6bfef4b1 100644
--- a/target/i386/tcg/decode-new.h
+++ b/target/i386/tcg/decode-new.h
@@ -104,6 +104,7 @@ typedef enum X86CPUIDFeature {
X86_FEAT_AVX2,
X86_FEAT_BMI1,
X86_FEAT_BMI2,
+ X86_FEAT_CMPCCXADD,
X86_FEAT_F16C,
X86_FEAT_FMA,
X86_FEAT_MOVBE,
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index fd120e7b9b4..6bcf88ecd71 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1190,6 +1190,110 @@ static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
prepare_update2_cc(decode, s, CC_OP_BMILGB + ot);
}
+static void gen_CMPccXADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ TCGLabel *label_top = gen_new_label();
+ TCGLabel *label_bottom = gen_new_label();
+ TCGv oldv = tcg_temp_new();
+ TCGv newv = tcg_temp_new();
+ TCGv cmpv = tcg_temp_new();
+ TCGCond cond;
+
+ TCGv cmp_lhs, cmp_rhs;
+ MemOp ot, ot_full;
+
+ int jcc_op = (decode->b >> 1) & 7;
+ static const TCGCond cond_table[8] = {
+ [JCC_O] = TCG_COND_LT, /* test sign bit by comparing against 0 */
+ [JCC_B] = TCG_COND_LTU,
+ [JCC_Z] = TCG_COND_EQ,
+ [JCC_BE] = TCG_COND_LEU,
+ [JCC_S] = TCG_COND_LT, /* test sign bit by comparing against 0 */
+ [JCC_P] = TCG_COND_EQ, /* even parity - tests low bit of popcount */
+ [JCC_L] = TCG_COND_LT,
+ [JCC_LE] = TCG_COND_LE,
+ };
+
+ cond = cond_table[jcc_op];
+ if (decode->b & 1) {
+ cond = tcg_invert_cond(cond);
+ }
+
+ ot = decode->op[0].ot;
+ ot_full = ot | MO_LE;
+ if (jcc_op >= JCC_S) {
+ /*
+ * Sign-extend values before subtracting for S, P (zero/sign extension
+ * does not matter there) L, LE and their inverses.
+ */
+ ot_full |= MO_SIGN;
+ }
+
+ /*
+ * cmpv will be moved to cc_src *after* cpu_regs[] is written back, so use
+ * tcg_gen_ext_tl instead of gen_ext_tl.
+ */
+ tcg_gen_ext_tl(cmpv, cpu_regs[decode->op[1].n], ot_full);
+
+ /*
+ * Cmpxchg loop starts here.
+ * - s->T1: addition operand (from decoder)
+ * - s->A0: dest address (from decoder)
+ * - s->cc_srcT: memory operand (lhs for comparison)
+ * - cmpv: rhs for comparison
+ */
+ gen_set_label(label_top);
+ gen_op_ld_v(s, ot_full, s->cc_srcT, s->A0);
+ tcg_gen_sub_tl(s->T0, s->cc_srcT, cmpv);
+
+ /* Compute the comparison result by hand, to avoid clobbering cc_*. */
+ switch (jcc_op) {
+ case JCC_O:
+ /* (src1 ^ src2) & (src1 ^ dst). newv is only used here for a moment */
+ tcg_gen_xor_tl(newv, s->cc_srcT, s->T0);
+ tcg_gen_xor_tl(s->tmp0, s->cc_srcT, cmpv);
+ tcg_gen_and_tl(s->tmp0, s->tmp0, newv);
+ tcg_gen_sextract_tl(s->tmp0, s->tmp0, 0, 8 << ot);
+ cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(0);
+ break;
+
+ case JCC_P:
+ tcg_gen_ext8u_tl(s->tmp0, s->T0);
+ tcg_gen_ctpop_tl(s->tmp0, s->tmp0);
+ tcg_gen_andi_tl(s->tmp0, s->tmp0, 1);
+ cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(0);
+ break;
+
+ case JCC_S:
+ tcg_gen_sextract_tl(s->tmp0, s->T0, 0, 8 << ot);
+ cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(0);
+ break;
+
+ default:
+ cmp_lhs = s->cc_srcT, cmp_rhs = cmpv;
+ break;
+ }
+
+ /* Compute new value: if condition does not hold, just store back s->cc_srcT */
+ tcg_gen_add_tl(newv, s->cc_srcT, s->T1);
+ tcg_gen_movcond_tl(cond, newv, cmp_lhs, cmp_rhs, newv, s->cc_srcT);
+ tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, s->cc_srcT, newv, s->mem_index, ot_full);
+
+ /* Exit unconditionally if cmpxchg succeeded. */
+ tcg_gen_brcond_tl(TCG_COND_EQ, oldv, s->cc_srcT, label_bottom);
+
+ /* Try again if there was actually a store to make. */
+ tcg_gen_brcond_tl(cond, cmp_lhs, cmp_rhs, label_top);
+ gen_set_label(label_bottom);
+
+ /* Store old value to registers only after a successful store. */
+ gen_writeback(s, decode, 1, s->cc_srcT);
+
+ decode->cc_dst = s->T0;
+ decode->cc_src = cmpv;
+ decode->cc_op = CC_OP_SUBB + ot;
+}
+
static void gen_CRC32(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[2].ot;
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index fe82d421576..e1eb82a5c68 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -122,6 +122,7 @@ typedef struct DisasContext {
int cpuid_ext3_features;
int cpuid_7_0_ebx_features;
int cpuid_7_0_ecx_features;
+ int cpuid_7_1_eax_features;
int cpuid_xsave_features;
/* TCG local temps */
@@ -6963,6 +6964,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
dc->cpuid_7_0_ecx_features = env->features[FEAT_7_0_ECX];
+ dc->cpuid_7_1_eax_features = env->features[FEAT_7_1_EAX];
dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
dc->jmp_opt = !((cflags & CF_NO_GOTO_TB) ||
(flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)));
--
2.43.0
next prev parent reply other threads:[~2023-12-31 8:52 UTC|newest]
Thread overview: 48+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-12-31 8:44 [PULL 00/46] (mostly) target/i386 and meson changes for 2023-12-31 Paolo Bonzini
2023-12-31 8:44 ` [PULL 01/46] configure: use a native non-cross compiler for linux-user Paolo Bonzini
2023-12-31 8:44 ` [PULL 02/46] target/i386: optimize computation of JL and JLE from flags Paolo Bonzini
2023-12-31 8:44 ` [PULL 03/46] target/i386: speedup JO/SETO after MUL or IMUL Paolo Bonzini
2023-12-31 8:44 ` [PULL 04/46] target/i386: remove unnecessary arguments from raise_interrupt Paolo Bonzini
2023-12-31 8:44 ` [PULL 05/46] target/i386: remove unnecessary truncations Paolo Bonzini
2023-12-31 8:44 ` [PULL 06/46] target/i386: clean up cpu_cc_compute_all Paolo Bonzini
2023-12-31 8:44 ` [PULL 07/46] target/i386: document more deviations from the manual Paolo Bonzini
2023-12-31 8:44 ` [PULL 08/46] target/i386: reimplement check for validity of LOCK prefix Paolo Bonzini
2023-12-31 8:44 ` [PULL 09/46] target/i386: avoid trunc and ext for MULX and RORX Paolo Bonzini
2023-12-31 8:44 ` [PULL 10/46] target/i386: rename zext0/zext2 and make them closer to the manual Paolo Bonzini
2023-12-31 8:44 ` [PULL 11/46] target/i386: add X86_SPECIALs for MOVSX and MOVZX Paolo Bonzini
2023-12-31 8:44 ` [PULL 12/46] target/i386: do not decode string source/destination into decode->mem Paolo Bonzini
2023-12-31 8:44 ` [PULL 13/46] target/i386: do not clobber A0 in POP translation Paolo Bonzini
2023-12-31 8:44 ` [PULL 14/46] target/i386: do not clobber T0 on string operations Paolo Bonzini
2023-12-31 8:44 ` [PULL 15/46] target/i386: split eflags computation out of gen_compute_eflags Paolo Bonzini
2023-12-31 8:44 ` [PULL 16/46] target/i386: do not use s->tmp4 for push Paolo Bonzini
2023-12-31 8:44 ` [PULL 17/46] target/i386: do not use s->tmp0 for jumps on ECX ==/!= 0 Paolo Bonzini
2023-12-31 8:44 ` [PULL 18/46] target/i386: prepare for implementation of STOS/SCAS in new decoder Paolo Bonzini
2023-12-31 8:44 ` [PULL 19/46] target/i386: move operand load and writeback out of gen_cmovcc1 Paolo Bonzini
2023-12-31 8:44 ` [PULL 20/46] target/i386: adjust decoding of J operand Paolo Bonzini
2023-12-31 8:44 ` [PULL 21/46] target/i386: introduce flags writeback mechanism Paolo Bonzini
2023-12-31 8:44 ` Paolo Bonzini [this message]
2023-12-31 8:44 ` [PULL 23/46] target/i386: the sgx_epc_get_section stub is reachable Paolo Bonzini
2023-12-31 8:44 ` [PULL 24/46] esp: check for NULL result from scsi_device_find() Paolo Bonzini
2023-12-31 8:44 ` [PULL 25/46] meson: fix type of "relocatable" option Paolo Bonzini
2023-12-31 8:44 ` [PULL 26/46] meson: remove unused variable Paolo Bonzini
2023-12-31 8:44 ` [PULL 27/46] meson: use version_compare() to compare version Paolo Bonzini
2023-12-31 8:44 ` [PULL 28/46] Makefile: clean qemu-iotests output Paolo Bonzini
2023-12-31 8:44 ` [PULL 29/46] configure: remove unnecessary subshell Paolo Bonzini
2023-12-31 8:44 ` [PULL 30/46] configure: unify again the case arms in probe_target_compiler Paolo Bonzini
2023-12-31 8:44 ` [PULL 31/46] meson: add more sections to main meson.build Paolo Bonzini
2023-12-31 8:44 ` [PULL 32/46] meson: move program checks together Paolo Bonzini
2023-12-31 8:44 ` [PULL 33/46] meson: move option validation together Paolo Bonzini
2023-12-31 8:44 ` [PULL 34/46] meson: move accelerator dependency checks together Paolo Bonzini
2023-12-31 8:44 ` [PULL 35/46] meson: keep subprojects together Paolo Bonzini
2023-12-31 8:44 ` [PULL 36/46] meson: move CFI detection code with other compiler flags Paolo Bonzini
2023-12-31 8:44 ` [PULL 37/46] meson: move config-host.h definitions together Paolo Bonzini
2023-12-31 8:44 ` [PULL 38/46] meson: move subdirs to "Collect sources" section Paolo Bonzini
2023-12-31 8:44 ` [PULL 39/46] meson: always probe u2f and canokey if the option is enabled Paolo Bonzini
2023-12-31 8:44 ` [PULL 40/46] meson: remove OS definitions from config_targetos Paolo Bonzini
2023-12-31 8:44 ` [PULL 41/46] meson: remove CONFIG_POSIX and CONFIG_WIN32 " Paolo Bonzini
2023-12-31 8:44 ` [PULL 42/46] meson: remove config_targetos Paolo Bonzini
2023-12-31 8:44 ` [PULL 43/46] meson: remove CONFIG_ALL Paolo Bonzini
2023-12-31 8:45 ` [PULL 44/46] meson: rename config_all Paolo Bonzini
2023-12-31 8:45 ` [PULL 45/46] configure, meson: rename targetos to host_os Paolo Bonzini
2023-12-31 8:45 ` [PULL 46/46] meson.build: report graphics backends separately Paolo Bonzini
2024-01-05 12:53 ` [PULL 00/46] (mostly) target/i386 and meson changes for 2023-12-31 Peter Maydell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231231084502.235366-23-pbonzini@redhat.com \
--to=pbonzini@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=richard.henderson@linaro.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).