qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Richard Henderson <rth@twiddle.net>
To: qemu-devel@nongnu.org
Cc: peter.maydell@linaro.org, cota@braap.org, alex.bennee@linaro.org
Subject: [Qemu-devel] [PATCH v8 19/37] target-i386: emulate LOCK'ed cmpxchg using cmpxchg helpers
Date: Mon, 24 Oct 2016 10:39:30 -0700	[thread overview]
Message-ID: <1477330788-14996-20-git-send-email-rth@twiddle.net> (raw)
In-Reply-To: <1477330788-14996-1-git-send-email-rth@twiddle.net>

From: "Emilio G. Cota" <cota@braap.org>

The diff here is uglier than necessary. All this does is to turn

FOO

into:

if (s->prefix & PREFIX_LOCK) {
  BAR
} else {
  FOO
}

where FOO is the original implementation of an unlocked cmpxchg.

[rth: Adjust unlocked cmpxchg to use movcond instead of branches.
Adjust helpers to use atomic helpers.]

Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-Id: <1467054136-10430-6-git-send-email-cota@braap.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-i386/helper.h     |   2 +
 target-i386/mem_helper.c | 134 +++++++++++++++++++++++++++++++++++++++--------
 target-i386/translate.c  |  99 ++++++++++++++++++----------------
 3 files changed, 169 insertions(+), 66 deletions(-)

diff --git a/target-i386/helper.h b/target-i386/helper.h
index 1320edc..729d4b6 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -74,8 +74,10 @@ DEF_HELPER_3(boundw, void, env, tl, int)
 DEF_HELPER_3(boundl, void, env, tl, int)
 DEF_HELPER_1(rsm, void, env)
 DEF_HELPER_2(into, void, env, int)
+DEF_HELPER_2(cmpxchg8b_unlocked, void, env, tl)
 DEF_HELPER_2(cmpxchg8b, void, env, tl)
 #ifdef TARGET_X86_64
+DEF_HELPER_2(cmpxchg16b_unlocked, void, env, tl)
 DEF_HELPER_2(cmpxchg16b, void, env, tl)
 #endif
 DEF_HELPER_1(single_step, void, env)
diff --git a/target-i386/mem_helper.c b/target-i386/mem_helper.c
index 5bc0594..c4b5c5b 100644
--- a/target-i386/mem_helper.c
+++ b/target-i386/mem_helper.c
@@ -22,6 +22,8 @@
 #include "exec/helper-proto.h"
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
+#include "qemu/int128.h"
+#include "tcg.h"
 
 /* broken thread support */
 
@@ -56,53 +58,143 @@ void helper_lock_init(void)
 }
 #endif
 
+void helper_cmpxchg8b_unlocked(CPUX86State *env, target_ulong a0)
+{
+    uintptr_t ra = GETPC();
+    uint64_t oldv, cmpv, newv;
+    int eflags;
+
+    eflags = cpu_cc_compute_all(env, CC_OP);
+
+    cmpv = deposit64(env->regs[R_EAX], 32, 32, env->regs[R_EDX]);
+    newv = deposit64(env->regs[R_EBX], 32, 32, env->regs[R_ECX]);
+
+    oldv = cpu_ldq_data_ra(env, a0, ra);
+    newv = (cmpv == oldv ? newv : oldv);
+    /* always do the store */
+    cpu_stq_data_ra(env, a0, newv, ra);
+
+    if (oldv == cmpv) {
+        eflags |= CC_Z;
+    } else {
+        env->regs[R_EAX] = (uint32_t)oldv;
+        env->regs[R_EDX] = (uint32_t)(oldv >> 32);
+        eflags &= ~CC_Z;
+    }
+    CC_SRC = eflags;
+}
+
 void helper_cmpxchg8b(CPUX86State *env, target_ulong a0)
 {
-    uint64_t d;
+#ifdef CONFIG_ATOMIC64
+    uint64_t oldv, cmpv, newv;
     int eflags;
 
     eflags = cpu_cc_compute_all(env, CC_OP);
-    d = cpu_ldq_data_ra(env, a0, GETPC());
-    if (d == (((uint64_t)env->regs[R_EDX] << 32) | (uint32_t)env->regs[R_EAX])) {
-        cpu_stq_data_ra(env, a0, ((uint64_t)env->regs[R_ECX] << 32)
-                                  | (uint32_t)env->regs[R_EBX], GETPC());
+
+    cmpv = deposit64(env->regs[R_EAX], 32, 32, env->regs[R_EDX]);
+    newv = deposit64(env->regs[R_EBX], 32, 32, env->regs[R_ECX]);
+
+#ifdef CONFIG_USER_ONLY
+    {
+        uint64_t *haddr = g2h(a0);
+        cmpv = cpu_to_le64(cmpv);
+        newv = cpu_to_le64(newv);
+        oldv = atomic_cmpxchg__nocheck(haddr, cmpv, newv);
+        oldv = le64_to_cpu(oldv);
+    }
+#else
+    {
+        uintptr_t ra = GETPC();
+        int mem_idx = cpu_mmu_index(env, false);
+        TCGMemOpIdx oi = make_memop_idx(MO_TEQ, mem_idx);
+        oldv = helper_atomic_cmpxchgq_le_mmu(env, a0, cmpv, newv, oi, ra);
+    }
+#endif
+
+    if (oldv == cmpv) {
         eflags |= CC_Z;
     } else {
-        /* always do the store */
-        cpu_stq_data_ra(env, a0, d, GETPC());
-        env->regs[R_EDX] = (uint32_t)(d >> 32);
-        env->regs[R_EAX] = (uint32_t)d;
+        env->regs[R_EAX] = (uint32_t)oldv;
+        env->regs[R_EDX] = (uint32_t)(oldv >> 32);
         eflags &= ~CC_Z;
     }
     CC_SRC = eflags;
+#else
+    cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC());
+#endif /* CONFIG_ATOMIC64 */
 }
 
 #ifdef TARGET_X86_64
-void helper_cmpxchg16b(CPUX86State *env, target_ulong a0)
+void helper_cmpxchg16b_unlocked(CPUX86State *env, target_ulong a0)
 {
-    uint64_t d0, d1;
+    uintptr_t ra = GETPC();
+    Int128 oldv, cmpv, newv;
+    uint64_t o0, o1;
     int eflags;
+    bool success;
 
     if ((a0 & 0xf) != 0) {
         raise_exception_ra(env, EXCP0D_GPF, GETPC());
     }
     eflags = cpu_cc_compute_all(env, CC_OP);
-    d0 = cpu_ldq_data_ra(env, a0, GETPC());
-    d1 = cpu_ldq_data_ra(env, a0 + 8, GETPC());
-    if (d0 == env->regs[R_EAX] && d1 == env->regs[R_EDX]) {
-        cpu_stq_data_ra(env, a0, env->regs[R_EBX], GETPC());
-        cpu_stq_data_ra(env, a0 + 8, env->regs[R_ECX], GETPC());
+
+    cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]);
+    newv = int128_make128(env->regs[R_EBX], env->regs[R_ECX]);
+
+    o0 = cpu_ldq_data_ra(env, a0 + 0, ra);
+    o1 = cpu_ldq_data_ra(env, a0 + 8, ra);
+
+    oldv = int128_make128(o0, o1);
+    success = int128_eq(oldv, cmpv);
+    if (!success) {
+        newv = oldv;
+    }
+
+    cpu_stq_data_ra(env, a0 + 0, int128_getlo(newv), ra);
+    cpu_stq_data_ra(env, a0 + 8, int128_gethi(newv), ra);
+
+    if (success) {
         eflags |= CC_Z;
     } else {
-        /* always do the store */
-        cpu_stq_data_ra(env, a0, d0, GETPC());
-        cpu_stq_data_ra(env, a0 + 8, d1, GETPC());
-        env->regs[R_EDX] = d1;
-        env->regs[R_EAX] = d0;
+        env->regs[R_EAX] = int128_getlo(oldv);
+        env->regs[R_EDX] = int128_gethi(oldv);
         eflags &= ~CC_Z;
     }
     CC_SRC = eflags;
 }
+
+void helper_cmpxchg16b(CPUX86State *env, target_ulong a0)
+{
+    uintptr_t ra = GETPC();
+
+    if ((a0 & 0xf) != 0) {
+        raise_exception_ra(env, EXCP0D_GPF, ra);
+    } else {
+#ifndef CONFIG_ATOMIC128
+        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
+#else
+        int eflags = cpu_cc_compute_all(env, CC_OP);
+
+        Int128 cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]);
+        Int128 newv = int128_make128(env->regs[R_EBX], env->regs[R_ECX]);
+
+        int mem_idx = cpu_mmu_index(env, false);
+        TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
+        Int128 oldv = helper_atomic_cmpxchgo_le_mmu(env, a0, cmpv,
+                                                    newv, oi, ra);
+
+        if (int128_eq(oldv, cmpv)) {
+            eflags |= CC_Z;
+        } else {
+            env->regs[R_EAX] = int128_getlo(oldv);
+            env->regs[R_EDX] = int128_gethi(oldv);
+            eflags &= ~CC_Z;
+        }
+        CC_SRC = eflags;
+#endif
+    }
+}
 #endif
 
 void helper_boundw(CPUX86State *env, target_ulong a0, int v)
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 23fde58..dd1b408 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -5069,57 +5069,58 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     case 0x1b0:
     case 0x1b1: /* cmpxchg Ev, Gv */
         {
-            TCGLabel *label1, *label2;
-            TCGv t0, t1, t2, a0;
+            TCGv oldv, newv, cmpv;
 
             ot = mo_b_d(b, dflag);
             modrm = cpu_ldub_code(env, s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
             mod = (modrm >> 6) & 3;
-            t0 = tcg_temp_local_new();
-            t1 = tcg_temp_local_new();
-            t2 = tcg_temp_local_new();
-            a0 = tcg_temp_local_new();
-            gen_op_mov_v_reg(ot, t1, reg);
-            if (mod == 3) {
-                rm = (modrm & 7) | REX_B(s);
-                gen_op_mov_v_reg(ot, t0, rm);
-            } else {
+            oldv = tcg_temp_new();
+            newv = tcg_temp_new();
+            cmpv = tcg_temp_new();
+            gen_op_mov_v_reg(ot, newv, reg);
+            tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
+
+            if (s->prefix & PREFIX_LOCK) {
+                if (mod == 3) {
+                    goto illegal_op;
+                }
                 gen_lea_modrm(env, s, modrm);
-                tcg_gen_mov_tl(a0, cpu_A0);
-                gen_op_ld_v(s, ot, t0, a0);
-                rm = 0; /* avoid warning */
-            }
-            label1 = gen_new_label();
-            tcg_gen_mov_tl(t2, cpu_regs[R_EAX]);
-            gen_extu(ot, t0);
-            gen_extu(ot, t2);
-            tcg_gen_brcond_tl(TCG_COND_EQ, t2, t0, label1);
-            label2 = gen_new_label();
-            if (mod == 3) {
-                gen_op_mov_reg_v(ot, R_EAX, t0);
-                tcg_gen_br(label2);
-                gen_set_label(label1);
-                gen_op_mov_reg_v(ot, rm, t1);
+                tcg_gen_atomic_cmpxchg_tl(oldv, cpu_A0, cmpv, newv,
+                                          s->mem_index, ot | MO_LE);
+                gen_op_mov_reg_v(ot, R_EAX, oldv);
             } else {
-                /* perform no-op store cycle like physical cpu; must be
-                   before changing accumulator to ensure idempotency if
-                   the store faults and the instruction is restarted */
-                gen_op_st_v(s, ot, t0, a0);
-                gen_op_mov_reg_v(ot, R_EAX, t0);
-                tcg_gen_br(label2);
-                gen_set_label(label1);
-                gen_op_st_v(s, ot, t1, a0);
-            }
-            gen_set_label(label2);
-            tcg_gen_mov_tl(cpu_cc_src, t0);
-            tcg_gen_mov_tl(cpu_cc_srcT, t2);
-            tcg_gen_sub_tl(cpu_cc_dst, t2, t0);
+                if (mod == 3) {
+                    rm = (modrm & 7) | REX_B(s);
+                    gen_op_mov_v_reg(ot, oldv, rm);
+                } else {
+                    gen_lea_modrm(env, s, modrm);
+                    gen_op_ld_v(s, ot, oldv, cpu_A0);
+                    rm = 0; /* avoid warning */
+                }
+                gen_extu(ot, oldv);
+                gen_extu(ot, cmpv);
+                /* store value = (old == cmp ? new : old);  */
+                tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
+                if (mod == 3) {
+                    gen_op_mov_reg_v(ot, R_EAX, oldv);
+                    gen_op_mov_reg_v(ot, rm, newv);
+                } else {
+                    /* Perform an unconditional store cycle like physical cpu;
+                       must be before changing accumulator to ensure
+                       idempotency if the store faults and the instruction
+                       is restarted */
+                    gen_op_st_v(s, ot, newv, cpu_A0);
+                    gen_op_mov_reg_v(ot, R_EAX, oldv);
+                }
+            }
+            tcg_gen_mov_tl(cpu_cc_src, oldv);
+            tcg_gen_mov_tl(cpu_cc_srcT, cmpv);
+            tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
             set_cc_op(s, CC_OP_SUBB + ot);
-            tcg_temp_free(t0);
-            tcg_temp_free(t1);
-            tcg_temp_free(t2);
-            tcg_temp_free(a0);
+            tcg_temp_free(oldv);
+            tcg_temp_free(newv);
+            tcg_temp_free(cmpv);
         }
         break;
     case 0x1c7: /* cmpxchg8b */
@@ -5132,14 +5133,22 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             if (!(s->cpuid_ext_features & CPUID_EXT_CX16))
                 goto illegal_op;
             gen_lea_modrm(env, s, modrm);
-            gen_helper_cmpxchg16b(cpu_env, cpu_A0);
+            if ((s->prefix & PREFIX_LOCK) && parallel_cpus) {
+                gen_helper_cmpxchg16b(cpu_env, cpu_A0);
+            } else {
+                gen_helper_cmpxchg16b_unlocked(cpu_env, cpu_A0);
+            }
         } else
 #endif        
         {
             if (!(s->cpuid_features & CPUID_CX8))
                 goto illegal_op;
             gen_lea_modrm(env, s, modrm);
-            gen_helper_cmpxchg8b(cpu_env, cpu_A0);
+            if ((s->prefix & PREFIX_LOCK) && parallel_cpus) {
+                gen_helper_cmpxchg8b(cpu_env, cpu_A0);
+            } else {
+                gen_helper_cmpxchg8b_unlocked(cpu_env, cpu_A0);
+            }
         }
         set_cc_op(s, CC_OP_EFLAGS);
         break;
-- 
2.7.4

  parent reply	other threads:[~2016-10-24 17:40 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-10-24 17:39 [Qemu-devel] [PATCH v8 00/37] cmpxchg atomic operations Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 01/37] atomics: Add parameters to macros Richard Henderson
2016-10-24 18:13   ` Emilio G. Cota
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 02/37] atomics: add atomic_xor Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 03/37] atomics: add atomic_op_fetch variants Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 04/37] atomics: Add __nocheck atomic operations Richard Henderson
2016-10-24 18:16   ` Emilio G. Cota
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 05/37] exec: Avoid direct references to Int128 parts Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 06/37] int128: Use __int128 if available Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 07/37] int128: Add int128_make128 Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 09/37] linux-user: enable parallel code generation on clone Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 10/37] cputlb: Replace SHIFT with DATA_SIZE Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 11/37] cputlb: Move probe_write out of softmmu_template.h Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 12/37] cputlb: Remove includes from softmmu_template.h Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 13/37] cputlb: Move most of iotlb code out of line Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 14/37] cputlb: Tidy some macros Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 15/37] tcg: Add atomic helpers Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 16/37] tcg: Add atomic128 helpers Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 17/37] tcg: Add CONFIG_ATOMIC64 Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 18/37] tcg: Emit barriers with parallel_cpus Richard Henderson
2016-10-24 17:39 ` Richard Henderson [this message]
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 20/37] target-i386: emulate LOCK'ed OP instructions using atomic helpers Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 21/37] target-i386: emulate LOCK'ed INC using atomic helper Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 22/37] target-i386: emulate LOCK'ed NOT " Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 23/37] target-i386: emulate LOCK'ed NEG using cmpxchg helper Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 24/37] target-i386: emulate LOCK'ed XADD using atomic helper Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 25/37] target-i386: emulate LOCK'ed BTX ops using atomic helpers Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 26/37] target-i386: emulate XCHG using atomic helper Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 27/37] target-i386: remove helper_lock() Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 28/37] tests: add atomic_add-bench Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 29/37] target-arm: Rearrange aa32 load and store functions Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 30/37] target-arm: emulate LL/SC using cmpxchg helpers Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 31/37] target-arm: emulate SWP with atomic_xchg helper Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 32/37] target-arm: emulate aarch64's LL/SC using cmpxchg helpers Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 33/37] linux-user: remove handling of ARM's EXCP_STREX Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 34/37] linux-user: remove handling of aarch64's EXCP_STREX Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 35/37] target-arm: remove EXCP_STREX + cpu_exclusive_{test, info} Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 36/37] target-alpha: Introduce MMU_PHYS_IDX Richard Henderson
2016-10-24 17:39 ` [Qemu-devel] [PATCH v8 37/37] target-alpha: Emulate LL/SC using cmpxchg helpers Richard Henderson
2016-10-24 18:27 ` [Qemu-devel] [PATCH v8 00/37] cmpxchg atomic operations Emilio G. Cota

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1477330788-14996-20-git-send-email-rth@twiddle.net \
    --to=rth@twiddle.net \
    --cc=alex.bennee@linaro.org \
    --cc=cota@braap.org \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).