qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Alex Bennée" <alex.bennee@linaro.org>
To: Richard Henderson <rth@twiddle.net>
Cc: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH v6 13/35] tcg: Add atomic helpers
Date: Wed, 12 Oct 2016 17:16:18 +0100	[thread overview]
Message-ID: <87r37lo84t.fsf@linaro.org> (raw)
In-Reply-To: <1476214861-31658-14-git-send-email-rth@twiddle.net>


Richard Henderson <rth@twiddle.net> writes:

> Add all of cmpxchg, op_fetch, fetch_op, and xchg.
> Handle both endian-ness, and sizes up to 8.
> Handle expanding non-atomically, when emulating in serial.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

> ---
>  Makefile.objs         |   2 +-
>  Makefile.target       |   1 +
>  atomic_template.h     | 173 ++++++++++++++++++++++++++
>  cputlb.c              | 112 ++++++++++++++++-
>  include/qemu/atomic.h |  43 ++++---
>  tcg-runtime.c         |  49 ++++++--
>  tcg/tcg-op.c          | 328 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  tcg/tcg-op.h          |  44 +++++++
>  tcg/tcg-runtime.h     |  75 ++++++++++++
>  tcg/tcg.h             |  53 ++++++++
>  10 files changed, 848 insertions(+), 32 deletions(-)
>  create mode 100644 atomic_template.h
>
> diff --git a/Makefile.objs b/Makefile.objs
> index 02fb8e7..99d1f6d 100644
> --- a/Makefile.objs
> +++ b/Makefile.objs
> @@ -89,7 +89,7 @@ endif
>
>  #######################################################################
>  # Target-independent parts used in system and user emulation
> -common-obj-y += tcg-runtime.o cpus-common.o
> +common-obj-y += cpus-common.o
>  common-obj-y += hw/
>  common-obj-y += qom/
>  common-obj-y += disas/
> diff --git a/Makefile.target b/Makefile.target
> index 9968871..91b6fbd 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -94,6 +94,7 @@ obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o
>  obj-y += fpu/softfloat.o
>  obj-y += target-$(TARGET_BASE_ARCH)/
>  obj-y += disas.o
> +obj-y += tcg-runtime.o
>  obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o
>  obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o
>
> diff --git a/atomic_template.h b/atomic_template.h
> new file mode 100644
> index 0000000..d2c8a08
> --- /dev/null
> +++ b/atomic_template.h
> @@ -0,0 +1,173 @@
> +/*
> + * Atomic helper templates
> + * Included from tcg-runtime.c and cputlb.c.
> + *
> + * Copyright (c) 2016 Red Hat, Inc
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#if DATA_SIZE == 8
> +# define SUFFIX     q
> +# define DATA_TYPE  uint64_t
> +# define BSWAP      bswap64
> +#elif DATA_SIZE == 4
> +# define SUFFIX     l
> +# define DATA_TYPE  uint32_t
> +# define BSWAP      bswap32
> +#elif DATA_SIZE == 2
> +# define SUFFIX     w
> +# define DATA_TYPE  uint16_t
> +# define BSWAP      bswap16
> +#elif DATA_SIZE == 1
> +# define SUFFIX     b
> +# define DATA_TYPE  uint8_t
> +# define BSWAP
> +#else
> +# error unsupported data size
> +#endif
> +
> +#if DATA_SIZE >= 4
> +# define ABI_TYPE  DATA_TYPE
> +#else
> +# define ABI_TYPE  uint32_t
> +#endif
> +
> +#if DATA_SIZE == 1
> +# define END
> +#elif defined(HOST_WORDS_BIGENDIAN)
> +# define END  _be
> +#else
> +# define END  _le
> +#endif
> +
> +ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
> +                              ABI_TYPE cmpv, ABI_TYPE newv EXTRA_ARGS)
> +{
> +    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
> +    return atomic_cmpxchg__nocheck(haddr, cmpv, newv);
> +}
> +
> +ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
> +                           ABI_TYPE val EXTRA_ARGS)
> +{
> +    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
> +    return atomic_xchg__nocheck(haddr, val);
> +}
> +
> +#define GEN_ATOMIC_HELPER(X)                                        \
> +ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
> +                 ABI_TYPE val EXTRA_ARGS)                           \
> +{                                                                   \
> +    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;                           \
> +    return atomic_##X(haddr, val);                                  \
> +}                                                                   \
> +
> +GEN_ATOMIC_HELPER(fetch_add)
> +GEN_ATOMIC_HELPER(fetch_and)
> +GEN_ATOMIC_HELPER(fetch_or)
> +GEN_ATOMIC_HELPER(fetch_xor)
> +GEN_ATOMIC_HELPER(add_fetch)
> +GEN_ATOMIC_HELPER(and_fetch)
> +GEN_ATOMIC_HELPER(or_fetch)
> +GEN_ATOMIC_HELPER(xor_fetch)
> +
> +#undef GEN_ATOMIC_HELPER
> +#undef END
> +
> +#if DATA_SIZE > 1
> +
> +#ifdef HOST_WORDS_BIGENDIAN
> +# define END  _le
> +#else
> +# define END  _be
> +#endif
> +
> +ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
> +                              ABI_TYPE cmpv, ABI_TYPE newv EXTRA_ARGS)
> +{
> +    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
> +    return BSWAP(atomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv)));
> +}
> +
> +ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
> +                           ABI_TYPE val EXTRA_ARGS)
> +{
> +    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
> +    return BSWAP(atomic_xchg__nocheck(haddr, BSWAP(val)));
> +}
> +
> +#define GEN_ATOMIC_HELPER(X)                                        \
> +ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
> +                 ABI_TYPE val EXTRA_ARGS)                           \
> +{                                                                   \
> +    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;                           \
> +    return BSWAP(atomic_##X(haddr, BSWAP(val)));                    \
> +}
> +
> +GEN_ATOMIC_HELPER(fetch_and)
> +GEN_ATOMIC_HELPER(fetch_or)
> +GEN_ATOMIC_HELPER(fetch_xor)
> +GEN_ATOMIC_HELPER(and_fetch)
> +GEN_ATOMIC_HELPER(or_fetch)
> +GEN_ATOMIC_HELPER(xor_fetch)
> +
> +#undef GEN_ATOMIC_HELPER
> +
> +/* Note that for addition, we need to use a separate cmpxchg loop instead
> +   of bswaps for the reverse-host-endian helpers.  */
> +ABI_TYPE ATOMIC_NAME(fetch_add)(CPUArchState *env, target_ulong addr,
> +                         ABI_TYPE val EXTRA_ARGS)
> +{
> +    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
> +    DATA_TYPE ldo, ldn, ret, sto;
> +
> +    ldo = *haddr;
> +    while (1) {
> +        ret = BSWAP(ldo);
> +        sto = BSWAP(ret + val);
> +        ldn = atomic_cmpxchg__nocheck(haddr, ldo, sto);
> +        if (ldn == ldo) {
> +            return ret;
> +        }
> +        ldo = ldn;
> +    }
> +}
> +
> +ABI_TYPE ATOMIC_NAME(add_fetch)(CPUArchState *env, target_ulong addr,
> +                         ABI_TYPE val EXTRA_ARGS)
> +{
> +    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
> +    DATA_TYPE ldo, ldn, ret, sto;
> +
> +    ldo = *haddr;
> +    while (1) {
> +        ret = BSWAP(ldo) + val;
> +        sto = BSWAP(ret);
> +        ldn = atomic_cmpxchg__nocheck(haddr, ldo, sto);
> +        if (ldn == ldo) {
> +            return ret;
> +        }
> +        ldo = ldn;
> +    }
> +}
> +
> +#undef END
> +#endif /* DATA_SIZE > 1 */
> +
> +#undef BSWAP
> +#undef ABI_TYPE
> +#undef DATA_TYPE
> +#undef SUFFIX
> +#undef DATA_SIZE
> diff --git a/cputlb.c b/cputlb.c
> index 82cf46e..4f2c500 100644
> --- a/cputlb.c
> +++ b/cputlb.c
> @@ -23,15 +23,15 @@
>  #include "exec/memory.h"
>  #include "exec/address-spaces.h"
>  #include "exec/cpu_ldst.h"
> -
>  #include "exec/cputlb.h"
> -
>  #include "exec/memory-internal.h"
>  #include "exec/ram_addr.h"
>  #include "exec/exec-all.h"
>  #include "tcg/tcg.h"
>  #include "qemu/error-report.h"
>  #include "exec/log.h"
> +#include "exec/helper-proto.h"
> +#include "qemu/atomic.h"
>
>  /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
>  /* #define DEBUG_TLB */
> @@ -585,6 +585,69 @@ void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx,
>      }
>  }
>
> +/* Probe for a read-modify-write atomic operation.  Do not allow unaligned
> + * operations, or io operations to proceed.  Return the host address.  */
> +static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
> +                               TCGMemOpIdx oi, uintptr_t retaddr)
> +{
> +    size_t mmu_idx = get_mmuidx(oi);
> +    size_t index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
> +    CPUTLBEntry *tlbe = &env->tlb_table[mmu_idx][index];
> +    target_ulong tlb_addr = tlbe->addr_write;
> +    TCGMemOp mop = get_memop(oi);
> +    int a_bits = get_alignment_bits(mop);
> +    int s_bits = mop & MO_SIZE;
> +
> +    /* Adjust the given return address.  */
> +    retaddr -= GETPC_ADJ;
> +
> +    /* Enforce guest required alignment.  */
> +    if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) {
> +        /* ??? Maybe indicate atomic op to cpu_unaligned_access */
> +        cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
> +                             mmu_idx, retaddr);
> +    }
> +
> +    /* Enforce qemu required alignment.  */
> +    if (unlikely(addr & ((1 << s_bits) - 1))) {
> +        /* We get here if guest alignment was not requested,
> +           or was not enforced by cpu_unaligned_access above.
> +           We might widen the access and emulate, but for now
> +           mark an exception and exit the cpu loop.  */
> +        goto stop_the_world;
> +    }
> +
> +    /* Check TLB entry and enforce page permissions.  */
> +    if ((addr & TARGET_PAGE_MASK)
> +        != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
> +        if (!VICTIM_TLB_HIT(addr_write, addr)) {
> +            tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_STORE, mmu_idx, retaddr);
> +        }
> +        tlb_addr = tlbe->addr_write;
> +    }
> +
> +    /* Notice an IO access, or a notdirty page.  */
> +    if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
> +        /* There's really nothing that can be done to
> +           support this apart from stop-the-world.  */
> +        goto stop_the_world;
> +    }
> +
> +    /* Let the guest notice RMW on a write-only page.  */
> +    if (unlikely(tlbe->addr_read != tlb_addr)) {
> +        tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_LOAD, mmu_idx, retaddr);
> +        /* Since we don't support reads and writes to different addresses,
> +           and we do have the proper page loaded for write, this shouldn't
> +           ever return.  But just in case, handle via stop-the-world.  */
> +        goto stop_the_world;
> +    }
> +
> +    return (void *)((uintptr_t)addr + tlbe->addend);
> +
> + stop_the_world:
> +    cpu_loop_exit_atomic(ENV_GET_CPU(env), retaddr);
> +}
> +
>  #ifdef TARGET_WORDS_BIGENDIAN
>  # define TGT_BE(X)  (X)
>  # define TGT_LE(X)  BSWAP(X)
> @@ -606,8 +669,51 @@ void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx,
>
>  #define DATA_SIZE 8
>  #include "softmmu_template.h"
> -#undef MMUSUFFIX
>
> +/* First set of helpers allows passing in of OI and RETADDR.  This makes
> +   them callable from other helpers.  */
> +
> +#define EXTRA_ARGS     , TCGMemOpIdx oi, uintptr_t retaddr
> +#define ATOMIC_NAME(X) \
> +    HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
> +#define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, oi, retaddr)
> +
> +#define DATA_SIZE 1
> +#include "atomic_template.h"
> +
> +#define DATA_SIZE 2
> +#include "atomic_template.h"
> +
> +#define DATA_SIZE 4
> +#include "atomic_template.h"
> +
> +#define DATA_SIZE 8
> +#include "atomic_template.h"
> +
> +/* Second set of helpers are directly callable from TCG as helpers.  */
> +
> +#undef EXTRA_ARGS
> +#undef ATOMIC_NAME
> +#undef ATOMIC_MMU_LOOKUP
> +#define EXTRA_ARGS         , TCGMemOpIdx oi
> +#define ATOMIC_NAME(X)     HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
> +#define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, oi, GETPC())
> +
> +#define DATA_SIZE 1
> +#include "atomic_template.h"
> +
> +#define DATA_SIZE 2
> +#include "atomic_template.h"
> +
> +#define DATA_SIZE 4
> +#include "atomic_template.h"
> +
> +#define DATA_SIZE 8
> +#include "atomic_template.h"
> +
> +/* Code access functions.  */
> +
> +#undef MMUSUFFIX
>  #define MMUSUFFIX _cmmu
>  #undef GETPC
>  #define GETPC() ((uintptr_t)0)
> diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h
> index 725144c..b729786 100644
> --- a/include/qemu/atomic.h
> +++ b/include/qemu/atomic.h
> @@ -171,20 +171,27 @@
>
>  /* All the remaining operations are fully sequentially consistent */
>
> +#define atomic_xchg__nocheck(ptr, i)    ({                  \
> +    __atomic_exchange_n(ptr, (i), __ATOMIC_SEQ_CST);        \
> +})
> +
>  #define atomic_xchg(ptr, i)    ({                           \
>      QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *));       \
> -    __atomic_exchange_n(ptr, i, __ATOMIC_SEQ_CST);          \
> +    atomic_xchg__nocheck(ptr, i);                           \
>  })
>
>  /* Returns the eventual value, failed or not */
> -#define atomic_cmpxchg(ptr, old, new)                                   \
> -    ({                                                                  \
> -    QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *));                   \
> +#define atomic_cmpxchg__nocheck(ptr, old, new)    ({                    \
>      typeof_strip_qual(*ptr) _old = (old);                               \
>      __atomic_compare_exchange_n(ptr, &_old, new, false,                 \
>                                __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);      \
>      _old;                                                               \
> -    })
> +})
> +
> +#define atomic_cmpxchg(ptr, old, new)    ({                             \
> +    QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *));                   \
> +    atomic_cmpxchg__nocheck(ptr, old, new);                             \
> +})
>
>  /* Provide shorter names for GCC atomic builtins, return old value */
>  #define atomic_fetch_inc(ptr)  __atomic_fetch_add(ptr, 1, __ATOMIC_SEQ_CST)
> @@ -389,25 +396,27 @@
>  #define atomic_xchg(ptr, i)    (smp_mb(), __sync_lock_test_and_set(ptr, i))
>  #endif
>  #endif
> +#define atomic_xchg__nocheck  atomic_xchg
>
>  /* Provide shorter names for GCC atomic builtins.  */
>  #define atomic_fetch_inc(ptr)  __sync_fetch_and_add(ptr, 1)
>  #define atomic_fetch_dec(ptr)  __sync_fetch_and_add(ptr, -1)
> -#define atomic_fetch_add       __sync_fetch_and_add
> -#define atomic_fetch_sub       __sync_fetch_and_sub
> -#define atomic_fetch_and       __sync_fetch_and_and
> -#define atomic_fetch_or        __sync_fetch_and_or
> -#define atomic_fetch_xor       __sync_fetch_and_xor
> +#define atomic_fetch_add(ptr, n)  __sync_fetch_and_add(ptr, n)
> +#define atomic_fetch_sub(ptr, n)  __sync_fetch_and_sub(ptr, n)
> +#define atomic_fetch_and(ptr, n)  __sync_fetch_and_and(ptr, n)
> +#define atomic_fetch_or(ptr, n)  __sync_fetch_and_or(ptr, n)
> +#define atomic_fetch_xor(ptr, n)  __sync_fetch_and_xor(ptr, n)
>
>  #define atomic_inc_fetch(ptr)  __sync_add_and_fetch(ptr, 1)
>  #define atomic_dec_fetch(ptr)  __sync_add_and_fetch(ptr, -1)
> -#define atomic_add_fetch       __sync_add_and_fetch
> -#define atomic_sub_fetch       __sync_sub_and_fetch
> -#define atomic_and_fetch       __sync_and_and_fetch
> -#define atomic_or_fetch        __sync_or_and_fetch
> -#define atomic_xor_fetch       __sync_xor_and_fetch
> -
> -#define atomic_cmpxchg         __sync_val_compare_and_swap
> +#define atomic_add_fetch(ptr, n)  __sync_add_and_fetch(ptr, n)
> +#define atomic_sub_fetch(ptr, n)  __sync_sub_and_fetch(ptr, n)
> +#define atomic_and_fetch(ptr, n)  __sync_and_and_fetch(ptr, n)
> +#define atomic_or_fetch(ptr, n)  __sync_or_and_fetch(ptr, n)
> +#define atomic_xor_fetch(ptr, n)  __sync_xor_and_fetch(ptr, n)
> +
> +#define atomic_cmpxchg(ptr, old, new) __sync_val_compare_and_swap(ptr, old, new)
> +#define atomic_cmpxchg__nocheck(ptr, old, new)  atomic_cmpxchg(ptr, old, new)
>
>  /* And even shorter names that return void.  */
>  #define atomic_inc(ptr)        ((void) __sync_fetch_and_add(ptr, 1))
> diff --git a/tcg-runtime.c b/tcg-runtime.c
> index ea2ad64..aa55d12 100644
> --- a/tcg-runtime.c
> +++ b/tcg-runtime.c
> @@ -23,17 +23,10 @@
>   */
>  #include "qemu/osdep.h"
>  #include "qemu/host-utils.h"
> -
> -/* This file is compiled once, and thus we can't include the standard
> -   "exec/helper-proto.h", which has includes that are target specific.  */
> -
> -#include "exec/helper-head.h"
> -
> -#define DEF_HELPER_FLAGS_2(name, flags, ret, t1, t2) \
> -  dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2));
> -
> -#include "tcg-runtime.h"
> -
> +#include "cpu.h"
> +#include "exec/helper-proto.h"
> +#include "exec/cpu_ldst.h"
> +#include "exec/exec-all.h"
>
>  /* 32-bit helpers */
>
> @@ -107,3 +100,37 @@ int64_t HELPER(mulsh_i64)(int64_t arg1, int64_t arg2)
>      muls64(&l, &h, arg1, arg2);
>      return h;
>  }
> +
> +#ifndef CONFIG_SOFTMMU
> +/* The softmmu versions of these helpers are in cputlb.c.  */
> +
> +/* Do not allow unaligned operations to proceed.  Return the host address.  */
> +static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
> +                               int size, uintptr_t retaddr)
> +{
> +    /* Enforce qemu required alignment.  */
> +    if (unlikely(addr & (size - 1))) {
> +        cpu_loop_exit_atomic(ENV_GET_CPU(env), retaddr);
> +    }
> +    return g2h(addr);
> +}
> +
> +/* Macro to call the above, with local variables from the use context.  */
> +#define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, DATA_SIZE, GETPC())
> +
> +#define ATOMIC_NAME(X)   HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
> +#define EXTRA_ARGS
> +
> +#define DATA_SIZE 1
> +#include "atomic_template.h"
> +
> +#define DATA_SIZE 2
> +#include "atomic_template.h"
> +
> +#define DATA_SIZE 4
> +#include "atomic_template.h"
> +
> +#define DATA_SIZE 8
> +#include "atomic_template.h"
> +
> +#endif /* !CONFIG_SOFTMMU */
> diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
> index 291d50b..65e3663 100644
> --- a/tcg/tcg-op.c
> +++ b/tcg/tcg-op.c
> @@ -1975,3 +1975,331 @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
>                                 addr, trace_mem_get_info(memop, 1));
>      gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
>  }
> +
> +static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, TCGMemOp opc)
> +{
> +    switch (opc & MO_SSIZE) {
> +    case MO_SB:
> +        tcg_gen_ext8s_i32(ret, val);
> +        break;
> +    case MO_UB:
> +        tcg_gen_ext8u_i32(ret, val);
> +        break;
> +    case MO_SW:
> +        tcg_gen_ext16s_i32(ret, val);
> +        break;
> +    case MO_UW:
> +        tcg_gen_ext16u_i32(ret, val);
> +        break;
> +    default:
> +        tcg_gen_mov_i32(ret, val);
> +        break;
> +    }
> +}
> +
> +static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, TCGMemOp opc)
> +{
> +    switch (opc & MO_SSIZE) {
> +    case MO_SB:
> +        tcg_gen_ext8s_i64(ret, val);
> +        break;
> +    case MO_UB:
> +        tcg_gen_ext8u_i64(ret, val);
> +        break;
> +    case MO_SW:
> +        tcg_gen_ext16s_i64(ret, val);
> +        break;
> +    case MO_UW:
> +        tcg_gen_ext16u_i64(ret, val);
> +        break;
> +    case MO_SL:
> +        tcg_gen_ext32s_i64(ret, val);
> +        break;
> +    case MO_UL:
> +        tcg_gen_ext32u_i64(ret, val);
> +        break;
> +    default:
> +        tcg_gen_mov_i64(ret, val);
> +        break;
> +    }
> +}
> +
> +#ifdef CONFIG_SOFTMMU
> +typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv,
> +                                  TCGv_i32, TCGv_i32, TCGv_i32);
> +typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv,
> +                                  TCGv_i64, TCGv_i64, TCGv_i32);
> +typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv,
> +                                  TCGv_i32, TCGv_i32);
> +typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
> +                                  TCGv_i64, TCGv_i32);
> +#else
> +typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv, TCGv_i32, TCGv_i32);
> +typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv, TCGv_i64, TCGv_i64);
> +typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv, TCGv_i32);
> +typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv, TCGv_i64);
> +#endif
> +
> +static void * const table_cmpxchg[16] = {
> +    [MO_8] = gen_helper_atomic_cmpxchgb,
> +    [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
> +    [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
> +    [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
> +    [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
> +    [MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le,
> +    [MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be,
> +};
> +
> +void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
> +                                TCGv_i32 newv, TCGArg idx, TCGMemOp memop)
> +{
> +    memop = tcg_canonicalize_memop(memop, 0, 0);
> +
> +    if (!parallel_cpus) {
> +        TCGv_i32 t1 = tcg_temp_new_i32();
> +        TCGv_i32 t2 = tcg_temp_new_i32();
> +
> +        tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
> +
> +        tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
> +        tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
> +        tcg_gen_qemu_st_i32(t2, addr, idx, memop);
> +        tcg_temp_free_i32(t2);
> +
> +        if (memop & MO_SIGN) {
> +            tcg_gen_ext_i32(retv, t1, memop);
> +        } else {
> +            tcg_gen_mov_i32(retv, t1);
> +        }
> +        tcg_temp_free_i32(t1);
> +    } else {
> +        gen_atomic_cx_i32 gen;
> +
> +        gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
> +        tcg_debug_assert(gen != NULL);
> +
> +#ifdef CONFIG_SOFTMMU
> +        {
> +            TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop & ~MO_SIGN, idx));
> +            gen(retv, tcg_ctx.tcg_env, addr, cmpv, newv, oi);
> +            tcg_temp_free_i32(oi);
> +        }
> +#else
> +        gen(retv, tcg_ctx.tcg_env, addr, cmpv, newv);
> +#endif
> +
> +        if (memop & MO_SIGN) {
> +            tcg_gen_ext_i32(retv, retv, memop);
> +        }
> +    }
> +}
> +
> +void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
> +                                TCGv_i64 newv, TCGArg idx, TCGMemOp memop)
> +{
> +    memop = tcg_canonicalize_memop(memop, 1, 0);
> +
> +    if (!parallel_cpus) {
> +        TCGv_i64 t1 = tcg_temp_new_i64();
> +        TCGv_i64 t2 = tcg_temp_new_i64();
> +
> +        tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
> +
> +        tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
> +        tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
> +        tcg_gen_qemu_st_i64(t2, addr, idx, memop);
> +        tcg_temp_free_i64(t2);
> +
> +        if (memop & MO_SIGN) {
> +            tcg_gen_ext_i64(retv, t1, memop);
> +        } else {
> +            tcg_gen_mov_i64(retv, t1);
> +        }
> +        tcg_temp_free_i64(t1);
> +    } else if ((memop & MO_SIZE) == MO_64) {
> +        gen_atomic_cx_i64 gen;
> +
> +        gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
> +        tcg_debug_assert(gen != NULL);
> +
> +#ifdef CONFIG_SOFTMMU
> +        {
> +            TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop, idx));
> +            gen(retv, tcg_ctx.tcg_env, addr, cmpv, newv, oi);
> +            tcg_temp_free_i32(oi);
> +        }
> +#else
> +        gen(retv, tcg_ctx.tcg_env, addr, cmpv, newv);
> +#endif
> +    } else {
> +        TCGv_i32 c32 = tcg_temp_new_i32();
> +        TCGv_i32 n32 = tcg_temp_new_i32();
> +        TCGv_i32 r32 = tcg_temp_new_i32();
> +
> +        tcg_gen_extrl_i64_i32(c32, cmpv);
> +        tcg_gen_extrl_i64_i32(n32, newv);
> +        tcg_gen_atomic_cmpxchg_i32(r32, addr, c32, n32, idx, memop & ~MO_SIGN);
> +        tcg_temp_free_i32(c32);
> +        tcg_temp_free_i32(n32);
> +
> +        tcg_gen_extu_i32_i64(retv, r32);
> +        tcg_temp_free_i32(r32);
> +
> +        if (memop & MO_SIGN) {
> +            tcg_gen_ext_i64(retv, retv, memop);
> +        }
> +    }
> +}
> +
> +static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
> +                                TCGArg idx, TCGMemOp memop, bool new_val,
> +                                void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
> +{
> +    TCGv_i32 t1 = tcg_temp_new_i32();
> +    TCGv_i32 t2 = tcg_temp_new_i32();
> +
> +    memop = tcg_canonicalize_memop(memop, 0, 0);
> +
> +    tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
> +    gen(t2, t1, val);
> +    tcg_gen_qemu_st_i32(t2, addr, idx, memop);
> +
> +    tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
> +    tcg_temp_free_i32(t1);
> +    tcg_temp_free_i32(t2);
> +}
> +
> +static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
> +                             TCGArg idx, TCGMemOp memop, void * const table[])
> +{
> +    gen_atomic_op_i32 gen;
> +
> +    memop = tcg_canonicalize_memop(memop, 0, 0);
> +
> +    gen = table[memop & (MO_SIZE | MO_BSWAP)];
> +    tcg_debug_assert(gen != NULL);
> +
> +#ifdef CONFIG_SOFTMMU
> +    {
> +        TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop & ~MO_SIGN, idx));
> +        gen(ret, tcg_ctx.tcg_env, addr, val, oi);
> +        tcg_temp_free_i32(oi);
> +    }
> +#else
> +    gen(ret, tcg_ctx.tcg_env, addr, val);
> +#endif
> +
> +    if (memop & MO_SIGN) {
> +        tcg_gen_ext_i32(ret, ret, memop);
> +    }
> +}
> +
> +static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
> +                                TCGArg idx, TCGMemOp memop, bool new_val,
> +                                void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
> +{
> +    TCGv_i64 t1 = tcg_temp_new_i64();
> +    TCGv_i64 t2 = tcg_temp_new_i64();
> +
> +    memop = tcg_canonicalize_memop(memop, 1, 0);
> +
> +    tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
> +    gen(t2, t1, val);
> +    tcg_gen_qemu_st_i64(t2, addr, idx, memop);
> +
> +    tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
> +    tcg_temp_free_i64(t1);
> +    tcg_temp_free_i64(t2);
> +}
> +
> +static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
> +                             TCGArg idx, TCGMemOp memop, void * const table[])
> +{
> +    memop = tcg_canonicalize_memop(memop, 1, 0);
> +
> +    if ((memop & MO_SIZE) == MO_64) {
> +        gen_atomic_op_i64 gen;
> +
> +        gen = table[memop & (MO_SIZE | MO_BSWAP)];
> +        tcg_debug_assert(gen != NULL);
> +
> +#ifdef CONFIG_SOFTMMU
> +        {
> +            TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop & ~MO_SIGN, idx));
> +            gen(ret, tcg_ctx.tcg_env, addr, val, oi);
> +            tcg_temp_free_i32(oi);
> +        }
> +#else
> +        gen(ret, tcg_ctx.tcg_env, addr, val);
> +#endif
> +    } else {
> +        TCGv_i32 v32 = tcg_temp_new_i32();
> +        TCGv_i32 r32 = tcg_temp_new_i32();
> +
> +        tcg_gen_extrl_i64_i32(v32, val);
> +        do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table);
> +        tcg_temp_free_i32(v32);
> +
> +        tcg_gen_extu_i32_i64(ret, r32);
> +        tcg_temp_free_i32(r32);
> +
> +        if (memop & MO_SIGN) {
> +            tcg_gen_ext_i64(ret, ret, memop);
> +        }
> +    }
> +}
> +
> +#define GEN_ATOMIC_HELPER(NAME, OP, NEW)                                \
> +static void * const table_##NAME[16] = {                                \
> +    [MO_8] = gen_helper_atomic_##NAME##b,                               \
> +    [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le,                   \
> +    [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be,                   \
> +    [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le,                   \
> +    [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be,                   \
> +    [MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le,                   \
> +    [MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be,                   \
> +};                                                                      \
> +void tcg_gen_atomic_##NAME##_i32                                        \
> +    (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, TCGMemOp memop) \
> +{                                                                       \
> +    if (parallel_cpus) {                                                \
> +        do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME);     \
> +    } else {                                                            \
> +        do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW,            \
> +                            tcg_gen_##OP##_i32);                        \
> +    }                                                                   \
> +}                                                                       \
> +void tcg_gen_atomic_##NAME##_i64                                        \
> +    (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, TCGMemOp memop) \
> +{                                                                       \
> +    if (parallel_cpus) {                                                \
> +        do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME);     \
> +    } else {                                                            \
> +        do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW,            \
> +                            tcg_gen_##OP##_i64);                        \
> +    }                                                                   \
> +}
> +
> +GEN_ATOMIC_HELPER(fetch_add, add, 0)
> +GEN_ATOMIC_HELPER(fetch_and, and, 0)
> +GEN_ATOMIC_HELPER(fetch_or, or, 0)
> +GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
> +
> +GEN_ATOMIC_HELPER(add_fetch, add, 1)
> +GEN_ATOMIC_HELPER(and_fetch, and, 1)
> +GEN_ATOMIC_HELPER(or_fetch, or, 1)
> +GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
> +
> +static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
> +{
> +    tcg_gen_mov_i32(r, b);
> +}
> +
> +static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b)
> +{
> +    tcg_gen_mov_i64(r, b);
> +}
> +
> +GEN_ATOMIC_HELPER(xchg, mov2, 0)
> +
> +#undef GEN_ATOMIC_HELPER
> diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
> index 02cb376..89b59e8 100644
> --- a/tcg/tcg-op.h
> +++ b/tcg/tcg-op.h
> @@ -854,6 +854,30 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
>      tcg_gen_qemu_st_i64(arg, addr, mem_index, MO_TEQ);
>  }
>
> +void tcg_gen_atomic_cmpxchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGv_i32,
> +                                TCGArg, TCGMemOp);
> +void tcg_gen_atomic_cmpxchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGv_i64,
> +                                TCGArg, TCGMemOp);
> +
> +void tcg_gen_atomic_xchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
> +void tcg_gen_atomic_xchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
> +void tcg_gen_atomic_fetch_add_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
> +void tcg_gen_atomic_fetch_add_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
> +void tcg_gen_atomic_fetch_and_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
> +void tcg_gen_atomic_fetch_and_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
> +void tcg_gen_atomic_fetch_or_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
> +void tcg_gen_atomic_fetch_or_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
> +void tcg_gen_atomic_fetch_xor_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
> +void tcg_gen_atomic_fetch_xor_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
> +void tcg_gen_atomic_add_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
> +void tcg_gen_atomic_add_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
> +void tcg_gen_atomic_and_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
> +void tcg_gen_atomic_and_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
> +void tcg_gen_atomic_or_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
> +void tcg_gen_atomic_or_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
> +void tcg_gen_atomic_xor_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
> +void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
> +
>  #if TARGET_LONG_BITS == 64
>  #define tcg_gen_movi_tl tcg_gen_movi_i64
>  #define tcg_gen_mov_tl tcg_gen_mov_i64
> @@ -932,6 +956,16 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
>  #define tcg_gen_sub2_tl tcg_gen_sub2_i64
>  #define tcg_gen_mulu2_tl tcg_gen_mulu2_i64
>  #define tcg_gen_muls2_tl tcg_gen_muls2_i64
> +#define tcg_gen_atomic_cmpxchg_tl tcg_gen_atomic_cmpxchg_i64
> +#define tcg_gen_atomic_xchg_tl tcg_gen_atomic_xchg_i64
> +#define tcg_gen_atomic_fetch_add_tl tcg_gen_atomic_fetch_add_i64
> +#define tcg_gen_atomic_fetch_and_tl tcg_gen_atomic_fetch_and_i64
> +#define tcg_gen_atomic_fetch_or_tl tcg_gen_atomic_fetch_or_i64
> +#define tcg_gen_atomic_fetch_xor_tl tcg_gen_atomic_fetch_xor_i64
> +#define tcg_gen_atomic_add_fetch_tl tcg_gen_atomic_add_fetch_i64
> +#define tcg_gen_atomic_and_fetch_tl tcg_gen_atomic_and_fetch_i64
> +#define tcg_gen_atomic_or_fetch_tl tcg_gen_atomic_or_fetch_i64
> +#define tcg_gen_atomic_xor_fetch_tl tcg_gen_atomic_xor_fetch_i64
>  #else
>  #define tcg_gen_movi_tl tcg_gen_movi_i32
>  #define tcg_gen_mov_tl tcg_gen_mov_i32
> @@ -1009,6 +1043,16 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
>  #define tcg_gen_sub2_tl tcg_gen_sub2_i32
>  #define tcg_gen_mulu2_tl tcg_gen_mulu2_i32
>  #define tcg_gen_muls2_tl tcg_gen_muls2_i32
> +#define tcg_gen_atomic_cmpxchg_tl tcg_gen_atomic_cmpxchg_i32
> +#define tcg_gen_atomic_xchg_tl tcg_gen_atomic_xchg_i32
> +#define tcg_gen_atomic_fetch_add_tl tcg_gen_atomic_fetch_add_i32
> +#define tcg_gen_atomic_fetch_and_tl tcg_gen_atomic_fetch_and_i32
> +#define tcg_gen_atomic_fetch_or_tl tcg_gen_atomic_fetch_or_i32
> +#define tcg_gen_atomic_fetch_xor_tl tcg_gen_atomic_fetch_xor_i32
> +#define tcg_gen_atomic_add_fetch_tl tcg_gen_atomic_add_fetch_i32
> +#define tcg_gen_atomic_and_fetch_tl tcg_gen_atomic_and_fetch_i32
> +#define tcg_gen_atomic_or_fetch_tl tcg_gen_atomic_or_fetch_i32
> +#define tcg_gen_atomic_xor_fetch_tl tcg_gen_atomic_xor_fetch_i32
>  #endif
>
>  #if UINTPTR_MAX == UINT32_MAX
> diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h
> index 23a0c37..22367aa 100644
> --- a/tcg/tcg-runtime.h
> +++ b/tcg/tcg-runtime.h
> @@ -14,3 +14,78 @@ DEF_HELPER_FLAGS_2(sar_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64)
>
>  DEF_HELPER_FLAGS_2(mulsh_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64)
>  DEF_HELPER_FLAGS_2(muluh_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64)
> +
> +#ifdef CONFIG_SOFTMMU
> +
> +DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
> +                   i32, env, tl, i32, i32, i32)
> +DEF_HELPER_FLAGS_5(atomic_cmpxchgw_be, TCG_CALL_NO_WG,
> +                   i32, env, tl, i32, i32, i32)
> +DEF_HELPER_FLAGS_5(atomic_cmpxchgl_be, TCG_CALL_NO_WG,
> +                   i32, env, tl, i32, i32, i32)
> +DEF_HELPER_FLAGS_5(atomic_cmpxchgq_be, TCG_CALL_NO_WG,
> +                   i64, env, tl, i64, i64, i32)
> +DEF_HELPER_FLAGS_5(atomic_cmpxchgw_le, TCG_CALL_NO_WG,
> +                   i32, env, tl, i32, i32, i32)
> +DEF_HELPER_FLAGS_5(atomic_cmpxchgl_le, TCG_CALL_NO_WG,
> +                   i32, env, tl, i32, i32, i32)
> +DEF_HELPER_FLAGS_5(atomic_cmpxchgq_le, TCG_CALL_NO_WG,
> +                   i64, env, tl, i64, i64, i32)
> +
> +#define GEN_ATOMIC_HELPERS(NAME)                                  \
> +    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), b),              \
> +                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
> +    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_le),           \
> +                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
> +    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_be),           \
> +                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
> +    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_le),           \
> +                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
> +    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_be),           \
> +                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
> +    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), q_le),           \
> +                       TCG_CALL_NO_WG, i64, env, tl, i64, i32)    \
> +    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), q_be),           \
> +                       TCG_CALL_NO_WG, i64, env, tl, i64, i32)
> +
> +#else
> +
> +DEF_HELPER_FLAGS_4(atomic_cmpxchgb, TCG_CALL_NO_WG, i32, env, tl, i32, i32)
> +DEF_HELPER_FLAGS_4(atomic_cmpxchgw_be, TCG_CALL_NO_WG, i32, env, tl, i32, i32)
> +DEF_HELPER_FLAGS_4(atomic_cmpxchgl_be, TCG_CALL_NO_WG, i32, env, tl, i32, i32)
> +DEF_HELPER_FLAGS_4(atomic_cmpxchgq_be, TCG_CALL_NO_WG, i64, env, tl, i64, i64)
> +DEF_HELPER_FLAGS_4(atomic_cmpxchgw_le, TCG_CALL_NO_WG, i32, env, tl, i32, i32)
> +DEF_HELPER_FLAGS_4(atomic_cmpxchgl_le, TCG_CALL_NO_WG, i32, env, tl, i32, i32)
> +DEF_HELPER_FLAGS_4(atomic_cmpxchgq_le, TCG_CALL_NO_WG, i64, env, tl, i64, i64)
> +
> +#define GEN_ATOMIC_HELPERS(NAME)                             \
> +    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), b),         \
> +                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
> +    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), w_le),      \
> +                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
> +    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), w_be),      \
> +                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
> +    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), l_le),      \
> +                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
> +    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), l_be),      \
> +                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
> +    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), q_le),      \
> +                       TCG_CALL_NO_WG, i64, env, tl, i64)    \
> +    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), q_be),      \
> +                       TCG_CALL_NO_WG, i64, env, tl, i64)
> +
> +#endif /* CONFIG_SOFTMMU */
> +
> +GEN_ATOMIC_HELPERS(fetch_add)
> +GEN_ATOMIC_HELPERS(fetch_and)
> +GEN_ATOMIC_HELPERS(fetch_or)
> +GEN_ATOMIC_HELPERS(fetch_xor)
> +
> +GEN_ATOMIC_HELPERS(add_fetch)
> +GEN_ATOMIC_HELPERS(and_fetch)
> +GEN_ATOMIC_HELPERS(or_fetch)
> +GEN_ATOMIC_HELPERS(xor_fetch)
> +
> +GEN_ATOMIC_HELPERS(xchg)
> +
> +#undef GEN_ATOMIC_HELPERS
> diff --git a/tcg/tcg.h b/tcg/tcg.h
> index 3b21156..5931965 100644
> --- a/tcg/tcg.h
> +++ b/tcg/tcg.h
> @@ -1177,6 +1177,59 @@ uint64_t helper_be_ldq_cmmu(CPUArchState *env, target_ulong addr,
>  # define helper_ret_ldq_cmmu  helper_le_ldq_cmmu
>  #endif
>
> +uint32_t helper_atomic_cmpxchgb_mmu(CPUArchState *env, target_ulong addr,
> +                                    uint32_t cmpv, uint32_t newv,
> +                                    TCGMemOpIdx oi, uintptr_t retaddr);
> +uint32_t helper_atomic_cmpxchgw_le_mmu(CPUArchState *env, target_ulong addr,
> +                                       uint32_t cmpv, uint32_t newv,
> +                                       TCGMemOpIdx oi, uintptr_t retaddr);
> +uint32_t helper_atomic_cmpxchgl_le_mmu(CPUArchState *env, target_ulong addr,
> +                                       uint32_t cmpv, uint32_t newv,
> +                                       TCGMemOpIdx oi, uintptr_t retaddr);
> +uint64_t helper_atomic_cmpxchgq_le_mmu(CPUArchState *env, target_ulong addr,
> +                                       uint64_t cmpv, uint64_t newv,
> +                                       TCGMemOpIdx oi, uintptr_t retaddr);
> +uint32_t helper_atomic_cmpxchgw_be_mmu(CPUArchState *env, target_ulong addr,
> +                                       uint32_t cmpv, uint32_t newv,
> +                                       TCGMemOpIdx oi, uintptr_t retaddr);
> +uint32_t helper_atomic_cmpxchgl_be_mmu(CPUArchState *env, target_ulong addr,
> +                                       uint32_t cmpv, uint32_t newv,
> +                                       TCGMemOpIdx oi, uintptr_t retaddr);
> +uint64_t helper_atomic_cmpxchgq_be_mmu(CPUArchState *env, target_ulong addr,
> +                                       uint64_t cmpv, uint64_t newv,
> +                                       TCGMemOpIdx oi, uintptr_t retaddr);
> +
> +#define GEN_ATOMIC_HELPER(NAME, TYPE, SUFFIX)         \
> +TYPE helper_atomic_ ## NAME ## SUFFIX ## _mmu         \
> +    (CPUArchState *env, target_ulong addr, TYPE val,  \
> +     TCGMemOpIdx oi, uintptr_t retaddr);
> +
> +#define GEN_ATOMIC_HELPER_ALL(NAME)          \
> +    GEN_ATOMIC_HELPER(NAME, uint32_t, b)      \
> +    GEN_ATOMIC_HELPER(NAME, uint32_t, w_le)  \
> +    GEN_ATOMIC_HELPER(NAME, uint32_t, l_le)  \
> +    GEN_ATOMIC_HELPER(NAME, uint64_t, q_le)  \
> +    GEN_ATOMIC_HELPER(NAME, uint32_t, w_be)  \
> +    GEN_ATOMIC_HELPER(NAME, uint32_t, l_be)  \
> +    GEN_ATOMIC_HELPER(NAME, uint64_t, q_be)
> +
> +GEN_ATOMIC_HELPER_ALL(fetch_add)
> +GEN_ATOMIC_HELPER_ALL(fetch_sub)
> +GEN_ATOMIC_HELPER_ALL(fetch_and)
> +GEN_ATOMIC_HELPER_ALL(fetch_or)
> +GEN_ATOMIC_HELPER_ALL(fetch_xor)
> +
> +GEN_ATOMIC_HELPER_ALL(add_fetch)
> +GEN_ATOMIC_HELPER_ALL(sub_fetch)
> +GEN_ATOMIC_HELPER_ALL(and_fetch)
> +GEN_ATOMIC_HELPER_ALL(or_fetch)
> +GEN_ATOMIC_HELPER_ALL(xor_fetch)
> +
> +GEN_ATOMIC_HELPER_ALL(xchg)
> +
> +#undef GEN_ATOMIC_HELPER_ALL
> +#undef GEN_ATOMIC_HELPER
> +
>  #endif /* CONFIG_SOFTMMU */
>
>  #endif /* TCG_H */


--
Alex Bennée

  reply	other threads:[~2016-10-12 16:16 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-10-11 19:40 [Qemu-devel] [PATCH v6 00/35] cmpxchg-based emulation of atomics Richard Henderson
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 01/35] atomics: add atomic_xor Richard Henderson
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 02/35] atomics: add atomic_op_fetch variants Richard Henderson
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 03/35] exec: Avoid direct references to Int128 parts Richard Henderson
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 04/35] int128: Use __int128 if available Richard Henderson
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 05/35] int128: Add int128_make128 Richard Henderson
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 07/35] linux-user: enable parallel code generation on clone Richard Henderson
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 08/35] cputlb: Replace SHIFT with DATA_SIZE Richard Henderson
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 09/35] cputlb: Move probe_write out of softmmu_template.h Richard Henderson
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 10/35] cputlb: Remove includes from softmmu_template.h Richard Henderson
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 11/35] cputlb: Move most of iotlb code out of line Richard Henderson
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 12/35] cputlb: Tidy some macros Richard Henderson
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 13/35] tcg: Add atomic helpers Richard Henderson
2016-10-12 16:16   ` Alex Bennée [this message]
2016-10-16 22:17   ` Emilio G. Cota
2016-10-17  1:09     ` Richard Henderson
2016-10-17  1:40     ` Richard Henderson
2016-10-17  3:23       ` Emilio G. Cota
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 14/35] tcg: Add atomic128 helpers Richard Henderson
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 15/35] tcg: Add CONFIG_ATOMIC64 Richard Henderson
2016-10-12 16:16   ` Alex Bennée
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 16/35] tcg: Emit barriers with parallel_cpus Richard Henderson
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 17/35] target-i386: emulate LOCK'ed cmpxchg using cmpxchg helpers Richard Henderson
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 18/35] target-i386: emulate LOCK'ed OP instructions using atomic helpers Richard Henderson
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 19/35] target-i386: emulate LOCK'ed INC using atomic helper Richard Henderson
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 20/35] target-i386: emulate LOCK'ed NOT " Richard Henderson
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 21/35] target-i386: emulate LOCK'ed NEG using cmpxchg helper Richard Henderson
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 22/35] target-i386: emulate LOCK'ed XADD using atomic helper Richard Henderson
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 23/35] target-i386: emulate LOCK'ed BTX ops using atomic helpers Richard Henderson
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 24/35] target-i386: emulate XCHG using atomic helper Richard Henderson
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 25/35] target-i386: remove helper_lock() Richard Henderson
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 26/35] tests: add atomic_add-bench Richard Henderson
2016-10-14 21:19   ` Emilio G. Cota
2016-10-17  1:49     ` Richard Henderson
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 27/35] target-arm: Rearrange aa32 load and store functions Richard Henderson
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 28/35] target-arm: emulate LL/SC using cmpxchg helpers Richard Henderson
2016-10-13 11:43   ` Alex Bennée
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 29/35] target-arm: emulate SWP with atomic_xchg helper Richard Henderson
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 30/35] target-arm: emulate aarch64's LL/SC using cmpxchg helpers Richard Henderson
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 31/35] linux-user: remove handling of ARM's EXCP_STREX Richard Henderson
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 32/35] linux-user: remove handling of aarch64's EXCP_STREX Richard Henderson
2016-10-11 19:40 ` [Qemu-devel] [PATCH v6 33/35] target-arm: remove EXCP_STREX + cpu_exclusive_{test, info} Richard Henderson
2016-10-11 19:41 ` [Qemu-devel] [PATCH v6 34/35] target-alpha: Introduce MMU_PHYS_IDX Richard Henderson
2016-10-11 19:41 ` [Qemu-devel] [PATCH v6 35/35] target-alpha: Emulate LL/SC using cmpxchg helpers Richard Henderson
2016-10-16 22:38 ` [Qemu-devel] [PATCH v6 00/35] cmpxchg-based emulation of atomics Emilio G. Cota
2016-10-17  8:17   ` Alex Bennée
2016-10-17 14:37     ` Richard Henderson
2016-10-17 15:33       ` Alex Bennée
2016-10-17 17:56     ` Emilio G. Cota
2016-10-18  8:28       ` Alex Bennée
2016-10-18 18:01         ` Emilio G. Cota

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87r37lo84t.fsf@linaro.org \
    --to=alex.bennee@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).