From: "Alex Bennée" <alex.bennee@linaro.org>
To: Richard Henderson <richard.henderson@linaro.org>
Cc: qemu-devel@nongnu.org, philmd@linaro.org
Subject: Re: [PATCH v6 15/36] tcg: Add guest load/store primitives for TCGv_i128
Date: Wed, 01 Feb 2023 09:52:13 +0000 [thread overview]
Message-ID: <874js5u2pu.fsf@linaro.org> (raw)
In-Reply-To: <20230130214844.1158612-16-richard.henderson@linaro.org>
Richard Henderson <richard.henderson@linaro.org> writes:
> These are not yet considering atomicity of the 16-byte value;
> this is a direct replacement for the current target code which
> uses a pair of 8-byte operations.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> include/exec/cpu_ldst.h | 10 +++
> include/tcg/tcg-op.h | 2 +
> accel/tcg/cputlb.c | 112 +++++++++++++++++++++++++++++++++
> accel/tcg/user-exec.c | 66 ++++++++++++++++++++
> tcg/tcg-op.c | 134 ++++++++++++++++++++++++++++++++++++++++
> 5 files changed, 324 insertions(+)
>
> diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
> index d0c7c0d5fe..09b55cc0ee 100644
> --- a/include/exec/cpu_ldst.h
> +++ b/include/exec/cpu_ldst.h
> @@ -220,6 +220,11 @@ uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr ptr,
> uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr ptr,
> MemOpIdx oi, uintptr_t ra);
>
> +Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
> + MemOpIdx oi, uintptr_t ra);
> +Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
> + MemOpIdx oi, uintptr_t ra);
> +
> void cpu_stb_mmu(CPUArchState *env, abi_ptr ptr, uint8_t val,
> MemOpIdx oi, uintptr_t ra);
> void cpu_stw_be_mmu(CPUArchState *env, abi_ptr ptr, uint16_t val,
> @@ -235,6 +240,11 @@ void cpu_stl_le_mmu(CPUArchState *env, abi_ptr ptr, uint32_t val,
> void cpu_stq_le_mmu(CPUArchState *env, abi_ptr ptr, uint64_t val,
> MemOpIdx oi, uintptr_t ra);
>
> +void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
> + MemOpIdx oi, uintptr_t ra);
> +void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
> + MemOpIdx oi, uintptr_t ra);
> +
> uint32_t cpu_atomic_cmpxchgb_mmu(CPUArchState *env, target_ulong addr,
> uint32_t cmpv, uint32_t newv,
> MemOpIdx oi, uintptr_t retaddr);
> diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
> index c4276767d1..e5f5b63c37 100644
> --- a/include/tcg/tcg-op.h
> +++ b/include/tcg/tcg-op.h
> @@ -845,6 +845,8 @@ void tcg_gen_qemu_ld_i32(TCGv_i32, TCGv, TCGArg, MemOp);
> void tcg_gen_qemu_st_i32(TCGv_i32, TCGv, TCGArg, MemOp);
> void tcg_gen_qemu_ld_i64(TCGv_i64, TCGv, TCGArg, MemOp);
> void tcg_gen_qemu_st_i64(TCGv_i64, TCGv, TCGArg, MemOp);
> +void tcg_gen_qemu_ld_i128(TCGv_i128, TCGv, TCGArg, MemOp);
> +void tcg_gen_qemu_st_i128(TCGv_i128, TCGv, TCGArg, MemOp);
>
> static inline void tcg_gen_qemu_ld8u(TCGv ret, TCGv addr, int mem_index)
> {
> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
> index 4e040a1cb9..e3604ad313 100644
> --- a/accel/tcg/cputlb.c
> +++ b/accel/tcg/cputlb.c
> @@ -2187,6 +2187,64 @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
> return cpu_load_helper(env, addr, oi, ra, helper_le_ldq_mmu);
> }
>
> +Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
> + MemOpIdx oi, uintptr_t ra)
> +{
> + MemOp mop = get_memop(oi);
> + int mmu_idx = get_mmuidx(oi);
> + MemOpIdx new_oi;
> + unsigned a_bits;
> + uint64_t h, l;
> +
> + tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_BE|MO_128));
> + a_bits = get_alignment_bits(mop);
> +
> + /* Handle CPU specific unaligned behaviour */
> + if (addr & ((1 << a_bits) - 1)) {
> + cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_LOAD,
> + mmu_idx, ra);
> + }
> +
> + /* Construct an unaligned 64-bit replacement MemOpIdx. */
> + mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
> + new_oi = make_memop_idx(mop, mmu_idx);
> +
> + h = helper_be_ldq_mmu(env, addr, new_oi, ra);
> + l = helper_be_ldq_mmu(env, addr + 8, new_oi, ra);
> +
> + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
> + return int128_make128(l, h);
> +}
> +
> +Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
> + MemOpIdx oi, uintptr_t ra)
> +{
> + MemOp mop = get_memop(oi);
> + int mmu_idx = get_mmuidx(oi);
> + MemOpIdx new_oi;
> + unsigned a_bits;
> + uint64_t h, l;
> +
> + tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_LE|MO_128));
Why not use validate_memop for this like elsewhere in cputlb?
<snip>
>
> +void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
> + MemOpIdx oi, uintptr_t ra)
> +{
> + MemOp mop = get_memop(oi);
> + int mmu_idx = get_mmuidx(oi);
> + MemOpIdx new_oi;
> + unsigned a_bits;
> +
> + tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_BE|MO_128));
ditto for the others
> diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
> index cb83d2375d..33ef325f6e 100644
> --- a/tcg/tcg-op.c
> +++ b/tcg/tcg-op.c
> @@ -3109,6 +3109,140 @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
> }
> }
>
I'm confused because the TCG ops in this patch are still using i64 and
the atomic use hasn't come in yet. Worth splitting the patch?
Anyway:
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
> +static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
> +{
> + MemOp mop_1 = orig, mop_2;
> +
> + tcg_debug_assert((orig & MO_SIZE) == MO_128);
> + tcg_debug_assert((orig & MO_SIGN) == 0);
> +
> + /* Use a memory ordering implemented by the host. */
> + if (!TCG_TARGET_HAS_MEMORY_BSWAP && (orig & MO_BSWAP)) {
> + mop_1 &= ~MO_BSWAP;
> + }
> +
> + /* Reduce the size to 64-bit. */
> + mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
> +
> + /* Retain the alignment constraints of the original. */
> + switch (orig & MO_AMASK) {
> + case MO_UNALN:
> + case MO_ALIGN_2:
> + case MO_ALIGN_4:
> + mop_2 = mop_1;
> + break;
> + case MO_ALIGN_8:
> + /* Prefer MO_ALIGN+MO_64 to MO_ALIGN_8+MO_64. */
> + mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
> + mop_2 = mop_1;
> + break;
> + case MO_ALIGN:
> + /* Second has 8-byte alignment; first has 16-byte alignment. */
> + mop_2 = mop_1;
> + mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN_16;
> + break;
> + case MO_ALIGN_16:
> + case MO_ALIGN_32:
> + case MO_ALIGN_64:
> + /* Second has 8-byte alignment; first retains original. */
> + mop_2 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
> + break;
> + default:
> + g_assert_not_reached();
> + }
> + ret[0] = mop_1;
> + ret[1] = mop_2;
> +}
> +
> +void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
> +{
> + MemOp mop[2];
> + TCGv addr_p8;
> + TCGv_i64 x, y;
> +
> + canonicalize_memop_i128_as_i64(mop, memop);
> +
> + tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
> + addr = plugin_prep_mem_callbacks(addr);
> +
> + /* TODO: respect atomicity of the operation. */
> + /* TODO: allow the tcg backend to see the whole operation. */
> +
> + /*
> + * Since there are no global TCGv_i128, there is no visible state
> + * changed if the second load faults. Load directly into the two
> + * subwords.
> + */
> + if ((memop & MO_BSWAP) == MO_LE) {
> + x = TCGV128_LOW(val);
> + y = TCGV128_HIGH(val);
> + } else {
> + x = TCGV128_HIGH(val);
> + y = TCGV128_LOW(val);
> + }
> +
> + gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
> +
> + if ((mop[0] ^ memop) & MO_BSWAP) {
> + tcg_gen_bswap64_i64(x, x);
> + }
> +
> + addr_p8 = tcg_temp_new();
> + tcg_gen_addi_tl(addr_p8, addr, 8);
> + gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
> + tcg_temp_free(addr_p8);
> +
> + if ((mop[0] ^ memop) & MO_BSWAP) {
> + tcg_gen_bswap64_i64(y, y);
> + }
> +
> + plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
> + QEMU_PLUGIN_MEM_R);
> +}
> +
> +void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
> +{
> + MemOp mop[2];
> + TCGv addr_p8;
> + TCGv_i64 x, y;
> +
> + canonicalize_memop_i128_as_i64(mop, memop);
> +
> + tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
> + addr = plugin_prep_mem_callbacks(addr);
> +
> + /* TODO: respect atomicity of the operation. */
> + /* TODO: allow the tcg backend to see the whole operation. */
> +
> + if ((memop & MO_BSWAP) == MO_LE) {
> + x = TCGV128_LOW(val);
> + y = TCGV128_HIGH(val);
> + } else {
> + x = TCGV128_HIGH(val);
> + y = TCGV128_LOW(val);
> + }
> +
> + addr_p8 = tcg_temp_new();
> + if ((mop[0] ^ memop) & MO_BSWAP) {
> + TCGv_i64 t = tcg_temp_new_i64();
> +
> + tcg_gen_bswap64_i64(t, x);
> + gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
> + tcg_gen_bswap64_i64(t, y);
> + tcg_gen_addi_tl(addr_p8, addr, 8);
> + gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
> + tcg_temp_free_i64(t);
> + } else {
> + gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
> + tcg_gen_addi_tl(addr_p8, addr, 8);
> + gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
> + }
> + tcg_temp_free(addr_p8);
> +
> + plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
> + QEMU_PLUGIN_MEM_W);
> +}
> +
> static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
> {
> switch (opc & MO_SSIZE) {
--
Alex Bennée
Virtualisation Tech Lead @ Linaro
next prev parent reply other threads:[~2023-02-01 9:58 UTC|newest]
Thread overview: 55+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-01-30 21:48 [PATCH v6 00/36] tcg: Support for Int128 with helpers Richard Henderson
2023-01-30 21:48 ` [PATCH v6 01/36] tcg: Define TCG_TYPE_I128 and related helper macros Richard Henderson
2023-01-30 21:48 ` [PATCH v6 02/36] tcg: Handle dh_typecode_i128 with TCG_CALL_{RET, ARG}_NORMAL Richard Henderson
2023-01-30 21:48 ` [PATCH v6 03/36] tcg: Allocate objects contiguously in temp_allocate_frame Richard Henderson
2023-01-30 21:48 ` [PATCH v6 04/36] tcg: Introduce tcg_out_addi_ptr Richard Henderson
2023-01-30 21:48 ` [PATCH v6 05/36] tcg: Add TCG_CALL_{RET,ARG}_BY_REF Richard Henderson
2023-02-01 9:38 ` Alex Bennée
2023-01-30 21:48 ` [PATCH v6 06/36] tcg: Introduce tcg_target_call_oarg_reg Richard Henderson
2023-01-30 21:48 ` [PATCH v6 07/36] tcg: Add TCG_CALL_RET_BY_VEC Richard Henderson
2023-01-30 21:48 ` [PATCH v6 08/36] include/qemu/int128: Use Int128 structure for TCI Richard Henderson
2023-01-30 21:48 ` [PATCH v6 09/36] tcg/i386: Add TCG_TARGET_CALL_{RET,ARG}_I128 Richard Henderson
2023-01-30 21:48 ` [PATCH v6 10/36] tcg/tci: Fix big-endian return register ordering Richard Henderson
2023-01-30 21:48 ` [PATCH v6 11/36] tcg/tci: Add TCG_TARGET_CALL_{RET,ARG}_I128 Richard Henderson
2023-01-30 21:48 ` [PATCH v6 12/36] tcg: " Richard Henderson
2023-01-30 21:48 ` [PATCH v6 13/36] tcg: Add temp allocation for TCGv_i128 Richard Henderson
2023-02-01 9:43 ` Alex Bennée
2023-01-30 21:48 ` [PATCH v6 14/36] tcg: Add basic data movement " Richard Henderson
2023-01-30 21:48 ` [PATCH v6 15/36] tcg: Add guest load/store primitives " Richard Henderson
2023-02-01 9:52 ` Alex Bennée [this message]
2023-02-01 19:03 ` Richard Henderson
2023-02-03 14:20 ` Philippe Mathieu-Daudé
2023-01-30 21:48 ` [PATCH v6 16/36] tcg: Add tcg_gen_{non}atomic_cmpxchg_i128 Richard Henderson
2023-02-01 10:15 ` Alex Bennée
2023-01-30 21:48 ` [PATCH v6 17/36] tcg: Split out tcg_gen_nonatomic_cmpxchg_i{32,64} Richard Henderson
2023-02-01 10:16 ` [PATCH v6 17/36] tcg: Split out tcg_gen_nonatomic_cmpxchg_i{32, 64} Alex Bennée
2023-01-30 21:48 ` [PATCH v6 18/36] target/arm: Use tcg_gen_atomic_cmpxchg_i128 for STXP Richard Henderson
2023-01-30 21:48 ` [PATCH v6 19/36] target/arm: Use tcg_gen_atomic_cmpxchg_i128 for CASP Richard Henderson
2023-02-01 15:04 ` Philippe Mathieu-Daudé
2023-02-01 19:05 ` Richard Henderson
2023-01-30 21:48 ` [PATCH v6 20/36] target/ppc: Use tcg_gen_atomic_cmpxchg_i128 for STQCX Richard Henderson
2023-01-30 21:48 ` [PATCH v6 21/36] tests/tcg/s390x: Add div.c Richard Henderson
2023-01-30 21:48 ` [PATCH v6 22/36] tests/tcg/s390x: Add clst.c Richard Henderson
2023-01-30 21:48 ` [PATCH v6 23/36] tests/tcg/s390x: Add long-double.c Richard Henderson
2023-01-30 21:48 ` [PATCH v6 24/36] target/s390x: Use a single return for helper_divs32/u32 Richard Henderson
2023-01-30 21:48 ` [PATCH v6 25/36] target/s390x: Use a single return for helper_divs64/u64 Richard Henderson
2023-01-30 21:48 ` [PATCH v6 26/36] target/s390x: Use Int128 for return from CLST Richard Henderson
2023-01-30 21:48 ` [PATCH v6 27/36] target/s390x: Use Int128 for return from CKSM Richard Henderson
2023-01-30 21:48 ` [PATCH v6 28/36] target/s390x: Use Int128 for return from TRE Richard Henderson
2023-01-30 21:48 ` [PATCH v6 29/36] target/s390x: Copy wout_x1 to wout_x1_P Richard Henderson
2023-02-01 14:55 ` Philippe Mathieu-Daudé
2023-01-30 21:48 ` [PATCH v6 30/36] target/s390x: Use Int128 for returning float128 Richard Henderson
2023-01-30 21:48 ` [PATCH v6 31/36] target/s390x: Use Int128 for passing float128 Richard Henderson
2023-02-03 13:25 ` Philippe Mathieu-Daudé
2023-01-30 21:48 ` [PATCH v6 32/36] target/s390x: Use tcg_gen_atomic_cmpxchg_i128 for CDSG Richard Henderson
2023-01-31 8:59 ` David Hildenbrand
2023-02-01 13:27 ` Ilya Leoshkevich
2023-02-01 13:32 ` [PATCH] tests/tcg/s390x: Add cdsg.c Ilya Leoshkevich
2023-02-01 19:07 ` Richard Henderson
2023-01-30 21:48 ` [PATCH v6 33/36] target/s390x: Implement CC_OP_NZ in gen_op_calc_cc Richard Henderson
2023-01-30 21:48 ` [PATCH v6 34/36] target/i386: Split out gen_cmpxchg8b, gen_cmpxchg16b Richard Henderson
2023-01-30 21:48 ` [PATCH v6 35/36] target/i386: Inline cmpxchg8b Richard Henderson
2023-01-30 23:21 ` Philippe Mathieu-Daudé
2023-02-01 10:19 ` Alex Bennée
2023-01-30 21:48 ` [PATCH v6 36/36] target/i386: Inline cmpxchg16b Richard Henderson
2023-01-30 23:24 ` Philippe Mathieu-Daudé
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=874js5u2pu.fsf@linaro.org \
--to=alex.bennee@linaro.org \
--cc=philmd@linaro.org \
--cc=qemu-devel@nongnu.org \
--cc=richard.henderson@linaro.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).