From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: Peter Maydell <peter.maydell@linaro.org>
Subject: [PULL 11/27] tcg/s390x: Support 128-bit load/store
Date: Tue, 30 May 2023 11:59:33 -0700 [thread overview]
Message-ID: <20230530185949.410208-12-richard.henderson@linaro.org> (raw)
In-Reply-To: <20230530185949.410208-1-richard.henderson@linaro.org>
Use LPQ/STPQ when 16-byte atomicity is required.
Note that these instructions require 16-byte alignment.
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/s390x/tcg-target-con-set.h | 2 +
tcg/s390x/tcg-target.h | 2 +-
tcg/s390x/tcg-target.c.inc | 107 ++++++++++++++++++++++++++++++++-
3 files changed, 107 insertions(+), 4 deletions(-)
diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
index ecc079bb6d..cbad91b2b5 100644
--- a/tcg/s390x/tcg-target-con-set.h
+++ b/tcg/s390x/tcg-target-con-set.h
@@ -14,6 +14,7 @@ C_O0_I2(r, r)
C_O0_I2(r, ri)
C_O0_I2(r, rA)
C_O0_I2(v, r)
+C_O0_I3(o, m, r)
C_O1_I1(r, r)
C_O1_I1(v, r)
C_O1_I1(v, v)
@@ -36,6 +37,7 @@ C_O1_I2(v, v, v)
C_O1_I3(v, v, v, v)
C_O1_I4(r, r, ri, rI, r)
C_O1_I4(r, r, rA, rI, r)
+C_O2_I1(o, m, r)
C_O2_I2(o, m, 0, r)
C_O2_I2(o, m, r, r)
C_O2_I3(o, m, 0, 1, r)
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
index 170007bea5..ec96952172 100644
--- a/tcg/s390x/tcg-target.h
+++ b/tcg/s390x/tcg-target.h
@@ -140,7 +140,7 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_HAS_muluh_i64 0
#define TCG_TARGET_HAS_mulsh_i64 0
-#define TCG_TARGET_HAS_qemu_ldst_i128 0
+#define TCG_TARGET_HAS_qemu_ldst_i128 1
#define TCG_TARGET_HAS_v64 HAVE_FACILITY(VECTOR)
#define TCG_TARGET_HAS_v128 HAVE_FACILITY(VECTOR)
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index dfaa34c264..503126cd66 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -243,6 +243,7 @@ typedef enum S390Opcode {
RXY_LLGF = 0xe316,
RXY_LLGH = 0xe391,
RXY_LMG = 0xeb04,
+ RXY_LPQ = 0xe38f,
RXY_LRV = 0xe31e,
RXY_LRVG = 0xe30f,
RXY_LRVH = 0xe31f,
@@ -253,6 +254,7 @@ typedef enum S390Opcode {
RXY_STG = 0xe324,
RXY_STHY = 0xe370,
RXY_STMG = 0xeb24,
+ RXY_STPQ = 0xe38e,
RXY_STRV = 0xe33e,
RXY_STRVG = 0xe32f,
RXY_STRVH = 0xe33f,
@@ -1577,7 +1579,18 @@ typedef struct {
bool tcg_target_has_memory_bswap(MemOp memop)
{
- return true;
+ TCGAtomAlign aa;
+
+ if ((memop & MO_SIZE) <= MO_64) {
+ return true;
+ }
+
+ /*
+ * Reject 16-byte memop with 16-byte atomicity,
+ * but do allow a pair of 64-bit operations.
+ */
+ aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
+ return aa.atom <= MO_64;
}
static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
@@ -1734,13 +1747,13 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
{
TCGLabelQemuLdst *ldst = NULL;
MemOp opc = get_memop(oi);
+ MemOp s_bits = opc & MO_SIZE;
unsigned a_mask;
- h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128);
a_mask = (1 << h->aa.align) - 1;
#ifdef CONFIG_SOFTMMU
- unsigned s_bits = opc & MO_SIZE;
unsigned s_mask = (1 << s_bits) - 1;
int mem_index = get_mmuidx(oi);
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
@@ -1865,6 +1878,80 @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
}
}
+static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
+ TCGReg addr_reg, MemOpIdx oi, bool is_ld)
+{
+ TCGLabel *l1 = NULL, *l2 = NULL;
+ TCGLabelQemuLdst *ldst;
+ HostAddress h;
+ bool need_bswap;
+ bool use_pair;
+ S390Opcode insn;
+
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
+
+ use_pair = h.aa.atom < MO_128;
+ need_bswap = get_memop(oi) & MO_BSWAP;
+
+ if (!use_pair) {
+ /*
+ * Atomicity requires we use LPQ. If we've already checked for
+ * 16-byte alignment, that's all we need. If we arrive with
+ * lesser alignment, we have determined that less than 16-byte
+ * alignment can be satisfied with two 8-byte loads.
+ */
+ if (h.aa.align < MO_128) {
+ use_pair = true;
+ l1 = gen_new_label();
+ l2 = gen_new_label();
+
+ tcg_out_insn(s, RI, TMLL, addr_reg, 15);
+ tgen_branch(s, 7, l1); /* CC in {1,2,3} */
+ }
+
+ tcg_debug_assert(!need_bswap);
+ tcg_debug_assert(datalo & 1);
+ tcg_debug_assert(datahi == datalo - 1);
+ insn = is_ld ? RXY_LPQ : RXY_STPQ;
+ tcg_out_insn_RXY(s, insn, datahi, h.base, h.index, h.disp);
+
+ if (use_pair) {
+ tgen_branch(s, S390_CC_ALWAYS, l2);
+ tcg_out_label(s, l1);
+ }
+ }
+ if (use_pair) {
+ TCGReg d1, d2;
+
+ if (need_bswap) {
+ d1 = datalo, d2 = datahi;
+ insn = is_ld ? RXY_LRVG : RXY_STRVG;
+ } else {
+ d1 = datahi, d2 = datalo;
+ insn = is_ld ? RXY_LG : RXY_STG;
+ }
+
+ if (h.base == d1 || h.index == d1) {
+ tcg_out_insn(s, RXY, LAY, TCG_TMP0, h.base, h.index, h.disp);
+ h.base = TCG_TMP0;
+ h.index = TCG_REG_NONE;
+ h.disp = 0;
+ }
+ tcg_out_insn_RXY(s, insn, d1, h.base, h.index, h.disp);
+ tcg_out_insn_RXY(s, insn, d2, h.base, h.index, h.disp + 8);
+ }
+ if (l2) {
+ tcg_out_label(s, l2);
+ }
+
+ if (ldst) {
+ ldst->type = TCG_TYPE_I128;
+ ldst->datalo_reg = datalo;
+ ldst->datahi_reg = datahi;
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
+ }
+}
+
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
{
/* Reuse the zeroing that exists for goto_ptr. */
@@ -2226,6 +2313,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_qemu_st_a64_i64:
tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I64);
break;
+ case INDEX_op_qemu_ld_a32_i128:
+ case INDEX_op_qemu_ld_a64_i128:
+ tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
+ break;
+ case INDEX_op_qemu_st_a32_i128:
+ case INDEX_op_qemu_st_a64_i128:
+ tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
+ break;
case INDEX_op_ld16s_i64:
tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
@@ -3107,6 +3202,12 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_qemu_st_a32_i32:
case INDEX_op_qemu_st_a64_i32:
return C_O0_I2(r, r);
+ case INDEX_op_qemu_ld_a32_i128:
+ case INDEX_op_qemu_ld_a64_i128:
+ return C_O2_I1(o, m, r);
+ case INDEX_op_qemu_st_a32_i128:
+ case INDEX_op_qemu_st_a64_i128:
+ return C_O0_I3(o, m, r);
case INDEX_op_deposit_i32:
case INDEX_op_deposit_i64:
--
2.34.1
next prev parent reply other threads:[~2023-05-30 19:03 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-05-30 18:59 [PULL 00/27] tcg patch queue Richard Henderson
2023-05-30 18:59 ` [PULL 01/27] tcg: Fix register move type in tcg_out_ld_helper_ret Richard Henderson
2023-05-30 18:59 ` [PULL 02/27] accel/tcg: Fix check for page writeability in load_atomic16_or_exit Richard Henderson
2023-05-30 18:59 ` [PULL 03/27] meson: Split test for __int128_t type from __int128_t arithmetic Richard Henderson
2023-05-30 18:59 ` [PULL 04/27] qemu/atomic128: Add x86_64 atomic128-ldst.h Richard Henderson
2023-05-30 18:59 ` [PULL 05/27] tcg/i386: Support 128-bit load/store Richard Henderson
2023-05-30 18:59 ` [PULL 06/27] tcg/aarch64: Rename temporaries Richard Henderson
2023-05-30 18:59 ` [PULL 07/27] tcg/aarch64: Reserve TCG_REG_TMP1, TCG_REG_TMP2 Richard Henderson
2023-05-30 18:59 ` [PULL 08/27] tcg/aarch64: Simplify constraints on qemu_ld/st Richard Henderson
2023-05-30 18:59 ` [PULL 09/27] tcg/aarch64: Support 128-bit load/store Richard Henderson
2023-05-30 18:59 ` [PULL 10/27] tcg/ppc: " Richard Henderson
2023-05-30 18:59 ` Richard Henderson [this message]
2023-07-10 8:58 ` TCG broken on s390x hosts (was: [PULL 11/27] tcg/s390x: Support 128-bit load/store) Thomas Huth
2023-07-10 9:31 ` Richard Henderson
2023-07-10 11:10 ` Thomas Huth
2023-05-30 18:59 ` [PULL 12/27] accel/tcg: Extract load_atom_extract_al16_or_al8 to host header Richard Henderson
2023-05-30 18:59 ` [PULL 13/27] accel/tcg: Extract store_atom_insert_al16 " Richard Henderson
2023-05-30 18:59 ` [PULL 14/27] accel/tcg: Add x86_64 load_atom_extract_al16_or_al8 Richard Henderson
2023-05-30 18:59 ` [PULL 15/27] accel/tcg: Add aarch64 lse2 load_atom_extract_al16_or_al8 Richard Henderson
2023-05-30 18:59 ` [PULL 16/27] accel/tcg: Add aarch64 store_atom_insert_al16 Richard Henderson
2023-05-30 18:59 ` [PULL 17/27] tcg: Remove TCG_TARGET_TLB_DISPLACEMENT_BITS Richard Henderson
2023-05-30 18:59 ` [PULL 18/27] decodetree: Add --test-for-error Richard Henderson
2023-05-30 18:59 ` [PULL 19/27] decodetree: Fix recursion in prop_format and build_tree Richard Henderson
2023-05-30 18:59 ` [PULL 20/27] decodetree: Diagnose empty pattern group Richard Henderson
2023-05-30 18:59 ` [PULL 21/27] decodetree: Do not remove output_file from /dev Richard Henderson
2023-05-30 18:59 ` [PULL 22/27] tests/decode: Convert tests to meson Richard Henderson
2023-05-30 18:59 ` [PULL 23/27] docs: Document decodetree named field syntax Richard Henderson
2023-05-30 18:59 ` [PULL 24/27] scripts/decodetree: Pass lvalue-formatter function to str_extract() Richard Henderson
2023-05-30 18:59 ` [PULL 25/27] scripts/decodetree: Implement a topological sort Richard Henderson
2023-05-30 18:59 ` [PULL 26/27] scripts/decodetree: Implement named field support Richard Henderson
2023-05-30 18:59 ` [PULL 27/27] tests/decode: Add tests for various named-field cases Richard Henderson
2023-05-31 1:08 ` [PULL 00/27] tcg patch queue Richard Henderson
2023-05-31 16:12 ` Thomas Huth
[not found] ` <227e27e0-4035-8e17-2259-3098340f716e@linaro.org>
2023-05-31 22:20 ` Richard Henderson
2023-06-01 10:52 ` Mark Cave-Ayland
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230530185949.410208-12-richard.henderson@linaro.org \
--to=richard.henderson@linaro.org \
--cc=peter.maydell@linaro.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).