From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-arm@nongnu.org, qemu-devel@nongnu.org
Subject: [PATCH 18/20] target/arm: Convert load/store (multiple structures) to decodetree
Date: Fri, 2 Jun 2023 16:52:21 +0100 [thread overview]
Message-ID: <20230602155223.2040685-19-peter.maydell@linaro.org> (raw)
In-Reply-To: <20230602155223.2040685-1-peter.maydell@linaro.org>
Convert the instructions in the ASIMD load/store multiple structures
instruction classes to decodetree.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
target/arm/tcg/a64.decode | 20 +++
target/arm/tcg/translate-a64.c | 220 ++++++++++++++++-----------------
2 files changed, 129 insertions(+), 111 deletions(-)
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index f75c227629b..12d331b4c2a 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -474,3 +474,23 @@ LDAPR_i 01 011001 10 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext
LDAPR_i 10 011001 10 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=0 sz=2
LDAPR_i 00 011001 11 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=1 sz=0
LDAPR_i 01 011001 11 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=1 sz=1
+
+# Load/store multiple structures
+# The 4-bit opcode in [15:12] encodes repeat count and structure elements
+&ldst_mult rm rn rt sz q p rpt selem
+@ldst_mult . q:1 ...... p:1 . . rm:5 .... sz:2 rn:5 rt:5 &ldst_mult
+ST_mult 0 . 001100 . 0 0 ..... 0000 .. ..... ..... @ldst_mult rpt=1 selem=4
+ST_mult 0 . 001100 . 0 0 ..... 0010 .. ..... ..... @ldst_mult rpt=4 selem=1
+ST_mult 0 . 001100 . 0 0 ..... 0100 .. ..... ..... @ldst_mult rpt=1 selem=3
+ST_mult 0 . 001100 . 0 0 ..... 0110 .. ..... ..... @ldst_mult rpt=3 selem=1
+ST_mult 0 . 001100 . 0 0 ..... 0111 .. ..... ..... @ldst_mult rpt=1 selem=1
+ST_mult 0 . 001100 . 0 0 ..... 1000 .. ..... ..... @ldst_mult rpt=1 selem=2
+ST_mult 0 . 001100 . 0 0 ..... 1010 .. ..... ..... @ldst_mult rpt=2 selem=1
+
+LD_mult 0 . 001100 . 1 0 ..... 0000 .. ..... ..... @ldst_mult rpt=1 selem=4
+LD_mult 0 . 001100 . 1 0 ..... 0010 .. ..... ..... @ldst_mult rpt=4 selem=1
+LD_mult 0 . 001100 . 1 0 ..... 0100 .. ..... ..... @ldst_mult rpt=1 selem=3
+LD_mult 0 . 001100 . 1 0 ..... 0110 .. ..... ..... @ldst_mult rpt=3 selem=1
+LD_mult 0 . 001100 . 1 0 ..... 0111 .. ..... ..... @ldst_mult rpt=1 selem=1
+LD_mult 0 . 001100 . 1 0 ..... 1000 .. ..... ..... @ldst_mult rpt=1 selem=2
+LD_mult 0 . 001100 . 1 0 ..... 1010 .. ..... ..... @ldst_mult rpt=2 selem=1
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 1cfd1cd7037..c3b22a74dd5 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -3237,99 +3237,28 @@ static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a)
return true;
}
-/* AdvSIMD load/store multiple structures
- *
- * 31 30 29 23 22 21 16 15 12 11 10 9 5 4 0
- * +---+---+---------------+---+-------------+--------+------+------+------+
- * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size | Rn | Rt |
- * +---+---+---------------+---+-------------+--------+------+------+------+
- *
- * AdvSIMD load/store multiple structures (post-indexed)
- *
- * 31 30 29 23 22 21 20 16 15 12 11 10 9 5 4 0
- * +---+---+---------------+---+---+---------+--------+------+------+------+
- * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 | Rm | opcode | size | Rn | Rt |
- * +---+---+---------------+---+---+---------+--------+------+------+------+
- *
- * Rt: first (or only) SIMD&FP register to be transferred
- * Rn: base address or SP
- * Rm (post-index only): post-index register (when !31) or size dependent #imm
- */
-static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
+static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a)
{
- int rt = extract32(insn, 0, 5);
- int rn = extract32(insn, 5, 5);
- int rm = extract32(insn, 16, 5);
- int size = extract32(insn, 10, 2);
- int opcode = extract32(insn, 12, 4);
- bool is_store = !extract32(insn, 22, 1);
- bool is_postidx = extract32(insn, 23, 1);
- bool is_q = extract32(insn, 30, 1);
TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
MemOp endian, align, mop;
int total; /* total bytes */
int elements; /* elements per vector */
- int rpt; /* num iterations */
- int selem; /* structure elements */
int r;
+ int size = a->sz;
- if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
- unallocated_encoding(s);
- return;
+ if (!a->p && a->rm != 0) {
+ /* For non-postindexed accesses the Rm field must be 0 */
+ return false;
}
-
- if (!is_postidx && rm != 0) {
- unallocated_encoding(s);
- return;
+ if (size == 3 && !a->q && a->selem != 1) {
+ return false;
}
-
- /* From the shared decode logic */
- switch (opcode) {
- case 0x0:
- rpt = 1;
- selem = 4;
- break;
- case 0x2:
- rpt = 4;
- selem = 1;
- break;
- case 0x4:
- rpt = 1;
- selem = 3;
- break;
- case 0x6:
- rpt = 3;
- selem = 1;
- break;
- case 0x7:
- rpt = 1;
- selem = 1;
- break;
- case 0x8:
- rpt = 1;
- selem = 2;
- break;
- case 0xa:
- rpt = 2;
- selem = 1;
- break;
- default:
- unallocated_encoding(s);
- return;
- }
-
- if (size == 3 && !is_q && selem != 1) {
- /* reserved */
- unallocated_encoding(s);
- return;
- }
-
if (!fp_access_check(s)) {
- return;
+ return true;
}
- if (rn == 31) {
+ if (a->rn == 31) {
gen_check_sp_alignment(s);
}
@@ -3339,22 +3268,21 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
endian = MO_LE;
}
- total = rpt * selem * (is_q ? 16 : 8);
- tcg_rn = cpu_reg_sp(s, rn);
+ total = a->rpt * a->selem * (a->q ? 16 : 8);
+ tcg_rn = cpu_reg_sp(s, a->rn);
/*
* Issue the MTE check vs the logical repeat count, before we
* promote consecutive little-endian elements below.
*/
- clean_addr = gen_mte_checkN(s, tcg_rn, is_store, is_postidx || rn != 31,
- total);
+ clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total);
/*
* Consecutive little-endian elements from a single register
* can be promoted to a larger little-endian operation.
*/
align = MO_ALIGN;
- if (selem == 1 && endian == MO_LE) {
+ if (a->selem == 1 && endian == MO_LE) {
align = pow2_align(size);
size = 3;
}
@@ -3363,45 +3291,118 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
}
mop = endian | size | align;
- elements = (is_q ? 16 : 8) >> size;
+ elements = (a->q ? 16 : 8) >> size;
tcg_ebytes = tcg_constant_i64(1 << size);
- for (r = 0; r < rpt; r++) {
+ for (r = 0; r < a->rpt; r++) {
int e;
for (e = 0; e < elements; e++) {
int xs;
- for (xs = 0; xs < selem; xs++) {
- int tt = (rt + r + xs) % 32;
- if (is_store) {
- do_vec_st(s, tt, e, clean_addr, mop);
- } else {
- do_vec_ld(s, tt, e, clean_addr, mop);
- }
+ for (xs = 0; xs < a->selem; xs++) {
+ int tt = (a->rt + r + xs) % 32;
+ do_vec_ld(s, tt, e, clean_addr, mop);
tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
}
}
}
- if (!is_store) {
- /* For non-quad operations, setting a slice of the low
- * 64 bits of the register clears the high 64 bits (in
- * the ARM ARM pseudocode this is implicit in the fact
- * that 'rval' is a 64 bit wide variable).
- * For quad operations, we might still need to zero the
- * high bits of SVE.
- */
- for (r = 0; r < rpt * selem; r++) {
- int tt = (rt + r) % 32;
- clear_vec_high(s, is_q, tt);
+ /*
+ * For non-quad operations, setting a slice of the low 64 bits of
+ * the register clears the high 64 bits (in the ARM ARM pseudocode
+ * this is implicit in the fact that 'rval' is a 64 bit wide
+ * variable). For quad operations, we might still need to zero
+ * the high bits of SVE.
+ */
+ for (r = 0; r < a->rpt * a->selem; r++) {
+ int tt = (a->rt + r) % 32;
+ clear_vec_high(s, a->q, tt);
+ }
+
+ if (a->p) {
+ if (a->rm == 31) {
+ tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
+ } else {
+ tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
+ }
+ }
+ return true;
+}
+
+static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a)
+{
+ TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
+ MemOp endian, align, mop;
+
+ int total; /* total bytes */
+ int elements; /* elements per vector */
+ int r;
+ int size = a->sz;
+
+ if (!a->p && a->rm != 0) {
+ /* For non-postindexed accesses the Rm field must be 0 */
+ return false;
+ }
+ if (size == 3 && !a->q && a->selem != 1) {
+ return false;
+ }
+ if (!fp_access_check(s)) {
+ return true;
+ }
+
+ if (a->rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+
+ /* For our purposes, bytes are always little-endian. */
+ endian = s->be_data;
+ if (size == 0) {
+ endian = MO_LE;
+ }
+
+ total = a->rpt * a->selem * (a->q ? 16 : 8);
+ tcg_rn = cpu_reg_sp(s, a->rn);
+
+ /*
+ * Issue the MTE check vs the logical repeat count, before we
+ * promote consecutive little-endian elements below.
+ */
+ clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total);
+
+ /*
+ * Consecutive little-endian elements from a single register
+ * can be promoted to a larger little-endian operation.
+ */
+ align = MO_ALIGN;
+ if (a->selem == 1 && endian == MO_LE) {
+ align = pow2_align(size);
+ size = 3;
+ }
+ if (!s->align_mem) {
+ align = 0;
+ }
+ mop = endian | size | align;
+
+ elements = (a->q ? 16 : 8) >> size;
+ tcg_ebytes = tcg_constant_i64(1 << size);
+ for (r = 0; r < a->rpt; r++) {
+ int e;
+ for (e = 0; e < elements; e++) {
+ int xs;
+ for (xs = 0; xs < a->selem; xs++) {
+ int tt = (a->rt + r + xs) % 32;
+ do_vec_st(s, tt, e, clean_addr, mop);
+ tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
+ }
}
}
- if (is_postidx) {
- if (rm == 31) {
+ if (a->p) {
+ if (a->rm == 31) {
tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
} else {
- tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
+ tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
}
}
+ return true;
}
/* AdvSIMD load/store single structure
@@ -3746,9 +3747,6 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn)
static void disas_ldst(DisasContext *s, uint32_t insn)
{
switch (extract32(insn, 24, 6)) {
- case 0x0c: /* AdvSIMD load/store multiple structures */
- disas_ldst_multiple_struct(s, insn);
- break;
case 0x0d: /* AdvSIMD load/store single structure */
disas_ldst_single_struct(s, insn);
break;
--
2.34.1
next prev parent reply other threads:[~2023-06-02 15:56 UTC|newest]
Thread overview: 44+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-06-02 15:52 [PATCH 00/20] target/arm: Convert exception, system, loads and stores to decodetree Peter Maydell
2023-06-02 15:52 ` [PATCH 01/20] target/arm: Fix return value from LDSMIN/LDSMAX 8/16 bit atomics Peter Maydell
2023-06-03 5:35 ` Richard Henderson
2023-06-02 15:52 ` [PATCH 02/20] target/arm: Convert hint instruction space to decodetree Peter Maydell
2023-06-03 5:42 ` Richard Henderson
2023-06-02 15:52 ` [PATCH 03/20] target/arm: Convert barrier insns " Peter Maydell
2023-06-03 5:48 ` Richard Henderson
2023-06-02 15:52 ` [PATCH 04/20] target/arm: Convert CFINV, XAFLAG and AXFLAG " Peter Maydell
2023-06-03 5:55 ` Richard Henderson
2023-06-02 15:52 ` [PATCH 05/20] target/arm: Convert MSR (immediate) " Peter Maydell
2023-06-03 6:01 ` Richard Henderson
2023-06-02 15:52 ` [PATCH 06/20] target/arm: Convert MSR (reg), MRS, SYS, SYSL " Peter Maydell
2023-06-03 6:05 ` Richard Henderson
2023-06-02 15:52 ` [PATCH 07/20] target/arm: Convert exception generation instructions " Peter Maydell
2023-06-03 6:09 ` Richard Henderson
2023-06-02 15:52 ` [PATCH 08/20] target/arm: Convert load/store exclusive and ordered " Peter Maydell
2023-06-03 22:32 ` Richard Henderson
2023-06-02 15:52 ` [PATCH 09/20] target/arm: Convert LDXP, STXP, CASP, CAS " Peter Maydell
2023-06-03 22:44 ` Richard Henderson
2023-06-02 15:52 ` [PATCH 10/20] target/arm: Convert load reg (literal) group " Peter Maydell
2023-06-03 22:49 ` Richard Henderson
2023-06-02 15:52 ` [PATCH 11/20] target/arm: Convert load/store-pair " Peter Maydell
2023-06-03 23:05 ` Richard Henderson
2023-06-02 15:52 ` [PATCH 12/20] target/arm: Convert ld/st reg+imm9 insns " Peter Maydell
2023-06-03 23:14 ` Richard Henderson
2023-06-02 15:52 ` [PATCH 13/20] target/arm: Convert LDR/STR with 12-bit immediate " Peter Maydell
2023-06-02 20:51 ` Philippe Mathieu-Daudé
2023-06-03 16:18 ` Peter Maydell
2023-06-03 23:19 ` Richard Henderson
2023-06-02 15:52 ` [PATCH 14/20] target/arm: Convert LDR/STR reg+reg " Peter Maydell
2023-06-03 23:27 ` Richard Henderson
2023-06-02 15:52 ` [PATCH 15/20] target/arm: Convert atomic memory ops " Peter Maydell
2023-06-03 23:35 ` Richard Henderson
2023-06-02 15:52 ` [PATCH 16/20] target/arm: Convert load (pointer auth) insns " Peter Maydell
2023-06-02 20:56 ` Philippe Mathieu-Daudé
2023-06-03 23:41 ` Richard Henderson
2023-06-02 15:52 ` [PATCH 17/20] target/arm: Convert LDAPR/STLR (imm) " Peter Maydell
2023-06-03 23:55 ` Richard Henderson
2023-06-02 15:52 ` Peter Maydell [this message]
2023-06-04 0:00 ` [PATCH 18/20] target/arm: Convert load/store (multiple structures) " Richard Henderson
2023-06-02 15:52 ` [PATCH 19/20] target/arm: Convert load/store single structure " Peter Maydell
2023-06-04 1:27 ` Richard Henderson
2023-06-02 15:52 ` [PATCH 20/20] target/arm: Convert load/store tags insns " Peter Maydell
2023-06-04 1:36 ` Richard Henderson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230602155223.2040685-19-peter.maydell@linaro.org \
--to=peter.maydell@linaro.org \
--cc=qemu-arm@nongnu.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).