qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: qemu-arm@nongnu.org
Subject: [PATCH 11/61] target/arm: Rename zarray to za_state.za
Date: Thu,  6 Feb 2025 11:56:25 -0800	[thread overview]
Message-ID: <20250206195715.2150758-12-richard.henderson@linaro.org> (raw)
In-Reply-To: <20250206195715.2150758-1-richard.henderson@linaro.org>

The whole ZA state will also contain ZT0.
Make things easier in aarch64_set_svcr to zero both
by wrapping them in a common structure.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/cpu.h               | 48 +++++++++++++++++++---------------
 linux-user/aarch64/signal.c    |  4 +--
 target/arm/cpu.c               |  4 +--
 target/arm/helper.c            |  2 +-
 target/arm/machine.c           |  2 +-
 target/arm/tcg/sme_helper.c    |  6 ++---
 target/arm/tcg/translate-sme.c |  4 +--
 7 files changed, 38 insertions(+), 32 deletions(-)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 42c39ac6bd..938c990854 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -733,27 +733,33 @@ typedef struct CPUArchState {
 
     uint64_t scxtnum_el[4];
 
-    /*
-     * SME ZA storage -- 256 x 256 byte array, with bytes in host word order,
-     * as we do with vfp.zregs[].  This corresponds to the architectural ZA
-     * array, where ZA[N] is in the least-significant bytes of env->zarray[N].
-     * When SVL is less than the architectural maximum, the accessible
-     * storage is restricted, such that if the SVL is X bytes the guest can
-     * see only the bottom X elements of zarray[], and only the least
-     * significant X bytes of each element of the array. (In other words,
-     * the observable part is always square.)
-     *
-     * The ZA storage can also be considered as a set of square tiles of
-     * elements of different sizes. The mapping from tiles to the ZA array
-     * is architecturally defined, such that for tiles of elements of esz
-     * bytes, the Nth row (or "horizontal slice") of tile T is in
-     * ZA[T + N * esz]. Note that this means that each tile is not contiguous
-     * in the ZA storage, because its rows are striped through the ZA array.
-     *
-     * Because this is so large, keep this toward the end of the reset area,
-     * to keep the offsets into the rest of the structure smaller.
-     */
-    ARMVectorReg zarray[ARM_MAX_VQ * 16];
+    struct {
+        /*
+         * SME ZA storage -- 256 x 256 byte array, with bytes in host
+         * word order, as we do with vfp.zregs[].  This corresponds to
+         * the architectural ZA array, where ZA[N] is in the least
+         * significant bytes of env->za_state.za[N].
+         *
+         * When SVL is less than the architectural maximum, the accessible
+         * storage is restricted, such that if the SVL is X bytes the guest
+         * can see only the bottom X elements of zarray[], and only the least
+         * significant X bytes of each element of the array. (In other words,
+         * the observable part is always square.)
+         *
+         * The ZA storage can also be considered as a set of square tiles of
+         * elements of different sizes. The mapping from tiles to the ZA array
+         * is architecturally defined, such that for tiles of elements of esz
+         * bytes, the Nth row (or "horizontal slice") of tile T is in
+         * ZA[T + N * esz]. Note that this means that each tile is not
+         * contiguous in the ZA storage, because its rows are striped through
+         * the ZA array.
+         *
+         * Because this is so large, keep this toward the end of the
+         * reset area, to keep the offsets into the rest of the structure
+         * smaller.
+         */
+        ARMVectorReg za[ARM_MAX_VQ * 16];
+    } za_state;
 #endif
 
     struct CPUBreakpoint *cpu_breakpoint[16];
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
index bc7a13800d..d50cab78d8 100644
--- a/linux-user/aarch64/signal.c
+++ b/linux-user/aarch64/signal.c
@@ -248,7 +248,7 @@ static void target_setup_za_record(struct target_za_context *za,
     for (i = 0; i < vl; ++i) {
         uint64_t *z = (void *)za + TARGET_ZA_SIG_ZAV_OFFSET(vq, i);
         for (j = 0; j < vq * 2; ++j) {
-            __put_user_e(env->zarray[i].d[j], z + j, le);
+            __put_user_e(env->za_state.za[i].d[j], z + j, le);
         }
     }
 }
@@ -397,7 +397,7 @@ static bool target_restore_za_record(CPUARMState *env,
     for (i = 0; i < vl; ++i) {
         uint64_t *z = (void *)za + TARGET_ZA_SIG_ZAV_OFFSET(vq, i);
         for (j = 0; j < vq * 2; ++j) {
-            __get_user_e(env->zarray[i].d[j], z + j, le);
+            __get_user_e(env->za_state.za[i].d[j], z + j, le);
         }
     }
     return true;
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index ba08c05ec6..813cb45276 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -1369,8 +1369,8 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
             qemu_fprintf(f, "ZA[%0*d]=", svl_lg10, i);
             for (j = zcr_len; j >= 0; --j) {
                 qemu_fprintf(f, "%016" PRIx64 ":%016" PRIx64 "%c",
-                             env->zarray[i].d[2 * j + 1],
-                             env->zarray[i].d[2 * j],
+                             env->za_state.za[i].d[2 * j + 1],
+                             env->za_state.za[i].d[2 * j],
                              j ? ':' : '\n');
             }
         }
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 7d95eae997..e5f06bc288 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -6438,7 +6438,7 @@ void aarch64_set_svcr(CPUARMState *env, uint64_t new, uint64_t mask)
      * when disabled either.
      */
     if (change & new & R_SVCR_ZA_MASK) {
-        memset(env->zarray, 0, sizeof(env->zarray));
+        memset(&env->za_state, 0, sizeof(env->za_state));
     }
 
     if (tcg_enabled()) {
diff --git a/target/arm/machine.c b/target/arm/machine.c
index 978249fb71..d41da414b3 100644
--- a/target/arm/machine.c
+++ b/target/arm/machine.c
@@ -315,7 +315,7 @@ static const VMStateDescription vmstate_za = {
     .minimum_version_id = 1,
     .needed = za_needed,
     .fields = (const VMStateField[]) {
-        VMSTATE_STRUCT_ARRAY(env.zarray, ARMCPU, ARM_MAX_VQ * 16, 0,
+        VMSTATE_STRUCT_ARRAY(env.za_state.za, ARMCPU, ARM_MAX_VQ * 16, 0,
                              vmstate_vreg, ARMVectorReg),
         VMSTATE_END_OF_LIST()
     }
diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
index d4562502dd..45f6cdfcb4 100644
--- a/target/arm/tcg/sme_helper.c
+++ b/target/arm/tcg/sme_helper.c
@@ -39,12 +39,12 @@ void helper_sme_zero(CPUARMState *env, uint32_t imm, uint32_t svl)
     uint32_t i;
 
     /*
-     * Special case clearing the entire ZA space.
+     * Special case clearing the entire ZArray.
      * This falls into the CONSTRAINED UNPREDICTABLE zeroing of any
      * parts of the ZA storage outside of SVL.
      */
     if (imm == 0xff) {
-        memset(env->zarray, 0, sizeof(env->zarray));
+        memset(env->za_state.za, 0, sizeof(env->za_state.za));
         return;
     }
 
@@ -54,7 +54,7 @@ void helper_sme_zero(CPUARMState *env, uint32_t imm, uint32_t svl)
      */
     for (i = 0; i < svl; i++) {
         if (imm & (1 << (i % 8))) {
-            memset(&env->zarray[i], 0, svl);
+            memset(&env->za_state.za[i], 0, svl);
         }
     }
 }
diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c
index 51175c923e..e8b3578174 100644
--- a/target/arm/tcg/translate-sme.c
+++ b/target/arm/tcg/translate-sme.c
@@ -92,7 +92,7 @@ static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs,
     offset = tile * sizeof(ARMVectorReg);
 
     /* Include the byte offset of zarray to make this relative to env. */
-    offset += offsetof(CPUARMState, zarray);
+    offset += offsetof(CPUARMState, za_state.za);
     tcg_gen_addi_i32(tmp, tmp, offset);
 
     /* Add the byte offset to env to produce the final pointer. */
@@ -112,7 +112,7 @@ static TCGv_ptr get_tile(DisasContext *s, int esz, int tile)
     TCGv_ptr addr = tcg_temp_new_ptr();
     int offset;
 
-    offset = tile * sizeof(ARMVectorReg) + offsetof(CPUARMState, zarray);
+    offset = tile * sizeof(ARMVectorReg) + offsetof(CPUARMState, za_state.za);
 
     tcg_gen_addi_ptr(addr, tcg_env, offset);
     return addr;
-- 
2.43.0



  parent reply	other threads:[~2025-02-06 19:58 UTC|newest]

Thread overview: 64+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-06 19:56 [PATCH 00/61] target/arm: Implement FEAT_SME2 Richard Henderson
2025-02-06 19:56 ` [PATCH 01/61] tcg: Add dbase argument to do_dup_store Richard Henderson
2025-02-06 19:56 ` [PATCH 02/61] tcg: Add dbase argument to do_dup Richard Henderson
2025-02-06 19:56 ` [PATCH 03/61] tcg: Add dbase argument to expand_clr Richard Henderson
2025-02-06 19:56 ` [PATCH 04/61] tcg: Add base arguments to check_overlap_[234] Richard Henderson
2025-02-06 19:56 ` [PATCH 05/61] tcg: Split out tcg_gen_gvec_2_var Richard Henderson
2025-02-06 19:56 ` [PATCH 06/61] tcg: Split out tcg_gen_gvec_3_var Richard Henderson
2025-02-06 19:56 ` [PATCH 07/61] tcg: Split out tcg_gen_gvec_mov_var Richard Henderson
2025-02-06 19:56 ` [PATCH 08/61] tcg: Split out tcg_gen_gvec_{add,sub}_var Richard Henderson
2025-02-06 19:56 ` [PATCH 09/61] target/arm: Introduce FPST_ZA, FPST_ZA_F16 Richard Henderson
2025-02-06 19:56 ` [PATCH 10/61] target/arm: Use FPST_ZA for sme_fmopa_[hsd] Richard Henderson
2025-02-06 19:56 ` Richard Henderson [this message]
2025-02-06 19:56 ` [PATCH 12/61] target/arm: Add isar_feature_aa64_sme2* Richard Henderson
2025-02-06 19:56 ` [PATCH 13/61] target/arm: Add ZT0 Richard Henderson
2025-02-06 19:56 ` [PATCH 14/61] target/arm: Add zt0_excp_el to DisasContext Richard Henderson
2025-02-06 19:56 ` [PATCH 15/61] target/arm: Implement SME2 ZERO ZT0 Richard Henderson
2025-02-06 19:56 ` [PATCH 16/61] target/arm: Implement SME2 LDR/STR ZT0 Richard Henderson
2025-02-06 19:56 ` [PATCH 17/61] target/arm: Implement SME2 MOVT Richard Henderson
2025-02-06 19:56 ` [PATCH 18/61] target/arm: Split get_tile_rowcol argument tile_index Richard Henderson
2025-02-06 19:56 ` [PATCH 19/61] target/arm: Rename MOVA for translate Richard Henderson
2025-02-06 19:56 ` [PATCH 20/61] target/arm: Implement SME2 MOVA to/from tile, multiple registers Richard Henderson
2025-02-06 19:56 ` [PATCH 21/61] target/arm: Split out get_zarray Richard Henderson
2025-02-06 19:56 ` [PATCH 22/61] target/arm: Implement SME2 MOVA to/from array, multiple registers Richard Henderson
2025-02-06 19:56 ` [PATCH 23/61] target/arm: Implement SME2 BMOPA Richard Henderson
2025-02-06 19:56 ` [PATCH 24/61] target/arm: Implement SME2 SMOPS, UMOPS (2-way) Richard Henderson
2025-02-06 19:56 ` [PATCH 25/61] target/arm: Introduce gen_gvec_sve2_sqdmulh Richard Henderson
2025-02-06 19:56 ` [PATCH 26/61] target/arm: Implement SME2 Multiple and Single SVE Destructive Richard Henderson
2025-02-06 19:56 ` [PATCH 27/61] target/arm: Implement SME2 Multiple Vectors " Richard Henderson
2025-02-06 19:56 ` [PATCH 28/61] target/arm: Implement SME2 ADD/SUB (array results, multiple and single vector) Richard Henderson
2025-02-06 19:56 ` [PATCH 29/61] target/arm: Implement SME2 ADD/SUB (array results, multiple vectors) Richard Henderson
2025-02-06 19:56 ` [PATCH 30/61] target/arm: Pass ZA to helper_sve2_fmlal_zz[zx]w_s Richard Henderson
2025-02-06 19:56 ` [PATCH 31/61] target/arm: Implement SME2 FMLAL, BFMLAL Richard Henderson
2025-02-06 19:56 ` [PATCH 32/61] target/arm: Implement SME2 FDOT Richard Henderson
2025-02-06 19:56 ` [PATCH 33/61] target/arm: Implement SME2 BFDOT Richard Henderson
2025-02-06 19:56 ` [PATCH 34/61] target/arm: Implement SME2 FVDOT, BFVDOT Richard Henderson
2025-02-06 19:56 ` [PATCH 35/61] target/arm: Rename helper_gvec_*dot_[bh] to *_4[bh] Richard Henderson
2025-02-06 19:56 ` [PATCH 36/61] target/arm: Remove helper_gvec_sudot_idx_4b Richard Henderson
2025-02-06 19:56 ` [PATCH 37/61] target/arm: Implemement SME2 SDOT, UDOT, USDOT, SUDOT Richard Henderson
2025-02-06 19:56 ` [PATCH 38/61] target/arm: Implement SME2 SVDOT, UVDOT, SUVDOT, USVDOT Richard Henderson
2025-02-06 19:56 ` [PATCH 39/61] target/arm: Implement SME2 SMLAL, SMLSL, UMLAL, UMLSL Richard Henderson
2025-02-06 19:56 ` [PATCH 40/61] target/arm: Implement SME2 SMLALL, SMLSLL, UMLALL, UMLSLL Richard Henderson
2025-02-06 19:56 ` [PATCH 41/61] target/arm: Rename gvec_fml[as]_[hs] with _nf_ infix Richard Henderson
2025-02-06 19:56 ` [PATCH 42/61] target/arm: Implement SME2 FMLA, FMLS Richard Henderson
2025-02-06 19:56 ` [PATCH 43/61] target/arm: Implement SME2 BFMLA, BFMLS Richard Henderson
2025-02-06 19:56 ` [PATCH 44/61] target/arm: Implement SME2 FADD, FSUB, BFADD, BFSUB Richard Henderson
2025-02-06 19:56 ` [PATCH 45/61] target/arm: Remove CPUARMState.vfp.scratch Richard Henderson
2025-02-06 19:57 ` [PATCH 46/61] target/arm: Implement SME2 BFCVT, BFCVTN, FCVT, FCVTN Richard Henderson
2025-02-06 19:57 ` [PATCH 47/61] target/arm: Implement SME2 FCVT (widening), FCVTL Richard Henderson
2025-02-06 19:57 ` [PATCH 48/61] target/arm: Implement SME2 FCVTZS, FCVTZU Richard Henderson
2025-02-06 19:57 ` [PATCH 49/61] target/arm: Implement SME2 SCVTF, UCVTF Richard Henderson
2025-02-06 19:57 ` [PATCH 50/61] target/arm: Implement SME2 FRINTN, FRINTP, FRINTM, FRINTA Richard Henderson
2025-02-06 19:57 ` [PATCH 51/61] target/arm: Introduce do_[us]sat_[bhs] macros Richard Henderson
2025-02-06 19:57 ` [PATCH 52/61] target/arm: Use do_[us]sat_[bhs] in sve_helper.c Richard Henderson
2025-02-06 19:57 ` [PATCH 53/61] target/arm: Implement SME2 SQCVT, UQCVT, SQCVTU Richard Henderson
2025-02-06 19:57 ` [PATCH 54/61] target/arm: Implement SME2 SUNPK, UUNPK Richard Henderson
2025-02-06 19:57 ` [PATCH 55/61] target/arm: Implement SME2 ZIP, UZP (four registers) Richard Henderson
2025-02-06 19:57 ` [PATCH 56/61] target/arm: Move do_urshr, do_srshr to vec_internal.h Richard Henderson
2025-02-06 19:57 ` [PATCH 57/61] target/arm: Implement SME2 SQRSHR, UQRSHR, SQRSHRN Richard Henderson
2025-02-06 19:57 ` [PATCH 58/61] target/arm: Implement SME2 ZIP, UZP (two registers) Richard Henderson
2025-02-06 19:57 ` [PATCH 59/61] target/arm: Implement SME2 FCLAMP, SCLAMP, UCLAMP Richard Henderson
2025-02-06 19:57 ` [PATCH 60/61] target/arm: Implement SME2 SEL Richard Henderson
2025-02-06 19:57 ` [PATCH 61/61] target/arm: Enable FEAT_SME2, FEAT_SME_F16F16, FEAT_SVE_B16B16 on -cpu max Richard Henderson
2025-02-24 20:27 ` [PATCH 00/61] target/arm: Implement FEAT_SME2 Richard Henderson
2025-02-24 20:35   ` Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250206195715.2150758-12-richard.henderson@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=qemu-arm@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).