From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-devel@nongnu.org
Subject: [PULL 03/25] target/arm: Pass env pointer through to gvec_bfdot helper
Date: Thu, 5 Sep 2024 14:00:38 +0100 [thread overview]
Message-ID: <20240905130100.298768-4-peter.maydell@linaro.org> (raw)
In-Reply-To: <20240905130100.298768-1-peter.maydell@linaro.org>
Pass the env pointer through to the gvec_bfdot helper,
so we can use it to add support for FEAT_EBF16.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/helper.h | 4 ++--
target/arm/tcg/translate-a64.c | 27 ++++++++++++++++++++++++-
target/arm/tcg/translate-neon.c | 35 +++++++++++++++++++++++++++++++--
target/arm/tcg/translate-sve.c | 15 +++++++++++++-
target/arm/tcg/vec_helper.c | 3 ++-
5 files changed, 77 insertions(+), 7 deletions(-)
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 970d059dec5..4466e796cb0 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -1027,8 +1027,8 @@ DEF_HELPER_FLAGS_5(gvec_ummla_b, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_5(gvec_usmmla_b, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_bfdot, TCG_CALL_NO_RWG,
- void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(gvec_bfdot, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, env, i32)
DEF_HELPER_FLAGS_5(gvec_bfdot_idx, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 4684e7eb6ea..3813c75895b 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -735,6 +735,22 @@ static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
}
+/*
+ * Expand a 4-operand operation using an out-of-line helper that takes
+ * a pointer to the CPU env.
+ */
+static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn,
+ int rm, int ra, int data,
+ gen_helper_gvec_4_ptr *fn)
+{
+ tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ vec_full_reg_offset(s, ra),
+ tcg_env,
+ is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
+}
+
/*
* Expand a 4-operand + fpstatus pointer + simd data value operation using
* an out-of-line helper.
@@ -5608,10 +5624,19 @@ static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a,
return true;
}
+static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a,
+ gen_helper_gvec_4_ptr *fn)
+{
+ if (fp_access_check(s)) {
+ gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
+ }
+ return true;
+}
+
TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b)
TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b)
TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b)
-TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector, a, gen_helper_gvec_bfdot)
+TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot)
TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector, a, gen_helper_gvec_bfmmla)
TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b)
TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b)
diff --git a/target/arm/tcg/translate-neon.c b/target/arm/tcg/translate-neon.c
index 915c9e56db5..454380f01d7 100644
--- a/target/arm/tcg/translate-neon.c
+++ b/target/arm/tcg/translate-neon.c
@@ -148,6 +148,37 @@ static bool do_neon_ddda(DisasContext *s, int q, int vd, int vn, int vm,
return true;
}
+static bool do_neon_ddda_env(DisasContext *s, int q, int vd, int vn, int vm,
+ int data, gen_helper_gvec_4_ptr *fn_gvec)
+{
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) {
+ return false;
+ }
+
+ /*
+ * UNDEF accesses to odd registers for each bit of Q.
+ * Q will be 0b111 for all Q-reg instructions, otherwise
+ * when we have mixed Q- and D-reg inputs.
+ */
+ if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ int opr_sz = q ? 16 : 8;
+ tcg_gen_gvec_4_ptr(vfp_reg_offset(1, vd),
+ vfp_reg_offset(1, vn),
+ vfp_reg_offset(1, vm),
+ vfp_reg_offset(1, vd),
+ tcg_env,
+ opr_sz, opr_sz, data, fn_gvec);
+ return true;
+}
+
static bool do_neon_ddda_fpst(DisasContext *s, int q, int vd, int vn, int vm,
int data, ARMFPStatusFlavour fp_flavour,
gen_helper_gvec_4_ptr *fn_gvec_ptr)
@@ -266,8 +297,8 @@ static bool trans_VDOT_b16(DisasContext *s, arg_VDOT_b16 *a)
if (!dc_isar_feature(aa32_bf16, s)) {
return false;
}
- return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
- gen_helper_gvec_bfdot);
+ return do_neon_ddda_env(s, a->q * 7, a->vd, a->vn, a->vm, 0,
+ gen_helper_gvec_bfdot);
}
static bool trans_VFML(DisasContext *s, arg_VFML *a)
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index a72c2620960..e1dd6617e8b 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -252,6 +252,19 @@ static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
return ret;
}
+static bool gen_gvec_env_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
+ int rd, int rn, int rm, int ra,
+ int data)
+{
+ return gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, tcg_env);
+}
+
+static bool gen_gvec_env_arg_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
+ arg_rrrr_esz *a, int data)
+{
+ return gen_gvec_env_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
+}
+
/* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */
static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn,
int rd, int rn, int rm, int ra, int pg,
@@ -7113,7 +7126,7 @@ TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
gen_helper_gvec_ummla_b, a, 0)
-TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
+TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_env_arg_zzzz,
gen_helper_gvec_bfdot, a, 0)
TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
gen_helper_gvec_bfdot_idx, a)
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index 98604d170fd..01b36fdd786 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -2814,7 +2814,8 @@ float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2)
return t1;
}
-void HELPER(gvec_bfdot)(void *vd, void *vn, void *vm, void *va, uint32_t desc)
+void HELPER(gvec_bfdot)(void *vd, void *vn, void *vm, void *va,
+ CPUARMState *env, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc);
float32 *d = vd, *a = va;
--
2.34.1
next prev parent reply other threads:[~2024-09-05 13:03 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-09-05 13:00 [PULL 00/25] target-arm queue Peter Maydell
2024-09-05 13:00 ` [PULL 01/25] target/arm: Allow setting the FPCR.EBF bit for FEAT_EBF16 Peter Maydell
2024-09-05 13:00 ` [PULL 02/25] target/arm: Pass env pointer through to sme_bfmopa helper Peter Maydell
2024-09-05 13:00 ` Peter Maydell [this message]
2024-09-05 13:00 ` [PULL 04/25] target/arm: Pass env pointer through to gvec_bfdot_idx helper Peter Maydell
2024-09-05 13:00 ` [PULL 05/25] target/arm: Pass env pointer through to gvec_bfmmla helper Peter Maydell
2024-09-05 13:00 ` [PULL 06/25] target/arm: Prepare bfdotadd() callers for FEAT_EBF support Peter Maydell
2024-09-05 13:00 ` [PULL 07/25] target/arm: Implement FPCR.EBF=1 semantics for bfdotadd() Peter Maydell
2024-09-05 13:00 ` [PULL 08/25] target/arm: Enable FEAT_EBF16 in the "max" CPU Peter Maydell
2024-09-05 13:00 ` [PULL 09/25] accel/tcg: Remove dead code from rr_cpu_thread_fn() Peter Maydell
2024-09-05 13:00 ` [PULL 10/25] hw: add compat machines for 9.2 Peter Maydell
2024-09-05 13:00 ` [PULL 11/25] hw/arm/smmuv3: Update comment documenting "stage" property Peter Maydell
2024-09-05 13:00 ` [PULL 12/25] hw/arm/virt: Default to two-stage SMMU from virt-9.2 Peter Maydell
2024-09-05 13:00 ` [PULL 13/25] hw/arm/sbsa-ref: Use two-stage SMMU Peter Maydell
2024-09-05 13:00 ` [PULL 14/25] hw/misc/xlnx-versal-cfu: destroy fifo in finalize Peter Maydell
2024-09-05 13:00 ` [PULL 15/25] hw/misc/xlnx-versal-trng: Free s->prng in finalize, not unrealize Peter Maydell
2024-09-05 13:00 ` [PULL 16/25] hw/nvram/xlnx-bbram: Call register_finalize_block Peter Maydell
2024-09-05 13:00 ` [PULL 17/25] hw/nvram/xlnx-zynqmp-efuse: " Peter Maydell
2024-09-05 13:00 ` [PULL 18/25] hw/misc/xlnx-versal-trng: " Peter Maydell
2024-09-05 13:00 ` [PULL 19/25] hm/nvram/xlnx-versal-efuse-ctrl: " Peter Maydell
2024-09-05 13:00 ` [PULL 20/25] hw/arm/sbsa-ref: Don't leak string in sbsa_fdt_add_gic_node() Peter Maydell
2024-09-05 13:00 ` [PULL 21/25] target/arm: Correct names of VFP VFNMA and VFNMS insns Peter Maydell
2024-09-05 13:00 ` [PULL 22/25] hw/arm/xilinx_zynq: Enable Security Extensions Peter Maydell
2024-09-05 13:00 ` [PULL 23/25] hw/arm/boot: Report error msg if loading elf/dtb failed Peter Maydell
2024-09-05 13:00 ` [PULL 24/25] hw/arm/boot: Explain why load_elf_hdr() error is ignored Peter Maydell
2024-09-05 13:01 ` [PULL 25/25] platform-bus: fix refcount leak Peter Maydell
2024-09-06 14:24 ` [PULL 00/25] target-arm queue Peter Maydell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240905130100.298768-4-peter.maydell@linaro.org \
--to=peter.maydell@linaro.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).