From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: qemu-arm@nongnu.org, Peter Maydell <peter.maydell@linaro.org>
Subject: [PATCH v2 11/29] target/arm: Convert FMAXNMV, FMINNMV, FMAXV, FMINV to decodetree
Date: Mon, 9 Sep 2024 09:22:21 -0700 [thread overview]
Message-ID: <20240909162240.647173-12-richard.henderson@linaro.org> (raw)
In-Reply-To: <20240909162240.647173-1-richard.henderson@linaro.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/translate-a64.c | 176 ++++++++++-----------------------
target/arm/tcg/a64.decode | 14 +++
2 files changed, 67 insertions(+), 123 deletions(-)
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 593a1774d8..aec2f6a542 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -6835,6 +6835,59 @@ TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64)
TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64)
TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64)
+/*
+ * do_fp_reduction helper
+ *
+ * This mirrors the Reduce() pseudocode in the ARM ARM. It is
+ * important for correct NaN propagation that we do these
+ * operations in exactly the order specified by the pseudocode.
+ *
+ * This is a recursive function.
+ */
+static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz,
+ int ebase, int ecount, TCGv_ptr fpst,
+ NeonGenTwoSingleOpFn *fn)
+{
+ if (ecount == 1) {
+ TCGv_i32 tcg_elem = tcg_temp_new_i32();
+ read_vec_element_i32(s, tcg_elem, rn, ebase, esz);
+ return tcg_elem;
+ } else {
+ int half = ecount >> 1;
+ TCGv_i32 tcg_hi, tcg_lo, tcg_res;
+
+ tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn);
+ tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn);
+ tcg_res = tcg_temp_new_i32();
+
+ fn(tcg_res, tcg_lo, tcg_hi, fpst);
+ return tcg_res;
+ }
+}
+
+static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a,
+ NeonGenTwoSingleOpFn *fn)
+{
+ if (fp_access_check(s)) {
+ MemOp esz = a->esz;
+ int elts = (a->q ? 16 : 8) >> esz;
+ TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
+ TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, fn);
+ write_fp_sreg(s, a->rd, res);
+ }
+ return true;
+}
+
+TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_maxnumh)
+TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_minnumh)
+TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_maxh)
+TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_minh)
+
+TRANS(FMAXNMV_s, do_fp_reduction, a, gen_helper_vfp_maxnums)
+TRANS(FMINNMV_s, do_fp_reduction, a, gen_helper_vfp_minnums)
+TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs)
+TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins)
+
/* Shift a TCGv src by TCGv shift_amount, put result in dst.
* Note that it is the caller's responsibility to ensure that the
* shift amount is in range (ie 0..31 or 0..63) and provide the ARM
@@ -9061,128 +9114,6 @@ static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
}
}
-/*
- * do_reduction_op helper
- *
- * This mirrors the Reduce() pseudocode in the ARM ARM. It is
- * important for correct NaN propagation that we do these
- * operations in exactly the order specified by the pseudocode.
- *
- * This is a recursive function.
- */
-static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
- MemOp esz, int ebase, int ecount, TCGv_ptr fpst)
-{
- if (ecount == 1) {
- TCGv_i32 tcg_elem = tcg_temp_new_i32();
- read_vec_element_i32(s, tcg_elem, rn, ebase, esz);
- return tcg_elem;
- } else {
- int half = ecount >> 1;
- TCGv_i32 tcg_hi, tcg_lo, tcg_res;
-
- tcg_hi = do_reduction_op(s, fpopcode, rn, esz,
- ebase + half, half, fpst);
- tcg_lo = do_reduction_op(s, fpopcode, rn, esz,
- ebase, half, fpst);
- tcg_res = tcg_temp_new_i32();
-
- switch (fpopcode) {
- case 0x0c: /* fmaxnmv half-precision */
- gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
- break;
- case 0x0f: /* fmaxv half-precision */
- gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
- break;
- case 0x1c: /* fminnmv half-precision */
- gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
- break;
- case 0x1f: /* fminv half-precision */
- gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
- break;
- case 0x2c: /* fmaxnmv */
- gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
- break;
- case 0x2f: /* fmaxv */
- gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
- break;
- case 0x3c: /* fminnmv */
- gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
- break;
- case 0x3f: /* fminv */
- gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
- break;
- default:
- g_assert_not_reached();
- }
- return tcg_res;
- }
-}
-
-/* AdvSIMD across lanes
- * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
- * +---+---+---+-----------+------+-----------+--------+-----+------+------+
- * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd |
- * +---+---+---+-----------+------+-----------+--------+-----+------+------+
- */
-static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
-{
- int rd = extract32(insn, 0, 5);
- int rn = extract32(insn, 5, 5);
- int size = extract32(insn, 22, 2);
- int opcode = extract32(insn, 12, 5);
- bool is_q = extract32(insn, 30, 1);
- bool is_u = extract32(insn, 29, 1);
- bool is_min = false;
- int elements;
-
- switch (opcode) {
- case 0xc: /* FMAXNMV, FMINNMV */
- case 0xf: /* FMAXV, FMINV */
- /* Bit 1 of size field encodes min vs max and the actual size
- * depends on the encoding of the U bit. If not set (and FP16
- * enabled) then we do half-precision float instead of single
- * precision.
- */
- is_min = extract32(size, 1, 1);
- if (!is_u && dc_isar_feature(aa64_fp16, s)) {
- size = 1;
- } else if (!is_u || !is_q || extract32(size, 0, 1)) {
- unallocated_encoding(s);
- return;
- } else {
- size = 2;
- }
- break;
- default:
- case 0x3: /* SADDLV, UADDLV */
- case 0xa: /* SMAXV, UMAXV */
- case 0x1a: /* SMINV, UMINV */
- case 0x1b: /* ADDV */
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- elements = (is_q ? 16 : 8) >> size;
-
- {
- /* Floating point vector reduction ops which work across 32
- * bit (single) or 16 bit (half-precision) intermediates.
- * Note that correct NaN propagation requires that we do these
- * operations in exactly the order specified by the pseudocode.
- */
- TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
- int fpopcode = opcode | is_min << 4 | is_u << 5;
- TCGv_i32 tcg_res = do_reduction_op(s, fpopcode, rn, size,
- 0, elements, fpst);
- write_fp_sreg(s, rd, tcg_res);
- }
-}
-
/* AdvSIMD modified immediate
* 31 30 29 28 19 18 16 15 12 11 10 9 5 4 0
* +---+---+----+---------------------+-----+-------+----+---+-------+------+
@@ -11735,7 +11666,6 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
static const AArch64DecodeTable data_proc_simd[] = {
/* pattern , mask , fn */
{ 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
- { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
/* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
{ 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
{ 0x0f000400, 0x9f800400, disas_simd_shift_imm },
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index 5ab4b11781..c77f9fc987 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -54,11 +54,13 @@
@rrx_d ........ .. . rm:5 .... idx:1 . rn:5 rd:5 &rrx_e esz=3
@rr_q1e0 ........ ........ ...... rn:5 rd:5 &qrr_e q=1 esz=0
+@rr_q1e2 ........ ........ ...... rn:5 rd:5 &qrr_e q=1 esz=2
@r2r_q1e0 ........ ........ ...... rm:5 rd:5 &qrrr_e rn=%rd q=1 esz=0
@rrr_q1e0 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=0
@rrr_q1e3 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=3
@rrrr_q1e3 ........ ... rm:5 . ra:5 rn:5 rd:5 &qrrrr_e q=1 esz=3
+@qrr_h . q:1 ...... .. ...... ...... rn:5 rd:5 &qrr_e esz=1
@qrr_e . q:1 ...... esz:2 ...... ...... rn:5 rd:5 &qrr_e
@qrrr_b . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=0
@@ -1166,3 +1168,15 @@ SMAXV 0.00 1110 .. 11000 01010 10 ..... ..... @qrr_e
UMAXV 0.10 1110 .. 11000 01010 10 ..... ..... @qrr_e
SMINV 0.00 1110 .. 11000 11010 10 ..... ..... @qrr_e
UMINV 0.10 1110 .. 11000 11010 10 ..... ..... @qrr_e
+
+FMAXNMV_h 0.00 1110 00 11000 01100 10 ..... ..... @qrr_h
+FMAXNMV_s 0110 1110 00 11000 01100 10 ..... ..... @rr_q1e2
+
+FMINNMV_h 0.00 1110 10 11000 01100 10 ..... ..... @qrr_h
+FMINNMV_s 0110 1110 10 11000 01100 10 ..... ..... @rr_q1e2
+
+FMAXV_h 0.00 1110 00 11000 01111 10 ..... ..... @qrr_h
+FMAXV_s 0110 1110 00 11000 01111 10 ..... ..... @rr_q1e2
+
+FMINV_h 0.00 1110 10 11000 01111 10 ..... ..... @qrr_h
+FMINV_s 0110 1110 10 11000 01111 10 ..... ..... @rr_q1e2
--
2.43.0
next prev parent reply other threads:[~2024-09-09 16:25 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-09-09 16:22 [PATCH v2 00/29] target/arm: AdvSIMD decodetree conversion, part 4 Richard Henderson
2024-09-09 16:22 ` [PATCH v2 01/29] target/arm: Replace tcg_gen_dupi_vec with constants in gengvec.c Richard Henderson
2024-09-09 16:22 ` [PATCH v2 02/29] target/arm: Replace tcg_gen_dupi_vec with constants in translate-sve.c Richard Henderson
2024-09-09 16:22 ` [PATCH v2 03/29] target/arm: Use cmpsel in gen_ushl_vec Richard Henderson
2024-09-09 16:22 ` [PATCH v2 04/29] target/arm: Use cmpsel in gen_sshl_vec Richard Henderson
2024-09-09 16:22 ` [PATCH v2 05/29] target/arm: Use tcg_gen_extract2_i64 for EXT Richard Henderson
2024-09-09 22:22 ` Philippe Mathieu-Daudé
2024-09-09 16:22 ` [PATCH v2 06/29] target/arm: Convert EXT to decodetree Richard Henderson
2024-09-09 22:22 ` Philippe Mathieu-Daudé
2024-09-09 16:22 ` [PATCH v2 07/29] target/arm: Convert TBL, TBX " Richard Henderson
2024-09-09 22:23 ` Philippe Mathieu-Daudé
2024-09-09 16:22 ` [PATCH v2 08/29] target/arm: Convert UZP, TRN, ZIP " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 09/29] target/arm: Simplify do_reduction_op Richard Henderson
2024-09-09 16:22 ` [PATCH v2 10/29] target/arm: Convert ADDV, *ADDLV, *MAXV, *MINV to decodetree Richard Henderson
2024-09-09 16:22 ` Richard Henderson [this message]
2024-09-09 16:22 ` [PATCH v2 12/29] target/arm: Convert FMOVI (scalar, immediate) " Richard Henderson
2024-09-09 22:24 ` Philippe Mathieu-Daudé
2024-09-10 12:27 ` Peter Maydell
2024-09-10 14:35 ` Philippe Mathieu-Daudé
2024-09-09 16:22 ` [PATCH v2 13/29] target/arm: Convert MOVI, FMOV, ORR, BIC (vector " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 14/29] target/arm: Introduce gen_gvec_sshr, gen_gvec_ushr Richard Henderson
2024-09-09 16:22 ` [PATCH v2 15/29] target/arm: Fix whitespace near gen_srshr64_i64 Richard Henderson
2024-09-10 14:37 ` Philippe Mathieu-Daudé
2024-09-09 16:22 ` [PATCH v2 16/29] target/arm: Convert handle_vec_simd_shri to decodetree Richard Henderson
2024-09-09 16:22 ` [PATCH v2 17/29] target/arm: Convert handle_vec_simd_shli " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 18/29] target/arm: Use {, s}extract in handle_vec_simd_wshli Richard Henderson
2024-09-09 16:22 ` [PATCH v2 19/29] target/arm: Convert SSHLL, USHLL to decodetree Richard Henderson
2024-09-09 16:22 ` [PATCH v2 20/29] target/arm: Push tcg_rnd into handle_shri_with_rndacc Richard Henderson
2024-09-10 14:37 ` Philippe Mathieu-Daudé
2024-09-09 16:22 ` [PATCH v2 21/29] target/arm: Split out subroutines of handle_shri_with_rndacc Richard Henderson
2024-09-09 16:22 ` [PATCH v2 22/29] target/arm: Convert SHRN, RSHRN to decodetree Richard Henderson
2024-09-09 16:22 ` [PATCH v2 23/29] target/arm: Convert handle_scalar_simd_shri " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 24/29] target/arm: Convert handle_scalar_simd_shli " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 25/29] target/arm: Convert VQSHL, VQSHLU to gvec Richard Henderson
2024-09-09 16:22 ` [PATCH v2 26/29] target/arm: Widen NeonGenNarrowEnvFn return to 64 bits Richard Henderson
2024-09-10 14:19 ` Peter Maydell
2024-09-09 16:22 ` [PATCH v2 27/29] target/arm: Convert SQSHL, UQSHL, SQSHLU (immediate) to decodetree Richard Henderson
2024-09-09 16:22 ` [PATCH v2 28/29] target/arm: Convert vector [US]QSHRN, [US]QRSHRN, SQSHRUN " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 29/29] target/arm: Convert scalar " Richard Henderson
2024-09-10 15:30 ` [PATCH v2 00/29] target/arm: AdvSIMD decodetree conversion, part 4 Peter Maydell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240909162240.647173-12-richard.henderson@linaro.org \
--to=richard.henderson@linaro.org \
--cc=peter.maydell@linaro.org \
--cc=qemu-arm@nongnu.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).