From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: qemu-arm@nongnu.org, Peter Maydell <peter.maydell@linaro.org>
Subject: [PATCH v2 10/29] target/arm: Convert ADDV, *ADDLV, *MAXV, *MINV to decodetree
Date: Mon, 9 Sep 2024 09:22:20 -0700 [thread overview]
Message-ID: <20240909162240.647173-11-richard.henderson@linaro.org> (raw)
In-Reply-To: <20240909162240.647173-1-richard.henderson@linaro.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/translate-a64.c | 140 ++++++++++++---------------------
target/arm/tcg/a64.decode | 12 +++
2 files changed, 61 insertions(+), 91 deletions(-)
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 74efb35164..593a1774d8 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -6794,6 +6794,47 @@ TRANS(FNMADD, do_fmadd, a, true, true)
TRANS(FMSUB, do_fmadd, a, false, true)
TRANS(FNMSUB, do_fmadd, a, true, false)
+/*
+ * Advanced SIMD Across Lanes
+ */
+
+static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen,
+ MemOp src_sign, NeonGenTwo64OpFn *fn)
+{
+ TCGv_i64 tcg_res, tcg_elt;
+ MemOp src_mop = a->esz | src_sign;
+ int elements = (a->q ? 16 : 8) >> a->esz;
+
+ /* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */
+ if (elements < 4) {
+ return false;
+ }
+ if (!fp_access_check(s)) {
+ return true;
+ }
+
+ tcg_res = tcg_temp_new_i64();
+ tcg_elt = tcg_temp_new_i64();
+
+ read_vec_element(s, tcg_res, a->rn, 0, src_mop);
+ for (int i = 1; i < elements; i++) {
+ read_vec_element(s, tcg_elt, a->rn, i, src_mop);
+ fn(tcg_res, tcg_res, tcg_elt);
+ }
+
+ tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen);
+ write_fp_dreg(s, a->rd, tcg_res);
+ return true;
+}
+
+TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64)
+TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64)
+TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64)
+TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64)
+TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64)
+TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64)
+TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64)
+
/* Shift a TCGv src by TCGv shift_amount, put result in dst.
* Note that it is the caller's responsibility to ensure that the
* shift amount is in range (ie 0..31 or 0..63) and provide the ARM
@@ -9092,27 +9133,10 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
int opcode = extract32(insn, 12, 5);
bool is_q = extract32(insn, 30, 1);
bool is_u = extract32(insn, 29, 1);
- bool is_fp = false;
bool is_min = false;
int elements;
- int i;
- TCGv_i64 tcg_res, tcg_elt;
switch (opcode) {
- case 0x1b: /* ADDV */
- if (is_u) {
- unallocated_encoding(s);
- return;
- }
- /* fall through */
- case 0x3: /* SADDLV, UADDLV */
- case 0xa: /* SMAXV, UMAXV */
- case 0x1a: /* SMINV, UMINV */
- if (size == 3 || (size == 2 && !is_q)) {
- unallocated_encoding(s);
- return;
- }
- break;
case 0xc: /* FMAXNMV, FMINNMV */
case 0xf: /* FMAXV, FMINV */
/* Bit 1 of size field encodes min vs max and the actual size
@@ -9121,7 +9145,6 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
* precision.
*/
is_min = extract32(size, 1, 1);
- is_fp = true;
if (!is_u && dc_isar_feature(aa64_fp16, s)) {
size = 1;
} else if (!is_u || !is_q || extract32(size, 0, 1)) {
@@ -9132,6 +9155,10 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
}
break;
default:
+ case 0x3: /* SADDLV, UADDLV */
+ case 0xa: /* SMAXV, UMAXV */
+ case 0x1a: /* SMINV, UMINV */
+ case 0x1b: /* ADDV */
unallocated_encoding(s);
return;
}
@@ -9142,52 +9169,7 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
elements = (is_q ? 16 : 8) >> size;
- tcg_res = tcg_temp_new_i64();
- tcg_elt = tcg_temp_new_i64();
-
- /* These instructions operate across all lanes of a vector
- * to produce a single result. We can guarantee that a 64
- * bit intermediate is sufficient:
- * + for [US]ADDLV the maximum element size is 32 bits, and
- * the result type is 64 bits
- * + for FMAX*V, FMIN*V, ADDV the intermediate type is the
- * same as the element size, which is 32 bits at most
- * For the integer operations we can choose to work at 64
- * or 32 bits and truncate at the end; for simplicity
- * we use 64 bits always. The floating point
- * ops do require 32 bit intermediates, though.
- */
- if (!is_fp) {
- read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
-
- for (i = 1; i < elements; i++) {
- read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
-
- switch (opcode) {
- case 0x03: /* SADDLV / UADDLV */
- case 0x1b: /* ADDV */
- tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
- break;
- case 0x0a: /* SMAXV / UMAXV */
- if (is_u) {
- tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
- } else {
- tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
- }
- break;
- case 0x1a: /* SMINV / UMINV */
- if (is_u) {
- tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
- } else {
- tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
- }
- break;
- default:
- g_assert_not_reached();
- }
-
- }
- } else {
+ {
/* Floating point vector reduction ops which work across 32
* bit (single) or 16 bit (half-precision) intermediates.
* Note that correct NaN propagation requires that we do these
@@ -9195,34 +9177,10 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
*/
TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
int fpopcode = opcode | is_min << 4 | is_u << 5;
- TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, size,
- 0, elements, fpst);
- tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
+ TCGv_i32 tcg_res = do_reduction_op(s, fpopcode, rn, size,
+ 0, elements, fpst);
+ write_fp_sreg(s, rd, tcg_res);
}
-
- /* Now truncate the result to the width required for the final output */
- if (opcode == 0x03) {
- /* SADDLV, UADDLV: result is 2*esize */
- size++;
- }
-
- switch (size) {
- case 0:
- tcg_gen_ext8u_i64(tcg_res, tcg_res);
- break;
- case 1:
- tcg_gen_ext16u_i64(tcg_res, tcg_res);
- break;
- case 2:
- tcg_gen_ext32u_i64(tcg_res, tcg_res);
- break;
- case 3:
- break;
- default:
- g_assert_not_reached();
- }
-
- write_fp_dreg(s, rd, tcg_res);
}
/* AdvSIMD modified immediate
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index 7f71c56f83..5ab4b11781 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -59,6 +59,8 @@
@rrr_q1e3 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=3
@rrrr_q1e3 ........ ... rm:5 . ra:5 rn:5 rd:5 &qrrrr_e q=1 esz=3
+@qrr_e . q:1 ...... esz:2 ...... ...... rn:5 rd:5 &qrr_e
+
@qrrr_b . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=0
@qrrr_h . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=1
@qrrr_s . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=2
@@ -1154,3 +1156,13 @@ TRN1 0.00 1110 .. 0 ..... 0 010 10 ..... ..... @qrrr_e
TRN2 0.00 1110 .. 0 ..... 0 110 10 ..... ..... @qrrr_e
ZIP1 0.00 1110 .. 0 ..... 0 011 10 ..... ..... @qrrr_e
ZIP2 0.00 1110 .. 0 ..... 0 111 10 ..... ..... @qrrr_e
+
+# Advanced SIMD Across Lanes
+
+ADDV 0.00 1110 .. 11000 11011 10 ..... ..... @qrr_e
+SADDLV 0.00 1110 .. 11000 00011 10 ..... ..... @qrr_e
+UADDLV 0.10 1110 .. 11000 00011 10 ..... ..... @qrr_e
+SMAXV 0.00 1110 .. 11000 01010 10 ..... ..... @qrr_e
+UMAXV 0.10 1110 .. 11000 01010 10 ..... ..... @qrr_e
+SMINV 0.00 1110 .. 11000 11010 10 ..... ..... @qrr_e
+UMINV 0.10 1110 .. 11000 11010 10 ..... ..... @qrr_e
--
2.43.0
next prev parent reply other threads:[~2024-09-09 16:24 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-09-09 16:22 [PATCH v2 00/29] target/arm: AdvSIMD decodetree conversion, part 4 Richard Henderson
2024-09-09 16:22 ` [PATCH v2 01/29] target/arm: Replace tcg_gen_dupi_vec with constants in gengvec.c Richard Henderson
2024-09-09 16:22 ` [PATCH v2 02/29] target/arm: Replace tcg_gen_dupi_vec with constants in translate-sve.c Richard Henderson
2024-09-09 16:22 ` [PATCH v2 03/29] target/arm: Use cmpsel in gen_ushl_vec Richard Henderson
2024-09-09 16:22 ` [PATCH v2 04/29] target/arm: Use cmpsel in gen_sshl_vec Richard Henderson
2024-09-09 16:22 ` [PATCH v2 05/29] target/arm: Use tcg_gen_extract2_i64 for EXT Richard Henderson
2024-09-09 22:22 ` Philippe Mathieu-Daudé
2024-09-09 16:22 ` [PATCH v2 06/29] target/arm: Convert EXT to decodetree Richard Henderson
2024-09-09 22:22 ` Philippe Mathieu-Daudé
2024-09-09 16:22 ` [PATCH v2 07/29] target/arm: Convert TBL, TBX " Richard Henderson
2024-09-09 22:23 ` Philippe Mathieu-Daudé
2024-09-09 16:22 ` [PATCH v2 08/29] target/arm: Convert UZP, TRN, ZIP " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 09/29] target/arm: Simplify do_reduction_op Richard Henderson
2024-09-09 16:22 ` Richard Henderson [this message]
2024-09-09 16:22 ` [PATCH v2 11/29] target/arm: Convert FMAXNMV, FMINNMV, FMAXV, FMINV to decodetree Richard Henderson
2024-09-09 16:22 ` [PATCH v2 12/29] target/arm: Convert FMOVI (scalar, immediate) " Richard Henderson
2024-09-09 22:24 ` Philippe Mathieu-Daudé
2024-09-10 12:27 ` Peter Maydell
2024-09-10 14:35 ` Philippe Mathieu-Daudé
2024-09-09 16:22 ` [PATCH v2 13/29] target/arm: Convert MOVI, FMOV, ORR, BIC (vector " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 14/29] target/arm: Introduce gen_gvec_sshr, gen_gvec_ushr Richard Henderson
2024-09-09 16:22 ` [PATCH v2 15/29] target/arm: Fix whitespace near gen_srshr64_i64 Richard Henderson
2024-09-10 14:37 ` Philippe Mathieu-Daudé
2024-09-09 16:22 ` [PATCH v2 16/29] target/arm: Convert handle_vec_simd_shri to decodetree Richard Henderson
2024-09-09 16:22 ` [PATCH v2 17/29] target/arm: Convert handle_vec_simd_shli " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 18/29] target/arm: Use {, s}extract in handle_vec_simd_wshli Richard Henderson
2024-09-09 16:22 ` [PATCH v2 19/29] target/arm: Convert SSHLL, USHLL to decodetree Richard Henderson
2024-09-09 16:22 ` [PATCH v2 20/29] target/arm: Push tcg_rnd into handle_shri_with_rndacc Richard Henderson
2024-09-10 14:37 ` Philippe Mathieu-Daudé
2024-09-09 16:22 ` [PATCH v2 21/29] target/arm: Split out subroutines of handle_shri_with_rndacc Richard Henderson
2024-09-09 16:22 ` [PATCH v2 22/29] target/arm: Convert SHRN, RSHRN to decodetree Richard Henderson
2024-09-09 16:22 ` [PATCH v2 23/29] target/arm: Convert handle_scalar_simd_shri " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 24/29] target/arm: Convert handle_scalar_simd_shli " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 25/29] target/arm: Convert VQSHL, VQSHLU to gvec Richard Henderson
2024-09-09 16:22 ` [PATCH v2 26/29] target/arm: Widen NeonGenNarrowEnvFn return to 64 bits Richard Henderson
2024-09-10 14:19 ` Peter Maydell
2024-09-09 16:22 ` [PATCH v2 27/29] target/arm: Convert SQSHL, UQSHL, SQSHLU (immediate) to decodetree Richard Henderson
2024-09-09 16:22 ` [PATCH v2 28/29] target/arm: Convert vector [US]QSHRN, [US]QRSHRN, SQSHRUN " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 29/29] target/arm: Convert scalar " Richard Henderson
2024-09-10 15:30 ` [PATCH v2 00/29] target/arm: AdvSIMD decodetree conversion, part 4 Peter Maydell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240909162240.647173-11-richard.henderson@linaro.org \
--to=richard.henderson@linaro.org \
--cc=peter.maydell@linaro.org \
--cc=qemu-arm@nongnu.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).