From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: qemu-arm@nongnu.org
Subject: [PATCH v2 27/29] target/arm: Convert SQSHL, UQSHL, SQSHLU (immediate) to decodetree
Date: Mon, 9 Sep 2024 09:22:37 -0700 [thread overview]
Message-ID: <20240909162240.647173-28-richard.henderson@linaro.org> (raw)
In-Reply-To: <20240909162240.647173-1-richard.henderson@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/translate-a64.c | 223 ++++++++++++++-------------------
target/arm/tcg/a64.decode | 36 +++++-
2 files changed, 128 insertions(+), 131 deletions(-)
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 7918720d9b..77324e0145 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -6982,6 +6982,9 @@ TRANS(URSRA_v, do_vec_shift_imm, a, gen_gvec_ursra)
TRANS(SRI_v, do_vec_shift_imm, a, gen_gvec_sri)
TRANS(SHL_v, do_vec_shift_imm, a, tcg_gen_gvec_shli)
TRANS(SLI_v, do_vec_shift_imm, a, gen_gvec_sli);
+TRANS(SQSHL_vi, do_vec_shift_imm, a, gen_neon_sqshli)
+TRANS(UQSHL_vi, do_vec_shift_imm, a, gen_neon_uqshli)
+TRANS(SQSHLU_vi, do_vec_shift_imm, a, gen_neon_sqshlui)
static bool do_vec_shift_imm_wide(DisasContext *s, arg_qrri_e *a, bool is_u)
{
@@ -7209,6 +7212,92 @@ TRANS(SRI_s, do_scalar_shift_imm, a, gen_sri_d, true, 0)
TRANS(SHL_s, do_scalar_shift_imm, a, tcg_gen_shli_i64, false, 0)
TRANS(SLI_s, do_scalar_shift_imm, a, gen_sli_d, true, 0)
+static void trunc_i64_env_imm(TCGv_i64 d, TCGv_i64 s, int64_t i,
+ NeonGenTwoOpEnvFn *fn)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+ tcg_gen_extrl_i64_i32(t, s);
+ fn(t, tcg_env, t, tcg_constant_i32(i));
+ tcg_gen_extu_i32_i64(d, t);
+}
+
+static void gen_sqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s8);
+}
+
+static void gen_sqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s16);
+}
+
+static void gen_sqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s32);
+}
+
+static void gen_sqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ gen_helper_neon_qshl_s64(d, tcg_env, s, tcg_constant_i64(i));
+}
+
+static void gen_uqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u8);
+}
+
+static void gen_uqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u16);
+}
+
+static void gen_uqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u32);
+}
+
+static void gen_uqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ gen_helper_neon_qshl_u64(d, tcg_env, s, tcg_constant_i64(i));
+}
+
+static void gen_sqshlui_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s8);
+}
+
+static void gen_sqshlui_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s16);
+}
+
+static void gen_sqshlui_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s32);
+}
+
+static void gen_sqshlui_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
+{
+ gen_helper_neon_qshlu_s64(d, tcg_env, s, tcg_constant_i64(i));
+}
+
+static WideShiftImmFn * const f_scalar_sqshli[] = {
+ gen_sqshli_b, gen_sqshli_h, gen_sqshli_s, gen_sqshli_d
+};
+
+static WideShiftImmFn * const f_scalar_uqshli[] = {
+ gen_uqshli_b, gen_uqshli_h, gen_uqshli_s, gen_uqshli_d
+};
+
+static WideShiftImmFn * const f_scalar_sqshlui[] = {
+ gen_sqshlui_b, gen_sqshlui_h, gen_sqshlui_s, gen_sqshlui_d
+};
+
+/* Note that the helpers sign-extend their inputs, so don't do it here. */
+TRANS(SQSHL_si, do_scalar_shift_imm, a, f_scalar_sqshli[a->esz], false, 0)
+TRANS(UQSHL_si, do_scalar_shift_imm, a, f_scalar_uqshli[a->esz], false, 0)
+TRANS(SQSHLU_si, do_scalar_shift_imm, a, f_scalar_sqshlui[a->esz], false, 0)
+
/* Shift a TCGv src by TCGv shift_amount, put result in dst.
* Note that it is the caller's responsibility to ensure that the
* shift amount is in range (ie 0..31 or 0..63) and provide the ARM
@@ -9501,116 +9590,6 @@ static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
clear_vec_high(s, is_q, rd);
}
-/* SQSHLU, UQSHL, SQSHL: saturating left shifts */
-static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
- bool src_unsigned, bool dst_unsigned,
- int immh, int immb, int rn, int rd)
-{
- int immhb = immh << 3 | immb;
- int size = 32 - clz32(immh) - 1;
- int shift = immhb - (8 << size);
- int pass;
-
- assert(immh != 0);
- assert(!(scalar && is_q));
-
- if (!scalar) {
- if (!is_q && extract32(immh, 3, 1)) {
- unallocated_encoding(s);
- return;
- }
-
- /* Since we use the variable-shift helpers we must
- * replicate the shift count into each element of
- * the tcg_shift value.
- */
- switch (size) {
- case 0:
- shift |= shift << 8;
- /* fall through */
- case 1:
- shift |= shift << 16;
- break;
- case 2:
- case 3:
- break;
- default:
- g_assert_not_reached();
- }
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- if (size == 3) {
- TCGv_i64 tcg_shift = tcg_constant_i64(shift);
- static NeonGenTwo64OpEnvFn * const fns[2][2] = {
- { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
- { NULL, gen_helper_neon_qshl_u64 },
- };
- NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
- int maxpass = is_q ? 2 : 1;
-
- for (pass = 0; pass < maxpass; pass++) {
- TCGv_i64 tcg_op = tcg_temp_new_i64();
-
- read_vec_element(s, tcg_op, rn, pass, MO_64);
- genfn(tcg_op, tcg_env, tcg_op, tcg_shift);
- write_vec_element(s, tcg_op, rd, pass, MO_64);
- }
- clear_vec_high(s, is_q, rd);
- } else {
- TCGv_i32 tcg_shift = tcg_constant_i32(shift);
- static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
- {
- { gen_helper_neon_qshl_s8,
- gen_helper_neon_qshl_s16,
- gen_helper_neon_qshl_s32 },
- { gen_helper_neon_qshlu_s8,
- gen_helper_neon_qshlu_s16,
- gen_helper_neon_qshlu_s32 }
- }, {
- { NULL, NULL, NULL },
- { gen_helper_neon_qshl_u8,
- gen_helper_neon_qshl_u16,
- gen_helper_neon_qshl_u32 }
- }
- };
- NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
- MemOp memop = scalar ? size : MO_32;
- int maxpass = scalar ? 1 : is_q ? 4 : 2;
-
- for (pass = 0; pass < maxpass; pass++) {
- TCGv_i32 tcg_op = tcg_temp_new_i32();
-
- read_vec_element_i32(s, tcg_op, rn, pass, memop);
- genfn(tcg_op, tcg_env, tcg_op, tcg_shift);
- if (scalar) {
- switch (size) {
- case 0:
- tcg_gen_ext8u_i32(tcg_op, tcg_op);
- break;
- case 1:
- tcg_gen_ext16u_i32(tcg_op, tcg_op);
- break;
- case 2:
- break;
- default:
- g_assert_not_reached();
- }
- write_fp_sreg(s, rd, tcg_op);
- } else {
- write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
- }
- }
-
- if (!scalar) {
- clear_vec_high(s, is_q, rd);
- }
- }
-}
-
/* Common vector code for handling integer to FP conversion */
static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
int elements, int is_signed,
@@ -9890,16 +9869,6 @@ static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
immh, immb, opcode, rn, rd);
break;
- case 0xc: /* SQSHLU */
- if (!is_u) {
- unallocated_encoding(s);
- return;
- }
- handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
- break;
- case 0xe: /* SQSHL, UQSHL */
- handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
- break;
case 0x1f: /* FCVTZS, FCVTZU */
handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
break;
@@ -9910,6 +9879,8 @@ static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
case 0x06: /* SRSRA / URSRA */
case 0x08: /* SRI */
case 0x0a: /* SHL / SLI */
+ case 0x0c: /* SQSHLU */
+ case 0x0e: /* SQSHL, UQSHL */
unallocated_encoding(s);
break;
}
@@ -10561,16 +10532,6 @@ static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
opcode, rn, rd);
break;
- case 0xc: /* SQSHLU */
- if (!is_u) {
- unallocated_encoding(s);
- return;
- }
- handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
- break;
- case 0xe: /* SQSHL, UQSHL */
- handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
- break;
case 0x1f: /* FCVTZS/ FCVTZU */
handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
return;
@@ -10581,6 +10542,8 @@ static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
case 0x06: /* SRSRA / URSRA (accum + rounding) */
case 0x08: /* SRI */
case 0x0a: /* SHL / SLI */
+ case 0x0c: /* SQSHLU */
+ case 0x0e: /* SQSHL, UQSHL */
case 0x14: /* SSHLL / USHLL */
unallocated_encoding(s);
return;
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index 96803fe6e4..63e04ddfcd 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -1287,11 +1287,30 @@ RSHRN_v 0.00 11110 .... ... 10001 1 ..... ..... @q_shri_b
RSHRN_v 0.00 11110 .... ... 10001 1 ..... ..... @q_shri_h
RSHRN_v 0.00 11110 .... ... 10001 1 ..... ..... @q_shri_s
+SQSHL_vi 0.00 11110 .... ... 01110 1 ..... ..... @q_shli_b
+SQSHL_vi 0.00 11110 .... ... 01110 1 ..... ..... @q_shli_h
+SQSHL_vi 0.00 11110 .... ... 01110 1 ..... ..... @q_shli_s
+SQSHL_vi 0.00 11110 .... ... 01110 1 ..... ..... @q_shli_d
+
+UQSHL_vi 0.10 11110 .... ... 01110 1 ..... ..... @q_shli_b
+UQSHL_vi 0.10 11110 .... ... 01110 1 ..... ..... @q_shli_h
+UQSHL_vi 0.10 11110 .... ... 01110 1 ..... ..... @q_shli_s
+UQSHL_vi 0.10 11110 .... ... 01110 1 ..... ..... @q_shli_d
+
+SQSHLU_vi 0.10 11110 .... ... 01100 1 ..... ..... @q_shli_b
+SQSHLU_vi 0.10 11110 .... ... 01100 1 ..... ..... @q_shli_h
+SQSHLU_vi 0.10 11110 .... ... 01100 1 ..... ..... @q_shli_s
+SQSHLU_vi 0.10 11110 .... ... 01100 1 ..... ..... @q_shli_d
+
# Advanced SIMD scalar shift by immediate
@shri_d .... ..... 1 ...... ..... . rn:5 rd:5 \
&rri_e esz=3 imm=%neon_rshift_i6
-@shli_d .... ..... 1 imm:6 ..... . rn:5 rd:5 &rri_e esz=3
+
+@shli_b .... ..... 0001 imm:3 ..... . rn:5 rd:5 &rri_e esz=0
+@shli_h .... ..... 001 imm:4 ..... . rn:5 rd:5 &rri_e esz=1
+@shli_s .... ..... 01 imm:5 ..... . rn:5 rd:5 &rri_e esz=2
+@shli_d .... ..... 1 imm:6 ..... . rn:5 rd:5 &rri_e esz=3
SSHR_s 0101 11110 .... ... 00000 1 ..... ..... @shri_d
USHR_s 0111 11110 .... ... 00000 1 ..... ..... @shri_d
@@ -1305,3 +1324,18 @@ SRI_s 0111 11110 .... ... 01000 1 ..... ..... @shri_d
SHL_s 0101 11110 .... ... 01010 1 ..... ..... @shli_d
SLI_s 0111 11110 .... ... 01010 1 ..... ..... @shli_d
+
+SQSHL_si 0101 11110 .... ... 01110 1 ..... ..... @shli_b
+SQSHL_si 0101 11110 .... ... 01110 1 ..... ..... @shli_h
+SQSHL_si 0101 11110 .... ... 01110 1 ..... ..... @shli_s
+SQSHL_si 0101 11110 .... ... 01110 1 ..... ..... @shli_d
+
+UQSHL_si 0111 11110 .... ... 01110 1 ..... ..... @shli_b
+UQSHL_si 0111 11110 .... ... 01110 1 ..... ..... @shli_h
+UQSHL_si 0111 11110 .... ... 01110 1 ..... ..... @shli_s
+UQSHL_si 0111 11110 .... ... 01110 1 ..... ..... @shli_d
+
+SQSHLU_si 0111 11110 .... ... 01100 1 ..... ..... @shli_b
+SQSHLU_si 0111 11110 .... ... 01100 1 ..... ..... @shli_h
+SQSHLU_si 0111 11110 .... ... 01100 1 ..... ..... @shli_s
+SQSHLU_si 0111 11110 .... ... 01100 1 ..... ..... @shli_d
--
2.43.0
next prev parent reply other threads:[~2024-09-09 16:26 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-09-09 16:22 [PATCH v2 00/29] target/arm: AdvSIMD decodetree conversion, part 4 Richard Henderson
2024-09-09 16:22 ` [PATCH v2 01/29] target/arm: Replace tcg_gen_dupi_vec with constants in gengvec.c Richard Henderson
2024-09-09 16:22 ` [PATCH v2 02/29] target/arm: Replace tcg_gen_dupi_vec with constants in translate-sve.c Richard Henderson
2024-09-09 16:22 ` [PATCH v2 03/29] target/arm: Use cmpsel in gen_ushl_vec Richard Henderson
2024-09-09 16:22 ` [PATCH v2 04/29] target/arm: Use cmpsel in gen_sshl_vec Richard Henderson
2024-09-09 16:22 ` [PATCH v2 05/29] target/arm: Use tcg_gen_extract2_i64 for EXT Richard Henderson
2024-09-09 22:22 ` Philippe Mathieu-Daudé
2024-09-09 16:22 ` [PATCH v2 06/29] target/arm: Convert EXT to decodetree Richard Henderson
2024-09-09 22:22 ` Philippe Mathieu-Daudé
2024-09-09 16:22 ` [PATCH v2 07/29] target/arm: Convert TBL, TBX " Richard Henderson
2024-09-09 22:23 ` Philippe Mathieu-Daudé
2024-09-09 16:22 ` [PATCH v2 08/29] target/arm: Convert UZP, TRN, ZIP " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 09/29] target/arm: Simplify do_reduction_op Richard Henderson
2024-09-09 16:22 ` [PATCH v2 10/29] target/arm: Convert ADDV, *ADDLV, *MAXV, *MINV to decodetree Richard Henderson
2024-09-09 16:22 ` [PATCH v2 11/29] target/arm: Convert FMAXNMV, FMINNMV, FMAXV, FMINV " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 12/29] target/arm: Convert FMOVI (scalar, immediate) " Richard Henderson
2024-09-09 22:24 ` Philippe Mathieu-Daudé
2024-09-10 12:27 ` Peter Maydell
2024-09-10 14:35 ` Philippe Mathieu-Daudé
2024-09-09 16:22 ` [PATCH v2 13/29] target/arm: Convert MOVI, FMOV, ORR, BIC (vector " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 14/29] target/arm: Introduce gen_gvec_sshr, gen_gvec_ushr Richard Henderson
2024-09-09 16:22 ` [PATCH v2 15/29] target/arm: Fix whitespace near gen_srshr64_i64 Richard Henderson
2024-09-10 14:37 ` Philippe Mathieu-Daudé
2024-09-09 16:22 ` [PATCH v2 16/29] target/arm: Convert handle_vec_simd_shri to decodetree Richard Henderson
2024-09-09 16:22 ` [PATCH v2 17/29] target/arm: Convert handle_vec_simd_shli " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 18/29] target/arm: Use {, s}extract in handle_vec_simd_wshli Richard Henderson
2024-09-09 16:22 ` [PATCH v2 19/29] target/arm: Convert SSHLL, USHLL to decodetree Richard Henderson
2024-09-09 16:22 ` [PATCH v2 20/29] target/arm: Push tcg_rnd into handle_shri_with_rndacc Richard Henderson
2024-09-10 14:37 ` Philippe Mathieu-Daudé
2024-09-09 16:22 ` [PATCH v2 21/29] target/arm: Split out subroutines of handle_shri_with_rndacc Richard Henderson
2024-09-09 16:22 ` [PATCH v2 22/29] target/arm: Convert SHRN, RSHRN to decodetree Richard Henderson
2024-09-09 16:22 ` [PATCH v2 23/29] target/arm: Convert handle_scalar_simd_shri " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 24/29] target/arm: Convert handle_scalar_simd_shli " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 25/29] target/arm: Convert VQSHL, VQSHLU to gvec Richard Henderson
2024-09-09 16:22 ` [PATCH v2 26/29] target/arm: Widen NeonGenNarrowEnvFn return to 64 bits Richard Henderson
2024-09-10 14:19 ` Peter Maydell
2024-09-09 16:22 ` Richard Henderson [this message]
2024-09-09 16:22 ` [PATCH v2 28/29] target/arm: Convert vector [US]QSHRN, [US]QRSHRN, SQSHRUN to decodetree Richard Henderson
2024-09-09 16:22 ` [PATCH v2 29/29] target/arm: Convert scalar " Richard Henderson
2024-09-10 15:30 ` [PATCH v2 00/29] target/arm: AdvSIMD decodetree conversion, part 4 Peter Maydell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240909162240.647173-28-richard.henderson@linaro.org \
--to=richard.henderson@linaro.org \
--cc=qemu-arm@nongnu.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).