From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: qemu-arm@nongnu.org
Subject: [PATCH v2 01/29] target/arm: Replace tcg_gen_dupi_vec with constants in gengvec.c
Date: Mon, 9 Sep 2024 09:22:11 -0700 [thread overview]
Message-ID: <20240909162240.647173-2-richard.henderson@linaro.org> (raw)
In-Reply-To: <20240909162240.647173-1-richard.henderson@linaro.org>
Instead of copying a constant into a temporary with dupi,
use a vector constant directly.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/gengvec.c | 43 ++++++++++++++++++----------------------
1 file changed, 19 insertions(+), 24 deletions(-)
diff --git a/target/arm/tcg/gengvec.c b/target/arm/tcg/gengvec.c
index 56a1dc1f75..726a1383ae 100644
--- a/target/arm/tcg/gengvec.c
+++ b/target/arm/tcg/gengvec.c
@@ -297,10 +297,9 @@ void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec ones = tcg_temp_new_vec_matching(d);
+ TCGv_vec ones = tcg_constant_vec_matching(d, vece, 1);
tcg_gen_shri_vec(vece, t, a, sh - 1);
- tcg_gen_dupi_vec(vece, ones, 1);
tcg_gen_and_vec(vece, t, t, ones);
tcg_gen_sari_vec(vece, d, a, sh);
tcg_gen_add_vec(vece, d, d, t);
@@ -492,10 +491,9 @@ void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec ones = tcg_temp_new_vec_matching(d);
+ TCGv_vec ones = tcg_constant_vec_matching(d, vece, 1);
tcg_gen_shri_vec(vece, t, a, shift - 1);
- tcg_gen_dupi_vec(vece, ones, 1);
tcg_gen_and_vec(vece, t, t, ones);
tcg_gen_shri_vec(vece, d, a, shift);
tcg_gen_add_vec(vece, d, d, t);
@@ -685,9 +683,9 @@ static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec m = tcg_temp_new_vec_matching(d);
+ int64_t mi = MAKE_64BIT_MASK((8 << vece) - sh, sh);
+ TCGv_vec m = tcg_constant_vec_matching(d, vece, mi);
- tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
tcg_gen_shri_vec(vece, t, a, sh);
tcg_gen_and_vec(vece, d, d, m);
tcg_gen_or_vec(vece, d, d, t);
@@ -773,10 +771,9 @@ static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec m = tcg_temp_new_vec_matching(d);
+ TCGv_vec m = tcg_constant_vec_matching(d, vece, MAKE_64BIT_MASK(0, sh));
tcg_gen_shli_vec(vece, t, a, sh);
- tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
tcg_gen_and_vec(vece, d, d, m);
tcg_gen_or_vec(vece, d, d, t);
}
@@ -1044,14 +1041,13 @@ static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
TCGv_vec rval = tcg_temp_new_vec_matching(dst);
TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
- TCGv_vec msk, max;
+ TCGv_vec max;
tcg_gen_neg_vec(vece, rsh, shift);
if (vece == MO_8) {
tcg_gen_mov_vec(lsh, shift);
} else {
- msk = tcg_temp_new_vec_matching(dst);
- tcg_gen_dupi_vec(vece, msk, 0xff);
+ TCGv_vec msk = tcg_constant_vec_matching(dst, vece, 0xff);
tcg_gen_and_vec(vece, lsh, shift, msk);
tcg_gen_and_vec(vece, rsh, rsh, msk);
}
@@ -1064,9 +1060,6 @@ static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
tcg_gen_shlv_vec(vece, lval, src, lsh);
tcg_gen_shrv_vec(vece, rval, src, rsh);
- max = tcg_temp_new_vec_matching(dst);
- tcg_gen_dupi_vec(vece, max, 8 << vece);
-
/*
* The choice of LT (signed) and GEU (unsigned) are biased toward
* the instructions of the x86_64 host. For MO_8, the whole byte
@@ -1074,6 +1067,7 @@ static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
* have already masked to a byte and so a signed compare works.
* Other tcg hosts have a full set of comparisons and do not care.
*/
+ max = tcg_constant_vec_matching(dst, vece, 8 << vece);
if (vece == MO_8) {
tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
@@ -1170,6 +1164,7 @@ static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
+ TCGv_vec max, zero;
/*
* Rely on the TCG guarantee that out of range shifts produce
@@ -1180,15 +1175,15 @@ static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
if (vece == MO_8) {
tcg_gen_mov_vec(lsh, shift);
} else {
- tcg_gen_dupi_vec(vece, tmp, 0xff);
- tcg_gen_and_vec(vece, lsh, shift, tmp);
- tcg_gen_and_vec(vece, rsh, rsh, tmp);
+ TCGv_vec msk = tcg_constant_vec_matching(dst, vece, 0xff);
+ tcg_gen_and_vec(vece, lsh, shift, msk);
+ tcg_gen_and_vec(vece, rsh, rsh, msk);
}
/* Bound rsh so out of bound right shift gets -1. */
- tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
- tcg_gen_umin_vec(vece, rsh, rsh, tmp);
- tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
+ max = tcg_constant_vec_matching(dst, vece, (8 << vece) - 1);
+ tcg_gen_umin_vec(vece, rsh, rsh, max);
+ tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, max);
tcg_gen_shlv_vec(vece, lval, src, lsh);
tcg_gen_sarv_vec(vece, rval, src, rsh);
@@ -1197,12 +1192,12 @@ static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
tcg_gen_andc_vec(vece, lval, lval, tmp);
/* Select between left and right shift. */
+ zero = tcg_constant_vec_matching(dst, vece, 0);
if (vece == MO_8) {
- tcg_gen_dupi_vec(vece, tmp, 0);
- tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
+ tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, zero, rval, lval);
} else {
- tcg_gen_dupi_vec(vece, tmp, 0x80);
- tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
+ TCGv_vec sgn = tcg_constant_vec_matching(dst, vece, 0x80);
+ tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, sgn, lval, rval);
}
}
--
2.43.0
next prev parent reply other threads:[~2024-09-09 16:25 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-09-09 16:22 [PATCH v2 00/29] target/arm: AdvSIMD decodetree conversion, part 4 Richard Henderson
2024-09-09 16:22 ` Richard Henderson [this message]
2024-09-09 16:22 ` [PATCH v2 02/29] target/arm: Replace tcg_gen_dupi_vec with constants in translate-sve.c Richard Henderson
2024-09-09 16:22 ` [PATCH v2 03/29] target/arm: Use cmpsel in gen_ushl_vec Richard Henderson
2024-09-09 16:22 ` [PATCH v2 04/29] target/arm: Use cmpsel in gen_sshl_vec Richard Henderson
2024-09-09 16:22 ` [PATCH v2 05/29] target/arm: Use tcg_gen_extract2_i64 for EXT Richard Henderson
2024-09-09 22:22 ` Philippe Mathieu-Daudé
2024-09-09 16:22 ` [PATCH v2 06/29] target/arm: Convert EXT to decodetree Richard Henderson
2024-09-09 22:22 ` Philippe Mathieu-Daudé
2024-09-09 16:22 ` [PATCH v2 07/29] target/arm: Convert TBL, TBX " Richard Henderson
2024-09-09 22:23 ` Philippe Mathieu-Daudé
2024-09-09 16:22 ` [PATCH v2 08/29] target/arm: Convert UZP, TRN, ZIP " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 09/29] target/arm: Simplify do_reduction_op Richard Henderson
2024-09-09 16:22 ` [PATCH v2 10/29] target/arm: Convert ADDV, *ADDLV, *MAXV, *MINV to decodetree Richard Henderson
2024-09-09 16:22 ` [PATCH v2 11/29] target/arm: Convert FMAXNMV, FMINNMV, FMAXV, FMINV " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 12/29] target/arm: Convert FMOVI (scalar, immediate) " Richard Henderson
2024-09-09 22:24 ` Philippe Mathieu-Daudé
2024-09-10 12:27 ` Peter Maydell
2024-09-10 14:35 ` Philippe Mathieu-Daudé
2024-09-09 16:22 ` [PATCH v2 13/29] target/arm: Convert MOVI, FMOV, ORR, BIC (vector " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 14/29] target/arm: Introduce gen_gvec_sshr, gen_gvec_ushr Richard Henderson
2024-09-09 16:22 ` [PATCH v2 15/29] target/arm: Fix whitespace near gen_srshr64_i64 Richard Henderson
2024-09-10 14:37 ` Philippe Mathieu-Daudé
2024-09-09 16:22 ` [PATCH v2 16/29] target/arm: Convert handle_vec_simd_shri to decodetree Richard Henderson
2024-09-09 16:22 ` [PATCH v2 17/29] target/arm: Convert handle_vec_simd_shli " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 18/29] target/arm: Use {, s}extract in handle_vec_simd_wshli Richard Henderson
2024-09-09 16:22 ` [PATCH v2 19/29] target/arm: Convert SSHLL, USHLL to decodetree Richard Henderson
2024-09-09 16:22 ` [PATCH v2 20/29] target/arm: Push tcg_rnd into handle_shri_with_rndacc Richard Henderson
2024-09-10 14:37 ` Philippe Mathieu-Daudé
2024-09-09 16:22 ` [PATCH v2 21/29] target/arm: Split out subroutines of handle_shri_with_rndacc Richard Henderson
2024-09-09 16:22 ` [PATCH v2 22/29] target/arm: Convert SHRN, RSHRN to decodetree Richard Henderson
2024-09-09 16:22 ` [PATCH v2 23/29] target/arm: Convert handle_scalar_simd_shri " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 24/29] target/arm: Convert handle_scalar_simd_shli " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 25/29] target/arm: Convert VQSHL, VQSHLU to gvec Richard Henderson
2024-09-09 16:22 ` [PATCH v2 26/29] target/arm: Widen NeonGenNarrowEnvFn return to 64 bits Richard Henderson
2024-09-10 14:19 ` Peter Maydell
2024-09-09 16:22 ` [PATCH v2 27/29] target/arm: Convert SQSHL, UQSHL, SQSHLU (immediate) to decodetree Richard Henderson
2024-09-09 16:22 ` [PATCH v2 28/29] target/arm: Convert vector [US]QSHRN, [US]QRSHRN, SQSHRUN " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 29/29] target/arm: Convert scalar " Richard Henderson
2024-09-10 15:30 ` [PATCH v2 00/29] target/arm: AdvSIMD decodetree conversion, part 4 Peter Maydell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240909162240.647173-2-richard.henderson@linaro.org \
--to=richard.henderson@linaro.org \
--cc=qemu-arm@nongnu.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).