From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: qemu-arm@nongnu.org
Subject: [PATCH v2 02/29] target/arm: Replace tcg_gen_dupi_vec with constants in translate-sve.c
Date: Mon, 9 Sep 2024 09:22:12 -0700 [thread overview]
Message-ID: <20240909162240.647173-3-richard.henderson@linaro.org> (raw)
In-Reply-To: <20240909162240.647173-1-richard.henderson@linaro.org>
Instead of copying a constant into a temporary with dupi,
use a vector constant directly.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/translate-sve.c | 128 +++++++++++++--------------------
1 file changed, 49 insertions(+), 79 deletions(-)
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index 9e2536dfe9..49d32fabc9 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -6081,9 +6081,9 @@ static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
if (top) {
if (shl == halfbits) {
- TCGv_vec t = tcg_temp_new_vec_matching(d);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
- tcg_gen_and_vec(vece, d, n, t);
+ tcg_gen_and_vec(vece, d, n,
+ tcg_constant_vec_matching(d, vece,
+ MAKE_64BIT_MASK(halfbits, halfbits)));
} else {
tcg_gen_sari_vec(vece, d, n, halfbits);
tcg_gen_shli_vec(vece, d, d, shl);
@@ -6138,18 +6138,18 @@ static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
if (top) {
if (shl == halfbits) {
- TCGv_vec t = tcg_temp_new_vec_matching(d);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
- tcg_gen_and_vec(vece, d, n, t);
+ tcg_gen_and_vec(vece, d, n,
+ tcg_constant_vec_matching(d, vece,
+ MAKE_64BIT_MASK(halfbits, halfbits)));
} else {
tcg_gen_shri_vec(vece, d, n, halfbits);
tcg_gen_shli_vec(vece, d, d, shl);
}
} else {
if (shl == 0) {
- TCGv_vec t = tcg_temp_new_vec_matching(d);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
- tcg_gen_and_vec(vece, d, n, t);
+ tcg_gen_and_vec(vece, d, n,
+ tcg_constant_vec_matching(d, vece,
+ MAKE_64BIT_MASK(0, halfbits)));
} else {
tcg_gen_shli_vec(vece, d, n, halfbits);
tcg_gen_shri_vec(vece, d, d, halfbits - shl);
@@ -6317,18 +6317,14 @@ static const TCGOpcode sqxtn_list[] = {
static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t mask = (1ull << halfbits) - 1;
int64_t min = -1ull << (halfbits - 1);
int64_t max = -min - 1;
- tcg_gen_dupi_vec(vece, t, min);
- tcg_gen_smax_vec(vece, d, n, t);
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_smin_vec(vece, d, d, t);
- tcg_gen_dupi_vec(vece, t, mask);
- tcg_gen_and_vec(vece, d, d, t);
+ tcg_gen_smax_vec(vece, d, n, tcg_constant_vec_matching(d, vece, min));
+ tcg_gen_smin_vec(vece, d, d, tcg_constant_vec_matching(d, vece, max));
+ tcg_gen_and_vec(vece, d, d, tcg_constant_vec_matching(d, vece, mask));
}
static const GVecGen2 sqxtnb_ops[3] = {
@@ -6349,19 +6345,15 @@ TRANS_FEAT(SQXTNB, aa64_sve2, do_narrow_extract, a, sqxtnb_ops)
static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t mask = (1ull << halfbits) - 1;
int64_t min = -1ull << (halfbits - 1);
int64_t max = -min - 1;
- tcg_gen_dupi_vec(vece, t, min);
- tcg_gen_smax_vec(vece, n, n, t);
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_smin_vec(vece, n, n, t);
+ tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min));
+ tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max));
tcg_gen_shli_vec(vece, n, n, halfbits);
- tcg_gen_dupi_vec(vece, t, mask);
- tcg_gen_bitsel_vec(vece, d, t, d, n);
+ tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n);
}
static const GVecGen2 sqxtnt_ops[3] = {
@@ -6389,12 +6381,10 @@ static const TCGOpcode uqxtn_list[] = {
static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = (1ull << halfbits) - 1;
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_umin_vec(vece, d, n, t);
+ tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max));
}
static const GVecGen2 uqxtnb_ops[3] = {
@@ -6415,14 +6405,13 @@ TRANS_FEAT(UQXTNB, aa64_sve2, do_narrow_extract, a, uqxtnb_ops)
static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = (1ull << halfbits) - 1;
+ TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_umin_vec(vece, n, n, t);
+ tcg_gen_umin_vec(vece, n, n, maxv);
tcg_gen_shli_vec(vece, n, n, halfbits);
- tcg_gen_bitsel_vec(vece, d, t, d, n);
+ tcg_gen_bitsel_vec(vece, d, maxv, d, n);
}
static const GVecGen2 uqxtnt_ops[3] = {
@@ -6450,14 +6439,11 @@ static const TCGOpcode sqxtun_list[] = {
static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = (1ull << halfbits) - 1;
- tcg_gen_dupi_vec(vece, t, 0);
- tcg_gen_smax_vec(vece, d, n, t);
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_umin_vec(vece, d, d, t);
+ tcg_gen_smax_vec(vece, d, n, tcg_constant_vec_matching(d, vece, 0));
+ tcg_gen_umin_vec(vece, d, d, tcg_constant_vec_matching(d, vece, max));
}
static const GVecGen2 sqxtunb_ops[3] = {
@@ -6478,16 +6464,14 @@ TRANS_FEAT(SQXTUNB, aa64_sve2, do_narrow_extract, a, sqxtunb_ops)
static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = (1ull << halfbits) - 1;
+ TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
- tcg_gen_dupi_vec(vece, t, 0);
- tcg_gen_smax_vec(vece, n, n, t);
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_umin_vec(vece, n, n, t);
+ tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0));
+ tcg_gen_umin_vec(vece, n, n, maxv);
tcg_gen_shli_vec(vece, n, n, halfbits);
- tcg_gen_bitsel_vec(vece, d, t, d, n);
+ tcg_gen_bitsel_vec(vece, d, maxv, d, n);
}
static const GVecGen2 sqxtunt_ops[3] = {
@@ -6551,13 +6535,11 @@ static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
tcg_gen_shri_vec(vece, n, n, shr);
- tcg_gen_dupi_vec(vece, t, mask);
- tcg_gen_and_vec(vece, d, n, t);
+ tcg_gen_and_vec(vece, d, n, tcg_constant_vec_matching(d, vece, mask));
}
static const TCGOpcode shrnb_vec_list[] = { INDEX_op_shri_vec, 0 };
@@ -6609,13 +6591,11 @@ static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
tcg_gen_shli_vec(vece, n, n, halfbits - shr);
- tcg_gen_dupi_vec(vece, t, mask);
- tcg_gen_bitsel_vec(vece, d, t, d, n);
+ tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n);
}
static const TCGOpcode shrnt_vec_list[] = { INDEX_op_shli_vec, 0 };
@@ -6658,14 +6638,12 @@ TRANS_FEAT(RSHRNT, aa64_sve2, do_shr_narrow, a, rshrnt_ops)
static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
+ uint64_t max = MAKE_64BIT_MASK(0, halfbits);
tcg_gen_sari_vec(vece, n, n, shr);
- tcg_gen_dupi_vec(vece, t, 0);
- tcg_gen_smax_vec(vece, n, n, t);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
- tcg_gen_umin_vec(vece, d, n, t);
+ tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0));
+ tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max));
}
static const TCGOpcode sqshrunb_vec_list[] = {
@@ -6690,16 +6668,15 @@ TRANS_FEAT(SQSHRUNB, aa64_sve2, do_shr_narrow, a, sqshrunb_ops)
static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
+ uint64_t max = MAKE_64BIT_MASK(0, halfbits);
+ TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
tcg_gen_sari_vec(vece, n, n, shr);
- tcg_gen_dupi_vec(vece, t, 0);
- tcg_gen_smax_vec(vece, n, n, t);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
- tcg_gen_umin_vec(vece, n, n, t);
+ tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0));
+ tcg_gen_umin_vec(vece, n, n, maxv);
tcg_gen_shli_vec(vece, n, n, halfbits);
- tcg_gen_bitsel_vec(vece, d, t, d, n);
+ tcg_gen_bitsel_vec(vece, d, maxv, d, n);
}
static const TCGOpcode sqshrunt_vec_list[] = {
@@ -6742,18 +6719,15 @@ TRANS_FEAT(SQRSHRUNT, aa64_sve2, do_shr_narrow, a, sqrshrunt_ops)
static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
int64_t min = -max - 1;
+ int64_t mask = MAKE_64BIT_MASK(0, halfbits);
tcg_gen_sari_vec(vece, n, n, shr);
- tcg_gen_dupi_vec(vece, t, min);
- tcg_gen_smax_vec(vece, n, n, t);
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_smin_vec(vece, n, n, t);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
- tcg_gen_and_vec(vece, d, n, t);
+ tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min));
+ tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max));
+ tcg_gen_and_vec(vece, d, n, tcg_constant_vec_matching(d, vece, mask));
}
static const TCGOpcode sqshrnb_vec_list[] = {
@@ -6778,19 +6752,16 @@ TRANS_FEAT(SQSHRNB, aa64_sve2, do_shr_narrow, a, sqshrnb_ops)
static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
int64_t min = -max - 1;
+ int64_t mask = MAKE_64BIT_MASK(0, halfbits);
tcg_gen_sari_vec(vece, n, n, shr);
- tcg_gen_dupi_vec(vece, t, min);
- tcg_gen_smax_vec(vece, n, n, t);
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_smin_vec(vece, n, n, t);
+ tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min));
+ tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max));
tcg_gen_shli_vec(vece, n, n, halfbits);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
- tcg_gen_bitsel_vec(vece, d, t, d, n);
+ tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n);
}
static const TCGOpcode sqshrnt_vec_list[] = {
@@ -6833,12 +6804,11 @@ TRANS_FEAT(SQRSHRNT, aa64_sve2, do_shr_narrow, a, sqrshrnt_ops)
static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
+ int64_t max = MAKE_64BIT_MASK(0, halfbits);
tcg_gen_shri_vec(vece, n, n, shr);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
- tcg_gen_umin_vec(vece, d, n, t);
+ tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max));
}
static const TCGOpcode uqshrnb_vec_list[] = {
@@ -6863,14 +6833,14 @@ TRANS_FEAT(UQSHRNB, aa64_sve2, do_shr_narrow, a, uqshrnb_ops)
static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
+ int64_t max = MAKE_64BIT_MASK(0, halfbits);
+ TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
tcg_gen_shri_vec(vece, n, n, shr);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
- tcg_gen_umin_vec(vece, n, n, t);
+ tcg_gen_umin_vec(vece, n, n, maxv);
tcg_gen_shli_vec(vece, n, n, halfbits);
- tcg_gen_bitsel_vec(vece, d, t, d, n);
+ tcg_gen_bitsel_vec(vece, d, maxv, d, n);
}
static const TCGOpcode uqshrnt_vec_list[] = {
--
2.43.0
next prev parent reply other threads:[~2024-09-09 16:24 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-09-09 16:22 [PATCH v2 00/29] target/arm: AdvSIMD decodetree conversion, part 4 Richard Henderson
2024-09-09 16:22 ` [PATCH v2 01/29] target/arm: Replace tcg_gen_dupi_vec with constants in gengvec.c Richard Henderson
2024-09-09 16:22 ` Richard Henderson [this message]
2024-09-09 16:22 ` [PATCH v2 03/29] target/arm: Use cmpsel in gen_ushl_vec Richard Henderson
2024-09-09 16:22 ` [PATCH v2 04/29] target/arm: Use cmpsel in gen_sshl_vec Richard Henderson
2024-09-09 16:22 ` [PATCH v2 05/29] target/arm: Use tcg_gen_extract2_i64 for EXT Richard Henderson
2024-09-09 22:22 ` Philippe Mathieu-Daudé
2024-09-09 16:22 ` [PATCH v2 06/29] target/arm: Convert EXT to decodetree Richard Henderson
2024-09-09 22:22 ` Philippe Mathieu-Daudé
2024-09-09 16:22 ` [PATCH v2 07/29] target/arm: Convert TBL, TBX " Richard Henderson
2024-09-09 22:23 ` Philippe Mathieu-Daudé
2024-09-09 16:22 ` [PATCH v2 08/29] target/arm: Convert UZP, TRN, ZIP " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 09/29] target/arm: Simplify do_reduction_op Richard Henderson
2024-09-09 16:22 ` [PATCH v2 10/29] target/arm: Convert ADDV, *ADDLV, *MAXV, *MINV to decodetree Richard Henderson
2024-09-09 16:22 ` [PATCH v2 11/29] target/arm: Convert FMAXNMV, FMINNMV, FMAXV, FMINV " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 12/29] target/arm: Convert FMOVI (scalar, immediate) " Richard Henderson
2024-09-09 22:24 ` Philippe Mathieu-Daudé
2024-09-10 12:27 ` Peter Maydell
2024-09-10 14:35 ` Philippe Mathieu-Daudé
2024-09-09 16:22 ` [PATCH v2 13/29] target/arm: Convert MOVI, FMOV, ORR, BIC (vector " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 14/29] target/arm: Introduce gen_gvec_sshr, gen_gvec_ushr Richard Henderson
2024-09-09 16:22 ` [PATCH v2 15/29] target/arm: Fix whitespace near gen_srshr64_i64 Richard Henderson
2024-09-10 14:37 ` Philippe Mathieu-Daudé
2024-09-09 16:22 ` [PATCH v2 16/29] target/arm: Convert handle_vec_simd_shri to decodetree Richard Henderson
2024-09-09 16:22 ` [PATCH v2 17/29] target/arm: Convert handle_vec_simd_shli " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 18/29] target/arm: Use {, s}extract in handle_vec_simd_wshli Richard Henderson
2024-09-09 16:22 ` [PATCH v2 19/29] target/arm: Convert SSHLL, USHLL to decodetree Richard Henderson
2024-09-09 16:22 ` [PATCH v2 20/29] target/arm: Push tcg_rnd into handle_shri_with_rndacc Richard Henderson
2024-09-10 14:37 ` Philippe Mathieu-Daudé
2024-09-09 16:22 ` [PATCH v2 21/29] target/arm: Split out subroutines of handle_shri_with_rndacc Richard Henderson
2024-09-09 16:22 ` [PATCH v2 22/29] target/arm: Convert SHRN, RSHRN to decodetree Richard Henderson
2024-09-09 16:22 ` [PATCH v2 23/29] target/arm: Convert handle_scalar_simd_shri " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 24/29] target/arm: Convert handle_scalar_simd_shli " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 25/29] target/arm: Convert VQSHL, VQSHLU to gvec Richard Henderson
2024-09-09 16:22 ` [PATCH v2 26/29] target/arm: Widen NeonGenNarrowEnvFn return to 64 bits Richard Henderson
2024-09-10 14:19 ` Peter Maydell
2024-09-09 16:22 ` [PATCH v2 27/29] target/arm: Convert SQSHL, UQSHL, SQSHLU (immediate) to decodetree Richard Henderson
2024-09-09 16:22 ` [PATCH v2 28/29] target/arm: Convert vector [US]QSHRN, [US]QRSHRN, SQSHRUN " Richard Henderson
2024-09-09 16:22 ` [PATCH v2 29/29] target/arm: Convert scalar " Richard Henderson
2024-09-10 15:30 ` [PATCH v2 00/29] target/arm: AdvSIMD decodetree conversion, part 4 Peter Maydell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240909162240.647173-3-richard.henderson@linaro.org \
--to=richard.henderson@linaro.org \
--cc=qemu-arm@nongnu.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).