From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: peter.maydell@linaro.org
Subject: [PATCH 11/11] target/arm: Improve do_prewiden_3d
Date: Tue, 27 Oct 2020 20:27:03 -0700 [thread overview]
Message-ID: <20201028032703.201526-12-richard.henderson@linaro.org> (raw)
In-Reply-To: <20201028032703.201526-1-richard.henderson@linaro.org>
We can use proper widening loads to extend 32-bit inputs,
and skip the "widenfn" step.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/translate.c | 6 +++
target/arm/translate-neon.c.inc | 66 ++++++++++++++++++---------------
2 files changed, 43 insertions(+), 29 deletions(-)
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 7611c1f0f1..29ea1eb781 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -1183,6 +1183,12 @@ static void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
long off = neon_element_offset(reg, ele, memop);
switch (memop) {
+ case MO_SL:
+ tcg_gen_ld32s_i64(dest, cpu_env, off);
+ break;
+ case MO_UL:
+ tcg_gen_ld32u_i64(dest, cpu_env, off);
+ break;
case MO_Q:
tcg_gen_ld_i64(dest, cpu_env, off);
break;
diff --git a/target/arm/translate-neon.c.inc b/target/arm/translate-neon.c.inc
index 7cd41c79ec..8f33b54067 100644
--- a/target/arm/translate-neon.c.inc
+++ b/target/arm/translate-neon.c.inc
@@ -1788,11 +1788,10 @@ static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
NeonGenWidenFn *widenfn,
NeonGenTwo64OpFn *opfn,
- bool src1_wide)
+ int src1_mop, int src2_mop)
{
/* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
TCGv_i64 rn0_64, rn1_64, rm_64;
- TCGv_i32 rm;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
@@ -1804,12 +1803,12 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
return false;
}
- if (!widenfn || !opfn) {
+ if (!opfn) {
/* size == 3 case, which is an entirely different insn group */
return false;
}
- if ((a->vd & 1) || (src1_wide && (a->vn & 1))) {
+ if ((a->vd & 1) || (src1_mop == MO_Q && (a->vn & 1))) {
return false;
}
@@ -1821,40 +1820,48 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
rn1_64 = tcg_temp_new_i64();
rm_64 = tcg_temp_new_i64();
- if (src1_wide) {
- read_neon_element64(rn0_64, a->vn, 0, MO_64);
+ if (src1_mop >= 0) {
+ read_neon_element64(rn0_64, a->vn, 0, src1_mop);
} else {
TCGv_i32 tmp = tcg_temp_new_i32();
read_neon_element32(tmp, a->vn, 0, MO_32);
widenfn(rn0_64, tmp);
tcg_temp_free_i32(tmp);
}
- rm = tcg_temp_new_i32();
- read_neon_element32(rm, a->vm, 0, MO_32);
+ if (src2_mop >= 0) {
+ read_neon_element64(rm_64, a->vm, 0, src2_mop);
+ } else {
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ read_neon_element32(tmp, a->vm, 0, MO_32);
+ widenfn(rm_64, tmp);
+ tcg_temp_free_i32(tmp);
+ }
- widenfn(rm_64, rm);
- tcg_temp_free_i32(rm);
opfn(rn0_64, rn0_64, rm_64);
/*
* Load second pass inputs before storing the first pass result, to
* avoid incorrect results if a narrow input overlaps with the result.
*/
- if (src1_wide) {
- read_neon_element64(rn1_64, a->vn, 1, MO_64);
+ if (src1_mop >= 0) {
+ read_neon_element64(rn1_64, a->vn, 1, src1_mop);
} else {
TCGv_i32 tmp = tcg_temp_new_i32();
read_neon_element32(tmp, a->vn, 1, MO_32);
widenfn(rn1_64, tmp);
tcg_temp_free_i32(tmp);
}
- rm = tcg_temp_new_i32();
- read_neon_element32(rm, a->vm, 1, MO_32);
+ if (src2_mop >= 0) {
+ read_neon_element64(rm_64, a->vm, 1, src2_mop);
+ } else {
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ read_neon_element32(tmp, a->vm, 1, MO_32);
+ widenfn(rm_64, tmp);
+ tcg_temp_free_i32(tmp);
+ }
write_neon_element64(rn0_64, a->vd, 0, MO_64);
- widenfn(rm_64, rm);
- tcg_temp_free_i32(rm);
opfn(rn1_64, rn1_64, rm_64);
write_neon_element64(rn1_64, a->vd, 1, MO_64);
@@ -1865,14 +1872,13 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
return true;
}
-#define DO_PREWIDEN(INSN, S, EXT, OP, SRC1WIDE) \
+#define DO_PREWIDEN(INSN, S, OP, SRC1WIDE, SIGN) \
static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
{ \
static NeonGenWidenFn * const widenfn[] = { \
gen_helper_neon_widen_##S##8, \
gen_helper_neon_widen_##S##16, \
- tcg_gen_##EXT##_i32_i64, \
- NULL, \
+ NULL, NULL, \
}; \
static NeonGenTwo64OpFn * const addfn[] = { \
gen_helper_neon_##OP##l_u16, \
@@ -1880,18 +1886,20 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
tcg_gen_##OP##_i64, \
NULL, \
}; \
- return do_prewiden_3d(s, a, widenfn[a->size], \
- addfn[a->size], SRC1WIDE); \
+ int narrow_mop = a->size == MO_32 ? MO_32 | SIGN : -1; \
+ return do_prewiden_3d(s, a, widenfn[a->size], addfn[a->size], \
+ SRC1WIDE ? MO_Q : narrow_mop, \
+ narrow_mop); \
}
-DO_PREWIDEN(VADDL_S, s, ext, add, false)
-DO_PREWIDEN(VADDL_U, u, extu, add, false)
-DO_PREWIDEN(VSUBL_S, s, ext, sub, false)
-DO_PREWIDEN(VSUBL_U, u, extu, sub, false)
-DO_PREWIDEN(VADDW_S, s, ext, add, true)
-DO_PREWIDEN(VADDW_U, u, extu, add, true)
-DO_PREWIDEN(VSUBW_S, s, ext, sub, true)
-DO_PREWIDEN(VSUBW_U, u, extu, sub, true)
+DO_PREWIDEN(VADDL_S, s, add, false, MO_SIGN)
+DO_PREWIDEN(VADDL_U, u, add, false, 0)
+DO_PREWIDEN(VSUBL_S, s, sub, false, MO_SIGN)
+DO_PREWIDEN(VSUBL_U, u, sub, false, 0)
+DO_PREWIDEN(VADDW_S, s, add, true, MO_SIGN)
+DO_PREWIDEN(VADDW_U, u, add, true, 0)
+DO_PREWIDEN(VSUBW_S, s, sub, true, MO_SIGN)
+DO_PREWIDEN(VSUBW_U, u, sub, true, 0)
static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn)
--
2.25.1
next prev parent reply other threads:[~2020-10-28 3:31 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-10-28 3:26 [PATCH 00/11] target/arm: Fix neon reg offsets Richard Henderson
2020-10-28 3:26 ` [PATCH 01/11] target/arm: Introduce neon_full_reg_offset Richard Henderson
2020-10-28 3:26 ` [PATCH 02/11] target/arm: Move neon_element_offset to translate.c Richard Henderson
2020-10-28 3:26 ` [PATCH 03/11] target/arm: Use neon_element_offset in neon_load/store_reg Richard Henderson
2020-10-28 3:26 ` [PATCH 04/11] target/arm: Use neon_element_offset in vfp_reg_offset Richard Henderson
2020-10-28 3:26 ` [PATCH 05/11] target/arm: Add read/write_neon_element32 Richard Henderson
2020-10-28 20:22 ` Richard Henderson
2020-10-29 10:15 ` Peter Maydell
2020-10-28 3:26 ` [PATCH 06/11] target/arm: Expand read/write_neon_element32 to all MemOp Richard Henderson
2020-10-28 3:26 ` [PATCH 07/11] target/arm: Rename neon_load_reg32 to vfp_load_reg32 Richard Henderson
2020-10-28 3:27 ` [PATCH 08/11] target/arm: Add read/write_neon_element64 Richard Henderson
2020-10-28 3:27 ` [PATCH 09/11] target/arm: Rename neon_load_reg64 to vfp_load_reg64 Richard Henderson
2020-10-28 3:27 ` [PATCH 10/11] target/arm: Simplify do_long_3d and do_2scalar_long Richard Henderson
2020-10-28 3:27 ` Richard Henderson [this message]
2020-10-28 16:48 ` [PATCH 00/11] target/arm: Fix neon reg offsets Peter Maydell
2020-10-28 18:31 ` Peter Maydell
2020-10-28 19:32 ` Richard Henderson
2020-10-28 20:03 ` Peter Maydell
2020-10-29 11:04 ` Peter Maydell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20201028032703.201526-12-richard.henderson@linaro.org \
--to=richard.henderson@linaro.org \
--cc=peter.maydell@linaro.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).