From: "Alex Bennée" <alex.bennee@linaro.org>
To: rth@twiddle.net, cota@braap.org, batuzovk@ispras.ru
Cc: qemu-devel@nongnu.org, qemu-arm@nongnu.org,
"Alex Bennée" <alex.bennee@linaro.org>,
"Peter Maydell" <peter.maydell@linaro.org>
Subject: [Qemu-devel] [RFC PATCH 9/9] target/arm/translate-a64: vectorise smull vD.4s, vN.[48]s, vM.h[]
Date: Thu, 17 Aug 2017 19:04:04 +0100 [thread overview]
Message-ID: <20170817180404.29334-10-alex.bennee@linaro.org> (raw)
In-Reply-To: <20170817180404.29334-1-alex.bennee@linaro.org>
These instructions show up in the ffmpeg profile from the
ff_simple_idct_put_neon function.
WARNING: this is experimental and essentially shortcuts to the
vectorised helper for the one instruction that shows up a lot in the
ffmpeg trace. Otherwise it falls through to the normal code
generation. We also skip where rd == rn to avoid having to explicitly
deal with the aliasing in the helper.
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
---
target/arm/helper-a64.c | 17 +++++++++++
target/arm/helper-a64.h | 2 ++
target/arm/translate-a64.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 91 insertions(+)
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
index 17b1edfb5f..ae0f8da5c4 100644
--- a/target/arm/helper-a64.c
+++ b/target/arm/helper-a64.c
@@ -538,3 +538,20 @@ uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr,
return !success;
}
+
+/* Multiply Long (vector, by element) */
+void HELPER(advsimd_smull_idx_s32)(void *d, void *n, uint32_t m,
+ uint32_t simd_data)
+{
+ int opr_elt = GET_SIMD_DATA(OPR_ELT, simd_data);
+ int doff_elt = GET_SIMD_DATA(DOFF_ELT, simd_data);
+ int32_t *rd = (int32_t *) d;
+ int16_t *rn = (int16_t *) n;
+ int16_t rm = (int16_t) m;
+ int i;
+
+ #pragma GCC ivdep
+ for (i = 0; i < opr_elt; ++i) {
+ rd[i] = rn[i + doff_elt] * rm;
+ }
+}
diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
index 6f9eaba533..0bd7942cec 100644
--- a/target/arm/helper-a64.h
+++ b/target/arm/helper-a64.h
@@ -44,3 +44,5 @@ DEF_HELPER_FLAGS_3(crc32_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
DEF_HELPER_FLAGS_3(crc32c_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
DEF_HELPER_FLAGS_4(paired_cmpxchg64_le, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
DEF_HELPER_FLAGS_4(paired_cmpxchg64_be, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
+
+DEF_HELPER_4(advsimd_smull_idx_s32, void, vec, vec, i32, i32)
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index f474c5008b..3a609e571c 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -10466,6 +10466,74 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
}
}
+typedef void AdvSIMDGenTwoPlusOneVectorFn(TCGv_vec, TCGv_vec, TCGv_i32, TCGv_i32);
+
+/* Handle [U/S]ML[S/A]L instructions
+ *
+ * This splits off from bellow only to aid experimentation.
+ */
+static bool handle_vec_simd_mul_addsub(DisasContext *s, uint32_t insn, int opcode, int size, bool is_q, bool u, int rn, int rm, int rd)
+{
+ /* fprintf(stderr, "%s: %#04x op:%x sz:%d rn:%d rm:%d rd:%d\n", __func__, */
+ /* insn, opcode, size, rn, rm, rd); */
+
+ if (size == 1) {
+ AdvSIMDGenTwoPlusOneVectorFn *fn = NULL;
+ uint32_t simd_info = 0;
+
+ switch (opcode) {
+ case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
+ break;
+ case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
+ break;
+ case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
+ if (!u)
+ {
+ /* helper assumes no aliasing */
+ if (rd == rn) {
+ return false;
+ }
+
+ fn = gen_helper_advsimd_smull_idx_s32;
+ simd_info = deposit32(simd_info,
+ ADVSIMD_OPR_ELT_SHIFT, ADVSIMD_OPR_ELT_BITS, 4);
+
+ if (is_q) {
+ simd_info = deposit32(simd_info,
+ ADVSIMD_DOFF_ELT_SHIFT, ADVSIMD_DOFF_ELT_BITS, 4);
+ }
+ };
+ break;
+ default:
+ break;
+ }
+
+ /* assert(fn); */
+
+ if (fn) {
+ TCGv_i32 tcg_idx = tcg_temp_new_i32();
+ TCGv_i32 tcg_simd_info = tcg_const_i32(simd_info);
+ int h = extract32(insn, 11, 1);
+ int lm = extract32(insn, 20, 2);
+ int index = h << 2 | lm;
+
+ if (!fp_access_check(s)) {
+ return false;
+ }
+
+ read_vec_element_i32(s, tcg_idx, rm, index, size);
+
+ fn(cpu_V[rd], cpu_V[rn], tcg_idx, tcg_simd_info);
+
+ tcg_temp_free_i32(tcg_simd_info);
+ tcg_temp_free_i32(tcg_idx);
+ return true;
+ }
+ }
+
+ return false;
+}
+
/* C3.6.13 AdvSIMD scalar x indexed element
* 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0
* +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
@@ -10518,6 +10586,10 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
unallocated_encoding(s);
return;
}
+ /* Shortcut if we have a vectorised helper */
+ if (handle_vec_simd_mul_addsub(s, insn, opcode, size, is_q, u, rn, rm, rd)) {
+ return;
+ }
is_long = true;
break;
case 0x3: /* SQDMLAL, SQDMLAL2 */
--
2.13.0
next prev parent reply other threads:[~2017-08-17 18:04 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-08-17 18:03 [Qemu-devel] [RFC PATCH 0/9] TCG Vector types and example conversion Alex Bennée
2017-08-17 18:03 ` [Qemu-devel] [RFC PATCH 1/9] tcg/README: listify the TCG types Alex Bennée
2017-08-17 20:05 ` Richard Henderson
2017-08-17 18:03 ` [Qemu-devel] [RFC PATCH 2/9] tcg: introduce the concepts of a TCGv_vec register type Alex Bennée
2017-08-17 20:07 ` Richard Henderson
2017-08-17 18:03 ` [Qemu-devel] [RFC PATCH 3/9] tcg: generate ptrs to vector registers Alex Bennée
2017-08-17 20:13 ` Richard Henderson
2017-08-17 18:03 ` [Qemu-devel] [RFC PATCH 4/9] helper-head: add support for vec type Alex Bennée
2017-08-17 18:04 ` [Qemu-devel] [RFC PATCH 5/9] arm/cpu.h: align VFP registers Alex Bennée
2017-08-17 20:13 ` Richard Henderson
2017-08-17 18:04 ` [Qemu-devel] [RFC PATCH 6/9] target/arm/translate-a64: regnames -> x_regnames Alex Bennée
2017-08-17 20:14 ` Richard Henderson
2017-08-17 18:04 ` [Qemu-devel] [RFC PATCH 7/9] target/arm/translate-a64: register global vectors Alex Bennée
2017-08-17 18:04 ` [Qemu-devel] [RFC PATCH 8/9] target/arm/helpers: introduce ADVSIMD flags Alex Bennée
2017-08-17 18:04 ` Alex Bennée [this message]
2017-08-17 20:23 ` [Qemu-devel] [RFC PATCH 9/9] target/arm/translate-a64: vectorise smull vD.4s, vN.[48]s, vM.h[] Richard Henderson
2017-08-17 18:32 ` [Qemu-devel] [RFC PATCH 0/9] TCG Vector types and example conversion no-reply
2017-08-18 11:33 ` Kirill Batuzov
2017-08-18 13:44 ` Richard Henderson
2017-08-22 9:04 ` Kirill Batuzov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170817180404.29334-10-alex.bennee@linaro.org \
--to=alex.bennee@linaro.org \
--cc=batuzovk@ispras.ru \
--cc=cota@braap.org \
--cc=peter.maydell@linaro.org \
--cc=qemu-arm@nongnu.org \
--cc=qemu-devel@nongnu.org \
--cc=rth@twiddle.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).