qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Alex Bennée" <alex.bennee@linaro.org>
To: rth@twiddle.net, cota@braap.org, batuzovk@ispras.ru
Cc: qemu-devel@nongnu.org, qemu-arm@nongnu.org,
	"Alex Bennée" <alex.bennee@linaro.org>,
	"Peter Maydell" <peter.maydell@linaro.org>
Subject: [Qemu-devel] [RFC PATCH 9/9] target/arm/translate-a64: vectorise smull vD.4s, vN.[48]s, vM.h[]
Date: Thu, 17 Aug 2017 19:04:04 +0100	[thread overview]
Message-ID: <20170817180404.29334-10-alex.bennee@linaro.org> (raw)
In-Reply-To: <20170817180404.29334-1-alex.bennee@linaro.org>

These instructions show up in the ffmpeg profile from the
ff_simple_idct_put_neon function.

WARNING: this is experimental and essentially shortcuts to the
vectorised helper for the one instruction that shows up a lot in the
ffmpeg trace. Otherwise it falls through to the normal code
generation. We also skip where rd == rn to avoid having to explicitly
deal with the aliasing in the helper.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
---
 target/arm/helper-a64.c    | 17 +++++++++++
 target/arm/helper-a64.h    |  2 ++
 target/arm/translate-a64.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 91 insertions(+)

diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
index 17b1edfb5f..ae0f8da5c4 100644
--- a/target/arm/helper-a64.c
+++ b/target/arm/helper-a64.c
@@ -538,3 +538,20 @@ uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr,
 
     return !success;
 }
+
+/* Multiply Long (vector, by element) */
+void HELPER(advsimd_smull_idx_s32)(void *d, void *n, uint32_t m,
+                                   uint32_t simd_data)
+{
+    int opr_elt = GET_SIMD_DATA(OPR_ELT, simd_data);
+    int doff_elt = GET_SIMD_DATA(DOFF_ELT, simd_data);
+    int32_t *rd = (int32_t *) d;
+    int16_t *rn = (int16_t *) n;
+    int16_t rm = (int16_t) m;
+    int i;
+
+    #pragma GCC ivdep
+    for (i = 0; i < opr_elt; ++i) {
+        rd[i] = rn[i + doff_elt] * rm;
+    }
+}
diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
index 6f9eaba533..0bd7942cec 100644
--- a/target/arm/helper-a64.h
+++ b/target/arm/helper-a64.h
@@ -44,3 +44,5 @@ DEF_HELPER_FLAGS_3(crc32_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
 DEF_HELPER_FLAGS_3(crc32c_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
 DEF_HELPER_FLAGS_4(paired_cmpxchg64_le, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
 DEF_HELPER_FLAGS_4(paired_cmpxchg64_be, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
+
+DEF_HELPER_4(advsimd_smull_idx_s32, void, vec, vec, i32, i32)
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index f474c5008b..3a609e571c 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -10466,6 +10466,74 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
     }
 }
 
+typedef void AdvSIMDGenTwoPlusOneVectorFn(TCGv_vec, TCGv_vec, TCGv_i32, TCGv_i32);
+
+/* Handle [U/S]ML[S/A]L instructions
+ *
+ * This splits off from bellow only to aid experimentation.
+ */
+static bool handle_vec_simd_mul_addsub(DisasContext *s, uint32_t insn, int opcode, int size, bool is_q, bool u, int rn, int rm, int rd)
+{
+    /* fprintf(stderr, "%s: %#04x op:%x sz:%d rn:%d rm:%d rd:%d\n", __func__, */
+    /*         insn, opcode, size, rn, rm, rd); */
+
+    if (size == 1) {
+        AdvSIMDGenTwoPlusOneVectorFn *fn = NULL;
+        uint32_t simd_info = 0;
+
+        switch (opcode) {
+        case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
+            break;
+        case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
+            break;
+        case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
+            if (!u)
+            {
+                /* helper assumes no aliasing */
+                if (rd == rn) {
+                    return false;
+                }
+
+                fn = gen_helper_advsimd_smull_idx_s32;
+                simd_info = deposit32(simd_info,
+                                      ADVSIMD_OPR_ELT_SHIFT, ADVSIMD_OPR_ELT_BITS, 4);
+
+                if (is_q) {
+                    simd_info = deposit32(simd_info,
+                                          ADVSIMD_DOFF_ELT_SHIFT, ADVSIMD_DOFF_ELT_BITS, 4);
+                }
+            };
+            break;
+        default:
+            break;
+        }
+
+        /* assert(fn); */
+
+        if (fn) {
+            TCGv_i32 tcg_idx = tcg_temp_new_i32();
+            TCGv_i32 tcg_simd_info = tcg_const_i32(simd_info);
+            int h = extract32(insn, 11, 1);
+            int lm = extract32(insn, 20, 2);
+            int index = h << 2 | lm;
+
+            if (!fp_access_check(s)) {
+                return false;
+            }
+
+            read_vec_element_i32(s, tcg_idx, rm, index, size);
+
+            fn(cpu_V[rd], cpu_V[rn], tcg_idx, tcg_simd_info);
+
+            tcg_temp_free_i32(tcg_simd_info);
+            tcg_temp_free_i32(tcg_idx);
+            return true;
+        }
+    }
+
+    return false;
+}
+
 /* C3.6.13 AdvSIMD scalar x indexed element
  *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
@@ -10518,6 +10586,10 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
             unallocated_encoding(s);
             return;
         }
+        /* Shortcut if we have a vectorised helper */
+        if (handle_vec_simd_mul_addsub(s, insn, opcode, size, is_q, u, rn, rm, rd)) {
+            return;
+        }
         is_long = true;
         break;
     case 0x3: /* SQDMLAL, SQDMLAL2 */
-- 
2.13.0

  parent reply	other threads:[~2017-08-17 18:04 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-17 18:03 [Qemu-devel] [RFC PATCH 0/9] TCG Vector types and example conversion Alex Bennée
2017-08-17 18:03 ` [Qemu-devel] [RFC PATCH 1/9] tcg/README: listify the TCG types Alex Bennée
2017-08-17 20:05   ` Richard Henderson
2017-08-17 18:03 ` [Qemu-devel] [RFC PATCH 2/9] tcg: introduce the concepts of a TCGv_vec register type Alex Bennée
2017-08-17 20:07   ` Richard Henderson
2017-08-17 18:03 ` [Qemu-devel] [RFC PATCH 3/9] tcg: generate ptrs to vector registers Alex Bennée
2017-08-17 20:13   ` Richard Henderson
2017-08-17 18:03 ` [Qemu-devel] [RFC PATCH 4/9] helper-head: add support for vec type Alex Bennée
2017-08-17 18:04 ` [Qemu-devel] [RFC PATCH 5/9] arm/cpu.h: align VFP registers Alex Bennée
2017-08-17 20:13   ` Richard Henderson
2017-08-17 18:04 ` [Qemu-devel] [RFC PATCH 6/9] target/arm/translate-a64: regnames -> x_regnames Alex Bennée
2017-08-17 20:14   ` Richard Henderson
2017-08-17 18:04 ` [Qemu-devel] [RFC PATCH 7/9] target/arm/translate-a64: register global vectors Alex Bennée
2017-08-17 18:04 ` [Qemu-devel] [RFC PATCH 8/9] target/arm/helpers: introduce ADVSIMD flags Alex Bennée
2017-08-17 18:04 ` Alex Bennée [this message]
2017-08-17 20:23   ` [Qemu-devel] [RFC PATCH 9/9] target/arm/translate-a64: vectorise smull vD.4s, vN.[48]s, vM.h[] Richard Henderson
2017-08-17 18:32 ` [Qemu-devel] [RFC PATCH 0/9] TCG Vector types and example conversion no-reply
2017-08-18 11:33 ` Kirill Batuzov
2017-08-18 13:44   ` Richard Henderson
2017-08-22  9:04     ` Kirill Batuzov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170817180404.29334-10-alex.bennee@linaro.org \
    --to=alex.bennee@linaro.org \
    --cc=batuzovk@ispras.ru \
    --cc=cota@braap.org \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-arm@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).