[Qemu-devel] [PATCH v2 16/32] arm/translate-a64: add FP16 x2 ops for simd_indexed

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: "Alex Bennée" <alex.bennee@linaro.org>
To: qemu-arm@nongnu.org
Cc: qemu-devel@nongnu.org, "Alex Bennée" <alex.bennee@linaro.org>,
	"Peter Maydell" <peter.maydell@linaro.org>
Subject: [Qemu-devel] [PATCH v2 16/32] arm/translate-a64: add FP16 x2 ops for simd_indexed
Date: Thu,  8 Feb 2018 17:31:41 +0000	[thread overview]
Message-ID: <20180208173157.24705-17-alex.bennee@linaro.org> (raw)
In-Reply-To: <20180208173157.24705-1-alex.bennee@linaro.org>

A bunch of the vectorised bitwise operations just operate on larger
chunks at a time. We can do the same for the new half-precision
operations by introducing some TWOHALFOP helpers which work on each
half of a pair of half-precision operations at once.

Hopefully all this hoop jumping will get simpler once we have
generically vectorised helpers here.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

---
v2
  - checkpatch fixes
---
 target/arm/helper-a64.c    | 46 +++++++++++++++++++++++++++++++++++++++++++++-
 target/arm/helper-a64.h    | 10 ++++++++++
 target/arm/translate-a64.c | 36 +++++++++++++++++++++++++++++-------
 3 files changed, 84 insertions(+), 8 deletions(-)

diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
index 6358b42472..8f0f59ea31 100644
--- a/target/arm/helper-a64.c
+++ b/target/arm/helper-a64.c
@@ -634,8 +634,32 @@ ADVSIMD_HALFOP(max)
 ADVSIMD_HALFOP(minnum)
 ADVSIMD_HALFOP(maxnum)
 
+#define ADVSIMD_TWOHALFOP(name)                                         \
+uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, void *fpstp) \
+{ \
+    float16  a1, a2, b1, b2;                        \
+    uint32_t r1, r2;                                \
+    float_status *fpst = fpstp;                     \
+    a1 = extract32(two_a, 0, 16);                   \
+    a2 = extract32(two_a, 16, 16);                  \
+    b1 = extract32(two_b, 0, 16);                   \
+    b2 = extract32(two_b, 16, 16);                  \
+    r1 = float16_ ## name(a1, b1, fpst);            \
+    r2 = float16_ ## name(a2, b2, fpst);            \
+    return deposit32(r1, 16, 16, r2);               \
+}
+
+ADVSIMD_TWOHALFOP(add)
+ADVSIMD_TWOHALFOP(sub)
+ADVSIMD_TWOHALFOP(mul)
+ADVSIMD_TWOHALFOP(div)
+ADVSIMD_TWOHALFOP(min)
+ADVSIMD_TWOHALFOP(max)
+ADVSIMD_TWOHALFOP(minnum)
+ADVSIMD_TWOHALFOP(maxnum)
+
 /* Data processing - scalar floating-point and advanced SIMD */
-float16 HELPER(advsimd_mulxh)(float16 a, float16 b, void *fpstp)
+static float16 float16_mulx(float16 a, float16 b, void *fpstp)
 {
     float_status *fpst = fpstp;
 
@@ -651,6 +675,9 @@ float16 HELPER(advsimd_mulxh)(float16 a, float16 b, void *fpstp)
     return float16_mul(a, b, fpst);
 }
 
+ADVSIMD_HALFOP(mulx)
+ADVSIMD_TWOHALFOP(mulx)
+
 /* fused multiply-accumulate */
 float16 HELPER(advsimd_muladdh)(float16 a, float16 b, float16 c, void *fpstp)
 {
@@ -658,6 +685,23 @@ float16 HELPER(advsimd_muladdh)(float16 a, float16 b, float16 c, void *fpstp)
     return float16_muladd(a, b, c, 0, fpst);
 }
 
+uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, uint32_t two_b,
+                                  uint32_t two_c, void *fpstp)
+{
+    float_status *fpst = fpstp;
+    float16  a1, a2, b1, b2, c1, c2;
+    uint32_t r1, r2;
+    a1 = extract32(two_a, 0, 16);
+    a2 = extract32(two_a, 16, 16);
+    b1 = extract32(two_b, 0, 16);
+    b2 = extract32(two_b, 16, 16);
+    c1 = extract32(two_c, 0, 16);
+    c2 = extract32(two_c, 16, 16);
+    r1 = float16_muladd(a1, b1, c1, 0, fpst);
+    r2 = float16_muladd(a2, b2, c2, 0, fpst);
+    return deposit32(r1, 16, 16, r2);
+}
+
 /*
  * Floating point comparisons produce an integer result. Softfloat
  * routines return float_relation types which we convert to the 0/-1
diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
index d347f473d4..d2dd46d07b 100644
--- a/target/arm/helper-a64.h
+++ b/target/arm/helper-a64.h
@@ -61,6 +61,16 @@ DEF_HELPER_3(advsimd_maxnumh, f16, f16, f16, ptr)
 DEF_HELPER_3(advsimd_minnumh, f16, f16, f16, ptr)
 DEF_HELPER_3(advsimd_mulxh, f16, f16, f16, ptr)
 DEF_HELPER_4(advsimd_muladdh, f16, f16, f16, f16, ptr)
+DEF_HELPER_3(advsimd_add2h, i32, i32, i32, ptr)
+DEF_HELPER_3(advsimd_sub2h, i32, i32, i32, ptr)
+DEF_HELPER_3(advsimd_mul2h, i32, i32, i32, ptr)
+DEF_HELPER_3(advsimd_div2h, i32, i32, i32, ptr)
+DEF_HELPER_3(advsimd_max2h, i32, i32, i32, ptr)
+DEF_HELPER_3(advsimd_min2h, i32, i32, i32, ptr)
+DEF_HELPER_3(advsimd_maxnum2h, i32, i32, i32, ptr)
+DEF_HELPER_3(advsimd_minnum2h, i32, i32, i32, ptr)
+DEF_HELPER_3(advsimd_mulx2h, i32, i32, i32, ptr)
+DEF_HELPER_4(advsimd_muladd2h, i32, i32, i32, i32, ptr)
 DEF_HELPER_3(advsimd_ceq_f16, i32, f16, f16, ptr)
 DEF_HELPER_3(advsimd_cge_f16, i32, f16, f16, ptr)
 DEF_HELPER_3(advsimd_cgt_f16, i32, f16, f16, ptr)
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 83a1fa3116..f01bab801c 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -10966,21 +10966,31 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
             }
             case 0x5: /* FMLS */
             case 0x1: /* FMLA */
-                read_vec_element_i32(s, tcg_res, rd, pass, is_scalar ? size : MO_32);
+                read_vec_element_i32(s, tcg_res, rd, pass,
+                                     is_scalar ? size : MO_32);
                 switch (size) {
                 case 1:
                     if (opcode == 0x5) {
-                        /* As usual for ARM, separate negation for fused multiply-add */
+                        /* As usual for ARM, separate negation for fused
+                         * multiply-add. */
                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
                     }
-                    gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
+                    if (is_scalar) {
+                        gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
+                                                   tcg_res, fpst);
+                    } else {
+                        gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx,
+                                                    tcg_res, fpst);
+                    }
                     break;
                 case 2:
                     if (opcode == 0x5) {
-                        /* As usual for ARM, separate negation for fused multiply-add */
+                        /* As usual for ARM, separate negation for fused
+                         * multiply-add. */
                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000);
                     }
-                    gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
+                    gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
+                                           tcg_res, fpst);
                     break;
                 default:
                     g_assert_not_reached();
@@ -10990,9 +11000,21 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
                 switch (size) {
                 case 1:
                     if (u) {
-                        gen_helper_advsimd_mulxh(tcg_res, tcg_op, tcg_idx, fpst);
+                        if (is_scalar) {
+                            gen_helper_advsimd_mulxh(tcg_res, tcg_op,
+                                                     tcg_idx, fpst);
+                        } else {
+                            gen_helper_advsimd_mulx2h(tcg_res, tcg_op,
+                                                      tcg_idx, fpst);
+                        }
                     } else {
-                        g_assert_not_reached();
+                        if (is_scalar) {
+                            gen_helper_advsimd_mulh(tcg_res, tcg_op,
+                                                    tcg_idx, fpst);
+                        } else {
+                            gen_helper_advsimd_mul2h(tcg_res, tcg_op,
+                                                     tcg_idx, fpst);
+                        }
                     }
                     break;
                 case 2:
-- 
2.15.1

next prev parent reply	other threads:[~2018-02-08 17:40 UTC|newest]

Thread overview: 79+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-02-08 17:31 [Qemu-devel] [PATCH v2 00/32] Add ARMv8.2 half-precision functions Alex Bennée
2018-02-08 17:31 ` [Qemu-devel] [PATCH v2 01/32] include/exec/helper-head.h: support f16 in helper calls Alex Bennée
2018-02-08 17:31 ` [Qemu-devel] [PATCH v2 02/32] target/arm/cpu64: introduce ARM_V8_FP16 feature bit Alex Bennée
2018-02-08 17:31 ` [Qemu-devel] [PATCH v2 03/32] target/arm/cpu64: allow fp16 to be disabled Alex Bennée
2018-02-08 20:36   ` Richard Henderson
2018-02-13 14:26   ` Peter Maydell
2018-02-21 16:35     ` Alex Bennée
2018-02-21 18:16       ` Richard Henderson
2018-02-08 17:31 ` [Qemu-devel] [PATCH v2 04/32] target/arm/cpu.h: update comment for half-precision values Alex Bennée
2018-02-08 17:31 ` [Qemu-devel] [PATCH v2 05/32] target/arm/cpu.h: add additional float_status flags Alex Bennée
2018-02-08 20:42   ` Richard Henderson
2018-02-08 17:31 ` [Qemu-devel] [PATCH v2 06/32] target/arm/helper: pass explicit fpst to set_rmode Alex Bennée
2018-02-08 20:43   ` Richard Henderson
2018-02-08 17:31 ` [Qemu-devel] [PATCH v2 07/32] arm/translate-a64: implement half-precision F(MIN|MAX)(V|NMV) Alex Bennée
2018-02-08 20:46   ` Richard Henderson
2018-02-08 20:49   ` Richard Henderson
2018-02-08 17:31 ` [Qemu-devel] [PATCH v2 08/32] arm/translate-a64: handle_3same_64 comment fix Alex Bennée
2018-02-08 20:46   ` Richard Henderson
2018-02-08 17:31 ` [Qemu-devel] [PATCH v2 09/32] arm/translate-a64: initial decode for simd_three_reg_same_fp16 Alex Bennée
2018-02-08 20:48   ` Richard Henderson
2018-02-08 17:31 ` [Qemu-devel] [PATCH v2 10/32] arm/translate-a64: add FP16 FADD/FABD/FSUB/FMUL/FDIV to simd_three_reg_same_fp16 Alex Bennée
2018-02-08 20:49   ` Richard Henderson
2018-02-08 17:31 ` [Qemu-devel] [PATCH v2 11/32] arm/translate-a64: add FP16 F[A]C[EQ/GE/GT] " Alex Bennée
2018-02-08 20:54   ` Richard Henderson
2018-02-23 11:59     ` Alex Bennée
2018-02-23 22:10       ` Richard Henderson
2018-02-08 17:31 ` [Qemu-devel] [PATCH v2 12/32] arm/translate-a64: add FP16 FMULA/X/S " Alex Bennée
2018-02-08 20:56   ` Richard Henderson
2018-02-08 17:31 ` [Qemu-devel] [PATCH v2 13/32] arm/translate-a64: add FP16 FR[ECP/SQRT]S " Alex Bennée
2018-02-08 20:59   ` Richard Henderson
2018-02-08 17:31 ` [Qemu-devel] [PATCH v2 14/32] arm/translate-a64: add FP16 pairwise ops simd_three_reg_same_fp16 Alex Bennée
2018-02-08 21:30   ` Richard Henderson
2018-02-08 17:31 ` [Qemu-devel] [PATCH v2 15/32] arm/translate-a64: add FP16 FMULX/MLS/FMLA to simd_indexed Alex Bennée
2018-02-08 21:49   ` Richard Henderson
2018-02-08 17:31 ` Alex Bennée [this message]
2018-02-08 22:10   ` [Qemu-devel] [PATCH v2 16/32] arm/translate-a64: add FP16 x2 ops for simd_indexed Richard Henderson
2018-02-08 17:31 ` [Qemu-devel] [PATCH v2 17/32] arm/translate-a64: initial decode for simd_two_reg_misc_fp16 Alex Bennée
2018-02-08 22:15   ` Richard Henderson
2018-02-08 17:31 ` [Qemu-devel] [PATCH v2 18/32] arm/translate-a64: add FP16 FPRINTx to simd_two_reg_misc_fp16 Alex Bennée
2018-02-08 22:32   ` Richard Henderson
2018-02-08 17:31 ` [Qemu-devel] [PATCH v2 19/32] arm/translate-a64: add FCVTxx " Alex Bennée
2018-02-08 22:35   ` Richard Henderson
2018-02-08 17:31 ` [Qemu-devel] [PATCH v2 20/32] arm/translate-a64: add FP16 FCMxx (zero) " Alex Bennée
2018-02-08 22:39   ` Richard Henderson
2018-02-22 17:23     ` Alex Bennée
2018-02-22 19:40       ` Richard Henderson
2018-02-23 10:23         ` Alex Bennée
2018-02-08 17:31 ` [Qemu-devel] [PATCH v2 21/32] arm/translate-a64: add FP16 SCVTF/UCVFT " Alex Bennée
2018-02-08 22:42   ` Richard Henderson
2018-02-08 17:31 ` [Qemu-devel] [PATCH v2 22/32] arm/translate-a64: add FP16 FNEG/FABS " Alex Bennée
2018-02-08 22:43   ` Richard Henderson
2018-02-08 17:31 ` [Qemu-devel] [PATCH v2 23/32] arm/helper.c: re-factor recpe and add recepe_f16 Alex Bennée
2018-02-09 17:54   ` Richard Henderson
2018-02-08 17:31 ` [Qemu-devel] [PATCH v2 24/32] arm/translate-a64: add FP16 FRECPE Alex Bennée
2018-02-09 17:57   ` Richard Henderson
2018-02-08 17:31 ` [Qemu-devel] [PATCH v2 25/32] arm/translate-a64: add FP16 FRCPX to simd_two_reg_misc_fp16 Alex Bennée
2018-02-09 18:00   ` Richard Henderson
2018-02-08 17:31 ` [Qemu-devel] [PATCH v2 26/32] arm/translate-a64: add FP16 FSQRT " Alex Bennée
2018-02-09 18:01   ` Richard Henderson
2018-02-08 17:31 ` [Qemu-devel] [PATCH v2 27/32] arm/helper.c: re-factor rsqrte and add rsqrte_f16 Alex Bennée
2018-02-09 18:15   ` Richard Henderson
2018-02-08 17:31 ` [Qemu-devel] [PATCH v2 28/32] arm/translate-a64: add FP16 FRSQRTE to simd_two_reg_misc_fp16 Alex Bennée
2018-02-09 18:15   ` Richard Henderson
2018-02-08 17:31 ` [Qemu-devel] [PATCH v2 29/32] arm/translate-a64: add FP16 FMOV to simd_mod_imm Alex Bennée
2018-02-09 18:23   ` Richard Henderson
2018-02-08 17:31 ` [Qemu-devel] [PATCH v2 30/32] arm/translate-a64: add all FP16 ops in simd_scalar_pairwise Alex Bennée
2018-02-09 18:27   ` Richard Henderson
2018-02-08 17:31 ` [Qemu-devel] [PATCH v2 31/32] arm/translate-a64: implement simd_scalar_three_reg_same_fp16 Alex Bennée
2018-02-09 18:34   ` Richard Henderson
2018-02-08 17:31 ` [Qemu-devel] [PATCH v2 32/32] arm/translate-a64: add all single op FP16 to handle_fp_1src_half Alex Bennée
2018-02-09 18:37   ` Richard Henderson
2018-02-23  9:45     ` Alex Bennée
2018-02-08 18:49 ` [Qemu-devel] [PATCH v2 00/32] Add ARMv8.2 half-precision functions no-reply
2018-02-08 18:56 ` no-reply
2018-02-08 19:04 ` no-reply
2018-02-08 19:11 ` no-reply
2018-02-08 19:17 ` no-reply
2018-02-08 21:33 ` no-reply
2018-02-13 14:27 ` [Qemu-devel] [Qemu-arm] " Peter Maydell

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:6358b4247 dfblob:8f0f59ea3 dfblob:d347f473d dfblob:d2dd46d07
dfblob:83a1fa311 dfblob:f01bab801 )
 OR (
bs:"[Qemu-devel] [PATCH v2 16/32] arm/translate-a64: add FP16 x2 ops for simd_indexed" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180208173157.24705-17-alex.bennee@linaro.org \
    --to=alex.bennee@linaro.org \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-arm@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).