[PULL 03/47] target/arm: Implement VFP fp16 for VFP_BINOP operations

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-devel@nongnu.org
Subject: [PULL 03/47] target/arm: Implement VFP fp16 for VFP_BINOP operations
Date: Tue,  1 Sep 2020 16:17:39 +0100	[thread overview]
Message-ID: <20200901151823.29785-4-peter.maydell@linaro.org> (raw)
In-Reply-To: <20200901151823.29785-1-peter.maydell@linaro.org>

Implmeent VFP fp16 support for simple binary-operator VFP insns VADD,
VSUB, VMUL, VDIV, VMINNM and VMAXNM:

 * make the VFP_BINOP() macro generate float16 helpers as well as
   float32 and float64
 * implement a do_vfp_3op_hp() function similar to the existing
   do_vfp_3op_sp()
 * add decode for the half-precision insn patterns

Note that the VFP_BINOP macro use creates a couple of unused helper
functions vfp_maxh and vfp_minh, but they're small so it's not worth
splitting the BINOP operations into "needs halfprec" and "no
halfprec" groups.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20200828183354.27913-4-peter.maydell@linaro.org
---
 target/arm/helper.h            |  8 ++++
 target/arm/vfp-uncond.decode   |  3 ++
 target/arm/vfp.decode          |  4 ++
 target/arm/vfp_helper.c        |  5 ++
 target/arm/translate-vfp.c.inc | 86 ++++++++++++++++++++++++++++++++++
 5 files changed, 106 insertions(+)

diff --git a/target/arm/helper.h b/target/arm/helper.h
index 3ca73a1764a..61e4e938861 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -101,20 +101,28 @@ DEF_HELPER_FLAGS_5(probe_access, TCG_CALL_NO_WG, void, env, tl, i32, i32, i32)
 DEF_HELPER_1(vfp_get_fpscr, i32, env)
 DEF_HELPER_2(vfp_set_fpscr, void, env, i32)
 
+DEF_HELPER_3(vfp_addh, f16, f16, f16, ptr)
 DEF_HELPER_3(vfp_adds, f32, f32, f32, ptr)
 DEF_HELPER_3(vfp_addd, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_subh, f16, f16, f16, ptr)
 DEF_HELPER_3(vfp_subs, f32, f32, f32, ptr)
 DEF_HELPER_3(vfp_subd, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_mulh, f16, f16, f16, ptr)
 DEF_HELPER_3(vfp_muls, f32, f32, f32, ptr)
 DEF_HELPER_3(vfp_muld, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_divh, f16, f16, f16, ptr)
 DEF_HELPER_3(vfp_divs, f32, f32, f32, ptr)
 DEF_HELPER_3(vfp_divd, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_maxh, f16, f16, f16, ptr)
 DEF_HELPER_3(vfp_maxs, f32, f32, f32, ptr)
 DEF_HELPER_3(vfp_maxd, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_minh, f16, f16, f16, ptr)
 DEF_HELPER_3(vfp_mins, f32, f32, f32, ptr)
 DEF_HELPER_3(vfp_mind, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_maxnumh, f16, f16, f16, ptr)
 DEF_HELPER_3(vfp_maxnums, f32, f32, f32, ptr)
 DEF_HELPER_3(vfp_maxnumd, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_minnumh, f16, f16, f16, ptr)
 DEF_HELPER_3(vfp_minnums, f32, f32, f32, ptr)
 DEF_HELPER_3(vfp_minnumd, f64, f64, f64, ptr)
 DEF_HELPER_1(vfp_negs, f32, f32)
diff --git a/target/arm/vfp-uncond.decode b/target/arm/vfp-uncond.decode
index 34ca164266f..ee700e51972 100644
--- a/target/arm/vfp-uncond.decode
+++ b/target/arm/vfp-uncond.decode
@@ -49,6 +49,9 @@ VSEL        1111 1110 0. cc:2 .... .... 1010 .0.0 .... \
 VSEL        1111 1110 0. cc:2 .... .... 1011 .0.0 .... \
             vm=%vm_dp vn=%vn_dp vd=%vd_dp dp=1
 
+VMAXNM_hp   1111 1110 1.00 .... .... 1001 .0.0 ....         @vfp_dnm_s
+VMINNM_hp   1111 1110 1.00 .... .... 1001 .1.0 ....         @vfp_dnm_s
+
 VMAXNM_sp   1111 1110 1.00 .... .... 1010 .0.0 ....         @vfp_dnm_s
 VMINNM_sp   1111 1110 1.00 .... .... 1010 .1.0 ....         @vfp_dnm_s
 
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
index 2c793e3e87f..1ecd5e28ca0 100644
--- a/target/arm/vfp.decode
+++ b/target/arm/vfp.decode
@@ -115,18 +115,22 @@ VNMLS_dp     ---- 1110 0.01 .... .... 1011 .0.0 ....        @vfp_dnm_d
 VNMLA_sp     ---- 1110 0.01 .... .... 1010 .1.0 ....        @vfp_dnm_s
 VNMLA_dp     ---- 1110 0.01 .... .... 1011 .1.0 ....        @vfp_dnm_d
 
+VMUL_hp      ---- 1110 0.10 .... .... 1001 .0.0 ....        @vfp_dnm_s
 VMUL_sp      ---- 1110 0.10 .... .... 1010 .0.0 ....        @vfp_dnm_s
 VMUL_dp      ---- 1110 0.10 .... .... 1011 .0.0 ....        @vfp_dnm_d
 
 VNMUL_sp     ---- 1110 0.10 .... .... 1010 .1.0 ....        @vfp_dnm_s
 VNMUL_dp     ---- 1110 0.10 .... .... 1011 .1.0 ....        @vfp_dnm_d
 
+VADD_hp      ---- 1110 0.11 .... .... 1001 .0.0 ....        @vfp_dnm_s
 VADD_sp      ---- 1110 0.11 .... .... 1010 .0.0 ....        @vfp_dnm_s
 VADD_dp      ---- 1110 0.11 .... .... 1011 .0.0 ....        @vfp_dnm_d
 
+VSUB_hp      ---- 1110 0.11 .... .... 1001 .1.0 ....        @vfp_dnm_s
 VSUB_sp      ---- 1110 0.11 .... .... 1010 .1.0 ....        @vfp_dnm_s
 VSUB_dp      ---- 1110 0.11 .... .... 1011 .1.0 ....        @vfp_dnm_d
 
+VDIV_hp      ---- 1110 1.00 .... .... 1001 .0.0 ....        @vfp_dnm_s
 VDIV_sp      ---- 1110 1.00 .... .... 1010 .0.0 ....        @vfp_dnm_s
 VDIV_dp      ---- 1110 1.00 .... .... 1011 .0.0 ....        @vfp_dnm_d
 
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
index 02ab8d7f2d8..b8ca744bccc 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/vfp_helper.c
@@ -236,6 +236,11 @@ void vfp_set_fpscr(CPUARMState *env, uint32_t val)
 #define VFP_HELPER(name, p) HELPER(glue(glue(vfp_,name),p))
 
 #define VFP_BINOP(name) \
+dh_ctype_f16 VFP_HELPER(name, h)(dh_ctype_f16 a, dh_ctype_f16 b, void *fpstp) \
+{ \
+    float_status *fpst = fpstp; \
+    return float16_ ## name(a, b, fpst); \
+} \
 float32 VFP_HELPER(name, s)(float32 a, float32 b, void *fpstp) \
 { \
     float_status *fpst = fpstp; \
diff --git a/target/arm/translate-vfp.c.inc b/target/arm/translate-vfp.c.inc
index 4eeafb494ad..01a5fd65115 100644
--- a/target/arm/translate-vfp.c.inc
+++ b/target/arm/translate-vfp.c.inc
@@ -1266,6 +1266,54 @@ static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
     return true;
 }
 
+static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
+                          int vd, int vn, int vm, bool reads_vd)
+{
+    /*
+     * Do a half-precision operation. Functionally this is
+     * the same as do_vfp_3op_sp(), except:
+     *  - it uses the FPST_FPCR_F16
+     *  - it doesn't need the VFP vector handling (fp16 is a
+     *    v8 feature, and in v8 VFP vectors don't exist)
+     *  - it does the aa32_fp16_arith feature test
+     */
+    TCGv_i32 f0, f1, fd;
+    TCGv_ptr fpst;
+
+    if (!dc_isar_feature(aa32_fp16_arith, s)) {
+        return false;
+    }
+
+    if (s->vec_len != 0 || s->vec_stride != 0) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    f0 = tcg_temp_new_i32();
+    f1 = tcg_temp_new_i32();
+    fd = tcg_temp_new_i32();
+    fpst = fpstatus_ptr(FPST_FPCR_F16);
+
+    neon_load_reg32(f0, vn);
+    neon_load_reg32(f1, vm);
+
+    if (reads_vd) {
+        neon_load_reg32(fd, vd);
+    }
+    fn(fd, f0, f1, fpst);
+    neon_store_reg32(fd, vd);
+
+    tcg_temp_free_i32(f0);
+    tcg_temp_free_i32(f1);
+    tcg_temp_free_i32(fd);
+    tcg_temp_free_ptr(fpst);
+
+    return true;
+}
+
 static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
                           int vd, int vn, int vm, bool reads_vd)
 {
@@ -1643,6 +1691,11 @@ static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a)
     return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true);
 }
 
+static bool trans_VMUL_hp(DisasContext *s, arg_VMUL_sp *a)
+{
+    return do_vfp_3op_hp(s, gen_helper_vfp_mulh, a->vd, a->vn, a->vm, false);
+}
+
 static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a)
 {
     return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false);
@@ -1677,6 +1730,11 @@ static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a)
     return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false);
 }
 
+static bool trans_VADD_hp(DisasContext *s, arg_VADD_sp *a)
+{
+    return do_vfp_3op_hp(s, gen_helper_vfp_addh, a->vd, a->vn, a->vm, false);
+}
+
 static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a)
 {
     return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false);
@@ -1687,6 +1745,11 @@ static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a)
     return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false);
 }
 
+static bool trans_VSUB_hp(DisasContext *s, arg_VSUB_sp *a)
+{
+    return do_vfp_3op_hp(s, gen_helper_vfp_subh, a->vd, a->vn, a->vm, false);
+}
+
 static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a)
 {
     return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false);
@@ -1697,6 +1760,11 @@ static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a)
     return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false);
 }
 
+static bool trans_VDIV_hp(DisasContext *s, arg_VDIV_sp *a)
+{
+    return do_vfp_3op_hp(s, gen_helper_vfp_divh, a->vd, a->vn, a->vm, false);
+}
+
 static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a)
 {
     return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false);
@@ -1707,6 +1775,24 @@ static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a)
     return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
 }
 
+static bool trans_VMINNM_hp(DisasContext *s, arg_VMINNM_sp *a)
+{
+    if (!dc_isar_feature(aa32_vminmaxnm, s)) {
+        return false;
+    }
+    return do_vfp_3op_hp(s, gen_helper_vfp_minnumh,
+                         a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VMAXNM_hp(DisasContext *s, arg_VMAXNM_sp *a)
+{
+    if (!dc_isar_feature(aa32_vminmaxnm, s)) {
+        return false;
+    }
+    return do_vfp_3op_hp(s, gen_helper_vfp_maxnumh,
+                         a->vd, a->vn, a->vm, false);
+}
+
 static bool trans_VMINNM_sp(DisasContext *s, arg_VMINNM_sp *a)
 {
     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
-- 
2.20.1

next prev parent reply	other threads:[~2020-09-01 15:19 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-01 15:17 [PULL 00/47] target-arm queue Peter Maydell
2020-09-01 15:17 ` [PULL 01/47] target/arm: Remove local definitions of float constants Peter Maydell
2020-09-01 15:17 ` [PULL 02/47] target/arm: Use correct ID register check for aa32_fp16_arith Peter Maydell
2020-09-01 15:17 ` Peter Maydell [this message]
2020-09-01 15:17 ` [PULL 04/47] target/arm: Implement VFP fp16 VMLA, VMLS, VNMLS, VNMLA, VNMUL Peter Maydell
2020-09-01 15:17 ` [PULL 05/47] target/arm: Macroify trans functions for VFMA, VFMS, VFNMA, VFNMS Peter Maydell
2020-09-01 15:17 ` [PULL 06/47] target/arm: Implement VFP fp16 for fused-multiply-add Peter Maydell
2020-09-01 15:17 ` [PULL 07/47] target/arm: Macroify uses of do_vfp_2op_sp() and do_vfp_2op_dp() Peter Maydell
2020-09-01 15:17 ` [PULL 08/47] target/arm: Implement VFP fp16 for VABS, VNEG, VSQRT Peter Maydell
2020-09-01 15:17 ` [PULL 09/47] target/arm: Implement VFP fp16 for VMOV immediate Peter Maydell
2020-09-01 15:17 ` [PULL 10/47] target/arm: Implement VFP fp16 VCMP Peter Maydell
2020-09-01 15:17 ` [PULL 11/47] target/arm: Implement VFP fp16 VLDR and VSTR Peter Maydell
2020-09-01 15:17 ` [PULL 12/47] target/arm: Implement VFP fp16 VCVT between float and integer Peter Maydell
2020-09-01 15:17 ` [PULL 13/47] target/arm: Make VFP_CONV_FIX macros take separate float type and float size Peter Maydell
2020-09-01 15:17 ` [PULL 14/47] target/arm: Use macros instead of open-coding fp16 conversion helpers Peter Maydell
2020-09-01 15:17 ` [PULL 15/47] target/arm: Implement VFP fp16 VCVT between float and fixed-point Peter Maydell
2020-09-01 15:17 ` [PULL 16/47] target/arm: Implement VFP vp16 VCVT-with-specified-rounding-mode Peter Maydell
2020-09-01 15:17 ` [PULL 17/47] target/arm: Implement VFP fp16 VSEL Peter Maydell
2020-09-01 15:17 ` [PULL 18/47] target/arm: Implement VFP fp16 VRINT* Peter Maydell
2020-09-01 15:17 ` [PULL 19/47] target/arm: Implement new VFP fp16 insn VINS Peter Maydell
2020-09-01 15:17 ` [PULL 20/47] target/arm: Implement new VFP fp16 insn VMOVX Peter Maydell
2020-09-01 15:17 ` [PULL 21/47] target/arm: Implement VFP fp16 VMOV between gp and halfprec registers Peter Maydell
2020-09-01 15:17 ` [PULL 22/47] target/arm: Implement FP16 for Neon VADD, VSUB, VABD, VMUL Peter Maydell
2020-09-01 15:17 ` [PULL 23/47] target/arm: Implement fp16 for Neon VRECPE, VRSQRTE using gvec Peter Maydell
2020-09-01 15:18 ` [PULL 24/47] target/arm: Implement fp16 for Neon VABS, VNEG of floats Peter Maydell
2020-09-01 15:18 ` [PULL 25/47] target/arm: Implement fp16 for VCEQ, VCGE, VCGT comparisons Peter Maydell
2020-09-01 15:18 ` [PULL 26/47] target/arm: Implement fp16 for VACGE, VACGT Peter Maydell
2020-09-01 15:18 ` [PULL 27/47] target/arm: Implement fp16 for Neon VMAX, VMIN Peter Maydell
2020-09-01 15:18 ` [PULL 28/47] target/arm: Implement fp16 for Neon VMAXNM, VMINNM Peter Maydell
2020-09-01 15:18 ` [PULL 29/47] target/arm: Implement fp16 for Neon VMLA, VMLS operations Peter Maydell
2020-09-01 15:18 ` [PULL 30/47] target/arm: Implement fp16 for Neon VFMA, VMFS Peter Maydell
2020-09-01 15:18 ` [PULL 31/47] target/arm: Implement fp16 for Neon fp compare-vs-0 Peter Maydell
2020-09-01 15:18 ` [PULL 32/47] target/arm: Implement fp16 for Neon VRECPS Peter Maydell
2020-09-01 15:18 ` [PULL 33/47] target/arm: Implement fp16 for Neon VRSQRTS Peter Maydell
2020-09-01 15:18 ` [PULL 34/47] target/arm: Implement fp16 for Neon pairwise fp ops Peter Maydell
2020-09-01 15:18 ` [PULL 35/47] target/arm: Implement fp16 for Neon float-integer VCVT Peter Maydell
2020-09-01 15:18 ` [PULL 36/47] target/arm: Convert Neon VCVT fixed-point to gvec Peter Maydell
2020-09-01 15:18 ` [PULL 37/47] target/arm: Implement fp16 for Neon VCVT fixed-point Peter Maydell
2020-09-01 15:18 ` [PULL 38/47] target/arm: Implement fp16 for Neon VCVT with rounding modes Peter Maydell
2020-09-01 15:18 ` [PULL 39/47] target/arm: Implement fp16 for Neon VRINT-with-specified-rounding-mode Peter Maydell
2020-09-01 15:18 ` [PULL 40/47] target/arm: Implement fp16 for Neon VRINTX Peter Maydell
2020-09-01 15:18 ` [PULL 41/47] target/arm/vec_helper: Handle oprsz less than 16 bytes in indexed operations Peter Maydell
2020-09-01 15:18 ` [PULL 42/47] target/arm/vec_helper: Add gvec fp indexed multiply-and-add operations Peter Maydell
2020-09-01 15:18 ` [PULL 43/47] target/arm: Implement fp16 for Neon VMUL, VMLA, VMLS Peter Maydell
2020-09-01 15:18 ` [PULL 44/47] target/arm: Enable FP16 in '-cpu max' Peter Maydell
2020-09-01 15:18 ` [PULL 45/47] hw/arm/sbsa-ref: add "reg" property to DT cpu nodes Peter Maydell
2020-09-01 15:18 ` [PULL 46/47] hw/misc/sbsa_ec : Add an embedded controller for sbsa-ref Peter Maydell
2020-09-01 15:18 ` [PULL 47/47] hw/arm/sbsa-ref : Add embedded controller in secure memory Peter Maydell
2020-09-01 21:47 ` [PULL 00/47] target-arm queue Peter Maydell
2020-09-02 10:16 ` no-reply

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:3ca73a1764 dfblob:61e4e93886 dfblob:34ca164266
dfblob:ee700e5197 dfblob:2c793e3e87 dfblob:1ecd5e28ca dfblob:02ab8d7f2d
dfblob:b8ca744bcc dfblob:4eeafb494a dfblob:01a5fd6511 )
 OR (
bs:"[PULL 03/47] target/arm: Implement VFP fp16 for VFP_BINOP operations" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200901151823.29785-4-peter.maydell@linaro.org \
    --to=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).