From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-devel@nongnu.org
Subject: [PULL 03/47] target/arm: Implement VFP fp16 for VFP_BINOP operations
Date: Tue, 1 Sep 2020 16:17:39 +0100 [thread overview]
Message-ID: <20200901151823.29785-4-peter.maydell@linaro.org> (raw)
In-Reply-To: <20200901151823.29785-1-peter.maydell@linaro.org>
Implmeent VFP fp16 support for simple binary-operator VFP insns VADD,
VSUB, VMUL, VDIV, VMINNM and VMAXNM:
* make the VFP_BINOP() macro generate float16 helpers as well as
float32 and float64
* implement a do_vfp_3op_hp() function similar to the existing
do_vfp_3op_sp()
* add decode for the half-precision insn patterns
Note that the VFP_BINOP macro use creates a couple of unused helper
functions vfp_maxh and vfp_minh, but they're small so it's not worth
splitting the BINOP operations into "needs halfprec" and "no
halfprec" groups.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20200828183354.27913-4-peter.maydell@linaro.org
---
target/arm/helper.h | 8 ++++
target/arm/vfp-uncond.decode | 3 ++
target/arm/vfp.decode | 4 ++
target/arm/vfp_helper.c | 5 ++
target/arm/translate-vfp.c.inc | 86 ++++++++++++++++++++++++++++++++++
5 files changed, 106 insertions(+)
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 3ca73a1764a..61e4e938861 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -101,20 +101,28 @@ DEF_HELPER_FLAGS_5(probe_access, TCG_CALL_NO_WG, void, env, tl, i32, i32, i32)
DEF_HELPER_1(vfp_get_fpscr, i32, env)
DEF_HELPER_2(vfp_set_fpscr, void, env, i32)
+DEF_HELPER_3(vfp_addh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_adds, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_addd, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_subh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_subs, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_subd, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_mulh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_muls, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_muld, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_divh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_divs, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_divd, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_maxh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_maxs, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_maxd, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_minh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_mins, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_mind, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_maxnumh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_maxnums, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_maxnumd, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_minnumh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_minnums, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_minnumd, f64, f64, f64, ptr)
DEF_HELPER_1(vfp_negs, f32, f32)
diff --git a/target/arm/vfp-uncond.decode b/target/arm/vfp-uncond.decode
index 34ca164266f..ee700e51972 100644
--- a/target/arm/vfp-uncond.decode
+++ b/target/arm/vfp-uncond.decode
@@ -49,6 +49,9 @@ VSEL 1111 1110 0. cc:2 .... .... 1010 .0.0 .... \
VSEL 1111 1110 0. cc:2 .... .... 1011 .0.0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp dp=1
+VMAXNM_hp 1111 1110 1.00 .... .... 1001 .0.0 .... @vfp_dnm_s
+VMINNM_hp 1111 1110 1.00 .... .... 1001 .1.0 .... @vfp_dnm_s
+
VMAXNM_sp 1111 1110 1.00 .... .... 1010 .0.0 .... @vfp_dnm_s
VMINNM_sp 1111 1110 1.00 .... .... 1010 .1.0 .... @vfp_dnm_s
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
index 2c793e3e87f..1ecd5e28ca0 100644
--- a/target/arm/vfp.decode
+++ b/target/arm/vfp.decode
@@ -115,18 +115,22 @@ VNMLS_dp ---- 1110 0.01 .... .... 1011 .0.0 .... @vfp_dnm_d
VNMLA_sp ---- 1110 0.01 .... .... 1010 .1.0 .... @vfp_dnm_s
VNMLA_dp ---- 1110 0.01 .... .... 1011 .1.0 .... @vfp_dnm_d
+VMUL_hp ---- 1110 0.10 .... .... 1001 .0.0 .... @vfp_dnm_s
VMUL_sp ---- 1110 0.10 .... .... 1010 .0.0 .... @vfp_dnm_s
VMUL_dp ---- 1110 0.10 .... .... 1011 .0.0 .... @vfp_dnm_d
VNMUL_sp ---- 1110 0.10 .... .... 1010 .1.0 .... @vfp_dnm_s
VNMUL_dp ---- 1110 0.10 .... .... 1011 .1.0 .... @vfp_dnm_d
+VADD_hp ---- 1110 0.11 .... .... 1001 .0.0 .... @vfp_dnm_s
VADD_sp ---- 1110 0.11 .... .... 1010 .0.0 .... @vfp_dnm_s
VADD_dp ---- 1110 0.11 .... .... 1011 .0.0 .... @vfp_dnm_d
+VSUB_hp ---- 1110 0.11 .... .... 1001 .1.0 .... @vfp_dnm_s
VSUB_sp ---- 1110 0.11 .... .... 1010 .1.0 .... @vfp_dnm_s
VSUB_dp ---- 1110 0.11 .... .... 1011 .1.0 .... @vfp_dnm_d
+VDIV_hp ---- 1110 1.00 .... .... 1001 .0.0 .... @vfp_dnm_s
VDIV_sp ---- 1110 1.00 .... .... 1010 .0.0 .... @vfp_dnm_s
VDIV_dp ---- 1110 1.00 .... .... 1011 .0.0 .... @vfp_dnm_d
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
index 02ab8d7f2d8..b8ca744bccc 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/vfp_helper.c
@@ -236,6 +236,11 @@ void vfp_set_fpscr(CPUARMState *env, uint32_t val)
#define VFP_HELPER(name, p) HELPER(glue(glue(vfp_,name),p))
#define VFP_BINOP(name) \
+dh_ctype_f16 VFP_HELPER(name, h)(dh_ctype_f16 a, dh_ctype_f16 b, void *fpstp) \
+{ \
+ float_status *fpst = fpstp; \
+ return float16_ ## name(a, b, fpst); \
+} \
float32 VFP_HELPER(name, s)(float32 a, float32 b, void *fpstp) \
{ \
float_status *fpst = fpstp; \
diff --git a/target/arm/translate-vfp.c.inc b/target/arm/translate-vfp.c.inc
index 4eeafb494ad..01a5fd65115 100644
--- a/target/arm/translate-vfp.c.inc
+++ b/target/arm/translate-vfp.c.inc
@@ -1266,6 +1266,54 @@ static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
return true;
}
+static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
+ int vd, int vn, int vm, bool reads_vd)
+{
+ /*
+ * Do a half-precision operation. Functionally this is
+ * the same as do_vfp_3op_sp(), except:
+ * - it uses the FPST_FPCR_F16
+ * - it doesn't need the VFP vector handling (fp16 is a
+ * v8 feature, and in v8 VFP vectors don't exist)
+ * - it does the aa32_fp16_arith feature test
+ */
+ TCGv_i32 f0, f1, fd;
+ TCGv_ptr fpst;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (s->vec_len != 0 || s->vec_stride != 0) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ f0 = tcg_temp_new_i32();
+ f1 = tcg_temp_new_i32();
+ fd = tcg_temp_new_i32();
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
+
+ neon_load_reg32(f0, vn);
+ neon_load_reg32(f1, vm);
+
+ if (reads_vd) {
+ neon_load_reg32(fd, vd);
+ }
+ fn(fd, f0, f1, fpst);
+ neon_store_reg32(fd, vd);
+
+ tcg_temp_free_i32(f0);
+ tcg_temp_free_i32(f1);
+ tcg_temp_free_i32(fd);
+ tcg_temp_free_ptr(fpst);
+
+ return true;
+}
+
static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
int vd, int vn, int vm, bool reads_vd)
{
@@ -1643,6 +1691,11 @@ static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a)
return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true);
}
+static bool trans_VMUL_hp(DisasContext *s, arg_VMUL_sp *a)
+{
+ return do_vfp_3op_hp(s, gen_helper_vfp_mulh, a->vd, a->vn, a->vm, false);
+}
+
static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a)
{
return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false);
@@ -1677,6 +1730,11 @@ static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a)
return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false);
}
+static bool trans_VADD_hp(DisasContext *s, arg_VADD_sp *a)
+{
+ return do_vfp_3op_hp(s, gen_helper_vfp_addh, a->vd, a->vn, a->vm, false);
+}
+
static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a)
{
return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false);
@@ -1687,6 +1745,11 @@ static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a)
return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false);
}
+static bool trans_VSUB_hp(DisasContext *s, arg_VSUB_sp *a)
+{
+ return do_vfp_3op_hp(s, gen_helper_vfp_subh, a->vd, a->vn, a->vm, false);
+}
+
static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a)
{
return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false);
@@ -1697,6 +1760,11 @@ static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a)
return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false);
}
+static bool trans_VDIV_hp(DisasContext *s, arg_VDIV_sp *a)
+{
+ return do_vfp_3op_hp(s, gen_helper_vfp_divh, a->vd, a->vn, a->vm, false);
+}
+
static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a)
{
return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false);
@@ -1707,6 +1775,24 @@ static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a)
return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
}
+static bool trans_VMINNM_hp(DisasContext *s, arg_VMINNM_sp *a)
+{
+ if (!dc_isar_feature(aa32_vminmaxnm, s)) {
+ return false;
+ }
+ return do_vfp_3op_hp(s, gen_helper_vfp_minnumh,
+ a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VMAXNM_hp(DisasContext *s, arg_VMAXNM_sp *a)
+{
+ if (!dc_isar_feature(aa32_vminmaxnm, s)) {
+ return false;
+ }
+ return do_vfp_3op_hp(s, gen_helper_vfp_maxnumh,
+ a->vd, a->vn, a->vm, false);
+}
+
static bool trans_VMINNM_sp(DisasContext *s, arg_VMINNM_sp *a)
{
if (!dc_isar_feature(aa32_vminmaxnm, s)) {
--
2.20.1
next prev parent reply other threads:[~2020-09-01 15:19 UTC|newest]
Thread overview: 50+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-09-01 15:17 [PULL 00/47] target-arm queue Peter Maydell
2020-09-01 15:17 ` [PULL 01/47] target/arm: Remove local definitions of float constants Peter Maydell
2020-09-01 15:17 ` [PULL 02/47] target/arm: Use correct ID register check for aa32_fp16_arith Peter Maydell
2020-09-01 15:17 ` Peter Maydell [this message]
2020-09-01 15:17 ` [PULL 04/47] target/arm: Implement VFP fp16 VMLA, VMLS, VNMLS, VNMLA, VNMUL Peter Maydell
2020-09-01 15:17 ` [PULL 05/47] target/arm: Macroify trans functions for VFMA, VFMS, VFNMA, VFNMS Peter Maydell
2020-09-01 15:17 ` [PULL 06/47] target/arm: Implement VFP fp16 for fused-multiply-add Peter Maydell
2020-09-01 15:17 ` [PULL 07/47] target/arm: Macroify uses of do_vfp_2op_sp() and do_vfp_2op_dp() Peter Maydell
2020-09-01 15:17 ` [PULL 08/47] target/arm: Implement VFP fp16 for VABS, VNEG, VSQRT Peter Maydell
2020-09-01 15:17 ` [PULL 09/47] target/arm: Implement VFP fp16 for VMOV immediate Peter Maydell
2020-09-01 15:17 ` [PULL 10/47] target/arm: Implement VFP fp16 VCMP Peter Maydell
2020-09-01 15:17 ` [PULL 11/47] target/arm: Implement VFP fp16 VLDR and VSTR Peter Maydell
2020-09-01 15:17 ` [PULL 12/47] target/arm: Implement VFP fp16 VCVT between float and integer Peter Maydell
2020-09-01 15:17 ` [PULL 13/47] target/arm: Make VFP_CONV_FIX macros take separate float type and float size Peter Maydell
2020-09-01 15:17 ` [PULL 14/47] target/arm: Use macros instead of open-coding fp16 conversion helpers Peter Maydell
2020-09-01 15:17 ` [PULL 15/47] target/arm: Implement VFP fp16 VCVT between float and fixed-point Peter Maydell
2020-09-01 15:17 ` [PULL 16/47] target/arm: Implement VFP vp16 VCVT-with-specified-rounding-mode Peter Maydell
2020-09-01 15:17 ` [PULL 17/47] target/arm: Implement VFP fp16 VSEL Peter Maydell
2020-09-01 15:17 ` [PULL 18/47] target/arm: Implement VFP fp16 VRINT* Peter Maydell
2020-09-01 15:17 ` [PULL 19/47] target/arm: Implement new VFP fp16 insn VINS Peter Maydell
2020-09-01 15:17 ` [PULL 20/47] target/arm: Implement new VFP fp16 insn VMOVX Peter Maydell
2020-09-01 15:17 ` [PULL 21/47] target/arm: Implement VFP fp16 VMOV between gp and halfprec registers Peter Maydell
2020-09-01 15:17 ` [PULL 22/47] target/arm: Implement FP16 for Neon VADD, VSUB, VABD, VMUL Peter Maydell
2020-09-01 15:17 ` [PULL 23/47] target/arm: Implement fp16 for Neon VRECPE, VRSQRTE using gvec Peter Maydell
2020-09-01 15:18 ` [PULL 24/47] target/arm: Implement fp16 for Neon VABS, VNEG of floats Peter Maydell
2020-09-01 15:18 ` [PULL 25/47] target/arm: Implement fp16 for VCEQ, VCGE, VCGT comparisons Peter Maydell
2020-09-01 15:18 ` [PULL 26/47] target/arm: Implement fp16 for VACGE, VACGT Peter Maydell
2020-09-01 15:18 ` [PULL 27/47] target/arm: Implement fp16 for Neon VMAX, VMIN Peter Maydell
2020-09-01 15:18 ` [PULL 28/47] target/arm: Implement fp16 for Neon VMAXNM, VMINNM Peter Maydell
2020-09-01 15:18 ` [PULL 29/47] target/arm: Implement fp16 for Neon VMLA, VMLS operations Peter Maydell
2020-09-01 15:18 ` [PULL 30/47] target/arm: Implement fp16 for Neon VFMA, VMFS Peter Maydell
2020-09-01 15:18 ` [PULL 31/47] target/arm: Implement fp16 for Neon fp compare-vs-0 Peter Maydell
2020-09-01 15:18 ` [PULL 32/47] target/arm: Implement fp16 for Neon VRECPS Peter Maydell
2020-09-01 15:18 ` [PULL 33/47] target/arm: Implement fp16 for Neon VRSQRTS Peter Maydell
2020-09-01 15:18 ` [PULL 34/47] target/arm: Implement fp16 for Neon pairwise fp ops Peter Maydell
2020-09-01 15:18 ` [PULL 35/47] target/arm: Implement fp16 for Neon float-integer VCVT Peter Maydell
2020-09-01 15:18 ` [PULL 36/47] target/arm: Convert Neon VCVT fixed-point to gvec Peter Maydell
2020-09-01 15:18 ` [PULL 37/47] target/arm: Implement fp16 for Neon VCVT fixed-point Peter Maydell
2020-09-01 15:18 ` [PULL 38/47] target/arm: Implement fp16 for Neon VCVT with rounding modes Peter Maydell
2020-09-01 15:18 ` [PULL 39/47] target/arm: Implement fp16 for Neon VRINT-with-specified-rounding-mode Peter Maydell
2020-09-01 15:18 ` [PULL 40/47] target/arm: Implement fp16 for Neon VRINTX Peter Maydell
2020-09-01 15:18 ` [PULL 41/47] target/arm/vec_helper: Handle oprsz less than 16 bytes in indexed operations Peter Maydell
2020-09-01 15:18 ` [PULL 42/47] target/arm/vec_helper: Add gvec fp indexed multiply-and-add operations Peter Maydell
2020-09-01 15:18 ` [PULL 43/47] target/arm: Implement fp16 for Neon VMUL, VMLA, VMLS Peter Maydell
2020-09-01 15:18 ` [PULL 44/47] target/arm: Enable FP16 in '-cpu max' Peter Maydell
2020-09-01 15:18 ` [PULL 45/47] hw/arm/sbsa-ref: add "reg" property to DT cpu nodes Peter Maydell
2020-09-01 15:18 ` [PULL 46/47] hw/misc/sbsa_ec : Add an embedded controller for sbsa-ref Peter Maydell
2020-09-01 15:18 ` [PULL 47/47] hw/arm/sbsa-ref : Add embedded controller in secure memory Peter Maydell
2020-09-01 21:47 ` [PULL 00/47] target-arm queue Peter Maydell
2020-09-02 10:16 ` no-reply
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200901151823.29785-4-peter.maydell@linaro.org \
--to=peter.maydell@linaro.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).