From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-arm@nongnu.org, qemu-devel@nongnu.org
Cc: Richard Henderson <richard.henderson@linaro.org>
Subject: [Qemu-devel] [PATCH v2 27/42] target/arm: Convert VFP fused multiply-add insns to decodetree
Date: Tue, 11 Jun 2019 11:53:36 +0100 [thread overview]
Message-ID: <20190611105351.9871-28-peter.maydell@linaro.org> (raw)
In-Reply-To: <20190611105351.9871-1-peter.maydell@linaro.org>
Convert the VFP fused multiply-add instructions (VFNMA, VFNMS,
VFMA, VFMS) to decodetree.
Note that in the old decode structure we were implementing
these to honour the VFP vector stride/length. These instructions
were introduced in VFPv4, and in the v7A architecture they
are UNPREDICTABLE if the vector stride or length are non-zero.
In v8A they must UNDEF if stride or length are non-zero, like
all VFP instructions; we choose to UNDEF always.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/translate-vfp.inc.c | 121 +++++++++++++++++++++++++++++++++
target/arm/translate.c | 53 +--------------
target/arm/vfp.decode | 9 +++
3 files changed, 131 insertions(+), 52 deletions(-)
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
index 6af99605d5c..ba6506a378c 100644
--- a/target/arm/translate-vfp.inc.c
+++ b/target/arm/translate-vfp.inc.c
@@ -1481,3 +1481,124 @@ static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_sp *a)
{
return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
}
+
+static bool trans_VFM_sp(DisasContext *s, arg_VFM_sp *a)
+{
+ /*
+ * VFNMA : fd = muladd(-fd, fn, fm)
+ * VFNMS : fd = muladd(-fd, -fn, fm)
+ * VFMA : fd = muladd( fd, fn, fm)
+ * VFMS : fd = muladd( fd, -fn, fm)
+ *
+ * These are fused multiply-add, and must be done as one floating
+ * point operation with no rounding between the multiplication and
+ * addition steps. NB that doing the negations here as separate
+ * steps is correct : an input NaN should come out with its sign
+ * bit flipped if it is a negated-input.
+ */
+ TCGv_ptr fpst;
+ TCGv_i32 vn, vm, vd;
+
+ /*
+ * Present in VFPv4 only.
+ * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
+ * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
+ */
+ if (!arm_dc_feature(s, ARM_FEATURE_VFP4) ||
+ (s->vec_len != 0 || s->vec_stride != 0)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ vn = tcg_temp_new_i32();
+ vm = tcg_temp_new_i32();
+ vd = tcg_temp_new_i32();
+
+ neon_load_reg32(vn, a->vn);
+ neon_load_reg32(vm, a->vm);
+ if (a->o2) {
+ /* VFNMS, VFMS */
+ gen_helper_vfp_negs(vn, vn);
+ }
+ neon_load_reg32(vd, a->vd);
+ if (a->o1 & 1) {
+ /* VFNMA, VFNMS */
+ gen_helper_vfp_negs(vd, vd);
+ }
+ fpst = get_fpstatus_ptr(0);
+ gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
+ neon_store_reg32(vd, a->vd);
+
+ tcg_temp_free_ptr(fpst);
+ tcg_temp_free_i32(vn);
+ tcg_temp_free_i32(vm);
+ tcg_temp_free_i32(vd);
+
+ return true;
+}
+
+static bool trans_VFM_dp(DisasContext *s, arg_VFM_sp *a)
+{
+ /*
+ * VFNMA : fd = muladd(-fd, fn, fm)
+ * VFNMS : fd = muladd(-fd, -fn, fm)
+ * VFMA : fd = muladd( fd, fn, fm)
+ * VFMS : fd = muladd( fd, -fn, fm)
+ *
+ * These are fused multiply-add, and must be done as one floating
+ * point operation with no rounding between the multiplication and
+ * addition steps. NB that doing the negations here as separate
+ * steps is correct : an input NaN should come out with its sign
+ * bit flipped if it is a negated-input.
+ */
+ TCGv_ptr fpst;
+ TCGv_i64 vn, vm, vd;
+
+ /*
+ * Present in VFPv4 only.
+ * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
+ * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
+ */
+ if (!arm_dc_feature(s, ARM_FEATURE_VFP4) ||
+ (s->vec_len != 0 || s->vec_stride != 0)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_fp_d32, s) && ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ vn = tcg_temp_new_i64();
+ vm = tcg_temp_new_i64();
+ vd = tcg_temp_new_i64();
+
+ neon_load_reg64(vn, a->vn);
+ neon_load_reg64(vm, a->vm);
+ if (a->o2) {
+ /* VFNMS, VFMS */
+ gen_helper_vfp_negd(vn, vn);
+ }
+ neon_load_reg64(vd, a->vd);
+ if (a->o1 & 1) {
+ /* VFNMA, VFNMS */
+ gen_helper_vfp_negd(vd, vd);
+ }
+ fpst = get_fpstatus_ptr(0);
+ gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
+ neon_store_reg64(vd, a->vd);
+
+ tcg_temp_free_ptr(fpst);
+ tcg_temp_free_i64(vn);
+ tcg_temp_free_i64(vm);
+ tcg_temp_free_i64(vd);
+
+ return true;
+}
diff --git a/target/arm/translate.c b/target/arm/translate.c
index a9ec6eaef80..150e9e64cc3 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -3093,7 +3093,7 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
rn = VFP_SREG_N(insn);
switch (op) {
- case 0 ... 8:
+ case 0 ... 13:
/* Already handled by decodetree */
return 1;
default:
@@ -3279,57 +3279,6 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
for (;;) {
/* Perform the calculation. */
switch (op) {
- case 10: /* VFNMA : fd = muladd(-fd, fn, fm) */
- case 11: /* VFNMS : fd = muladd(-fd, -fn, fm) */
- case 12: /* VFMA : fd = muladd( fd, fn, fm) */
- case 13: /* VFMS : fd = muladd( fd, -fn, fm) */
- /* These are fused multiply-add, and must be done as one
- * floating point operation with no rounding between the
- * multiplication and addition steps.
- * NB that doing the negations here as separate steps is
- * correct : an input NaN should come out with its sign bit
- * flipped if it is a negated-input.
- */
- if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) {
- return 1;
- }
- if (dp) {
- TCGv_ptr fpst;
- TCGv_i64 frd;
- if (op & 1) {
- /* VFNMS, VFMS */
- gen_helper_vfp_negd(cpu_F0d, cpu_F0d);
- }
- frd = tcg_temp_new_i64();
- tcg_gen_ld_f64(frd, cpu_env, vfp_reg_offset(dp, rd));
- if (op & 2) {
- /* VFNMA, VFNMS */
- gen_helper_vfp_negd(frd, frd);
- }
- fpst = get_fpstatus_ptr(0);
- gen_helper_vfp_muladdd(cpu_F0d, cpu_F0d,
- cpu_F1d, frd, fpst);
- tcg_temp_free_ptr(fpst);
- tcg_temp_free_i64(frd);
- } else {
- TCGv_ptr fpst;
- TCGv_i32 frd;
- if (op & 1) {
- /* VFNMS, VFMS */
- gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
- }
- frd = tcg_temp_new_i32();
- tcg_gen_ld_f32(frd, cpu_env, vfp_reg_offset(dp, rd));
- if (op & 2) {
- gen_helper_vfp_negs(frd, frd);
- }
- fpst = get_fpstatus_ptr(0);
- gen_helper_vfp_muladds(cpu_F0s, cpu_F0s,
- cpu_F1s, frd, fpst);
- tcg_temp_free_ptr(fpst);
- tcg_temp_free_i32(frd);
- }
- break;
case 14: /* fconst */
if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
return 1;
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
index de305f60e18..37eec0e1310 100644
--- a/target/arm/vfp.decode
+++ b/target/arm/vfp.decode
@@ -142,3 +142,12 @@ VDIV_sp ---- 1110 1.00 .... .... 1010 .0.0 .... \
vm=%vm_sp vn=%vn_sp vd=%vd_sp
VDIV_dp ---- 1110 1.00 .... .... 1011 .0.0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp
+
+VFM_sp ---- 1110 1.01 .... .... 1010 . o2:1 . 0 .... \
+ vm=%vm_sp vn=%vn_sp vd=%vd_sp o1=1
+VFM_dp ---- 1110 1.01 .... .... 1011 . o2:1 . 0 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp o1=1
+VFM_sp ---- 1110 1.10 .... .... 1010 . o2:1 . 0 .... \
+ vm=%vm_sp vn=%vn_sp vd=%vd_sp o1=2
+VFM_dp ---- 1110 1.10 .... .... 1011 . o2:1 . 0 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp o1=2
--
2.20.1
next prev parent reply other threads:[~2019-06-11 11:44 UTC|newest]
Thread overview: 44+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-06-11 10:53 [Qemu-devel] [PATCH v2 00/42] target/arm: Convert VFP decoder to decodetree Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 01/42] decodetree: Fix comparison of Field Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 02/42] target/arm: Add stubs for AArch32 VFP decodetree Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 03/42] target/arm: Factor out VFP access checking code Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 04/42] target/arm: Fix Cortex-R5F MVFR values Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 05/42] target/arm: Explicitly enable VFP short-vectors for aarch32 -cpu max Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 06/42] target/arm: Convert the VSEL instructions to decodetree Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 07/42] target/arm: Convert VMINNM, VMAXNM " Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 08/42] target/arm: Convert VRINTA/VRINTN/VRINTP/VRINTM " Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 09/42] target/arm: Convert VCVTA/VCVTN/VCVTP/VCVTM " Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 10/42] target/arm: Move the VFP trans_* functions to translate-vfp.inc.c Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 11/42] target/arm: Add helpers for VFP register loads and stores Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 12/42] target/arm: Convert "double-precision" register moves to decodetree Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 13/42] target/arm: Convert "single-precision" " Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 14/42] target/arm: Convert VFP two-register transfer insns " Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 15/42] target/arm: Convert VFP VLDR and VSTR " Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 16/42] target/arm: Convert the VFP load/store multiple insns " Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 17/42] target/arm: Remove VLDR/VSTR/VLDM/VSTM use of cpu_F0s and cpu_F0d Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 18/42] target/arm: Convert VFP VMLA to decodetree Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 19/42] target/arm: Convert VFP VMLS " Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 20/42] target/arm: Convert VFP VNMLS " Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 21/42] target/arm: Convert VFP VNMLA " Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 22/42] target/arm: Convert VMUL " Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 23/42] target/arm: Convert VNMUL " Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 24/42] target/arm: Convert VADD " Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 25/42] target/arm: Convert VSUB " Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 26/42] target/arm: Convert VDIV " Peter Maydell
2019-06-11 10:53 ` Peter Maydell [this message]
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 28/42] target/arm: Convert VMOV (imm) " Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 29/42] target/arm: Convert VABS " Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 30/42] target/arm: Convert VNEG " Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 31/42] target/arm: Convert VSQRT " Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 32/42] target/arm: Convert VMOV (register) " Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 33/42] target/arm: Convert VFP comparison insns " Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 34/42] target/arm: Convert the VCVT-from-f16 " Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 35/42] target/arm: Convert the VCVT-to-f16 " Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 36/42] target/arm: Convert VFP round " Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 37/42] target/arm: Convert double-single precision conversion " Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 38/42] target/arm: Convert integer-to-float " Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 39/42] target/arm: Convert VJCVT " Peter Maydell
2019-06-11 15:08 ` Richard Henderson
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 40/42] target/arm: Convert VCVT fp/fixed-point conversion insns " Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 41/42] target/arm: Convert float-to-integer VCVT " Peter Maydell
2019-06-11 10:53 ` [Qemu-devel] [PATCH v2 42/42] target/arm: Fix short-vector increment behaviour Peter Maydell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190611105351.9871-28-peter.maydell@linaro.org \
--to=peter.maydell@linaro.org \
--cc=qemu-arm@nongnu.org \
--cc=qemu-devel@nongnu.org \
--cc=richard.henderson@linaro.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).