[PATCH v3 32/33] target/arm: Convert FMADD, FMSUB, FNMADD, FNMSUB to decodetree

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: qemu-arm@nongnu.org
Subject: [PATCH v3 32/33] target/arm: Convert FMADD, FMSUB, FNMADD, FNMSUB to decodetree
Date: Tue, 28 May 2024 13:30:43 -0700	[thread overview]
Message-ID: <20240528203044.612851-33-richard.henderson@linaro.org> (raw)
In-Reply-To: <20240528203044.612851-1-richard.henderson@linaro.org>

These are the only instructions in the 3 source scalar class.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/tcg/a64.decode      |  10 ++
 target/arm/tcg/translate-a64.c | 231 ++++++++++++---------------------
 2 files changed, 93 insertions(+), 148 deletions(-)

diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index f7f897f9fc..6f6cd805b7 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -32,6 +32,7 @@
 &rr_e           rd rn esz
 &rrr_e          rd rn rm esz
 &rrx_e          rd rn rm idx esz
+&rrrr_e         rd rn rm ra esz
 &qrr_e          q rd rn esz
 &qrrr_e         q rd rn rm esz
 &qrrx_e         q rd rn rm idx esz
@@ -998,3 +999,12 @@ SQDMULH_vi      0.00 1111 10 . ..... 1100 . 0 ..... .....   @qrrx_s
 
 SQRDMULH_vi     0.00 1111 01 .. .... 1101 . 0 ..... .....   @qrrx_h
 SQRDMULH_vi     0.00 1111 10 . ..... 1101 . 0 ..... .....   @qrrx_s
+
+# Floating-point data-processing (3 source)
+
+@rrrr_hsd       .... .... .. . rm:5  . ra:5  rn:5  rd:5     &rrrr_e esz=%esz_hsd
+
+FMADD           0001 1111 .. 0 ..... 0 ..... ..... .....    @rrrr_hsd
+FMSUB           0001 1111 .. 0 ..... 1 ..... ..... .....    @rrrr_hsd
+FNMADD          0001 1111 .. 1 ..... 0 ..... ..... .....    @rrrr_hsd
+FNMSUB          0001 1111 .. 1 ..... 1 ..... ..... .....    @rrrr_hsd
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 14226c56cf..78a2e6d692 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -5866,6 +5866,88 @@ static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a)
     return true;
 }
 
+/*
+ * Floating-point data-processing (3 source)
+ */
+
+static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
+{
+    TCGv_ptr fpst;
+
+    /*
+     * These are fused multiply-add.  Note that doing the negations here
+     * as separate steps is correct: an input NaN should come out with
+     * its sign bit flipped if it is a negated-input.
+     */
+    switch (a->esz) {
+    case MO_64:
+        if (fp_access_check(s)) {
+            TCGv_i64 tn = read_fp_dreg(s, a->rn);
+            TCGv_i64 tm = read_fp_dreg(s, a->rm);
+            TCGv_i64 ta = read_fp_dreg(s, a->ra);
+
+            if (neg_a) {
+                gen_vfp_negd(ta, ta);
+            }
+            if (neg_n) {
+                gen_vfp_negd(tn, tn);
+            }
+            fpst = fpstatus_ptr(FPST_FPCR);
+            gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst);
+            write_fp_dreg(s, a->rd, ta);
+        }
+        break;
+
+    case MO_32:
+        if (fp_access_check(s)) {
+            TCGv_i32 tn = read_fp_sreg(s, a->rn);
+            TCGv_i32 tm = read_fp_sreg(s, a->rm);
+            TCGv_i32 ta = read_fp_sreg(s, a->ra);
+
+            if (neg_a) {
+                gen_vfp_negs(ta, ta);
+            }
+            if (neg_n) {
+                gen_vfp_negs(tn, tn);
+            }
+            fpst = fpstatus_ptr(FPST_FPCR);
+            gen_helper_vfp_muladds(ta, tn, tm, ta, fpst);
+            write_fp_sreg(s, a->rd, ta);
+        }
+        break;
+
+    case MO_16:
+        if (!dc_isar_feature(aa64_fp16, s)) {
+            return false;
+        }
+        if (fp_access_check(s)) {
+            TCGv_i32 tn = read_fp_hreg(s, a->rn);
+            TCGv_i32 tm = read_fp_hreg(s, a->rm);
+            TCGv_i32 ta = read_fp_hreg(s, a->ra);
+
+            if (neg_a) {
+                gen_vfp_negh(ta, ta);
+            }
+            if (neg_n) {
+                gen_vfp_negh(tn, tn);
+            }
+            fpst = fpstatus_ptr(FPST_FPCR_F16);
+            gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst);
+            write_fp_sreg(s, a->rd, ta);
+        }
+        break;
+
+    default:
+        return false;
+    }
+    return true;
+}
+
+TRANS(FMADD, do_fmadd, a, false, false)
+TRANS(FNMADD, do_fmadd, a, true, true)
+TRANS(FMSUB, do_fmadd, a, false, true)
+TRANS(FNMSUB, do_fmadd, a, true, false)
+
 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
  * Note that it is the caller's responsibility to ensure that the
  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
@@ -7665,152 +7747,6 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn)
     }
 }
 
-/* Floating-point data-processing (3 source) - single precision */
-static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
-                                  int rd, int rn, int rm, int ra)
-{
-    TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
-    TCGv_i32 tcg_res = tcg_temp_new_i32();
-    TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
-
-    tcg_op1 = read_fp_sreg(s, rn);
-    tcg_op2 = read_fp_sreg(s, rm);
-    tcg_op3 = read_fp_sreg(s, ra);
-
-    /* These are fused multiply-add, and must be done as one
-     * floating point operation with no rounding between the
-     * multiplication and addition steps.
-     * NB that doing the negations here as separate steps is
-     * correct : an input NaN should come out with its sign bit
-     * flipped if it is a negated-input.
-     */
-    if (o1 == true) {
-        gen_vfp_negs(tcg_op3, tcg_op3);
-    }
-
-    if (o0 != o1) {
-        gen_vfp_negs(tcg_op1, tcg_op1);
-    }
-
-    gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
-
-    write_fp_sreg(s, rd, tcg_res);
-}
-
-/* Floating-point data-processing (3 source) - double precision */
-static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
-                                  int rd, int rn, int rm, int ra)
-{
-    TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
-    TCGv_i64 tcg_res = tcg_temp_new_i64();
-    TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
-
-    tcg_op1 = read_fp_dreg(s, rn);
-    tcg_op2 = read_fp_dreg(s, rm);
-    tcg_op3 = read_fp_dreg(s, ra);
-
-    /* These are fused multiply-add, and must be done as one
-     * floating point operation with no rounding between the
-     * multiplication and addition steps.
-     * NB that doing the negations here as separate steps is
-     * correct : an input NaN should come out with its sign bit
-     * flipped if it is a negated-input.
-     */
-    if (o1 == true) {
-        gen_vfp_negd(tcg_op3, tcg_op3);
-    }
-
-    if (o0 != o1) {
-        gen_vfp_negd(tcg_op1, tcg_op1);
-    }
-
-    gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
-
-    write_fp_dreg(s, rd, tcg_res);
-}
-
-/* Floating-point data-processing (3 source) - half precision */
-static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
-                                int rd, int rn, int rm, int ra)
-{
-    TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
-    TCGv_i32 tcg_res = tcg_temp_new_i32();
-    TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_F16);
-
-    tcg_op1 = read_fp_hreg(s, rn);
-    tcg_op2 = read_fp_hreg(s, rm);
-    tcg_op3 = read_fp_hreg(s, ra);
-
-    /* These are fused multiply-add, and must be done as one
-     * floating point operation with no rounding between the
-     * multiplication and addition steps.
-     * NB that doing the negations here as separate steps is
-     * correct : an input NaN should come out with its sign bit
-     * flipped if it is a negated-input.
-     */
-    if (o1 == true) {
-        tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000);
-    }
-
-    if (o0 != o1) {
-        tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
-    }
-
-    gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
-
-    write_fp_sreg(s, rd, tcg_res);
-}
-
-/* Floating point data-processing (3 source)
- *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
- * +---+---+---+-----------+------+----+------+----+------+------+------+
- * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
- * +---+---+---+-----------+------+----+------+----+------+------+------+
- */
-static void disas_fp_3src(DisasContext *s, uint32_t insn)
-{
-    int mos = extract32(insn, 29, 3);
-    int type = extract32(insn, 22, 2);
-    int rd = extract32(insn, 0, 5);
-    int rn = extract32(insn, 5, 5);
-    int ra = extract32(insn, 10, 5);
-    int rm = extract32(insn, 16, 5);
-    bool o0 = extract32(insn, 15, 1);
-    bool o1 = extract32(insn, 21, 1);
-
-    if (mos) {
-        unallocated_encoding(s);
-        return;
-    }
-
-    switch (type) {
-    case 0:
-        if (!fp_access_check(s)) {
-            return;
-        }
-        handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
-        break;
-    case 1:
-        if (!fp_access_check(s)) {
-            return;
-        }
-        handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
-        break;
-    case 3:
-        if (!dc_isar_feature(aa64_fp16, s)) {
-            unallocated_encoding(s);
-            return;
-        }
-        if (!fp_access_check(s)) {
-            return;
-        }
-        handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra);
-        break;
-    default:
-        unallocated_encoding(s);
-    }
-}
-
 /* Floating point immediate
  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
  * +---+---+---+-----------+------+---+------------+-------+------+------+
@@ -8255,8 +8191,7 @@ static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
 {
     if (extract32(insn, 24, 1)) {
-        /* Floating point data-processing (3 source) */
-        disas_fp_3src(s, insn);
+        unallocated_encoding(s); /* in decodetree */
     } else if (extract32(insn, 21, 1) == 0) {
         /* Floating point to fixed point conversions */
         disas_fp_fixed_conv(s, insn);
-- 
2.34.1

next prev parent reply	other threads:[~2024-05-28 20:34 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-05-28 20:30 [PATCH v3 00/33] target/arm: Convert a64 advsimd to decodetree (part 1b) Richard Henderson
2024-05-28 20:30 ` [PATCH v3 01/33] target/arm: Diagnose UNPREDICTABLE operands to PLD, PLDW, PLI Richard Henderson
2024-05-28 20:30 ` [PATCH v3 02/33] target/arm: Improve vector UQADD, UQSUB, SQADD, SQSUB Richard Henderson
2024-05-28 20:30 ` [PATCH v3 03/33] target/arm: Assert oprsz in range when using vfp.qc Richard Henderson
2024-05-30 14:14   ` Peter Maydell
2024-05-28 20:30 ` [PATCH v3 04/33] target/arm: Convert SUQADD and USQADD to gvec Richard Henderson
2024-05-30 14:18   ` Peter Maydell
2024-05-28 20:30 ` [PATCH v3 05/33] target/arm: Inline scalar SUQADD and USQADD Richard Henderson
2024-05-28 20:30 ` [PATCH v3 06/33] target/arm: Inline scalar SQADD, UQADD, SQSUB, UQSUB Richard Henderson
2024-05-28 20:30 ` [PATCH v3 07/33] target/arm: Convert SQADD, SQSUB, UQADD, UQSUB to decodetree Richard Henderson
2024-05-28 20:30 ` [PATCH v3 08/33] target/arm: Convert SUQADD, USQADD " Richard Henderson
2024-05-28 20:30 ` [PATCH v3 09/33] target/arm: Convert SSHL, USHL " Richard Henderson
2024-05-28 20:30 ` [PATCH v3 10/33] target/arm: Convert SRSHL and URSHL (register) to gvec Richard Henderson
2024-05-30 14:12   ` Peter Maydell
2024-05-28 20:30 ` [PATCH v3 11/33] target/arm: Convert SRSHL, URSHL to decodetree Richard Henderson
2024-05-28 20:30 ` [PATCH v3 12/33] target/arm: Convert SQSHL and UQSHL (register) to gvec Richard Henderson
2024-05-30 14:12   ` Peter Maydell
2024-05-28 20:30 ` [PATCH v3 13/33] target/arm: Convert SQSHL, UQSHL to decodetree Richard Henderson
2024-05-28 20:30 ` [PATCH v3 14/33] target/arm: Convert SQRSHL and UQRSHL (register) to gvec Richard Henderson
2024-05-28 20:30 ` [PATCH v3 15/33] target/arm: Convert SQRSHL, UQRSHL to decodetree Richard Henderson
2024-05-28 20:30 ` [PATCH v3 16/33] target/arm: Convert ADD, SUB (vector) " Richard Henderson
2024-05-28 20:30 ` [PATCH v3 17/33] target/arm: Convert CMGT, CMHI, CMGE, CMHS, CMTST, CMEQ " Richard Henderson
2024-05-28 20:30 ` [PATCH v3 18/33] target/arm: Use TCG_COND_TSTNE in gen_cmtst_{i32, i64} Richard Henderson
2024-05-28 20:30 ` [PATCH v3 19/33] target/arm: Use TCG_COND_TSTNE in gen_cmtst_vec Richard Henderson
2024-05-28 20:30 ` [PATCH v3 20/33] target/arm: Convert SHADD, UHADD to gvec Richard Henderson
2024-05-28 20:30 ` [PATCH v3 21/33] target/arm: Convert SHADD, UHADD to decodetree Richard Henderson
2024-05-28 20:30 ` [PATCH v3 22/33] target/arm: Convert SHSUB, UHSUB to gvec Richard Henderson
2024-05-28 20:30 ` [PATCH v3 23/33] target/arm: Convert SHSUB, UHSUB to decodetree Richard Henderson
2024-05-28 20:30 ` [PATCH v3 24/33] target/arm: Convert SRHADD, URHADD to gvec Richard Henderson
2024-05-28 20:30 ` [PATCH v3 25/33] target/arm: Convert SRHADD, URHADD to decodetree Richard Henderson
2024-05-28 20:30 ` [PATCH v3 26/33] target/arm: Convert SMAX, SMIN, UMAX, UMIN " Richard Henderson
2024-05-28 20:30 ` [PATCH v3 27/33] target/arm: Convert SABA, SABD, UABA, UABD " Richard Henderson
2024-05-28 20:30 ` [PATCH v3 28/33] target/arm: Convert MUL, PMUL " Richard Henderson
2024-05-28 20:30 ` [PATCH v3 29/33] target/arm: Convert MLA, MLS " Richard Henderson
2024-05-28 20:30 ` [PATCH v3 30/33] target/arm: Tidy SQDMULH, SQRDMULH (vector) Richard Henderson
2024-05-28 20:30 ` [PATCH v3 31/33] target/arm: Convert SQDMULH, SQRDMULH to decodetree Richard Henderson
2024-05-30 14:10   ` Peter Maydell
2024-05-28 20:30 ` Richard Henderson [this message]
2024-05-30 14:06   ` [PATCH v3 32/33] target/arm: Convert FMADD, FMSUB, FNMADD, FNMSUB " Peter Maydell
2024-05-28 20:30 ` [PATCH v3 33/33] target/arm: Convert FCSEL " Richard Henderson
2024-05-30 14:32 ` [PATCH v3 00/33] target/arm: Convert a64 advsimd to decodetree (part 1b) Peter Maydell

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:f7f897f9f dfblob:6f6cd805b dfblob:14226c56c dfblob:78a2e6d69 )
 OR (
bs:"[PATCH v3 32/33] target/arm: Convert FMADD, FMSUB, FNMADD, FNMSUB to decodetree" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240528203044.612851-33-richard.henderson@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=qemu-arm@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).