[PATCH v2 02/69] tests/tcg/x86_64/fma: Test some x86 fused-multiply-add cases

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-arm@nongnu.org, qemu-devel@nongnu.org
Subject: [PATCH v2 02/69] tests/tcg/x86_64/fma: Test some x86 fused-multiply-add cases
Date: Sat,  1 Feb 2025 16:39:05 +0000	[thread overview]
Message-ID: <20250201164012.1660228-3-peter.maydell@linaro.org> (raw)
In-Reply-To: <20250201164012.1660228-1-peter.maydell@linaro.org>

Add a test case which tests some corner case behaviour of
fused-multiply-add on x86:
 * 0 * Inf + SNaN should raise Invalid
 * 0 * Inf + QNaN shouldh not raise Invalid
 * tininess should be detected after rounding

There is also one currently-disabled test case:
 * flush-to-zero should be done after rounding

This is disabled because QEMU's emulation currently does this
incorrectly (and so would fail the test).  The test case is kept in
but disabled, as the justification for why the test running harness
has support for testing both with and without FTZ set.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 tests/tcg/x86_64/fma.c           | 109 +++++++++++++++++++++++++++++++
 tests/tcg/x86_64/Makefile.target |   1 +
 2 files changed, 110 insertions(+)
 create mode 100644 tests/tcg/x86_64/fma.c

diff --git a/tests/tcg/x86_64/fma.c b/tests/tcg/x86_64/fma.c
new file mode 100644
index 00000000000..09c622ebc00
--- /dev/null
+++ b/tests/tcg/x86_64/fma.c
@@ -0,0 +1,109 @@
+/*
+ * Test some fused multiply add corner cases.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <inttypes.h>
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+/*
+ * Perform one "n * m + a" operation using the vfmadd insn and return
+ * the result; on return *mxcsr_p is set to the bottom 6 bits of MXCSR
+ * (the Flag bits). If ftz is true then we set MXCSR.FTZ while doing
+ * the operation.
+ * We print the operation and its results to stdout.
+ */
+static uint64_t do_fmadd(uint64_t n, uint64_t m, uint64_t a,
+                         bool ftz, uint32_t *mxcsr_p)
+{
+    uint64_t r;
+    uint32_t mxcsr = 0;
+    uint32_t ftz_bit = ftz ? (1 << 15) : 0;
+    uint32_t saved_mxcsr = 0;
+
+    asm volatile("stmxcsr %[saved_mxcsr]\n"
+                 "stmxcsr %[mxcsr]\n"
+                 "andl $0xffff7fc0, %[mxcsr]\n"
+                 "orl %[ftz_bit], %[mxcsr]\n"
+                 "ldmxcsr %[mxcsr]\n"
+                 "movq %[a], %%xmm0\n"
+                 "movq %[m], %%xmm1\n"
+                 "movq %[n], %%xmm2\n"
+                 /* xmm0 = xmm0 + xmm2 * xmm1 */
+                 "vfmadd231sd %%xmm1, %%xmm2, %%xmm0\n"
+                 "movq %%xmm0, %[r]\n"
+                 "stmxcsr %[mxcsr]\n"
+                 "ldmxcsr %[saved_mxcsr]\n"
+                 : [r] "=r" (r), [mxcsr] "=m" (mxcsr),
+                   [saved_mxcsr] "=m" (saved_mxcsr)
+                 : [n] "r" (n), [m] "r" (m), [a] "r" (a),
+                   [ftz_bit] "r" (ftz_bit)
+                 : "xmm0", "xmm1", "xmm2");
+    *mxcsr_p = mxcsr & 0x3f;
+    printf("vfmadd132sd 0x%" PRIx64 " 0x%" PRIx64 " 0x%" PRIx64
+           " = 0x%" PRIx64 " MXCSR flags 0x%" PRIx32 "\n",
+           n, m, a, r, *mxcsr_p);
+    return r;
+}
+
+typedef struct testdata {
+    /* Input n, m, a */
+    uint64_t n;
+    uint64_t m;
+    uint64_t a;
+    bool ftz;
+    /* Expected result */
+    uint64_t expected_r;
+    /* Expected low 6 bits of MXCSR (the Flag bits) */
+    uint32_t expected_mxcsr;
+} testdata;
+
+static testdata tests[] = {
+    { 0, 0x7ff0000000000000, 0x7ff000000000aaaa, false, /* 0 * Inf + SNaN */
+      0x7ff800000000aaaa, 1 }, /* Should be QNaN and does raise Invalid */
+    { 0, 0x7ff0000000000000, 0x7ff800000000aaaa, false, /* 0 * Inf + QNaN */
+      0x7ff800000000aaaa, 0 }, /* Should be QNaN and does *not* raise Invalid */
+    /*
+     * These inputs give a result which is tiny before rounding but which
+     * becomes non-tiny after rounding. x86 is a "detect tininess after
+     * rounding" architecture, so it should give a non-denormal result and
+     * not set the Underflow flag (only the Precision flag for an inexact
+     * result).
+     */
+    { 0x3fdfffffffffffff, 0x001fffffffffffff, 0x801fffffffffffff, false,
+      0x8010000000000000, 0x20 },
+    /*
+     * Flushing of denormal outputs to zero should also happen after
+     * rounding, so setting FTZ should not affect the result or the flags.
+     * QEMU currently does not emulate this correctly because we do the
+     * flush-to-zero check before rounding, so we incorrectly produce a
+     * zero result and set Underflow as well as Precision.
+     */
+#ifdef ENABLE_FAILING_TESTS
+    { 0x3fdfffffffffffff, 0x001fffffffffffff, 0x801fffffffffffff, true,
+      0x8010000000000000, 0x20 }, /* Enabling FTZ shouldn't change flags */
+#endif
+};
+
+int main(void)
+{
+    bool passed = true;
+    for (int i = 0; i < ARRAY_SIZE(tests); i++) {
+        uint32_t mxcsr;
+        uint64_t r = do_fmadd(tests[i].n, tests[i].m, tests[i].a,
+                              tests[i].ftz, &mxcsr);
+        if (r != tests[i].expected_r) {
+            printf("expected result 0x%" PRIx64 "\n", tests[i].expected_r);
+            passed = false;
+        }
+        if (mxcsr != tests[i].expected_mxcsr) {
+            printf("expected MXCSR flags 0x%x\n", tests[i].expected_mxcsr);
+            passed = false;
+        }
+    }
+    return passed ? 0 : 1;
+}
diff --git a/tests/tcg/x86_64/Makefile.target b/tests/tcg/x86_64/Makefile.target
index d6dff559c7d..be20fc64e88 100644
--- a/tests/tcg/x86_64/Makefile.target
+++ b/tests/tcg/x86_64/Makefile.target
@@ -18,6 +18,7 @@ X86_64_TESTS += adox
 X86_64_TESTS += test-1648
 X86_64_TESTS += test-2175
 X86_64_TESTS += cross-modifying-code
+X86_64_TESTS += fma
 TESTS=$(MULTIARCH_TESTS) $(X86_64_TESTS) test-x86_64
 else
 TESTS=$(MULTIARCH_TESTS)
-- 
2.34.1

next prev parent reply	other threads:[~2025-02-01 16:43 UTC|newest]

Thread overview: 75+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-01 16:39 [PATCH v2 00/69] target/arm: FEAT_AFP and FEAT_RPRES Peter Maydell
2025-02-01 16:39 ` [PATCH v2 01/69] target/i386: Do not raise Invalid for 0 * Inf + QNaN Peter Maydell
2025-02-01 16:39 ` Peter Maydell [this message]
2025-02-01 16:39 ` [PATCH v2 03/69] fpu: Add float_class_denormal Peter Maydell
2025-02-01 16:39 ` [PATCH v2 04/69] fpu: Implement float_flag_input_denormal_used Peter Maydell
2025-02-02 16:45   ` Richard Henderson
2025-02-01 16:39 ` [PATCH v2 05/69] fpu: allow flushing of output denormals to be after rounding Peter Maydell
2025-02-02 16:50   ` Richard Henderson
2025-02-01 16:39 ` [PATCH v2 06/69] target/arm: Define FPCR AH, FIZ, NEP bits Peter Maydell
2025-02-02 16:51   ` Richard Henderson
2025-02-01 16:39 ` [PATCH v2 07/69] target/arm: Implement FPCR.FIZ handling Peter Maydell
2025-02-01 16:39 ` [PATCH v2 08/69] target/arm: Adjust FP behaviour for FPCR.AH = 1 Peter Maydell
2025-02-11 13:17   ` Peter Maydell
2025-02-01 16:39 ` [PATCH v2 09/69] target/arm: Adjust exception flag handling for AH " Peter Maydell
2025-02-01 16:39 ` [PATCH v2 10/69] target/arm: Add FPCR.AH to tbflags Peter Maydell
2025-02-01 16:39 ` [PATCH v2 11/69] target/arm: Set up float_status to use for FPCR.AH=1 behaviour Peter Maydell
2025-02-01 16:39 ` [PATCH v2 12/69] target/arm: Use FPST_FPCR_AH for FRECPE, FRECPS, FRECPX, FRSQRTE, FRSQRTS Peter Maydell
2025-02-01 16:39 ` [PATCH v2 13/69] target/arm: Use FPST_FPCR_AH for BFCVT* insns Peter Maydell
2025-02-01 16:39 ` [PATCH v2 14/69] target/arm: Use FPST_FPCR_AH for BFMLAL*, BFMLSL* insns Peter Maydell
2025-02-01 16:39 ` [PATCH v2 15/69] target/arm: Add FPCR.NEP to TBFLAGS Peter Maydell
2025-02-01 16:39 ` [PATCH v2 16/69] target/arm: Define and use new write_fp_*reg_merging() functions Peter Maydell
2025-02-01 16:39 ` [PATCH v2 17/69] target/arm: Handle FPCR.NEP for 3-input scalar operations Peter Maydell
2025-02-01 16:39 ` [PATCH v2 18/69] target/arm: Handle FPCR.NEP for BFCVT scalar Peter Maydell
2025-02-01 16:39 ` [PATCH v2 19/69] target/arm: Handle FPCR.NEP for 1-input scalar operations Peter Maydell
2025-02-01 16:39 ` [PATCH v2 20/69] target/arm: Handle FPCR.NEP in do_cvtf_scalar() Peter Maydell
2025-02-01 16:39 ` [PATCH v2 21/69] target/arm: Handle FPCR.NEP for scalar FABS and FNEG Peter Maydell
2025-02-01 16:39 ` [PATCH v2 22/69] target/arm: Handle FPCR.NEP for FCVTXN (scalar) Peter Maydell
2025-02-01 16:39 ` [PATCH v2 23/69] target/arm: Handle FPCR.NEP for NEP for FMUL, FMULX scalar by element Peter Maydell
2025-02-01 16:39 ` [PATCH v2 24/69] target/arm: Implement FPCR.AH semantics for scalar FMIN/FMAX Peter Maydell
2025-02-01 16:39 ` [PATCH v2 25/69] target/arm: Implement FPCR.AH semantics for vector FMIN/FMAX Peter Maydell
2025-02-01 16:39 ` [PATCH v2 26/69] target/arm: Implement FPCR.AH semantics for FMAXV and FMINV Peter Maydell
2025-02-01 16:39 ` [PATCH v2 27/69] target/arm: Implement FPCR.AH semantics for FMINP and FMAXP Peter Maydell
2025-02-01 16:39 ` [PATCH v2 28/69] target/arm: Implement FPCR.AH semantics for SVE FMAXV and FMINV Peter Maydell
2025-02-01 16:39 ` [PATCH v2 29/69] target/arm: Implement FPCR.AH semantics for SVE FMIN/FMAX immediate Peter Maydell
2025-02-01 16:39 ` [PATCH v2 30/69] target/arm: Implement FPCR.AH semantics for SVE FMIN/FMAX vector Peter Maydell
2025-02-01 16:39 ` [PATCH v2 31/69] target/arm: Implement FPCR.AH handling of negation of NaN Peter Maydell
2025-02-01 16:39 ` [PATCH v2 32/69] target/arm: Implement FPCR.AH handling for scalar FABS and FABD Peter Maydell
2025-02-01 16:39 ` [PATCH v2 33/69] target/arm: Handle FPCR.AH in vector FABD Peter Maydell
2025-02-01 16:39 ` [PATCH v2 34/69] target/arm: Handle FPCR.AH in SVE FNEG Peter Maydell
2025-02-01 16:39 ` [PATCH v2 35/69] target/arm: Handle FPCR.AH in SVE FABS Peter Maydell
2025-02-01 16:39 ` [PATCH v2 36/69] target/arm: Handle FPCR.AH in SVE FABD Peter Maydell
2025-02-01 16:39 ` [PATCH v2 37/69] target/arm: Handle FPCR.AH in negation steps in SVE FCADD Peter Maydell
2025-02-02 17:17   ` Richard Henderson
2025-02-01 16:39 ` [PATCH v2 38/69] target/arm: Handle FPCR.AH in negation steps in FCADD Peter Maydell
2025-02-01 16:39 ` [PATCH v2 39/69] target/arm: Handle FPCR.AH in FRECPS and FRSQRTS scalar insns Peter Maydell
2025-02-01 16:39 ` [PATCH v2 40/69] target/arm: Handle FPCR.AH in FRECPS and FRSQRTS vector insns Peter Maydell
2025-02-01 16:39 ` [PATCH v2 41/69] target/arm: Handle FPCR.AH in negation step in FMLS (indexed) Peter Maydell
2025-02-01 16:39 ` [PATCH v2 42/69] target/arm: Handle FPCR.AH in negation in FMLS (vector) Peter Maydell
2025-02-01 16:39 ` [PATCH v2 43/69] target/arm: Handle FPCR.AH in negation step in SVE " Peter Maydell
2025-02-01 16:39 ` [PATCH v2 44/69] target/arm: Handle FPCR.AH in SVE FTSSEL Peter Maydell
2025-02-01 16:39 ` [PATCH v2 45/69] target/arm: Handle FPCR.AH in SVE FTMAD Peter Maydell
2025-02-01 16:39 ` [PATCH v2 46/69] target/arm: Handle FPCR.AH in vector FCMLA Peter Maydell
2025-02-01 16:39 ` [PATCH v2 47/69] target/arm: Handle FPCR.AH in FCMLA by index Peter Maydell
2025-02-01 16:39 ` [PATCH v2 48/69] target/arm: Handle FPCR.AH in SVE FCMLA Peter Maydell
2025-02-01 16:39 ` [PATCH v2 49/69] target/arm: Handle FPCR.AH in FMLSL (by element and vector) Peter Maydell
2025-02-01 16:39 ` [PATCH v2 50/69] target/arm: Handle FPCR.AH in SVE FMLSL (indexed) Peter Maydell
2025-02-01 16:39 ` [PATCH v2 51/69] target/arm: Handle FPCR.AH in SVE FMLSLB, FMLSLT (vectors) Peter Maydell
2025-02-01 16:39 ` [PATCH v2 52/69] target/arm: Enable FEAT_AFP for '-cpu max' Peter Maydell
2025-02-01 16:39 ` [PATCH v2 53/69] target/arm: Plumb FEAT_RPRES frecpe and frsqrte through to new helper Peter Maydell
2025-02-01 16:39 ` [PATCH v2 54/69] target/arm: Implement increased precision FRECPE Peter Maydell
2025-02-01 16:39 ` [PATCH v2 55/69] target/arm: Implement increased precision FRSQRTE Peter Maydell
2025-02-01 16:39 ` [PATCH v2 56/69] target/arm: Enable FEAT_RPRES for -cpu max Peter Maydell
2025-02-01 16:40 ` [PATCH v2 57/69] target/arm: Introduce CPUARMState.vfp.fp_status[] Peter Maydell
2025-02-01 16:40 ` [PATCH v2 58/69] target/arm: Remove standard_fp_status_f16 Peter Maydell
2025-02-01 16:40 ` [PATCH v2 59/69] target/arm: Remove standard_fp_status Peter Maydell
2025-02-01 16:40 ` [PATCH v2 60/69] target/arm: Remove ah_fp_status_f16 Peter Maydell
2025-02-01 16:40 ` [PATCH v2 61/69] target/arm: Remove ah_fp_status Peter Maydell
2025-02-01 16:40 ` [PATCH v2 62/69] target/arm: Remove fp_status_f16_a64 Peter Maydell
2025-02-01 16:40 ` [PATCH v2 63/69] target/arm: Remove fp_status_f16_a32 Peter Maydell
2025-02-01 16:40 ` [PATCH v2 64/69] target/arm: Remove fp_status_a64 Peter Maydell
2025-02-01 16:40 ` [PATCH v2 65/69] target/arm: Remove fp_status_a32 Peter Maydell
2025-02-01 16:40 ` [PATCH v2 66/69] target/arm: Simplify fp_status indexing in mve_helper.c Peter Maydell
2025-02-01 16:40 ` [PATCH v2 67/69] target/arm: Simplify DO_VFP_cmp in vfp_helper.c Peter Maydell
2025-02-01 16:40 ` [PATCH v2 68/69] target/arm: Read fz16 from env->vfp.fpcr Peter Maydell
2025-02-01 16:40 ` [PATCH v2 69/69] target/arm: Sink fp_status and fpcr access into do_fmlal* Peter Maydell

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:09c622ebc0 dfblob:d6dff559c7 dfblob:be20fc64e8 )
 OR (
bs:"[PATCH v2 02/69] tests/tcg/x86_64/fma: Test some x86 fused-multiply-add cases" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250201164012.1660228-3-peter.maydell@linaro.org \
    --to=peter.maydell@linaro.org \
    --cc=qemu-arm@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).