[PATCH 37/72] softfloat: Move muladd_floats to softfloat-parts.c.inc

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: alex.bennee@linaro.org, david@redhat.com
Subject: [PATCH 37/72] softfloat: Move muladd_floats to softfloat-parts.c.inc
Date: Fri,  7 May 2021 18:47:27 -0700	[thread overview]
Message-ID: <20210508014802.892561-38-richard.henderson@linaro.org> (raw)
In-Reply-To: <20210508014802.892561-1-richard.henderson@linaro.org>

Rename to parts$N_muladd.
Implement float128_muladd with FloatParts128.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/fpu/softfloat.h   |   2 +
 fpu/softfloat.c           | 406 ++++++++++++++++++--------------------
 tests/fp/fp-bench.c       |   8 +-
 tests/fp/fp-test.c        |   2 +-
 fpu/softfloat-parts.c.inc | 126 ++++++++++++
 tests/fp/wrap.c.inc       |  12 ++
 6 files changed, 342 insertions(+), 214 deletions(-)

diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index 019c2ec66d..53f2c2ea3c 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -1197,6 +1197,8 @@ float128 float128_round_to_int(float128, float_status *status);
 float128 float128_add(float128, float128, float_status *status);
 float128 float128_sub(float128, float128, float_status *status);
 float128 float128_mul(float128, float128, float_status *status);
+float128 float128_muladd(float128, float128, float128, int,
+                         float_status *status);
 float128 float128_div(float128, float128, float_status *status);
 float128 float128_rem(float128, float128, float_status *status);
 float128 float128_sqrt(float128, float_status *status);
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 4f498c11e5..a9ee8498ae 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -715,6 +715,10 @@ static float128 float128_pack_raw(const FloatParts128 *p)
 #define PARTS_GENERIC_64_128(NAME, P) \
     QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
 
+#define PARTS_GENERIC_64_128_256(NAME, P) \
+    QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
+                 (FloatParts128 *, parts128_##NAME), parts64_##NAME)
+
 #define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
 #define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
 
@@ -760,15 +764,17 @@ static void parts128_uncanon(FloatParts128 *p, float_status *status,
 
 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
+static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
 
 #define parts_add_normal(A, B) \
-    PARTS_GENERIC_64_128(add_normal, A)(A, B)
+    PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
 
 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
+static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
 
 #define parts_sub_normal(A, B) \
-    PARTS_GENERIC_64_128(sub_normal, A)(A, B)
+    PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
 
 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
                                     float_status *s, bool subtract);
@@ -786,6 +792,16 @@ static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
 #define parts_mul(A, B, S) \
     PARTS_GENERIC_64_128(mul, A)(A, B, S)
 
+static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
+                                    FloatParts64 *c, int flags,
+                                    float_status *s);
+static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
+                                      FloatParts128 *c, int flags,
+                                      float_status *s);
+
+#define parts_muladd(A, B, C, Z, S) \
+    PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
+
 /*
  * Helper functions for softfloat-parts.c.inc, per-size operations.
  */
@@ -793,6 +809,10 @@ static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
 #define FRAC_GENERIC_64_128(NAME, P) \
     QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
 
+#define FRAC_GENERIC_64_128_256(NAME, P) \
+    QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
+                 (FloatParts128 *, frac128_##NAME), frac64_##NAME)
+
 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
 {
     return uadd64_overflow(a->frac, b->frac, &r->frac);
@@ -806,7 +826,17 @@ static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
     return c;
 }
 
-#define frac_add(R, A, B)  FRAC_GENERIC_64_128(add, R)(R, A, B)
+static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
+{
+    bool c = 0;
+    r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
+    r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
+    r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
+    r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
+    return c;
+}
+
+#define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
 
 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
 {
@@ -901,7 +931,16 @@ static void frac128_neg(FloatParts128 *a)
     a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
 }
 
-#define frac_neg(A)  FRAC_GENERIC_64_128(neg, A)(A)
+static void frac256_neg(FloatParts256 *a)
+{
+    bool c = 0;
+    a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
+    a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
+    a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
+    a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
+}
+
+#define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
 
 static int frac64_normalize(FloatParts64 *a)
 {
@@ -932,7 +971,55 @@ static int frac128_normalize(FloatParts128 *a)
     return 128;
 }
 
-#define frac_normalize(A)  FRAC_GENERIC_64_128(normalize, A)(A)
+static int frac256_normalize(FloatParts256 *a)
+{
+    uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
+    uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
+    int ret, shl, shr;
+
+    if (likely(a0)) {
+        shl = clz64(a0);
+        if (shl == 0) {
+            return 0;
+        }
+        ret = shl;
+    } else {
+        if (a1) {
+            ret = 64;
+            a0 = a1, a1 = a2, a2 = a3, a3 = 0;
+        } else if (a2) {
+            ret = 128;
+            a0 = a2, a1 = a3, a2 = 0, a3 = 0;
+        } else if (a3) {
+            ret = 192;
+            a0 = a3, a1 = 0, a2 = 0, a3 = 0;
+        } else {
+            ret = 256;
+            a0 = 0, a1 = 0, a2 = 0, a3 = 0;
+            goto done;
+        }
+        shl = clz64(a0);
+        if (shl == 0) {
+            goto done;
+        }
+        ret += shl;
+    }
+
+    shr = -shl & 63;
+    a0 = (a0 << shl) | (a1 >> shr);
+    a1 = (a1 << shl) | (a2 >> shr);
+    a2 = (a2 << shl) | (a3 >> shr);
+    a3 = (a3 << shl);
+
+ done:
+    a->frac_hi = a0;
+    a->frac_hm = a1;
+    a->frac_lm = a2;
+    a->frac_lo = a3;
+    return ret;
+}
+
+#define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
 
 static void frac64_shl(FloatParts64 *a, int c)
 {
@@ -968,7 +1055,51 @@ static void frac128_shrjam(FloatParts128 *a, int c)
     shift128RightJamming(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
 }
 
-#define frac_shrjam(A, C)  FRAC_GENERIC_64_128(shrjam, A)(A, C)
+static void frac256_shrjam(FloatParts256 *a, int c)
+{
+    uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
+    uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
+    uint64_t sticky = 0;
+    int invc;
+
+    if (unlikely(c == 0)) {
+        return;
+    } else if (likely(c < 64)) {
+        /* nothing */
+    } else if (likely(c < 256)) {
+        if (unlikely(c & 128)) {
+            sticky |= a2 | a3;
+            a3 = a1, a2 = a0, a1 = 0, a0 = 0;
+        }
+        if (unlikely(c & 64)) {
+            sticky |= a3;
+            a3 = a2, a2 = a1, a1 = a0, a0 = 0;
+        }
+        c &= 63;
+        if (c == 0) {
+            goto done;
+        }
+    } else {
+        sticky = a0 | a1 | a2 | a3;
+        a0 = a1 = a2 = a3 = 0;
+        goto done;
+    }
+
+    invc = -c & 63;
+    sticky |= a3 << invc;
+    a3 = (a3 >> c) | (a2 << invc);
+    a2 = (a2 >> c) | (a1 << invc);
+    a1 = (a1 >> c) | (a0 << invc);
+    a0 = (a0 >> c);
+
+ done:
+    a->frac_lo = a3 | (sticky != 0);
+    a->frac_lm = a2;
+    a->frac_hm = a1;
+    a->frac_hi = a0;
+}
+
+#define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
 
 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
 {
@@ -983,7 +1114,17 @@ static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
     return c;
 }
 
-#define frac_sub(R, A, B)  FRAC_GENERIC_64_128(sub, R)(R, A, B)
+static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
+{
+    bool c = 0;
+    r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
+    r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
+    r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
+    r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
+    return c;
+}
+
+#define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
 
 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
 {
@@ -998,6 +1139,22 @@ static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
 
 #define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
 
+static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
+{
+    r->frac_hi = a->frac;
+    r->frac_lo = 0;
+}
+
+static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
+{
+    r->frac_hi = a->frac_hi;
+    r->frac_hm = a->frac_lo;
+    r->frac_lm = 0;
+    r->frac_lo = 0;
+}
+
+#define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
+
 #define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
 #define FloatPartsN    glue(FloatParts,N)
 #define FloatPartsW    glue(FloatParts,W)
@@ -1016,6 +1173,12 @@ static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
 #include "softfloat-parts-addsub.c.inc"
 #include "softfloat-parts.c.inc"
 
+#undef  N
+#undef  W
+#define N            256
+
+#include "softfloat-parts-addsub.c.inc"
+
 #undef  N
 #undef  W
 #undef  partsN
@@ -1386,230 +1549,48 @@ float128_mul(float128 a, float128 b, float_status *status)
 }
 
 /*
- * Returns the result of multiplying the floating-point values `a' and
- * `b' then adding 'c', with no intermediate rounding step after the
- * multiplication. The operation is performed according to the
- * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
- * The flags argument allows the caller to select negation of the
- * addend, the intermediate product, or the final result. (The
- * difference between this and having the caller do a separate
- * negation is that negating externally will flip the sign bit on
- * NaNs.)
+ * Fused multiply-add
  */
 
-static FloatParts64 muladd_floats(FloatParts64 a, FloatParts64 b, FloatParts64 c,
-                                int flags, float_status *s)
-{
-    bool inf_zero, p_sign;
-    bool sign_flip = flags & float_muladd_negate_result;
-    FloatClass p_class;
-    uint64_t hi, lo;
-    int p_exp;
-    int ab_mask, abc_mask;
-
-    ab_mask = float_cmask(a.cls) | float_cmask(b.cls);
-    abc_mask = float_cmask(c.cls) | ab_mask;
-    inf_zero = ab_mask == float_cmask_infzero;
-
-    /* It is implementation-defined whether the cases of (0,inf,qnan)
-     * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
-     * they return if they do), so we have to hand this information
-     * off to the target-specific pick-a-NaN routine.
-     */
-    if (unlikely(abc_mask & float_cmask_anynan)) {
-        return *parts_pick_nan_muladd(&a, &b, &c, s, ab_mask, abc_mask);
-    }
-
-    if (inf_zero) {
-        float_raise(float_flag_invalid, s);
-        parts_default_nan(&a, s);
-        return a;
-    }
-
-    if (flags & float_muladd_negate_c) {
-        c.sign ^= 1;
-    }
-
-    p_sign = a.sign ^ b.sign;
-
-    if (flags & float_muladd_negate_product) {
-        p_sign ^= 1;
-    }
-
-    if (ab_mask & float_cmask_inf) {
-        p_class = float_class_inf;
-    } else if (ab_mask & float_cmask_zero) {
-        p_class = float_class_zero;
-    } else {
-        p_class = float_class_normal;
-    }
-
-    if (c.cls == float_class_inf) {
-        if (p_class == float_class_inf && p_sign != c.sign) {
-            float_raise(float_flag_invalid, s);
-            parts_default_nan(&c, s);
-        } else {
-            c.sign ^= sign_flip;
-        }
-        return c;
-    }
-
-    if (p_class == float_class_inf) {
-        a.cls = float_class_inf;
-        a.sign = p_sign ^ sign_flip;
-        return a;
-    }
-
-    if (p_class == float_class_zero) {
-        if (c.cls == float_class_zero) {
-            if (p_sign != c.sign) {
-                p_sign = s->float_rounding_mode == float_round_down;
-            }
-            c.sign = p_sign;
-        } else if (flags & float_muladd_halve_result) {
-            c.exp -= 1;
-        }
-        c.sign ^= sign_flip;
-        return c;
-    }
-
-    /* a & b should be normals now... */
-    assert(a.cls == float_class_normal &&
-           b.cls == float_class_normal);
-
-    p_exp = a.exp + b.exp;
-
-    mul64To128(a.frac, b.frac, &hi, &lo);
-
-    /* Renormalize to the msb. */
-    if (hi & DECOMPOSED_IMPLICIT_BIT) {
-        p_exp += 1;
-    } else {
-        shortShift128Left(hi, lo, 1, &hi, &lo);
-    }
-
-    /* + add/sub */
-    if (c.cls != float_class_zero) {
-        int exp_diff = p_exp - c.exp;
-        if (p_sign == c.sign) {
-            /* Addition */
-            if (exp_diff <= 0) {
-                shift64RightJamming(hi, -exp_diff, &hi);
-                p_exp = c.exp;
-                if (uadd64_overflow(hi, c.frac, &hi)) {
-                    shift64RightJamming(hi, 1, &hi);
-                    hi |= DECOMPOSED_IMPLICIT_BIT;
-                    p_exp += 1;
-                }
-            } else {
-                uint64_t c_hi, c_lo, over;
-                shift128RightJamming(c.frac, 0, exp_diff, &c_hi, &c_lo);
-                add192(0, hi, lo, 0, c_hi, c_lo, &over, &hi, &lo);
-                if (over) {
-                    shift64RightJamming(hi, 1, &hi);
-                    hi |= DECOMPOSED_IMPLICIT_BIT;
-                    p_exp += 1;
-                }
-            }
-        } else {
-            /* Subtraction */
-            uint64_t c_hi = c.frac, c_lo = 0;
-
-            if (exp_diff <= 0) {
-                shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
-                if (exp_diff == 0
-                    &&
-                    (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
-                    sub128(hi, lo, c_hi, c_lo, &hi, &lo);
-                } else {
-                    sub128(c_hi, c_lo, hi, lo, &hi, &lo);
-                    p_sign ^= 1;
-                    p_exp = c.exp;
-                }
-            } else {
-                shift128RightJamming(c_hi, c_lo,
-                                     exp_diff,
-                                     &c_hi, &c_lo);
-                sub128(hi, lo, c_hi, c_lo, &hi, &lo);
-            }
-
-            if (hi == 0 && lo == 0) {
-                a.cls = float_class_zero;
-                a.sign = s->float_rounding_mode == float_round_down;
-                a.sign ^= sign_flip;
-                return a;
-            } else {
-                int shift;
-                if (hi != 0) {
-                    shift = clz64(hi);
-                } else {
-                    shift = clz64(lo) + 64;
-                }
-                /* Normalizing to a binary point of 124 is the
-                   correct adjust for the exponent.  However since we're
-                   shifting, we might as well put the binary point back
-                   at 63 where we really want it.  Therefore shift as
-                   if we're leaving 1 bit at the top of the word, but
-                   adjust the exponent as if we're leaving 3 bits.  */
-                shift128Left(hi, lo, shift, &hi, &lo);
-                p_exp -= shift;
-            }
-        }
-    }
-    hi |= (lo != 0);
-
-    if (flags & float_muladd_halve_result) {
-        p_exp -= 1;
-    }
-
-    /* finally prepare our result */
-    a.cls = float_class_normal;
-    a.sign = p_sign ^ sign_flip;
-    a.exp = p_exp;
-    a.frac = hi;
-
-    return a;
-}
-
 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
-                                                int flags, float_status *status)
+                                    int flags, float_status *status)
 {
-    FloatParts64 pa, pb, pc, pr;
+    FloatParts64 pa, pb, pc, *pr;
 
     float16_unpack_canonical(&pa, a, status);
     float16_unpack_canonical(&pb, b, status);
     float16_unpack_canonical(&pc, c, status);
-    pr = muladd_floats(pa, pb, pc, flags, status);
+    pr = parts_muladd(&pa, &pb, &pc, flags, status);
 
-    return float16_round_pack_canonical(&pr, status);
+    return float16_round_pack_canonical(pr, status);
 }
 
 static float32 QEMU_SOFTFLOAT_ATTR
 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
                 float_status *status)
 {
-    FloatParts64 pa, pb, pc, pr;
+    FloatParts64 pa, pb, pc, *pr;
 
     float32_unpack_canonical(&pa, a, status);
     float32_unpack_canonical(&pb, b, status);
     float32_unpack_canonical(&pc, c, status);
-    pr = muladd_floats(pa, pb, pc, flags, status);
+    pr = parts_muladd(&pa, &pb, &pc, flags, status);
 
-    return float32_round_pack_canonical(&pr, status);
+    return float32_round_pack_canonical(pr, status);
 }
 
 static float64 QEMU_SOFTFLOAT_ATTR
 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
                 float_status *status)
 {
-    FloatParts64 pa, pb, pc, pr;
+    FloatParts64 pa, pb, pc, *pr;
 
     float64_unpack_canonical(&pa, a, status);
     float64_unpack_canonical(&pb, b, status);
     float64_unpack_canonical(&pc, c, status);
-    pr = muladd_floats(pa, pb, pc, flags, status);
+    pr = parts_muladd(&pa, &pb, &pc, flags, status);
 
-    return float64_round_pack_canonical(&pr, status);
+    return float64_round_pack_canonical(pr, status);
 }
 
 static bool force_soft_fma;
@@ -1756,23 +1737,30 @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
 }
 
-/*
- * Returns the result of multiplying the bfloat16 values `a'
- * and `b' then adding 'c', with no intermediate rounding step after the
- * multiplication.
- */
-
 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
                                       int flags, float_status *status)
 {
-    FloatParts64 pa, pb, pc, pr;
+    FloatParts64 pa, pb, pc, *pr;
 
     bfloat16_unpack_canonical(&pa, a, status);
     bfloat16_unpack_canonical(&pb, b, status);
     bfloat16_unpack_canonical(&pc, c, status);
-    pr = muladd_floats(pa, pb, pc, flags, status);
+    pr = parts_muladd(&pa, &pb, &pc, flags, status);
 
-    return bfloat16_round_pack_canonical(&pr, status);
+    return bfloat16_round_pack_canonical(pr, status);
+}
+
+float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
+                                      int flags, float_status *status)
+{
+    FloatParts128 pa, pb, pc, *pr;
+
+    float128_unpack_canonical(&pa, a, status);
+    float128_unpack_canonical(&pb, b, status);
+    float128_unpack_canonical(&pc, c, status);
+    pr = parts_muladd(&pa, &pb, &pc, flags, status);
+
+    return float128_round_pack_canonical(pr, status);
 }
 
 /*
diff --git a/tests/fp/fp-bench.c b/tests/fp/fp-bench.c
index d319993280..c24baf8535 100644
--- a/tests/fp/fp-bench.c
+++ b/tests/fp/fp-bench.c
@@ -386,7 +386,7 @@ static void bench(enum precision prec, enum op op, int n_ops, bool no_neg)
             for (i = 0; i < OPS_PER_ITER; i++) {
                 float128 a = ops[0].f128;
                 float128 b = ops[1].f128;
-                /* float128 c = ops[2].f128; */
+                float128 c = ops[2].f128;
 
                 switch (op) {
                 case OP_ADD:
@@ -401,9 +401,9 @@ static void bench(enum precision prec, enum op op, int n_ops, bool no_neg)
                 case OP_DIV:
                     res.f128 = float128_div(a, b, &soft_status);
                     break;
-                /* case OP_FMA: */
-                /*     res.f128 = float128_muladd(a, b, c, 0, &soft_status); */
-                /*     break; */
+                case OP_FMA:
+                    res.f128 = float128_muladd(a, b, c, 0, &soft_status);
+                    break;
                 case OP_SQRT:
                     res.f128 = float128_sqrt(a, &soft_status);
                     break;
diff --git a/tests/fp/fp-test.c b/tests/fp/fp-test.c
index 5a4cad8c8b..ff131afbde 100644
--- a/tests/fp/fp-test.c
+++ b/tests/fp/fp-test.c
@@ -717,7 +717,7 @@ static void do_testfloat(int op, int rmode, bool exact)
         test_abz_f128(true_abz_f128M, subj_abz_f128M);
         break;
     case F128_MULADD:
-        not_implemented();
+        test_abcz_f128(slow_f128M_mulAdd, qemu_f128M_mulAdd);
         break;
     case F128_SQRT:
         test_az_f128(slow_f128M_sqrt, qemu_f128M_sqrt);
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
index 9a67ab2bea..a203811299 100644
--- a/fpu/softfloat-parts.c.inc
+++ b/fpu/softfloat-parts.c.inc
@@ -413,3 +413,129 @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
     a->sign = sign;
     return a;
 }
+
+/*
+ * Returns the result of multiplying the floating-point values `a' and
+ * `b' then adding 'c', with no intermediate rounding step after the
+ * multiplication. The operation is performed according to the
+ * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
+ * The flags argument allows the caller to select negation of the
+ * addend, the intermediate product, or the final result. (The
+ * difference between this and having the caller do a separate
+ * negation is that negating externally will flip the sign bit on NaNs.)
+ *
+ * Requires A and C extracted into a double-sized structure to provide the
+ * extra space for the widening multiply.
+ */
+static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
+                                   FloatPartsN *c, int flags, float_status *s)
+{
+    int ab_mask, abc_mask;
+    FloatPartsW p_widen, c_widen;
+
+    ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
+    abc_mask = float_cmask(c->cls) | ab_mask;
+
+    /*
+     * It is implementation-defined whether the cases of (0,inf,qnan)
+     * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
+     * they return if they do), so we have to hand this information
+     * off to the target-specific pick-a-NaN routine.
+     */
+    if (unlikely(abc_mask & float_cmask_anynan)) {
+        return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
+    }
+
+    if (flags & float_muladd_negate_c) {
+        c->sign ^= 1;
+    }
+
+    /* Compute the sign of the product into A. */
+    a->sign ^= b->sign;
+    if (flags & float_muladd_negate_product) {
+        a->sign ^= 1;
+    }
+
+    if (unlikely(ab_mask != float_cmask_normal)) {
+        if (unlikely(ab_mask == float_cmask_infzero)) {
+            goto d_nan;
+        }
+
+        if (ab_mask & float_cmask_inf) {
+            if (c->cls == float_class_inf && a->sign != c->sign) {
+                goto d_nan;
+            }
+            goto return_inf;
+        }
+
+        g_assert(ab_mask & float_cmask_zero);
+        if (c->cls == float_class_normal) {
+            *a = *c;
+            goto return_normal;
+        }
+        if (c->cls == float_class_zero) {
+            if (a->sign != c->sign) {
+                goto return_sub_zero;
+            }
+            goto return_zero;
+        }
+        g_assert(c->cls == float_class_inf);
+    }
+
+    if (unlikely(c->cls == float_class_inf)) {
+        a->sign = c->sign;
+        goto return_inf;
+    }
+
+    /* Perform the multiplication step. */
+    p_widen.sign = a->sign;
+    p_widen.exp = a->exp + b->exp + 1;
+    frac_mulw(&p_widen, a, b);
+    if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
+        frac_add(&p_widen, &p_widen, &p_widen);
+        p_widen.exp -= 1;
+    }
+
+    /* Perform the addition step. */
+    if (c->cls != float_class_zero) {
+        /* Zero-extend C to less significant bits. */
+        frac_widen(&c_widen, c);
+        c_widen.exp = c->exp;
+
+        if (a->sign == c->sign) {
+            parts_add_normal(&p_widen, &c_widen);
+        } else if (!parts_sub_normal(&p_widen, &c_widen)) {
+            goto return_sub_zero;
+        }
+    }
+
+    /* Narrow with sticky bit, for proper rounding later. */
+    frac_truncjam(a, &p_widen);
+    a->sign = p_widen.sign;
+    a->exp = p_widen.exp;
+
+ return_normal:
+    if (flags & float_muladd_halve_result) {
+        a->exp -= 1;
+    }
+ finish_sign:
+    if (flags & float_muladd_negate_result) {
+        a->sign ^= 1;
+    }
+    return a;
+
+ return_sub_zero:
+    a->sign = s->float_rounding_mode == float_round_down;
+ return_zero:
+    a->cls = float_class_zero;
+    goto finish_sign;
+
+ return_inf:
+    a->cls = float_class_inf;
+    goto finish_sign;
+
+ d_nan:
+    float_raise(float_flag_invalid, s);
+    parts_default_nan(a, s);
+    return a;
+}
diff --git a/tests/fp/wrap.c.inc b/tests/fp/wrap.c.inc
index 0cbd20013e..cb1bb77e4c 100644
--- a/tests/fp/wrap.c.inc
+++ b/tests/fp/wrap.c.inc
@@ -574,6 +574,18 @@ WRAP_MULADD(qemu_f32_mulAdd, float32_muladd, float32)
 WRAP_MULADD(qemu_f64_mulAdd, float64_muladd, float64)
 #undef WRAP_MULADD
 
+static void qemu_f128M_mulAdd(const float128_t *ap, const float128_t *bp,
+                              const float128_t *cp, float128_t *res)
+{
+    float128 a, b, c, ret;
+
+    a = soft_to_qemu128(*ap);
+    b = soft_to_qemu128(*bp);
+    c = soft_to_qemu128(*cp);
+    ret = float128_muladd(a, b, c, 0, &qsf);
+    *res = qemu_to_soft128(ret);
+}
+
 #define WRAP_CMP16(name, func, retcond)         \
     static bool name(float16_t a, float16_t b)  \
     {                                           \
-- 
2.25.1

next prev parent reply	other threads:[~2021-05-08  2:17 UTC|newest]

Thread overview: 151+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-08  1:46 [PATCH 00/72] Convert floatx80 and float128 to FloatParts Richard Henderson
2021-05-08  1:46 ` [PATCH 01/72] qemu/host-utils: Use __builtin_bitreverseN Richard Henderson
2021-05-10  9:59   ` Alex Bennée
2021-05-11  9:41   ` David Hildenbrand
2021-05-08  1:46 ` [PATCH 02/72] qemu/host-utils: Add wrappers for overflow builtins Richard Henderson
2021-05-10 10:22   ` Alex Bennée
2021-05-08  1:46 ` [PATCH 03/72] qemu/host-utils: Add wrappers for carry builtins Richard Henderson
2021-05-10 12:57   ` Alex Bennée
2021-05-11 20:10     ` Richard Henderson
2021-05-12 11:17       ` Alex Bennée
2021-05-08  1:46 ` [PATCH 04/72] accel/tcg: Use add/sub overflow routines in tcg-runtime-gvec.c Richard Henderson
2021-05-09  8:43   ` Philippe Mathieu-Daudé
2021-05-10 13:02   ` Alex Bennée
2021-05-11  9:46   ` David Hildenbrand
2021-05-08  1:46 ` [PATCH 05/72] tests/fp: add quad support to the benchmark utility Richard Henderson
2021-05-11 10:01   ` David Hildenbrand
2021-05-08  1:46 ` [PATCH 06/72] softfloat: Move the binary point to the msb Richard Henderson
2021-05-10 13:36   ` Alex Bennée
2021-05-08  1:46 ` [PATCH 07/72] softfloat: Inline float_raise Richard Henderson
2021-05-09  8:32   ` Philippe Mathieu-Daudé
2021-05-11 10:04   ` David Hildenbrand
2021-05-08  1:46 ` [PATCH 08/72] softfloat: Use float_raise in more places Richard Henderson
2021-05-09  8:34   ` Philippe Mathieu-Daudé
2021-05-11 10:06   ` David Hildenbrand
2021-05-08  1:46 ` [PATCH 09/72] softfloat: Tidy a * b + inf return Richard Henderson
2021-05-08  1:47 ` [PATCH 10/72] softfloat: Add float_cmask and constants Richard Henderson
2021-05-08  1:47 ` [PATCH 11/72] softfloat: Use return_nan in float_to_float Richard Henderson
2021-05-10 15:10   ` Alex Bennée
2021-05-11 10:10   ` David Hildenbrand
2021-05-08  1:47 ` [PATCH 12/72] softfloat: fix return_nan vs default_nan_mode Richard Henderson
2021-05-10 15:12   ` Alex Bennée
2021-05-11 10:12   ` David Hildenbrand
2021-05-08  1:47 ` [PATCH 13/72] target/mips: Set set_default_nan_mode with set_snan_bit_is_one Richard Henderson
2021-05-11  9:37   ` Alex Bennée
2021-05-11 10:16   ` David Hildenbrand
2021-05-08  1:47 ` [PATCH 14/72] softfloat: Do not produce a default_nan from parts_silence_nan Richard Henderson
2021-05-11 10:16   ` David Hildenbrand
2021-05-11 10:32   ` Alex Bennée
2021-05-08  1:47 ` [PATCH 15/72] softfloat: Rename FloatParts to FloatParts64 Richard Henderson
2021-05-09  8:45   ` Philippe Mathieu-Daudé
2021-05-11 10:16   ` David Hildenbrand
2021-05-08  1:47 ` [PATCH 16/72] softfloat: Move type-specific pack/unpack routines Richard Henderson
2021-05-09  8:46   ` Philippe Mathieu-Daudé
2021-05-11 10:17   ` David Hildenbrand
2021-05-08  1:47 ` [PATCH 17/72] softfloat: Use pointers with parts_default_nan Richard Henderson
2021-05-11 10:22   ` David Hildenbrand
2021-05-11 20:19     ` Richard Henderson
2021-05-08  1:47 ` [PATCH 18/72] softfloat: Use pointers with unpack_raw Richard Henderson
2021-05-09  8:48   ` Philippe Mathieu-Daudé
2021-05-12 12:58   ` Alex Bennée
2021-05-08  1:47 ` [PATCH 19/72] softfloat: Use pointers with ftype_unpack_raw Richard Henderson
2021-05-11 11:31   ` Alex Bennée
2021-05-08  1:47 ` [PATCH 20/72] softfloat: Use pointers with pack_raw Richard Henderson
2021-05-11 11:32   ` Alex Bennée
2021-05-08  1:47 ` [PATCH 21/72] softfloat: Use pointers with ftype_pack_raw Richard Henderson
2021-05-11 11:32   ` Alex Bennée
2021-05-08  1:47 ` [PATCH 22/72] softfloat: Use pointers with ftype_unpack_canonical Richard Henderson
2021-05-11 13:54   ` Alex Bennée
2021-05-08  1:47 ` [PATCH 23/72] softfloat: Use pointers with ftype_round_pack_canonical Richard Henderson
2021-05-11 13:55   ` Alex Bennée
2021-05-08  1:47 ` [PATCH 24/72] softfloat: Use pointers with parts_silence_nan Richard Henderson
2021-05-11 13:56   ` Alex Bennée
2021-05-08  1:47 ` [PATCH 25/72] softfloat: Rearrange FloatParts64 Richard Henderson
2021-05-11 13:57   ` Alex Bennée
2021-05-11 15:04     ` Richard Henderson
2021-05-12 11:08       ` Alex Bennée
2021-05-08  1:47 ` [PATCH 26/72] softfloat: Convert float128_silence_nan to parts Richard Henderson
2021-05-13  8:34   ` Alex Bennée
2021-05-13 12:25     ` Richard Henderson
2021-05-08  1:47 ` [PATCH 27/72] softfloat: Convert float128_default_nan " Richard Henderson
2021-05-13  8:56   ` Alex Bennée
2021-05-08  1:47 ` [PATCH 28/72] softfloat: Move return_nan to softfloat-parts.c.inc Richard Henderson
2021-05-12 18:10   ` David Hildenbrand
2021-05-08  1:47 ` [PATCH 29/72] softfloat: Move pick_nan " Richard Henderson
2021-05-12 18:16   ` David Hildenbrand
2021-05-13 12:28     ` Richard Henderson
2021-05-08  1:47 ` [PATCH 30/72] softfloat: Move pick_nan_muladd " Richard Henderson
2021-05-12 18:18   ` David Hildenbrand
2021-05-08  1:47 ` [PATCH 31/72] softfloat: Move sf_canonicalize " Richard Henderson
2021-05-13  9:45   ` Alex Bennée
2021-05-08  1:47 ` [PATCH 32/72] softfloat: Move round_canonical " Richard Henderson
2021-05-13  9:53   ` Alex Bennée
2021-05-08  1:47 ` [PATCH 33/72] softfloat: Use uadd64_carry, usub64_borrow in softfloat-macros.h Richard Henderson
2021-05-13 10:00   ` Alex Bennée
2021-05-13 12:38     ` Richard Henderson
2021-05-08  1:47 ` [PATCH 34/72] softfloat: Move addsub_floats to softfloat-parts.c.inc Richard Henderson
2021-05-13 10:03   ` Alex Bennée
2021-05-13 10:05   ` Alex Bennée
2021-05-08  1:47 ` [PATCH 35/72] softfloat: Implement float128_add/sub via parts Richard Henderson
2021-05-13 10:06   ` Alex Bennée
2021-05-08  1:47 ` [PATCH 36/72] softfloat: Move mul_floats to softfloat-parts.c.inc Richard Henderson
2021-05-13 10:08   ` Alex Bennée
2021-05-08  1:47 ` Richard Henderson [this message]
2021-05-13 10:43   ` [PATCH 37/72] softfloat: Move muladd_floats " Alex Bennée
2021-05-08  1:47 ` [PATCH 38/72] softfloat: Use mulu64 for mul64To128 Richard Henderson
2021-05-08  1:47 ` [PATCH 39/72] softfloat: Use add192 in mul128To256 Richard Henderson
2021-05-13 10:49   ` Alex Bennée
2021-05-08  1:47 ` [PATCH 40/72] softfloat: Tidy mul128By64To192 Richard Henderson
2021-05-13 10:50   ` Alex Bennée
2021-05-08  1:47 ` [PATCH 41/72] softfloat: Introduce sh[lr]_double primitives Richard Henderson
2021-05-13 10:59   ` Alex Bennée
2021-05-08  1:47 ` [PATCH 42/72] softfloat: Move div_floats to softfloat-parts.c.inc Richard Henderson
2021-05-13 11:02   ` Alex Bennée
2021-05-08  1:47 ` [PATCH 43/72] softfloat: Split float_to_float Richard Henderson
2021-05-13 11:04   ` Alex Bennée
2021-05-08  1:47 ` [PATCH 44/72] softfloat: Convert float-to-float conversions with float128 Richard Henderson
2021-05-13 11:17   ` Alex Bennée
2021-05-08  1:47 ` [PATCH 45/72] softfloat: Move round_to_int to softfloat-parts.c.inc Richard Henderson
2021-05-13 14:10   ` Alex Bennée
2021-05-08  1:47 ` [PATCH 46/72] softfloat: Move rount_to_int_and_pack " Richard Henderson
2021-05-13 14:11   ` Alex Bennée
2021-05-08  1:47 ` [PATCH 47/72] softfloat: Move rount_to_uint_and_pack " Richard Henderson
2021-05-08  1:47 ` [PATCH 48/72] softfloat: Move int_to_float " Richard Henderson
2021-05-08  1:47 ` [PATCH 49/72] softfloat: Move uint_to_float " Richard Henderson
2021-05-08  1:47 ` [PATCH 50/72] softfloat: Move minmax_flags " Richard Henderson
2021-05-17 13:14   ` David Hildenbrand
2021-05-17 15:57     ` Richard Henderson
2021-05-08  1:47 ` [PATCH 51/72] softfloat: Move compare_floats " Richard Henderson
2021-05-08  1:47 ` [PATCH 52/72] softfloat: Move scalbn_decomposed " Richard Henderson
2021-05-08  1:47 ` [PATCH 53/72] softfloat: Move sqrt_float " Richard Henderson
2021-05-08  1:47 ` [PATCH 54/72] softfloat: Split out parts_uncanon_normal Richard Henderson
2021-05-08  1:47 ` [PATCH 55/72] softfloat: Reduce FloatFmt Richard Henderson
2021-05-08  1:47 ` [PATCH 56/72] softfloat: Introduce Floatx80RoundPrec Richard Henderson
2021-05-08  1:47 ` [PATCH 57/72] softfloat: Adjust parts_uncanon_normal for floatx80 Richard Henderson
2021-05-08  1:47 ` [PATCH 58/72] tests/fp/fp-test: Reverse order of floatx80 precision tests Richard Henderson
2021-05-13 14:12   ` Alex Bennée
2021-05-08  1:47 ` [PATCH 59/72] softfloat: Convert floatx80_add/sub to FloatParts Richard Henderson
2021-05-08  1:47 ` [PATCH 60/72] softfloat: Convert floatx80_mul " Richard Henderson
2021-05-08  1:47 ` [PATCH 61/72] softfloat: Convert floatx80_div " Richard Henderson
2021-05-08  1:47 ` [PATCH 62/72] softfloat: Convert floatx80_sqrt " Richard Henderson
2021-05-08  1:47 ` [PATCH 63/72] softfloat: Convert floatx80_round " Richard Henderson
2021-05-08  1:47 ` [PATCH 64/72] softfloat: Convert floatx80_round_to_int " Richard Henderson
2021-05-08  1:47 ` [PATCH 65/72] softfloat: Convert integer to floatx80 " Richard Henderson
2021-05-08  1:47 ` [PATCH 66/72] softfloat: Convert floatx80 float conversions " Richard Henderson
2021-05-08  1:47 ` [PATCH 67/72] softfloat: Convert floatx80 to integer " Richard Henderson
2021-05-08  1:47 ` [PATCH 68/72] softfloat: Convert floatx80_scalbn " Richard Henderson
2021-05-08  1:47 ` [PATCH 69/72] softfloat: Convert floatx80 compare " Richard Henderson
2021-05-08  1:48 ` [PATCH 70/72] softfloat: Convert float32_exp2 " Richard Henderson
2021-05-08  1:48 ` [PATCH 71/72] softfloat: Move floatN_log2 to softfloat-parts.c.inc Richard Henderson
2021-05-08  1:48 ` [PATCH 72/72] softfloat: Convert modrem operations to FloatParts Richard Henderson
2021-05-08  2:52 ` [PATCH 00/72] Convert floatx80 and float128 " no-reply
2021-05-10 13:36 ` Alex Bennée
2021-05-12  1:52   ` Richard Henderson
2021-05-12 11:22     ` Alex Bennée
2021-05-12 15:28       ` Richard Henderson
2021-05-12 16:47 ` Alex Bennée
2021-05-12 19:23 ` Alex Bennée
2021-05-13 11:49   ` Richard Henderson
2021-05-13 13:33     ` Alex Bennée
2021-05-13 23:54       ` Richard Henderson
2021-05-13 14:13 ` Alex Bennée

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:019c2ec66 dfblob:53f2c2ea3 dfblob:4f498c11e dfblob:a9ee8498a
dfblob:d31999328 dfblob:c24baf853 dfblob:5a4cad8c8 dfblob:ff131afbd
dfblob:9a67ab2be dfblob:a20381129 dfblob:0cbd20013 dfblob:cb1bb77e4 )
 OR (
bs:"[PATCH 37/72] softfloat: Move muladd_floats to softfloat-parts.c.inc" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210508014802.892561-38-richard.henderson@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=alex.bennee@linaro.org \
    --cc=david@redhat.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).