qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: peter.maydell@linaro.org, "Alex Bennée" <alex.bennee@linaro.org>
Subject: [PULL 41/46] softfloat: Introduce sh[lr]_double primitives
Date: Sun, 16 May 2021 07:34:26 -0500	[thread overview]
Message-ID: <20210516123431.718318-42-richard.henderson@linaro.org> (raw)
In-Reply-To: <20210516123431.718318-1-richard.henderson@linaro.org>

Have x86_64 assembly for them, with a fallback.
This avoids shuffling values through %cl in the x86 case.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/fpu/softfloat-macros.h |  36 ++++++++++++
 fpu/softfloat.c                | 102 +++++++++++++++++++++++++--------
 2 files changed, 115 insertions(+), 23 deletions(-)

diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h
index 672c1db555..ec4e27a595 100644
--- a/include/fpu/softfloat-macros.h
+++ b/include/fpu/softfloat-macros.h
@@ -85,6 +85,42 @@ this code that are retained.
 #include "fpu/softfloat-types.h"
 #include "qemu/host-utils.h"
 
+/**
+ * shl_double: double-word merging left shift
+ * @l: left or most-significant word
+ * @r: right or least-significant word
+ * @c: shift count
+ *
+ * Shift @l left by @c bits, shifting in bits from @r.
+ */
+static inline uint64_t shl_double(uint64_t l, uint64_t r, int c)
+{
+#if defined(__x86_64__)
+    asm("shld %b2, %1, %0" : "+r"(l) : "r"(r), "ci"(c));
+    return l;
+#else
+    return c ? (l << c) | (r >> (64 - c)) : l;
+#endif
+}
+
+/**
+ * shr_double: double-word merging right shift
+ * @l: left or most-significant word
+ * @r: right or least-significant word
+ * @c: shift count
+ *
+ * Shift @r right by @c bits, shifting in bits from @l.
+ */
+static inline uint64_t shr_double(uint64_t l, uint64_t r, int c)
+{
+#if defined(__x86_64__)
+    asm("shrd %b2, %1, %0" : "+r"(r) : "r"(l), "ci"(c));
+    return r;
+#else
+    return c ? (r >> c) | (l << (64 - c)) : r;
+#endif
+}
+
 /*----------------------------------------------------------------------------
 | Shifts `a' right by the number of bits given in `count'.  If any nonzero
 | bits are shifted off, they are ``jammed'' into the least significant bit of
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 571309e74f..34689959a9 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -957,15 +957,12 @@ static int frac128_normalize(FloatParts128 *a)
 {
     if (a->frac_hi) {
         int shl = clz64(a->frac_hi);
-        if (shl) {
-            int shr = 64 - shl;
-            a->frac_hi = (a->frac_hi << shl) | (a->frac_lo >> shr);
-            a->frac_lo = (a->frac_lo << shl);
-        }
+        a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
+        a->frac_lo <<= shl;
         return shl;
     } else if (a->frac_lo) {
         int shl = clz64(a->frac_lo);
-        a->frac_hi = (a->frac_lo << shl);
+        a->frac_hi = a->frac_lo << shl;
         a->frac_lo = 0;
         return shl + 64;
     }
@@ -976,7 +973,7 @@ static int frac256_normalize(FloatParts256 *a)
 {
     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
-    int ret, shl, shr;
+    int ret, shl;
 
     if (likely(a0)) {
         shl = clz64(a0);
@@ -1006,11 +1003,10 @@ static int frac256_normalize(FloatParts256 *a)
         ret += shl;
     }
 
-    shr = -shl & 63;
-    a0 = (a0 << shl) | (a1 >> shr);
-    a1 = (a1 << shl) | (a2 >> shr);
-    a2 = (a2 << shl) | (a3 >> shr);
-    a3 = (a3 << shl);
+    a0 = shl_double(a0, a1, shl);
+    a1 = shl_double(a1, a2, shl);
+    a2 = shl_double(a2, a3, shl);
+    a3 <<= shl;
 
  done:
     a->frac_hi = a0;
@@ -1029,7 +1025,20 @@ static void frac64_shl(FloatParts64 *a, int c)
 
 static void frac128_shl(FloatParts128 *a, int c)
 {
-    shift128Left(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
+    uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
+
+    if (c & 64) {
+        a0 = a1, a1 = 0;
+    }
+
+    c &= 63;
+    if (c) {
+        a0 = shl_double(a0, a1, c);
+        a1 = a1 << c;
+    }
+
+    a->frac_hi = a0;
+    a->frac_lo = a1;
 }
 
 #define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
@@ -1041,19 +1050,68 @@ static void frac64_shr(FloatParts64 *a, int c)
 
 static void frac128_shr(FloatParts128 *a, int c)
 {
-    shift128Right(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
+    uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
+
+    if (c & 64) {
+        a1 = a0, a0 = 0;
+    }
+
+    c &= 63;
+    if (c) {
+        a1 = shr_double(a0, a1, c);
+        a0 = a0 >> c;
+    }
+
+    a->frac_hi = a0;
+    a->frac_lo = a1;
 }
 
 #define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
 
 static void frac64_shrjam(FloatParts64 *a, int c)
 {
-    shift64RightJamming(a->frac, c, &a->frac);
+    uint64_t a0 = a->frac;
+
+    if (likely(c != 0)) {
+        if (likely(c < 64)) {
+            a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
+        } else {
+            a0 = a0 != 0;
+        }
+        a->frac = a0;
+    }
 }
 
 static void frac128_shrjam(FloatParts128 *a, int c)
 {
-    shift128RightJamming(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
+    uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
+    uint64_t sticky = 0;
+
+    if (unlikely(c == 0)) {
+        return;
+    } else if (likely(c < 64)) {
+        /* nothing */
+    } else if (likely(c < 128)) {
+        sticky = a1;
+        a1 = a0;
+        a0 = 0;
+        c &= 63;
+        if (c == 0) {
+            goto done;
+        }
+    } else {
+        sticky = a0 | a1;
+        a0 = a1 = 0;
+        goto done;
+    }
+
+    sticky |= shr_double(a1, 0, c);
+    a1 = shr_double(a0, a1, c);
+    a0 = a0 >> c;
+
+ done:
+    a->frac_lo = a1 | (sticky != 0);
+    a->frac_hi = a0;
 }
 
 static void frac256_shrjam(FloatParts256 *a, int c)
@@ -1061,7 +1119,6 @@ static void frac256_shrjam(FloatParts256 *a, int c)
     uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
     uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
     uint64_t sticky = 0;
-    int invc;
 
     if (unlikely(c == 0)) {
         return;
@@ -1086,12 +1143,11 @@ static void frac256_shrjam(FloatParts256 *a, int c)
         goto done;
     }
 
-    invc = -c & 63;
-    sticky |= a3 << invc;
-    a3 = (a3 >> c) | (a2 << invc);
-    a2 = (a2 >> c) | (a1 << invc);
-    a1 = (a1 >> c) | (a0 << invc);
-    a0 = (a0 >> c);
+    sticky |= shr_double(a3, 0, c);
+    a3 = shr_double(a2, a3, c);
+    a2 = shr_double(a1, a2, c);
+    a1 = shr_double(a0, a1, c);
+    a0 = a0 >> c;
 
  done:
     a->frac_lo = a3 | (sticky != 0);
-- 
2.25.1



  parent reply	other threads:[~2021-05-16 13:08 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
2021-05-16 12:33 ` [PULL 01/46] qemu/host-utils: Use __builtin_bitreverseN Richard Henderson
2021-05-16 12:33 ` [PULL 02/46] qemu/host-utils: Add wrappers for overflow builtins Richard Henderson
2021-05-16 12:33 ` [PULL 03/46] qemu/host-utils: Add wrappers for carry builtins Richard Henderson
2021-05-16 12:33 ` [PULL 04/46] accel/tcg: Use add/sub overflow routines in tcg-runtime-gvec.c Richard Henderson
2021-05-16 12:33 ` [PULL 05/46] tests/fp: add quad support to the benchmark utility Richard Henderson
2021-05-16 12:33 ` [PULL 06/46] softfloat: Move the binary point to the msb Richard Henderson
2021-05-16 12:33 ` [PULL 07/46] softfloat: Inline float_raise Richard Henderson
2021-05-16 12:33 ` [PULL 08/46] softfloat: Use float_raise in more places Richard Henderson
2021-05-16 12:33 ` [PULL 09/46] softfloat: Tidy a * b + inf return Richard Henderson
2021-05-16 12:33 ` [PULL 10/46] softfloat: Add float_cmask and constants Richard Henderson
2021-05-16 12:33 ` [PULL 11/46] softfloat: Use return_nan in float_to_float Richard Henderson
2021-05-16 12:33 ` [PULL 12/46] softfloat: fix return_nan vs default_nan_mode Richard Henderson
2021-05-16 12:33 ` [PULL 13/46] target/mips: Set set_default_nan_mode with set_snan_bit_is_one Richard Henderson
2021-05-16 12:33 ` [PULL 14/46] softfloat: Do not produce a default_nan from parts_silence_nan Richard Henderson
2021-05-16 12:34 ` [PULL 15/46] softfloat: Rename FloatParts to FloatParts64 Richard Henderson
2021-05-16 12:34 ` [PULL 16/46] softfloat: Move type-specific pack/unpack routines Richard Henderson
2021-05-16 12:34 ` [PULL 17/46] softfloat: Use pointers with parts_default_nan Richard Henderson
2021-05-16 12:34 ` [PULL 18/46] softfloat: Use pointers with unpack_raw Richard Henderson
2021-05-16 12:34 ` [PULL 19/46] softfloat: Use pointers with ftype_unpack_raw Richard Henderson
2021-05-16 12:34 ` [PULL 20/46] softfloat: Use pointers with pack_raw Richard Henderson
2021-05-16 12:34 ` [PULL 21/46] softfloat: Use pointers with ftype_pack_raw Richard Henderson
2021-05-16 12:34 ` [PULL 22/46] softfloat: Use pointers with ftype_unpack_canonical Richard Henderson
2021-05-16 12:34 ` [PULL 23/46] softfloat: Use pointers with ftype_round_pack_canonical Richard Henderson
2021-05-16 12:34 ` [PULL 24/46] softfloat: Use pointers with parts_silence_nan Richard Henderson
2021-05-16 12:34 ` [PULL 25/46] softfloat: Rearrange FloatParts64 Richard Henderson
2021-05-16 12:34 ` [PULL 26/46] softfloat: Convert float128_silence_nan to parts Richard Henderson
2021-05-16 12:34 ` [PULL 27/46] softfloat: Convert float128_default_nan " Richard Henderson
2021-05-16 12:34 ` [PULL 28/46] softfloat: Move return_nan to softfloat-parts.c.inc Richard Henderson
2021-05-16 12:34 ` [PULL 29/46] softfloat: Move pick_nan " Richard Henderson
2021-05-16 12:34 ` [PULL 30/46] softfloat: Move pick_nan_muladd " Richard Henderson
2021-05-16 12:34 ` [PULL 31/46] softfloat: Move sf_canonicalize " Richard Henderson
2021-05-16 12:34 ` [PULL 32/46] softfloat: Move round_canonical " Richard Henderson
2021-05-16 12:34 ` [PULL 33/46] softfloat: Use uadd64_carry, usub64_borrow in softfloat-macros.h Richard Henderson
2021-05-16 12:34 ` [PULL 34/46] softfloat: Move addsub_floats to softfloat-parts.c.inc Richard Henderson
2021-05-16 12:34 ` [PULL 35/46] softfloat: Implement float128_add/sub via parts Richard Henderson
2021-05-16 12:34 ` [PULL 36/46] softfloat: Move mul_floats to softfloat-parts.c.inc Richard Henderson
2021-05-16 12:34 ` [PULL 37/46] softfloat: Move muladd_floats " Richard Henderson
2021-05-16 12:34 ` [PULL 38/46] softfloat: Use mulu64 for mul64To128 Richard Henderson
2021-05-16 12:34 ` [PULL 39/46] softfloat: Use add192 in mul128To256 Richard Henderson
2021-05-16 12:34 ` [PULL 40/46] softfloat: Tidy mul128By64To192 Richard Henderson
2021-05-16 12:34 ` Richard Henderson [this message]
2021-05-16 12:34 ` [PULL 42/46] softfloat: Move div_floats to softfloat-parts.c.inc Richard Henderson
2021-05-20 13:40   ` Peter Maydell
2021-05-20 16:15     ` Richard Henderson
2021-05-20 17:04       ` Peter Maydell
2021-05-16 12:34 ` [PULL 43/46] softfloat: Split float_to_float Richard Henderson
2021-05-16 12:34 ` [PULL 44/46] softfloat: Convert float-to-float conversions with float128 Richard Henderson
2021-05-16 12:34 ` [PULL 45/46] softfloat: Move round_to_int to softfloat-parts.c.inc Richard Henderson
2021-05-16 12:34 ` [PULL 46/46] softfloat: Move round_to_int_and_pack " Richard Henderson
2021-05-16 13:19 ` [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) no-reply
2021-05-18 10:11 ` Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210516123431.718318-42-richard.henderson@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=alex.bennee@linaro.org \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).