From mboxrd@z Thu Jan  1 00:00:00 1970
Received: from eggs.gnu.org ([2001:4830:134:3::10]:33614)
	by lists.gnu.org with esmtp (Exim 4.71)
	(envelope-from <pm215@archaic.org.uk>) id 1WBtIn-0005ni-Ss
	for qemu-devel@nongnu.org; Fri, 07 Feb 2014 16:49:38 -0500
Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71)
	(envelope-from <pm215@archaic.org.uk>) id 1WBtIm-0004vY-CV
	for qemu-devel@nongnu.org; Fri, 07 Feb 2014 16:49:37 -0500
Received: from mnementh.archaic.org.uk ([2001:8b0:1d0::1]:45625)
	by eggs.gnu.org with esmtp (Exim 4.71)
	(envelope-from <pm215@archaic.org.uk>) id 1WBtIm-0004u7-4o
	for qemu-devel@nongnu.org; Fri, 07 Feb 2014 16:49:36 -0500
From: Peter Maydell <peter.maydell@linaro.org>
Date: Fri,  7 Feb 2014 21:49:22 +0000
Message-Id: <1391809763-11251-8-git-send-email-peter.maydell@linaro.org>
In-Reply-To: <1391809763-11251-1-git-send-email-peter.maydell@linaro.org>
References: <1391809763-11251-1-git-send-email-peter.maydell@linaro.org>
Subject: [Qemu-devel] [PATCH 7/8] softfloat: Support halving the result of
	muladd operation
List-Id: <qemu-devel.nongnu.org>
List-Unsubscribe: <https://lists.nongnu.org/mailman/options/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>
List-Archive: <http://lists.nongnu.org/archive/html/qemu-devel>
List-Post: <mailto:qemu-devel@nongnu.org>
List-Help: <mailto:qemu-devel-request@nongnu.org?subject=help>
List-Subscribe: <https://lists.nongnu.org/mailman/listinfo/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=subscribe>
To: qemu-devel@nongnu.org
Cc: Peter Crosthwaite <peter.crosthwaite@xilinx.com>, patches@linaro.org, Michael Matz <matz@suse.de>, Alexander Graf <agraf@suse.de>, Claudio Fontana <claudio.fontana@linaro.org>, Dirk Mueller <dmueller@suse.de>, Will Newton <will.newton@linaro.org>, Laurent Desnogues <laurent.desnogues@gmail.com>, =?UTF-8?q?Alex=20Benn=C3=A9e?= <alex.bennee@linaro.org>, kvmarm@lists.cs.columbia.edu, Christoffer Dall <christoffer.dall@linaro.org>, Richard Henderson <rth@twiddle.net>

The ARMv8 instruction set includes a fused floating point
reciprocal square root step instruction which demands an
"(x * y + z) / 2" fused operation. Support this by adding
a flag to the softfloat muladd operations which requests
that the result is halved before rounding.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 fpu/softfloat.c         | 38 ++++++++++++++++++++++++++++++++++++++
 include/fpu/softfloat.h |  3 +++
 2 files changed, 41 insertions(+)

diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index e0ea599..c8f0370 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -2372,6 +2372,17 @@ float32 float32_muladd(float32 a, float32 b, float32 c, int flags STATUS_PARAM)
             }
         }
         /* Zero plus something non-zero : just return the something */
+        if (flags & float_muladd_halve_result) {
+            if (cExp == 0) {
+                shift32RightJamming(cSig, 1, &cSig);
+            } else if (cExp == 1) {
+                shift32RightJamming(cSig, 1, &cSig);
+                cSig |= (1 << 22);
+                cExp = 0;
+            } else {
+                cExp--;
+            }
+        }
         return packFloat32(cSign ^ signflip, cExp, cSig);
     }
 
@@ -2408,6 +2419,9 @@ float32 float32_muladd(float32 a, float32 b, float32 c, int flags STATUS_PARAM)
             /* Throw out the special case of c being an exact zero now */
             shift64RightJamming(pSig64, 32, &pSig64);
             pSig = pSig64;
+            if (flags & float_muladd_halve_result) {
+                pExp--;
+            }
             return roundAndPackFloat32(zSign, pExp - 1,
                                        pSig STATUS_VAR);
         }
@@ -2472,6 +2486,10 @@ float32 float32_muladd(float32 a, float32 b, float32 c, int flags STATUS_PARAM)
         zSig64 <<= shiftcount;
         zExp -= shiftcount;
     }
+    if (flags & float_muladd_halve_result) {
+        zExp--;
+    }
+
     shift64RightJamming(zSig64, 32, &zSig64);
     return roundAndPackFloat32(zSign, zExp, zSig64 STATUS_VAR);
 }
@@ -4088,6 +4106,17 @@ float64 float64_muladd(float64 a, float64 b, float64 c, int flags STATUS_PARAM)
             }
         }
         /* Zero plus something non-zero : just return the something */
+        if (flags & float_muladd_halve_result) {
+            if (cExp == 0) {
+                shift64RightJamming(cSig, 1, &cSig);
+            } else if (cExp == 1) {
+                shift64RightJamming(cSig, 1, &cSig);
+                cSig |= (1ULL << 51);
+                cExp = 0;
+            } else {
+                cExp--;
+            }
+        }
         return packFloat64(cSign ^ signflip, cExp, cSig);
     }
 
@@ -4123,6 +4152,9 @@ float64 float64_muladd(float64 a, float64 b, float64 c, int flags STATUS_PARAM)
         if (!cSig) {
             /* Throw out the special case of c being an exact zero now */
             shift128RightJamming(pSig0, pSig1, 64, &pSig0, &pSig1);
+            if (flags & float_muladd_halve_result) {
+                pExp--;
+            }
             return roundAndPackFloat64(zSign, pExp - 1,
                                        pSig1 STATUS_VAR);
         }
@@ -4159,6 +4191,9 @@ float64 float64_muladd(float64 a, float64 b, float64 c, int flags STATUS_PARAM)
             zExp--;
         }
         shift128RightJamming(zSig0, zSig1, 64, &zSig0, &zSig1);
+        if (flags & float_muladd_halve_result) {
+            zExp--;
+        }
         return roundAndPackFloat64(zSign, zExp, zSig1 STATUS_VAR);
     } else {
         /* Subtraction */
@@ -4209,6 +4244,9 @@ float64 float64_muladd(float64 a, float64 b, float64 c, int flags STATUS_PARAM)
                 zExp -= (shiftcount + 64);
             }
         }
+        if (flags & float_muladd_halve_result) {
+            zExp--;
+        }
         return roundAndPackFloat64(zSign, zExp, zSig0 STATUS_VAR);
     }
 }
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index 806ae13..4b4df88 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -249,11 +249,14 @@ void float_raise( int8 flags STATUS_PARAM);
 | Using these differs from negating an input or output before calling
 | the muladd function in that this means that a NaN doesn't have its
 | sign bit inverted before it is propagated.
+| We also support halving the result before rounding, as a special
+| case to support the ARM fused-sqrt-step instruction FRSQRTS.
 *----------------------------------------------------------------------------*/
 enum {
     float_muladd_negate_c = 1,
     float_muladd_negate_product = 2,
     float_muladd_negate_result = 4,
+    float_muladd_halve_result = 8,
 };
 
 /*----------------------------------------------------------------------------
-- 
1.8.5