From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-devel@nongnu.org
Subject: [Qemu-devel] [PATCH 7/8] softfloat: Add float/double to 16 bit integer conversion functions
Date: Thu, 11 Nov 2010 18:24:01 +0000 [thread overview]
Message-ID: <1289499842-28818-8-git-send-email-peter.maydell@linaro.org> (raw)
In-Reply-To: <1289499842-28818-1-git-send-email-peter.maydell@linaro.org>
The ARM architecture needs float/double to 16 bit integer conversions.
(The 32 bit versions aren't sufficient because of the requirement
to saturate at 16 bit MAXINT/MININT and to get the exception bits right.)
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
fpu/softfloat.c | 136 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
fpu/softfloat.h | 4 ++
2 files changed, 140 insertions(+), 0 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 0b82797..6f5b05d 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -1355,6 +1355,55 @@ int32 float32_to_int32_round_to_zero( float32 a STATUS_PARAM )
/*----------------------------------------------------------------------------
| Returns the result of converting the single-precision floating-point value
+| `a' to the 16-bit two's complement integer format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int16 float32_to_int16_round_to_zero( float32 a STATUS_PARAM )
+{
+ flag aSign;
+ int16 aExp, shiftCount;
+ bits32 aSig;
+ int32 z;
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+ shiftCount = aExp - 0x8E;
+ if ( 0 <= shiftCount ) {
+ if ( float32_val(a) != 0xC7000000 ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
+ return 0x7FFF;
+ }
+ }
+ return (sbits32) 0xffff8000;
+ }
+ else if ( aExp <= 0x7E ) {
+ if ( aExp | aSig ) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ return 0;
+ }
+ shiftCount -= 0x10;
+ aSig = ( aSig | 0x00800000 )<<8;
+ z = aSig>>( - shiftCount );
+ if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ if ( aSign ) {
+ z = - z;
+ }
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
| `a' to the 64-bit two's complement integer format. The conversion is
| performed according to the IEC/IEEE Standard for Binary Floating-Point
| Arithmetic---which means in particular that the conversion is rounded
@@ -2412,6 +2461,57 @@ int32 float64_to_int32_round_to_zero( float64 a STATUS_PARAM )
/*----------------------------------------------------------------------------
| Returns the result of converting the double-precision floating-point value
+| `a' to the 16-bit two's complement integer format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int16 float64_to_int16_round_to_zero( float64 a STATUS_PARAM )
+{
+ flag aSign;
+ int16 aExp, shiftCount;
+ bits64 aSig, savedASig;
+ int32 z;
+
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ aSign = extractFloat64Sign( a );
+ if ( 0x40E < aExp ) {
+ if ( ( aExp == 0x7FF ) && aSig ) {
+ aSign = 0;
+ }
+ goto invalid;
+ }
+ else if ( aExp < 0x3FF ) {
+ if ( aExp || aSig ) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ return 0;
+ }
+ aSig |= LIT64( 0x0010000000000000 );
+ shiftCount = 0x433 - aExp;
+ savedASig = aSig;
+ aSig >>= shiftCount;
+ z = aSig;
+ if ( aSign ) {
+ z = - z;
+ }
+ if ( ( (int16_t)z < 0 ) ^ aSign ) {
+ invalid:
+ float_raise( float_flag_invalid STATUS_VAR);
+ return aSign ? (sbits32) 0xffff8000 : 0x7FFF;
+ }
+ if ( ( aSig<<shiftCount ) != savedASig ) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
| `a' to the 64-bit two's complement integer format. The conversion is
| performed according to the IEC/IEEE Standard for Binary Floating-Point
| Arithmetic---which means in particular that the conversion is rounded
@@ -5632,6 +5732,24 @@ unsigned int float32_to_uint32_round_to_zero( float32 a STATUS_PARAM )
return res;
}
+unsigned int float32_to_uint16_round_to_zero( float32 a STATUS_PARAM )
+{
+ int64_t v;
+ unsigned int res;
+
+ v = float32_to_int64_round_to_zero(a STATUS_VAR);
+ if (v < 0) {
+ res = 0;
+ float_raise( float_flag_invalid STATUS_VAR);
+ } else if (v > 0xffff) {
+ res = 0xffff;
+ float_raise( float_flag_invalid STATUS_VAR);
+ } else {
+ res = v;
+ }
+ return res;
+}
+
unsigned int float64_to_uint32( float64 a STATUS_PARAM )
{
int64_t v;
@@ -5668,6 +5786,24 @@ unsigned int float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
return res;
}
+unsigned int float64_to_uint16_round_to_zero( float64 a STATUS_PARAM )
+{
+ int64_t v;
+ unsigned int res;
+
+ v = float64_to_int64_round_to_zero(a STATUS_VAR);
+ if (v < 0) {
+ res = 0;
+ float_raise( float_flag_invalid STATUS_VAR);
+ } else if (v > 0xffff) {
+ res = 0xffff;
+ float_raise( float_flag_invalid STATUS_VAR);
+ } else {
+ res = v;
+ }
+ return res;
+}
+
/* FIXME: This looks broken. */
uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
{
diff --git a/fpu/softfloat.h b/fpu/softfloat.h
index 9528825..5d68a07 100644
--- a/fpu/softfloat.h
+++ b/fpu/softfloat.h
@@ -251,6 +251,8 @@ float32 float16_to_float32( bits16, flag STATUS_PARAM );
/*----------------------------------------------------------------------------
| Software IEC/IEEE single-precision conversion routines.
*----------------------------------------------------------------------------*/
+int float32_to_int16_round_to_zero( float32 STATUS_PARAM );
+unsigned int float32_to_uint16_round_to_zero( float32 STATUS_PARAM );
int float32_to_int32( float32 STATUS_PARAM );
int float32_to_int32_round_to_zero( float32 STATUS_PARAM );
unsigned int float32_to_uint32( float32 STATUS_PARAM );
@@ -321,6 +323,8 @@ INLINE int float32_is_zero(float32 a)
/*----------------------------------------------------------------------------
| Software IEC/IEEE double-precision conversion routines.
*----------------------------------------------------------------------------*/
+int float64_to_int16_round_to_zero( float64 STATUS_PARAM );
+unsigned int float64_to_uint16_round_to_zero( float64 STATUS_PARAM );
int float64_to_int32( float64 STATUS_PARAM );
int float64_to_int32_round_to_zero( float64 STATUS_PARAM );
unsigned int float64_to_uint32( float64 STATUS_PARAM );
--
1.7.1
next prev parent reply other threads:[~2010-11-11 18:24 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-11-11 18:23 [Qemu-devel] [PATCH 0/8] ARM: fix VCVT instructions Peter Maydell
2010-11-11 18:23 ` [Qemu-devel] [PATCH 1/8] ARM: Fix decoding of VFP forms of VCVT between float and int/fixed Peter Maydell
2010-12-06 16:30 ` Nathan Froyd
2010-12-06 16:48 ` Peter Maydell
2010-12-06 16:57 ` Nathan Froyd
2010-12-06 17:07 ` Peter Maydell
2010-12-08 12:00 ` Peter Maydell
2010-11-11 18:23 ` [Qemu-devel] [PATCH 2/8] ARM: Fix decoding of Neon forms of VCVT between float and fixed point Peter Maydell
2010-11-11 18:23 ` [Qemu-devel] [PATCH 3/8] ARM: Fix sense of to_integer bit in Neon VCVT float/int conversion Peter Maydell
2010-11-11 18:23 ` [Qemu-devel] [PATCH 4/8] ARM: Return correct result for float-to-integer conversion of NaN Peter Maydell
2010-11-11 19:21 ` Nathan Froyd
2010-11-12 23:08 ` Peter Maydell
2010-11-11 18:23 ` [Qemu-devel] [PATCH 5/8] ARM: Return correct result for single<->double " Peter Maydell
2010-11-11 18:24 ` [Qemu-devel] [PATCH 6/8] ARM: Ignore top 16 bits when doing VCVT from 16 bit fixed point Peter Maydell
2010-11-11 18:24 ` Peter Maydell [this message]
2010-11-11 18:24 ` [Qemu-devel] [PATCH 8/8] ARM: Implement VCVT to 16 bit integer using new softfloat routines Peter Maydell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1289499842-28818-8-git-send-email-peter.maydell@linaro.org \
--to=peter.maydell@linaro.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).