* [Qemu-devel] [V4 PATCH 01/22] softfloat: Fix float64_to_uint64
2013-12-18 20:18 [Qemu-devel] [V4 PATCH 00/22] PowerPC VSX Stage 3 Tom Musta
@ 2013-12-18 20:19 ` Tom Musta
2013-12-19 22:11 ` Peter Maydell
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 02/22] softfloat: Add float32_to_uint64() Tom Musta
` (20 subsequent siblings)
21 siblings, 1 reply; 31+ messages in thread
From: Tom Musta @ 2013-12-18 20:19 UTC (permalink / raw)
To: qemu-devel; +Cc: Tom Musta, qemu-ppc
The comment preceding the float64_to_uint64 routine suggests that
the implementation is broken. And this is, indeed, the case.
This patch properly implements the conversion of a 64-bit floating
point number to an unsigned, 64 bit integer.
This contribution can be licensed under either the softfloat-2a or -2b
license.
V2: Added softfloat license statement.
V3: Modified to meet QEMU coding conventions.
V4: Fixed incorrect handling of small negatives, which, if rounded
up to zero should not set the inexact flag.
Signed-off-by: Tom Musta <tommusta@gmail.com>
---
fpu/softfloat.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++-----
1 files changed, 89 insertions(+), 9 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index dbda61b..ec23908 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -161,7 +161,6 @@ static int32 roundAndPackInt32( flag zSign, uint64_t absZ STATUS_PARAM)
| exception is raised and the largest positive or negative integer is
| returned.
*----------------------------------------------------------------------------*/
-
static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATUS_PARAM)
{
int8 roundingMode;
@@ -204,6 +203,56 @@ static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATU
}
/*----------------------------------------------------------------------------
+| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
+| `absZ1', with binary point between bits 63 and 64 (between the input words),
+| and returns the properly rounded 64-bit unsigned integer corresponding to the
+| input. Ordinarily, the fixed-point input is simply rounded to an integer,
+| with the inexact exception raised if the input cannot be represented exactly
+| as an integer. However, if the fixed-point input is too large, the invalid
+| exception is raised and the largest unsigned integer is returned.
+*----------------------------------------------------------------------------*/
+
+static int64 roundAndPackUint64(flag zSign, uint64_t absZ0,
+ uint64_t absZ1 STATUS_PARAM)
+{
+ int8 roundingMode;
+ flag roundNearestEven, increment;
+
+ roundingMode = STATUS(float_rounding_mode);
+ roundNearestEven = (roundingMode == float_round_nearest_even);
+ increment = ((int64_t)absZ1 < 0);
+ if (!roundNearestEven) {
+ if (roundingMode == float_round_to_zero) {
+ increment = 0;
+ } else if (absZ1) {
+ if (zSign) {
+ increment = (roundingMode == float_round_down) && absZ1;
+ } else {
+ increment = (roundingMode == float_round_up) && absZ1;
+ }
+ }
+ }
+ if (increment) {
+ ++absZ0;
+ if (absZ0 == 0) {
+ float_raise(float_flag_invalid STATUS_VAR);
+ return LIT64(0xFFFFFFFFFFFFFFFF);
+ }
+ absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
+ }
+
+ if (zSign && absZ0) {
+ float_raise(float_flag_invalid STATUS_VAR);
+ return 0;
+ }
+
+ if (absZ1) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ return absZ0;
+}
+
+/*----------------------------------------------------------------------------
| Returns the fraction bits of the single-precision floating-point value `a'.
*----------------------------------------------------------------------------*/
@@ -6536,18 +6585,49 @@ uint_fast16_t float64_to_uint16_round_to_zero(float64 a STATUS_PARAM)
return res;
}
-/* FIXME: This looks broken. */
-uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
-{
- int64_t v;
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 64-bit unsigned integer format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode. If `a' is a NaN, the largest
+| positive integer is returned. If the conversion overflows, the
+| largest unsigned integer is returned. If 'a' is negative, zero is
+| returned.
+*----------------------------------------------------------------------------*/
- v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
- v += float64_val(a);
- v = float64_to_int64(make_float64(v) STATUS_VAR);
+uint64_t float64_to_uint64(float64 a STATUS_PARAM)
+{
+ flag aSign;
+ int_fast16_t aExp, shiftCount;
+ uint64_t aSig, aSigExtra;
+ a = float64_squash_input_denormal(a STATUS_VAR);
- return v - INT64_MIN;
+ aSig = extractFloat64Frac(a);
+ aExp = extractFloat64Exp(a);
+ aSign = extractFloat64Sign(a);
+ if (aSign && (aExp > 1022)) {
+ float_raise(float_flag_invalid STATUS_VAR);
+ return 0;
+ }
+ if (aExp) {
+ aSig |= LIT64(0x0010000000000000);
+ }
+ shiftCount = 0x433 - aExp;
+ if (shiftCount <= 0) {
+ if (0x43E < aExp) {
+ float_raise(float_flag_invalid STATUS_VAR);
+ return LIT64(0xFFFFFFFFFFFFFFFF);
+ }
+ aSigExtra = 0;
+ aSig <<= -shiftCount;
+ } else {
+ shift64ExtraRightJamming(aSig, 0, shiftCount, &aSig, &aSigExtra);
+ }
+ return roundAndPackUint64(aSign, aSig, aSigExtra STATUS_VAR);
}
+
uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
{
int64_t v;
--
1.7.1
^ permalink raw reply related [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [V4 PATCH 01/22] softfloat: Fix float64_to_uint64
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 01/22] softfloat: Fix float64_to_uint64 Tom Musta
@ 2013-12-19 22:11 ` Peter Maydell
2013-12-20 20:05 ` [Qemu-devel] [Qemu-ppc] " Tom Musta
0 siblings, 1 reply; 31+ messages in thread
From: Peter Maydell @ 2013-12-19 22:11 UTC (permalink / raw)
To: Tom Musta; +Cc: qemu-ppc@nongnu.org, QEMU Developers
On 18 December 2013 20:19, Tom Musta <tommusta@gmail.com> wrote:
> The comment preceding the float64_to_uint64 routine suggests that
> the implementation is broken. And this is, indeed, the case.
>
> This patch properly implements the conversion of a 64-bit floating
> point number to an unsigned, 64 bit integer.
>
> This contribution can be licensed under either the softfloat-2a or -2b
> license.
>
> V2: Added softfloat license statement.
>
> V3: Modified to meet QEMU coding conventions.
>
> V4: Fixed incorrect handling of small negatives, which, if rounded
> up to zero should not set the inexact flag.
>
> Signed-off-by: Tom Musta <tommusta@gmail.com>
> ---
> fpu/softfloat.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++-----
> 1 files changed, 89 insertions(+), 9 deletions(-)
>
> diff --git a/fpu/softfloat.c b/fpu/softfloat.c
> index dbda61b..ec23908 100644
> --- a/fpu/softfloat.c
> +++ b/fpu/softfloat.c
> @@ -161,7 +161,6 @@ static int32 roundAndPackInt32( flag zSign, uint64_t absZ STATUS_PARAM)
> | exception is raised and the largest positive or negative integer is
> | returned.
> *----------------------------------------------------------------------------*/
> -
> static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATUS_PARAM)
> {
> int8 roundingMode;
> @@ -204,6 +203,56 @@ static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATU
> }
>
> /*----------------------------------------------------------------------------
> +| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
> +| `absZ1', with binary point between bits 63 and 64 (between the input words),
> +| and returns the properly rounded 64-bit unsigned integer corresponding to the
> +| input. Ordinarily, the fixed-point input is simply rounded to an integer,
> +| with the inexact exception raised if the input cannot be represented exactly
> +| as an integer. However, if the fixed-point input is too large, the invalid
> +| exception is raised and the largest unsigned integer is returned.
> +*----------------------------------------------------------------------------*/
You should probably say in this comment what the behaviour is for
negative inputs.
> +uint64_t float64_to_uint64(float64 a STATUS_PARAM)
> +{
> + flag aSign;
> + int_fast16_t aExp, shiftCount;
> + uint64_t aSig, aSigExtra;
> + a = float64_squash_input_denormal(a STATUS_VAR);
>
> - return v - INT64_MIN;
> + aSig = extractFloat64Frac(a);
> + aExp = extractFloat64Exp(a);
> + aSign = extractFloat64Sign(a);
> + if (aSign && (aExp > 1022)) {
> + float_raise(float_flag_invalid STATUS_VAR);
> + return 0;
This incorrectly returns 0 rather than largest-positive-integer
for NaNs with the sign bit set.
> + }
> + if (aExp) {
> + aSig |= LIT64(0x0010000000000000);
> + }
> + shiftCount = 0x433 - aExp;
> + if (shiftCount <= 0) {
> + if (0x43E < aExp) {
> + float_raise(float_flag_invalid STATUS_VAR);
> + return LIT64(0xFFFFFFFFFFFFFFFF);
> + }
> + aSigExtra = 0;
> + aSig <<= -shiftCount;
> + } else {
> + shift64ExtraRightJamming(aSig, 0, shiftCount, &aSig, &aSigExtra);
> + }
> + return roundAndPackUint64(aSign, aSig, aSigExtra STATUS_VAR);
> }
Other than that, the code *looks* OK, but it's really easy for
"not quite right" code to slip through here (especially on corner
cases like NaNs, denormals and odd rounding modes). How much
testing have you given this? I really recommend testing by firing a
huge pile of random (and semi random) test vectors at whatever
guest instruction you're implementing and comparing against
results on reference hardware.
thanks
-- PMM
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [Qemu-ppc] [V4 PATCH 01/22] softfloat: Fix float64_to_uint64
2013-12-19 22:11 ` Peter Maydell
@ 2013-12-20 20:05 ` Tom Musta
0 siblings, 0 replies; 31+ messages in thread
From: Tom Musta @ 2013-12-20 20:05 UTC (permalink / raw)
To: Peter Maydell; +Cc: qemu-ppc@nongnu.org, QEMU Developers
On 12/19/2013 4:11 PM, Peter Maydell wrote:
> On 18 December 2013 20:19, Tom Musta <tommusta@gmail.com> wrote:
>> The comment preceding the float64_to_uint64 routine suggests that
>> the implementation is broken. And this is, indeed, the case.
>>
>> This patch properly implements the conversion of a 64-bit floating
>> point number to an unsigned, 64 bit integer.
>>
>> This contribution can be licensed under either the softfloat-2a or -2b
>> license.
>>
>> V2: Added softfloat license statement.
>>
>> V3: Modified to meet QEMU coding conventions.
>>
>> V4: Fixed incorrect handling of small negatives, which, if rounded
>> up to zero should not set the inexact flag.
>>
>> Signed-off-by: Tom Musta <tommusta@gmail.com>
>> ---
>> fpu/softfloat.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++-----
>> 1 files changed, 89 insertions(+), 9 deletions(-)
>>
>> diff --git a/fpu/softfloat.c b/fpu/softfloat.c
>> index dbda61b..ec23908 100644
>> --- a/fpu/softfloat.c
>> +++ b/fpu/softfloat.c
>> @@ -161,7 +161,6 @@ static int32 roundAndPackInt32( flag zSign, uint64_t absZ STATUS_PARAM)
>> | exception is raised and the largest positive or negative integer is
>> | returned.
>> *----------------------------------------------------------------------------*/
>> -
>> static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATUS_PARAM)
>> {
>> int8 roundingMode;
>> @@ -204,6 +203,56 @@ static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATU
>> }
>>
>> /*----------------------------------------------------------------------------
>> +| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
>> +| `absZ1', with binary point between bits 63 and 64 (between the input words),
>> +| and returns the properly rounded 64-bit unsigned integer corresponding to the
>> +| input. Ordinarily, the fixed-point input is simply rounded to an integer,
>> +| with the inexact exception raised if the input cannot be represented exactly
>> +| as an integer. However, if the fixed-point input is too large, the invalid
>> +| exception is raised and the largest unsigned integer is returned.
>> +*----------------------------------------------------------------------------*/
>
> You should probably say in this comment what the behaviour is for
> negative inputs.
>
>> +uint64_t float64_to_uint64(float64 a STATUS_PARAM)
>> +{
>> + flag aSign;
>> + int_fast16_t aExp, shiftCount;
>> + uint64_t aSig, aSigExtra;
>> + a = float64_squash_input_denormal(a STATUS_VAR);
>>
>> - return v - INT64_MIN;
>> + aSig = extractFloat64Frac(a);
>> + aExp = extractFloat64Exp(a);
>> + aSign = extractFloat64Sign(a);
>> + if (aSign && (aExp > 1022)) {
>> + float_raise(float_flag_invalid STATUS_VAR);
>> + return 0;
>
> This incorrectly returns 0 rather than largest-positive-integer
> for NaNs with the sign bit set.
>
>> + }
>> + if (aExp) {
>> + aSig |= LIT64(0x0010000000000000);
>> + }
>> + shiftCount = 0x433 - aExp;
>> + if (shiftCount <= 0) {
>> + if (0x43E < aExp) {
>> + float_raise(float_flag_invalid STATUS_VAR);
>> + return LIT64(0xFFFFFFFFFFFFFFFF);
>> + }
>> + aSigExtra = 0;
>> + aSig <<= -shiftCount;
>> + } else {
>> + shift64ExtraRightJamming(aSig, 0, shiftCount, &aSig, &aSigExtra);
>> + }
>> + return roundAndPackUint64(aSign, aSig, aSigExtra STATUS_VAR);
>> }
>
> Other than that, the code *looks* OK, but it's really easy for
> "not quite right" code to slip through here (especially on corner
> cases like NaNs, denormals and odd rounding modes). How much
> testing have you given this? I really recommend testing by firing a
> huge pile of random (and semi random) test vectors at whatever
> guest instruction you're implementing and comparing against
> results on reference hardware.
>
> thanks
> -- PMM
>
Peter:
I agree with the comments and also with the bug. I will fix.
I do test like you said ... random patterns with some biasing to try to get into corner
cases. The bug you found was masked in the PowerPC code that wrapped to call to
float64_to_uint64. I have constructed a variant of my test harness that invokes
float64_to_uint64 directly and it has uncovered the bug.
^ permalink raw reply [flat|nested] 31+ messages in thread
* [Qemu-devel] [V4 PATCH 02/22] softfloat: Add float32_to_uint64()
2013-12-18 20:18 [Qemu-devel] [V4 PATCH 00/22] PowerPC VSX Stage 3 Tom Musta
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 01/22] softfloat: Fix float64_to_uint64 Tom Musta
@ 2013-12-18 20:19 ` Tom Musta
2013-12-19 21:31 ` Peter Maydell
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 03/22] softfloat: Fix float64_to_uint64_round_to_zero Tom Musta
` (19 subsequent siblings)
21 siblings, 1 reply; 31+ messages in thread
From: Tom Musta @ 2013-12-18 20:19 UTC (permalink / raw)
To: qemu-devel; +Cc: Tom Musta, qemu-ppc
This patch adds the float32_to_uint64() routine, which converts a
32-bit floating point number to an unsigned 64 bit number.
This contribution can be licensed under either the softfloat-2a or -2b
license.
V2: Reduced patch to just this single routine per feedback from Peter
Maydell.
V4: Now passing sign to roundAndPackUint64()
Signed-off-by: Tom Musta <tommusta@gmail.com>
---
fpu/softfloat.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
include/fpu/softfloat.h | 1 +
2 files changed, 46 insertions(+), 0 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index ec23908..1ff59d0 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -1558,6 +1558,51 @@ int64 float32_to_int64( float32 a STATUS_PARAM )
/*----------------------------------------------------------------------------
| Returns the result of converting the single-precision floating-point value
+| `a' to the 64-bit unsigned integer format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode. If `a' is a NaN, the largest
+| unsigned integer is returned. Otherwise, if the conversion overflows, the
+| largest unsigned integer is returned. If the 'a' is negative, zero is
+| returned.
+*----------------------------------------------------------------------------*/
+
+uint64 float32_to_uint64(float32 a STATUS_PARAM)
+{
+ flag aSign;
+ int_fast16_t aExp, shiftCount;
+ uint32_t aSig;
+ uint64_t aSig64, aSigExtra;
+ a = float32_squash_input_denormal(a STATUS_VAR);
+
+ aSig = extractFloat32Frac(a);
+ aExp = extractFloat32Exp(a);
+ aSign = extractFloat32Sign(a);
+ if (aSign) {
+ if (aExp) {
+ float_raise(float_flag_invalid STATUS_VAR);
+ } else if (aSig) { /* negative denormalized */
+ float_raise(float_flag_inexact STATUS_VAR);
+ }
+ return 0;
+ }
+ shiftCount = 0xBE - aExp;
+ if (aExp) {
+ aSig |= 0x00800000;
+ }
+ if (shiftCount < 0) {
+ float_raise(float_flag_invalid STATUS_VAR);
+ return (int64_t)LIT64(0xFFFFFFFFFFFFFFFF);
+ }
+
+ aSig64 = aSig;
+ aSig64 <<= 40;
+ shift64ExtraRightJamming(aSig64, 0, shiftCount, &aSig64, &aSigExtra);
+ return roundAndPackUint64(aSign, aSig64, aSigExtra STATUS_VAR);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
| `a' to the 64-bit two's complement integer format. The conversion is
| performed according to the IEC/IEEE Standard for Binary Floating-Point
| Arithmetic, except that the conversion is always rounded toward zero. If
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index 2365274..080b36d 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -272,6 +272,7 @@ int32 float32_to_int32_round_to_zero( float32 STATUS_PARAM );
uint32 float32_to_uint32( float32 STATUS_PARAM );
uint32 float32_to_uint32_round_to_zero( float32 STATUS_PARAM );
int64 float32_to_int64( float32 STATUS_PARAM );
+uint64 float32_to_uint64(float32 STATUS_PARAM);
int64 float32_to_int64_round_to_zero( float32 STATUS_PARAM );
float64 float32_to_float64( float32 STATUS_PARAM );
floatx80 float32_to_floatx80( float32 STATUS_PARAM );
--
1.7.1
^ permalink raw reply related [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [V4 PATCH 02/22] softfloat: Add float32_to_uint64()
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 02/22] softfloat: Add float32_to_uint64() Tom Musta
@ 2013-12-19 21:31 ` Peter Maydell
2013-12-20 20:07 ` Tom Musta
0 siblings, 1 reply; 31+ messages in thread
From: Peter Maydell @ 2013-12-19 21:31 UTC (permalink / raw)
To: Tom Musta; +Cc: qemu-ppc@nongnu.org, QEMU Developers
On 18 December 2013 20:19, Tom Musta <tommusta@gmail.com> wrote:
> This patch adds the float32_to_uint64() routine, which converts a
> 32-bit floating point number to an unsigned 64 bit number.
>
> This contribution can be licensed under either the softfloat-2a or -2b
> license.
>
> V2: Reduced patch to just this single routine per feedback from Peter
> Maydell.
>
> V4: Now passing sign to roundAndPackUint64()
>
> Signed-off-by: Tom Musta <tommusta@gmail.com>
> ---
> fpu/softfloat.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
> include/fpu/softfloat.h | 1 +
> 2 files changed, 46 insertions(+), 0 deletions(-)
>
> diff --git a/fpu/softfloat.c b/fpu/softfloat.c
> index ec23908..1ff59d0 100644
> --- a/fpu/softfloat.c
> +++ b/fpu/softfloat.c
> @@ -1558,6 +1558,51 @@ int64 float32_to_int64( float32 a STATUS_PARAM )
>
> /*----------------------------------------------------------------------------
> | Returns the result of converting the single-precision floating-point value
> +| `a' to the 64-bit unsigned integer format. The conversion is
> +| performed according to the IEC/IEEE Standard for Binary Floating-Point
> +| Arithmetic---which means in particular that the conversion is rounded
> +| according to the current rounding mode. If `a' is a NaN, the largest
> +| unsigned integer is returned. Otherwise, if the conversion overflows, the
> +| largest unsigned integer is returned. If the 'a' is negative, zero is
> +| returned.
> +*----------------------------------------------------------------------------*/
> +
> +uint64 float32_to_uint64(float32 a STATUS_PARAM)
> +{
> + flag aSign;
> + int_fast16_t aExp, shiftCount;
> + uint32_t aSig;
> + uint64_t aSig64, aSigExtra;
> + a = float32_squash_input_denormal(a STATUS_VAR);
> +
> + aSig = extractFloat32Frac(a);
> + aExp = extractFloat32Exp(a);
> + aSign = extractFloat32Sign(a);
> + if (aSign) {
> + if (aExp) {
> + float_raise(float_flag_invalid STATUS_VAR);
NaNs with the sign bit set will wind up in this case and return 0
rather than largest-unsigned-integer.
Also it seems like this code says "negative inputs return
zero if they're denormal or signal Invalid and return 0
if they're not". Are you sure this does the right thing for
(a) values which are not denormal but are close enough
to zero to round to it and (b) different rounding modes?
> + } else if (aSig) { /* negative denormalized */
> + float_raise(float_flag_inexact STATUS_VAR);
> + }
> + return 0;
> + }
> + shiftCount = 0xBE - aExp;
> + if (aExp) {
> + aSig |= 0x00800000;
> + }
> + if (shiftCount < 0) {
> + float_raise(float_flag_invalid STATUS_VAR);
> + return (int64_t)LIT64(0xFFFFFFFFFFFFFFFF);
> + }
> +
> + aSig64 = aSig;
> + aSig64 <<= 40;
> + shift64ExtraRightJamming(aSig64, 0, shiftCount, &aSig64, &aSigExtra);
> + return roundAndPackUint64(aSign, aSig64, aSigExtra STATUS_VAR);
> +}
thanks
-- PMM
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [V4 PATCH 02/22] softfloat: Add float32_to_uint64()
2013-12-19 21:31 ` Peter Maydell
@ 2013-12-20 20:07 ` Tom Musta
0 siblings, 0 replies; 31+ messages in thread
From: Tom Musta @ 2013-12-20 20:07 UTC (permalink / raw)
To: Peter Maydell; +Cc: qemu-ppc@nongnu.org, QEMU Developers
On 12/19/2013 3:31 PM, Peter Maydell wrote:
> On 18 December 2013 20:19, Tom Musta <tommusta@gmail.com> wrote:
>> This patch adds the float32_to_uint64() routine, which converts a
>> 32-bit floating point number to an unsigned 64 bit number.
>>
>> This contribution can be licensed under either the softfloat-2a or -2b
>> license.
>>
>> V2: Reduced patch to just this single routine per feedback from Peter
>> Maydell.
>>
>> V4: Now passing sign to roundAndPackUint64()
>>
>> Signed-off-by: Tom Musta <tommusta@gmail.com>
>> ---
>> fpu/softfloat.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
>> include/fpu/softfloat.h | 1 +
>> 2 files changed, 46 insertions(+), 0 deletions(-)
>>
>> diff --git a/fpu/softfloat.c b/fpu/softfloat.c
>> index ec23908..1ff59d0 100644
>> --- a/fpu/softfloat.c
>> +++ b/fpu/softfloat.c
>> @@ -1558,6 +1558,51 @@ int64 float32_to_int64( float32 a STATUS_PARAM )
>>
>> /*----------------------------------------------------------------------------
>> | Returns the result of converting the single-precision floating-point value
>> +| `a' to the 64-bit unsigned integer format. The conversion is
>> +| performed according to the IEC/IEEE Standard for Binary Floating-Point
>> +| Arithmetic---which means in particular that the conversion is rounded
>> +| according to the current rounding mode. If `a' is a NaN, the largest
>> +| unsigned integer is returned. Otherwise, if the conversion overflows, the
>> +| largest unsigned integer is returned. If the 'a' is negative, zero is
>> +| returned.
>> +*----------------------------------------------------------------------------*/
>> +
>> +uint64 float32_to_uint64(float32 a STATUS_PARAM)
>> +{
>> + flag aSign;
>> + int_fast16_t aExp, shiftCount;
>> + uint32_t aSig;
>> + uint64_t aSig64, aSigExtra;
>> + a = float32_squash_input_denormal(a STATUS_VAR);
>> +
>> + aSig = extractFloat32Frac(a);
>> + aExp = extractFloat32Exp(a);
>> + aSign = extractFloat32Sign(a);
>> + if (aSign) {
>> + if (aExp) {
>> + float_raise(float_flag_invalid STATUS_VAR);
>
> NaNs with the sign bit set will wind up in this case and return 0
> rather than largest-unsigned-integer.
>
> Also it seems like this code says "negative inputs return
> zero if they're denormal or signal Invalid and return 0
> if they're not". Are you sure this does the right thing for
> (a) values which are not denormal but are close enough
> to zero to round to it and (b) different rounding modes?
>
>> + } else if (aSig) { /* negative denormalized */
>> + float_raise(float_flag_inexact STATUS_VAR);
>> + }
>> + return 0;
>> + }
>> + shiftCount = 0xBE - aExp;
>> + if (aExp) {
>> + aSig |= 0x00800000;
>> + }
>> + if (shiftCount < 0) {
>> + float_raise(float_flag_invalid STATUS_VAR);
>> + return (int64_t)LIT64(0xFFFFFFFFFFFFFFFF);
>> + }
>> +
>> + aSig64 = aSig;
>> + aSig64 <<= 40;
>> + shift64ExtraRightJamming(aSig64, 0, shiftCount, &aSig64, &aSigExtra);
>> + return roundAndPackUint64(aSign, aSig64, aSigExtra STATUS_VAR);
>> +}
>
> thanks
> -- PMM
>
Peter: I agree ... this still isn't quite right.
^ permalink raw reply [flat|nested] 31+ messages in thread
* [Qemu-devel] [V4 PATCH 03/22] softfloat: Fix float64_to_uint64_round_to_zero
2013-12-18 20:18 [Qemu-devel] [V4 PATCH 00/22] PowerPC VSX Stage 3 Tom Musta
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 01/22] softfloat: Fix float64_to_uint64 Tom Musta
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 02/22] softfloat: Add float32_to_uint64() Tom Musta
@ 2013-12-18 20:19 ` Tom Musta
2013-12-19 21:43 ` Peter Maydell
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 04/22] softfloat: Fix float64_to_uint32 Tom Musta
` (18 subsequent siblings)
21 siblings, 1 reply; 31+ messages in thread
From: Tom Musta @ 2013-12-18 20:19 UTC (permalink / raw)
To: qemu-devel; +Cc: Tom Musta, qemu-ppc
The float64_to_uint64_round_to_zero routine is incorrect.
For example, the following test pattern:
46697351FF4AEC29 / 0x1.97351ff4aec29p+103
currently produces 8000000000000000 instead of FFFFFFFFFFFFFFFF.
This patch re-implements the routine to temporarily force the
rounding mode and use the float64_to_uint64 routine.
This contribution can be licensed under either the softfloat-2a or -2b
license.
Signed-off-by: Tom Musta <tommusta@gmail.com>
---
fpu/softfloat.c | 12 +++++-------
1 files changed, 5 insertions(+), 7 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 1ff59d0..1b614ae 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -6675,13 +6675,11 @@ uint64_t float64_to_uint64(float64 a STATUS_PARAM)
uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
{
- int64_t v;
-
- v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
- v += float64_val(a);
- v = float64_to_int64_round_to_zero(make_float64(v) STATUS_VAR);
-
- return v - INT64_MIN;
+ signed char current_rounding_mode = STATUS(float_rounding_mode);
+ set_float_rounding_mode(float_round_to_zero STATUS_VAR);
+ int64_t v = float64_to_uint64(a STATUS_VAR);
+ set_float_rounding_mode(current_rounding_mode STATUS_VAR);
+ return v;
}
#define COMPARE(s, nan_exp) \
--
1.7.1
^ permalink raw reply related [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [V4 PATCH 03/22] softfloat: Fix float64_to_uint64_round_to_zero
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 03/22] softfloat: Fix float64_to_uint64_round_to_zero Tom Musta
@ 2013-12-19 21:43 ` Peter Maydell
0 siblings, 0 replies; 31+ messages in thread
From: Peter Maydell @ 2013-12-19 21:43 UTC (permalink / raw)
To: Tom Musta; +Cc: qemu-ppc@nongnu.org, QEMU Developers
On 18 December 2013 20:19, Tom Musta <tommusta@gmail.com> wrote:
> The float64_to_uint64_round_to_zero routine is incorrect.
>
> For example, the following test pattern:
>
> 46697351FF4AEC29 / 0x1.97351ff4aec29p+103
>
> currently produces 8000000000000000 instead of FFFFFFFFFFFFFFFF.
>
> This patch re-implements the routine to temporarily force the
> rounding mode and use the float64_to_uint64 routine.
>
> This contribution can be licensed under either the softfloat-2a or -2b
> license.
>
> Signed-off-by: Tom Musta <tommusta@gmail.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
The other approach would be to make all the conversion
functions explicitly take the rounding mode parameter
(and in some ways that might be closer to the set of
functionality IEE754-2008 describes), but this is a
simple change and saving and restoring rounding mode
is very cheap.
thanks
-- PMM
^ permalink raw reply [flat|nested] 31+ messages in thread
* [Qemu-devel] [V4 PATCH 04/22] softfloat: Fix float64_to_uint32
2013-12-18 20:18 [Qemu-devel] [V4 PATCH 00/22] PowerPC VSX Stage 3 Tom Musta
` (2 preceding siblings ...)
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 03/22] softfloat: Fix float64_to_uint64_round_to_zero Tom Musta
@ 2013-12-18 20:19 ` Tom Musta
2013-12-19 21:48 ` Peter Maydell
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 05/22] softfloat: Fix float64_to_uint32_round_to_zero Tom Musta
` (17 subsequent siblings)
21 siblings, 1 reply; 31+ messages in thread
From: Tom Musta @ 2013-12-18 20:19 UTC (permalink / raw)
To: qemu-devel; +Cc: Tom Musta, qemu-ppc
The float64_to_uint32 has several flaws:
- for numbers between 2**32 and 2**64, the inexact exception flag
may get incorrectly set. In this case, only the invalid flag
should be set.
test pattern: 425F81378DC0CD1F / 0x1.f81378dc0cd1fp+38
- for numbers between 2**63 and 2**64, incorrect results may
be produced:
test pattern: 43EAAF73F1F0B8BD / 0x1.aaf73f1f0b8bdp+63
This patch re-implements float64_to_uint32 to re-use the
float64_to_uint64 routine (instead of float64_to_int64). For the
saturation case, the inexact bit is explicitly cleared before raising
the invalid flag, provided that it was not previously set.
V4: Fixed handling of stickiness of the inexact bit per comments from
Peter Maydell.
This contribution can be licensed under either the softfloat-2a or -2b
license.
Signed-off-by: Tom Musta <tommusta@gmail.com>
---
fpu/softfloat.c | 15 +++++++--------
1 files changed, 7 insertions(+), 8 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 1b614ae..6110e28 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -6578,19 +6578,18 @@ uint_fast16_t float32_to_uint16_round_to_zero(float32 a STATUS_PARAM)
uint32 float64_to_uint32( float64 a STATUS_PARAM )
{
- int64_t v;
+ uint64_t v;
uint32 res;
+ int old_exc_flags = get_float_exception_flags(status);
- v = float64_to_int64(a STATUS_VAR);
- if (v < 0) {
- res = 0;
- float_raise( float_flag_invalid STATUS_VAR);
- } else if (v > 0xffffffff) {
+ v = float64_to_uint64(a STATUS_VAR);
+ if (v > 0xffffffff) {
res = 0xffffffff;
- float_raise( float_flag_invalid STATUS_VAR);
} else {
- res = v;
+ return v;
}
+ set_float_exception_flags(old_exc_flags, status);
+ float_raise(float_flag_invalid STATUS_VAR);
return res;
}
--
1.7.1
^ permalink raw reply related [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [V4 PATCH 04/22] softfloat: Fix float64_to_uint32
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 04/22] softfloat: Fix float64_to_uint32 Tom Musta
@ 2013-12-19 21:48 ` Peter Maydell
0 siblings, 0 replies; 31+ messages in thread
From: Peter Maydell @ 2013-12-19 21:48 UTC (permalink / raw)
To: Tom Musta; +Cc: qemu-ppc@nongnu.org, QEMU Developers
On 18 December 2013 20:19, Tom Musta <tommusta@gmail.com> wrote:
> The float64_to_uint32 has several flaws:
>
> - for numbers between 2**32 and 2**64, the inexact exception flag
> may get incorrectly set. In this case, only the invalid flag
> should be set.
>
> test pattern: 425F81378DC0CD1F / 0x1.f81378dc0cd1fp+38
>
> - for numbers between 2**63 and 2**64, incorrect results may
> be produced:
>
> test pattern: 43EAAF73F1F0B8BD / 0x1.aaf73f1f0b8bdp+63
>
> This patch re-implements float64_to_uint32 to re-use the
> float64_to_uint64 routine (instead of float64_to_int64). For the
> saturation case, the inexact bit is explicitly cleared before raising
> the invalid flag, provided that it was not previously set.
>
> V4: Fixed handling of stickiness of the inexact bit per comments from
> Peter Maydell.
>
> This contribution can be licensed under either the softfloat-2a or -2b
> license.
>
> Signed-off-by: Tom Musta <tommusta@gmail.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
(though you might tidy up the commit message as per other
email).
I have a patch which fixes the remaining float-to-int conversion
functions (ie the ones you didn't deal with in this series) to not
raise Inexact when they raise Invalid for out of range, and one
which fixes a bug in scalbn. I'll send those out shortly.
thanks
-- PMM
^ permalink raw reply [flat|nested] 31+ messages in thread
* [Qemu-devel] [V4 PATCH 05/22] softfloat: Fix float64_to_uint32_round_to_zero
2013-12-18 20:18 [Qemu-devel] [V4 PATCH 00/22] PowerPC VSX Stage 3 Tom Musta
` (3 preceding siblings ...)
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 04/22] softfloat: Fix float64_to_uint32 Tom Musta
@ 2013-12-18 20:19 ` Tom Musta
2013-12-19 21:41 ` Peter Maydell
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 06/22] target-ppc: Add set_fprf Argument to fload_invalid_op_excp() Tom Musta
` (16 subsequent siblings)
21 siblings, 1 reply; 31+ messages in thread
From: Tom Musta @ 2013-12-18 20:19 UTC (permalink / raw)
To: qemu-devel; +Cc: Tom Musta, qemu-ppc
The float64_to_uint32_round_to_zero routine is incorrect.
For example, the following test pattern:
425F81378DC0CD1F / 0x1.f81378dc0cd1fp+38
will erroneously set the inexact flag.
This patch re-implements the routine to use the float64_to_uint64_round_to_zero
routine. If saturation occurs and the inexact flag was not previously set, it
will be cleared.
This contribution can be licensed under either the softfloat-2a or -2b
license.
V4: Correct commit commentary. Corrected code to properly handle the
stickiness of the inexact flag.
Signed-off-by: Tom Musta <tommusta@gmail.com>
---
fpu/softfloat.c | 15 +++++++--------
1 files changed, 7 insertions(+), 8 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 6110e28..6112e2a 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -6595,19 +6595,18 @@ uint32 float64_to_uint32( float64 a STATUS_PARAM )
uint32 float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
{
- int64_t v;
+ uint64_t v;
uint32 res;
+ int old_exc_flags = get_float_exception_flags(status);
- v = float64_to_int64_round_to_zero(a STATUS_VAR);
- if (v < 0) {
- res = 0;
- float_raise( float_flag_invalid STATUS_VAR);
- } else if (v > 0xffffffff) {
+ v = float64_to_uint64_round_to_zero(a STATUS_VAR);
+ if (v > 0xffffffff) {
res = 0xffffffff;
- float_raise( float_flag_invalid STATUS_VAR);
} else {
- res = v;
+ return v;
}
+ set_float_exception_flags(old_exc_flags, status);
+ float_raise(float_flag_invalid STATUS_VAR);
return res;
}
--
1.7.1
^ permalink raw reply related [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [V4 PATCH 05/22] softfloat: Fix float64_to_uint32_round_to_zero
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 05/22] softfloat: Fix float64_to_uint32_round_to_zero Tom Musta
@ 2013-12-19 21:41 ` Peter Maydell
0 siblings, 0 replies; 31+ messages in thread
From: Peter Maydell @ 2013-12-19 21:41 UTC (permalink / raw)
To: Tom Musta; +Cc: qemu-ppc@nongnu.org, QEMU Developers
On 18 December 2013 20:19, Tom Musta <tommusta@gmail.com> wrote:
> The float64_to_uint32_round_to_zero routine is incorrect.
>
> For example, the following test pattern:
>
> 425F81378DC0CD1F / 0x1.f81378dc0cd1fp+38
>
> will erroneously set the inexact flag.
>
> This patch re-implements the routine to use the float64_to_uint64_round_to_zero
> routine. If saturation occurs and the inexact flag was not previously set, it
> will be cleared.
Code is OK but you forgot to update this commit message.
>
> This contribution can be licensed under either the softfloat-2a or -2b
> license.
>
> V4: Correct commit commentary. Corrected code to properly handle the
> stickiness of the inexact flag.
These 'changes from previous version' comments should ideally go
below the "---" line, by the way. The idea is that the bit above the
line goes into the git commit history and should be a self-contained
description of the final patch.
> Signed-off-by: Tom Musta <tommusta@gmail.com>
If you fix the commit message you can add:
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
thanks
-- PMM
^ permalink raw reply [flat|nested] 31+ messages in thread
* [Qemu-devel] [V4 PATCH 06/22] target-ppc: Add set_fprf Argument to fload_invalid_op_excp()
2013-12-18 20:18 [Qemu-devel] [V4 PATCH 00/22] PowerPC VSX Stage 3 Tom Musta
` (4 preceding siblings ...)
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 05/22] softfloat: Fix float64_to_uint32_round_to_zero Tom Musta
@ 2013-12-18 20:19 ` Tom Musta
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 07/22] target-ppc: General Support for VSX Helpers Tom Musta
` (15 subsequent siblings)
21 siblings, 0 replies; 31+ messages in thread
From: Tom Musta @ 2013-12-18 20:19 UTC (permalink / raw)
To: qemu-devel; +Cc: Tom Musta, qemu-ppc
The fload_invalid_op_excp() function sets assorted invalid
operation status bits. However, it also implicitly modifies
the FPRF field of the PowerPC FPSCR. Many VSX instructions
set invalid operation bits but do not alter FPRF. Thus the
function is more generally useful if the setting of the FPRF
field is made conditional via a parameter.
All invocations of this routine in existing instructions are
modified to pass 1 and thus retain their current behavior.
Signed-off-by: Tom Musta <tommusta@gmail.com>
Reviewed-by: Richard Henderson <address@hidden>
---
target-ppc/fpu_helper.c | 103 +++++++++++++++++++++++++----------------------
1 files changed, 55 insertions(+), 48 deletions(-)
diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index 4f60218..f0b0a49 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -106,7 +106,8 @@ uint32_t helper_compute_fprf(CPUPPCState *env, uint64_t arg, uint32_t set_fprf)
}
/* Floating-point invalid operations exception */
-static inline uint64_t fload_invalid_op_excp(CPUPPCState *env, int op)
+static inline uint64_t fload_invalid_op_excp(CPUPPCState *env, int op,
+ int set_fpcc)
{
uint64_t ret = 0;
int ve;
@@ -138,8 +139,10 @@ static inline uint64_t fload_invalid_op_excp(CPUPPCState *env, int op)
case POWERPC_EXCP_FP_VXVC:
/* Ordered comparison of NaN */
env->fpscr |= 1 << FPSCR_VXVC;
- env->fpscr &= ~(0xF << FPSCR_FPCC);
- env->fpscr |= 0x11 << FPSCR_FPCC;
+ if (set_fpcc) {
+ env->fpscr &= ~(0xF << FPSCR_FPCC);
+ env->fpscr |= 0x11 << FPSCR_FPCC;
+ }
/* We must update the target FPR before raising the exception */
if (ve != 0) {
env->exception_index = POWERPC_EXCP_PROGRAM;
@@ -158,8 +161,10 @@ static inline uint64_t fload_invalid_op_excp(CPUPPCState *env, int op)
if (ve == 0) {
/* Set the result to quiet NaN */
ret = 0x7FF8000000000000ULL;
- env->fpscr &= ~(0xF << FPSCR_FPCC);
- env->fpscr |= 0x11 << FPSCR_FPCC;
+ if (set_fpcc) {
+ env->fpscr &= ~(0xF << FPSCR_FPCC);
+ env->fpscr |= 0x11 << FPSCR_FPCC;
+ }
}
break;
case POWERPC_EXCP_FP_VXCVI:
@@ -169,8 +174,10 @@ static inline uint64_t fload_invalid_op_excp(CPUPPCState *env, int op)
if (ve == 0) {
/* Set the result to quiet NaN */
ret = 0x7FF8000000000000ULL;
- env->fpscr &= ~(0xF << FPSCR_FPCC);
- env->fpscr |= 0x11 << FPSCR_FPCC;
+ if (set_fpcc) {
+ env->fpscr &= ~(0xF << FPSCR_FPCC);
+ env->fpscr |= 0x11 << FPSCR_FPCC;
+ }
}
break;
}
@@ -505,12 +512,12 @@ uint64_t helper_fadd(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
if (unlikely(float64_is_infinity(farg1.d) && float64_is_infinity(farg2.d) &&
float64_is_neg(farg1.d) != float64_is_neg(farg2.d))) {
/* Magnitude subtraction of infinities */
- farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI);
+ farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
} else {
if (unlikely(float64_is_signaling_nan(farg1.d) ||
float64_is_signaling_nan(farg2.d))) {
/* sNaN addition */
- fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN);
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
}
farg1.d = float64_add(farg1.d, farg2.d, &env->fp_status);
}
@@ -529,12 +536,12 @@ uint64_t helper_fsub(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
if (unlikely(float64_is_infinity(farg1.d) && float64_is_infinity(farg2.d) &&
float64_is_neg(farg1.d) == float64_is_neg(farg2.d))) {
/* Magnitude subtraction of infinities */
- farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI);
+ farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
} else {
if (unlikely(float64_is_signaling_nan(farg1.d) ||
float64_is_signaling_nan(farg2.d))) {
/* sNaN subtraction */
- fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN);
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
}
farg1.d = float64_sub(farg1.d, farg2.d, &env->fp_status);
}
@@ -553,12 +560,12 @@ uint64_t helper_fmul(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
if (unlikely((float64_is_infinity(farg1.d) && float64_is_zero(farg2.d)) ||
(float64_is_zero(farg1.d) && float64_is_infinity(farg2.d)))) {
/* Multiplication of zero by infinity */
- farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ);
+ farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);
} else {
if (unlikely(float64_is_signaling_nan(farg1.d) ||
float64_is_signaling_nan(farg2.d))) {
/* sNaN multiplication */
- fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN);
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
}
farg1.d = float64_mul(farg1.d, farg2.d, &env->fp_status);
}
@@ -577,15 +584,15 @@ uint64_t helper_fdiv(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
if (unlikely(float64_is_infinity(farg1.d) &&
float64_is_infinity(farg2.d))) {
/* Division of infinity by infinity */
- farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIDI);
+ farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIDI, 1);
} else if (unlikely(float64_is_zero(farg1.d) && float64_is_zero(farg2.d))) {
/* Division of zero by zero */
- farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXZDZ);
+ farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXZDZ, 1);
} else {
if (unlikely(float64_is_signaling_nan(farg1.d) ||
float64_is_signaling_nan(farg2.d))) {
/* sNaN division */
- fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN);
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
}
farg1.d = float64_div(farg1.d, farg2.d, &env->fp_status);
}
@@ -603,11 +610,11 @@ uint64_t helper_fctiw(CPUPPCState *env, uint64_t arg)
if (unlikely(float64_is_signaling_nan(farg.d))) {
/* sNaN conversion */
farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN |
- POWERPC_EXCP_FP_VXCVI);
+ POWERPC_EXCP_FP_VXCVI, 1);
} else if (unlikely(float64_is_quiet_nan(farg.d) ||
float64_is_infinity(farg.d))) {
/* qNan / infinity conversion */
- farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXCVI);
+ farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXCVI, 1);
} else {
farg.ll = float64_to_int32(farg.d, &env->fp_status);
/* XXX: higher bits are not supposed to be significant.
@@ -628,11 +635,11 @@ uint64_t helper_fctiwz(CPUPPCState *env, uint64_t arg)
if (unlikely(float64_is_signaling_nan(farg.d))) {
/* sNaN conversion */
farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN |
- POWERPC_EXCP_FP_VXCVI);
+ POWERPC_EXCP_FP_VXCVI, 1);
} else if (unlikely(float64_is_quiet_nan(farg.d) ||
float64_is_infinity(farg.d))) {
/* qNan / infinity conversion */
- farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXCVI);
+ farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXCVI, 1);
} else {
farg.ll = float64_to_int32_round_to_zero(farg.d, &env->fp_status);
/* XXX: higher bits are not supposed to be significant.
@@ -663,11 +670,11 @@ uint64_t helper_fctid(CPUPPCState *env, uint64_t arg)
if (unlikely(float64_is_signaling_nan(farg.d))) {
/* sNaN conversion */
farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN |
- POWERPC_EXCP_FP_VXCVI);
+ POWERPC_EXCP_FP_VXCVI, 1);
} else if (unlikely(float64_is_quiet_nan(farg.d) ||
float64_is_infinity(farg.d))) {
/* qNan / infinity conversion */
- farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXCVI);
+ farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXCVI, 1);
} else {
farg.ll = float64_to_int64(farg.d, &env->fp_status);
}
@@ -684,11 +691,11 @@ uint64_t helper_fctidz(CPUPPCState *env, uint64_t arg)
if (unlikely(float64_is_signaling_nan(farg.d))) {
/* sNaN conversion */
farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN |
- POWERPC_EXCP_FP_VXCVI);
+ POWERPC_EXCP_FP_VXCVI, 1);
} else if (unlikely(float64_is_quiet_nan(farg.d) ||
float64_is_infinity(farg.d))) {
/* qNan / infinity conversion */
- farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXCVI);
+ farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXCVI, 1);
} else {
farg.ll = float64_to_int64_round_to_zero(farg.d, &env->fp_status);
}
@@ -707,11 +714,11 @@ static inline uint64_t do_fri(CPUPPCState *env, uint64_t arg,
if (unlikely(float64_is_signaling_nan(farg.d))) {
/* sNaN round */
farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN |
- POWERPC_EXCP_FP_VXCVI);
+ POWERPC_EXCP_FP_VXCVI, 1);
} else if (unlikely(float64_is_quiet_nan(farg.d) ||
float64_is_infinity(farg.d))) {
/* qNan / infinity round */
- farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXCVI);
+ farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXCVI, 1);
} else {
set_float_rounding_mode(rounding_mode, &env->fp_status);
farg.ll = float64_round_to_int(farg.d, &env->fp_status);
@@ -754,13 +761,13 @@ uint64_t helper_fmadd(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
if (unlikely((float64_is_infinity(farg1.d) && float64_is_zero(farg2.d)) ||
(float64_is_zero(farg1.d) && float64_is_infinity(farg2.d)))) {
/* Multiplication of zero by infinity */
- farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ);
+ farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);
} else {
if (unlikely(float64_is_signaling_nan(farg1.d) ||
float64_is_signaling_nan(farg2.d) ||
float64_is_signaling_nan(farg3.d))) {
/* sNaN operation */
- fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN);
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
}
/* This is the way the PowerPC specification defines it */
float128 ft0_128, ft1_128;
@@ -772,7 +779,7 @@ uint64_t helper_fmadd(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
float64_is_infinity(farg3.d) &&
float128_is_neg(ft0_128) != float64_is_neg(farg3.d))) {
/* Magnitude subtraction of infinities */
- farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI);
+ farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
} else {
ft1_128 = float64_to_float128(farg3.d, &env->fp_status);
ft0_128 = float128_add(ft0_128, ft1_128, &env->fp_status);
@@ -797,13 +804,13 @@ uint64_t helper_fmsub(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
(float64_is_zero(farg1.d) &&
float64_is_infinity(farg2.d)))) {
/* Multiplication of zero by infinity */
- farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ);
+ farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);
} else {
if (unlikely(float64_is_signaling_nan(farg1.d) ||
float64_is_signaling_nan(farg2.d) ||
float64_is_signaling_nan(farg3.d))) {
/* sNaN operation */
- fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN);
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
}
/* This is the way the PowerPC specification defines it */
float128 ft0_128, ft1_128;
@@ -815,7 +822,7 @@ uint64_t helper_fmsub(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
float64_is_infinity(farg3.d) &&
float128_is_neg(ft0_128) == float64_is_neg(farg3.d))) {
/* Magnitude subtraction of infinities */
- farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI);
+ farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
} else {
ft1_128 = float64_to_float128(farg3.d, &env->fp_status);
ft0_128 = float128_sub(ft0_128, ft1_128, &env->fp_status);
@@ -838,13 +845,13 @@ uint64_t helper_fnmadd(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
if (unlikely((float64_is_infinity(farg1.d) && float64_is_zero(farg2.d)) ||
(float64_is_zero(farg1.d) && float64_is_infinity(farg2.d)))) {
/* Multiplication of zero by infinity */
- farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ);
+ farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);
} else {
if (unlikely(float64_is_signaling_nan(farg1.d) ||
float64_is_signaling_nan(farg2.d) ||
float64_is_signaling_nan(farg3.d))) {
/* sNaN operation */
- fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN);
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
}
/* This is the way the PowerPC specification defines it */
float128 ft0_128, ft1_128;
@@ -856,7 +863,7 @@ uint64_t helper_fnmadd(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
float64_is_infinity(farg3.d) &&
float128_is_neg(ft0_128) != float64_is_neg(farg3.d))) {
/* Magnitude subtraction of infinities */
- farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI);
+ farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
} else {
ft1_128 = float64_to_float128(farg3.d, &env->fp_status);
ft0_128 = float128_add(ft0_128, ft1_128, &env->fp_status);
@@ -883,13 +890,13 @@ uint64_t helper_fnmsub(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
(float64_is_zero(farg1.d) &&
float64_is_infinity(farg2.d)))) {
/* Multiplication of zero by infinity */
- farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ);
+ farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);
} else {
if (unlikely(float64_is_signaling_nan(farg1.d) ||
float64_is_signaling_nan(farg2.d) ||
float64_is_signaling_nan(farg3.d))) {
/* sNaN operation */
- fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN);
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
}
/* This is the way the PowerPC specification defines it */
float128 ft0_128, ft1_128;
@@ -901,7 +908,7 @@ uint64_t helper_fnmsub(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
float64_is_infinity(farg3.d) &&
float128_is_neg(ft0_128) == float64_is_neg(farg3.d))) {
/* Magnitude subtraction of infinities */
- farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI);
+ farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
} else {
ft1_128 = float64_to_float128(farg3.d, &env->fp_status);
ft0_128 = float128_sub(ft0_128, ft1_128, &env->fp_status);
@@ -924,7 +931,7 @@ uint64_t helper_frsp(CPUPPCState *env, uint64_t arg)
if (unlikely(float64_is_signaling_nan(farg.d))) {
/* sNaN square root */
- fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN);
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
}
f32 = float64_to_float32(farg.d, &env->fp_status);
farg.d = float32_to_float64(f32, &env->fp_status);
@@ -941,11 +948,11 @@ uint64_t helper_fsqrt(CPUPPCState *env, uint64_t arg)
if (unlikely(float64_is_neg(farg.d) && !float64_is_zero(farg.d))) {
/* Square root of a negative nonzero number */
- farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT);
+ farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, 1);
} else {
if (unlikely(float64_is_signaling_nan(farg.d))) {
/* sNaN square root */
- fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN);
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
}
farg.d = float64_sqrt(farg.d, &env->fp_status);
}
@@ -961,7 +968,7 @@ uint64_t helper_fre(CPUPPCState *env, uint64_t arg)
if (unlikely(float64_is_signaling_nan(farg.d))) {
/* sNaN reciprocal */
- fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN);
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
}
farg.d = float64_div(float64_one, farg.d, &env->fp_status);
return farg.d;
@@ -977,7 +984,7 @@ uint64_t helper_fres(CPUPPCState *env, uint64_t arg)
if (unlikely(float64_is_signaling_nan(farg.d))) {
/* sNaN reciprocal */
- fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN);
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
}
farg.d = float64_div(float64_one, farg.d, &env->fp_status);
f32 = float64_to_float32(farg.d, &env->fp_status);
@@ -996,11 +1003,11 @@ uint64_t helper_frsqrte(CPUPPCState *env, uint64_t arg)
if (unlikely(float64_is_neg(farg.d) && !float64_is_zero(farg.d))) {
/* Reciprocal square root of a negative nonzero number */
- farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT);
+ farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, 1);
} else {
if (unlikely(float64_is_signaling_nan(farg.d))) {
/* sNaN reciprocal square root */
- fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN);
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
}
farg.d = float64_sqrt(farg.d, &env->fp_status);
farg.d = float64_div(float64_one, farg.d, &env->fp_status);
@@ -1053,7 +1060,7 @@ void helper_fcmpu(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
&& (float64_is_signaling_nan(farg1.d) ||
float64_is_signaling_nan(farg2.d)))) {
/* sNaN comparison */
- fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN);
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
}
}
@@ -1085,10 +1092,10 @@ void helper_fcmpo(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
float64_is_signaling_nan(farg2.d)) {
/* sNaN comparison */
fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN |
- POWERPC_EXCP_FP_VXVC);
+ POWERPC_EXCP_FP_VXVC, 1);
} else {
/* qNaN comparison */
- fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXVC);
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXVC, 1);
}
}
}
--
1.7.1
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [Qemu-devel] [V4 PATCH 07/22] target-ppc: General Support for VSX Helpers
2013-12-18 20:18 [Qemu-devel] [V4 PATCH 00/22] PowerPC VSX Stage 3 Tom Musta
` (5 preceding siblings ...)
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 06/22] target-ppc: Add set_fprf Argument to fload_invalid_op_excp() Tom Musta
@ 2013-12-18 20:19 ` Tom Musta
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 08/22] target-ppc: Add VSX ISA2.06 xadd/xsub Instructions Tom Musta
` (14 subsequent siblings)
21 siblings, 0 replies; 31+ messages in thread
From: Tom Musta @ 2013-12-18 20:19 UTC (permalink / raw)
To: qemu-devel; +Cc: Tom Musta, qemu-ppc
This patch adds general support that will be used by the VSX helper
routines:
- a union describing the various VSR subfields.
- access routines to get and set VSRs
- VSX decoders
- a general routine to generate a handler that invokes a VSX
helper.
Signed-off-by: Tom Musta <tommusta@gmail.com>
Reviewed-by: Richard Henderson <address@hidden>
---
target-ppc/fpu_helper.c | 41 +++++++++++++++++++++++++++++++++++++++++
target-ppc/translate.c | 14 ++++++++++++++
2 files changed, 55 insertions(+), 0 deletions(-)
diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index f0b0a49..cea94ac 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -1717,3 +1717,44 @@ uint32_t helper_efdcmpeq(CPUPPCState *env, uint64_t op1, uint64_t op2)
/* XXX: TODO: test special values (NaN, infinites, ...) */
return helper_efdtsteq(env, op1, op2);
}
+
+#define DECODE_SPLIT(opcode, shift1, nb1, shift2, nb2) \
+ (((((opcode) >> (shift1)) & ((1 << (nb1)) - 1)) << nb2) | \
+ (((opcode) >> (shift2)) & ((1 << (nb2)) - 1)))
+
+#define xT(opcode) DECODE_SPLIT(opcode, 0, 1, 21, 5)
+#define xA(opcode) DECODE_SPLIT(opcode, 2, 1, 16, 5)
+#define xB(opcode) DECODE_SPLIT(opcode, 1, 1, 11, 5)
+#define xC(opcode) DECODE_SPLIT(opcode, 3, 1, 6, 5)
+#define BF(opcode) (((opcode) >> (31-8)) & 7)
+
+typedef union _ppc_vsr_t {
+ uint64_t u64[2];
+ uint32_t u32[4];
+ float32 f32[4];
+ float64 f64[2];
+} ppc_vsr_t;
+
+static void getVSR(int n, ppc_vsr_t *vsr, CPUPPCState *env)
+{
+ if (n < 32) {
+ vsr->f64[0] = env->fpr[n];
+ vsr->u64[1] = env->vsr[n];
+ } else {
+ vsr->u64[0] = env->avr[n-32].u64[0];
+ vsr->u64[1] = env->avr[n-32].u64[1];
+ }
+}
+
+static void putVSR(int n, ppc_vsr_t *vsr, CPUPPCState *env)
+{
+ if (n < 32) {
+ env->fpr[n] = vsr->f64[0];
+ env->vsr[n] = vsr->u64[1];
+ } else {
+ env->avr[n-32].u64[0] = vsr->u64[0];
+ env->avr[n-32].u64[1] = vsr->u64[1];
+ }
+}
+
+#define float64_to_float64(x, env) x
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index ce07a56..0453900 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -7280,6 +7280,20 @@ VSX_VECTOR_MOVE(xvnabssp, OP_NABS, SGN_MASK_SP)
VSX_VECTOR_MOVE(xvnegsp, OP_NEG, SGN_MASK_SP)
VSX_VECTOR_MOVE(xvcpsgnsp, OP_CPSGN, SGN_MASK_SP)
+#define GEN_VSX_HELPER_2(name, op1, op2, inval, type) \
+static void gen_##name(DisasContext * ctx) \
+{ \
+ TCGv_i32 opc; \
+ if (unlikely(!ctx->vsx_enabled)) { \
+ gen_exception(ctx, POWERPC_EXCP_VSXU); \
+ return; \
+ } \
+ /* NIP cannot be restored if the memory exception comes from an helper */ \
+ gen_update_nip(ctx, ctx->nip - 4); \
+ opc = tcg_const_i32(ctx->opcode); \
+ gen_helper_##name(cpu_env, opc); \
+ tcg_temp_free_i32(opc); \
+}
#define VSX_LOGICAL(name, tcg_op) \
static void glue(gen_, name)(DisasContext * ctx) \
--
1.7.1
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [Qemu-devel] [V4 PATCH 08/22] target-ppc: Add VSX ISA2.06 xadd/xsub Instructions
2013-12-18 20:18 [Qemu-devel] [V4 PATCH 00/22] PowerPC VSX Stage 3 Tom Musta
` (6 preceding siblings ...)
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 07/22] target-ppc: General Support for VSX Helpers Tom Musta
@ 2013-12-18 20:19 ` Tom Musta
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 09/22] target-ppc: Add VSX ISA2.06 xmul Instructions Tom Musta
` (13 subsequent siblings)
21 siblings, 0 replies; 31+ messages in thread
From: Tom Musta @ 2013-12-18 20:19 UTC (permalink / raw)
To: qemu-devel; +Cc: Tom Musta, qemu-ppc
This patch adds the floating point addition and subtraction
instructions defined by V2.06 of the PowerPC ISA: xssubdp,
xvsubdp and xvsubsp.
V2: re-implemented helper macro and combined add and substract.
Signed-off-by: Tom Musta <tommusta@gmail.com>
Reviewed-by: Richard Henderson <address@hidden>
---
target-ppc/fpu_helper.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++
target-ppc/helper.h | 9 ++++++++
target-ppc/translate.c | 18 ++++++++++++++++
3 files changed, 78 insertions(+), 0 deletions(-)
diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index cea94ac..a577d28 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -1758,3 +1758,54 @@ static void putVSR(int n, ppc_vsr_t *vsr, CPUPPCState *env)
}
#define float64_to_float64(x, env) x
+
+
+/* VSX_ADD_SUB - VSX floating point add/subract
+ * name - instruction mnemonic
+ * op - operation (add or sub)
+ * nels - number of elements (1, 2 or 4)
+ * tp - type (float32 or float64)
+ * fld - vsr_t field (f32 or f64)
+ * sfprf - set FPRF
+ */
+#define VSX_ADD_SUB(name, op, nels, tp, fld, sfprf) \
+void helper_##name(CPUPPCState *env, uint32_t opcode) \
+{ \
+ ppc_vsr_t xt, xa, xb; \
+ int i; \
+ \
+ getVSR(xA(opcode), &xa, env); \
+ getVSR(xB(opcode), &xb, env); \
+ getVSR(xT(opcode), &xt, env); \
+ helper_reset_fpstatus(env); \
+ \
+ for (i = 0; i < nels; i++) { \
+ float_status tstat = env->fp_status; \
+ set_float_exception_flags(0, &tstat); \
+ xt.fld[i] = tp##_##op(xa.fld[i], xb.fld[i], &tstat); \
+ env->fp_status.float_exception_flags |= tstat.float_exception_flags; \
+ \
+ if (unlikely(tstat.float_exception_flags & float_flag_invalid)) { \
+ if (tp##_is_infinity(xa.fld[i]) && tp##_is_infinity(xb.fld[i])) {\
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, sfprf); \
+ } else if (tp##_is_signaling_nan(xa.fld[i]) || \
+ tp##_is_signaling_nan(xb.fld[i])) { \
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, sfprf); \
+ } \
+ } \
+ \
+ if (sfprf) { \
+ helper_compute_fprf(env, xt.fld[i], sfprf); \
+ } \
+ } \
+ putVSR(xT(opcode), &xt, env); \
+ helper_float_check_status(env); \
+}
+
+VSX_ADD_SUB(xsadddp, add, 1, float64, f64, 1)
+VSX_ADD_SUB(xvadddp, add, 2, float64, f64, 0)
+VSX_ADD_SUB(xvaddsp, add, 4, float32, f32, 0)
+VSX_ADD_SUB(xssubdp, sub, 1, float64, f64, 1)
+VSX_ADD_SUB(xvsubdp, sub, 2, float64, f64, 0)
+VSX_ADD_SUB(xvsubsp, sub, 4, float32, f32, 0)
+
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 6d282bb..966200d 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -251,6 +251,15 @@ DEF_HELPER_4(vcfsx, void, env, avr, avr, i32)
DEF_HELPER_4(vctuxs, void, env, avr, avr, i32)
DEF_HELPER_4(vctsxs, void, env, avr, avr, i32)
+DEF_HELPER_2(xsadddp, void, env, i32)
+DEF_HELPER_2(xssubdp, void, env, i32)
+
+DEF_HELPER_2(xvadddp, void, env, i32)
+DEF_HELPER_2(xvsubdp, void, env, i32)
+
+DEF_HELPER_2(xvaddsp, void, env, i32)
+DEF_HELPER_2(xvsubsp, void, env, i32)
+
DEF_HELPER_2(efscfsi, i32, env, i32)
DEF_HELPER_2(efscfui, i32, env, i32)
DEF_HELPER_2(efscfuf, i32, env, i32)
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 0453900..d20b269 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -7295,6 +7295,15 @@ static void gen_##name(DisasContext * ctx) \
tcg_temp_free_i32(opc); \
}
+GEN_VSX_HELPER_2(xsadddp, 0x00, 0x04, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xssubdp, 0x00, 0x05, 0, PPC2_VSX)
+
+GEN_VSX_HELPER_2(xvadddp, 0x00, 0x0C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvsubdp, 0x00, 0x0D, 0, PPC2_VSX)
+
+GEN_VSX_HELPER_2(xvaddsp, 0x00, 0x08, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvsubsp, 0x00, 0x09, 0, PPC2_VSX)
+
#define VSX_LOGICAL(name, tcg_op) \
static void glue(gen_, name)(DisasContext * ctx) \
{ \
@@ -9977,6 +9986,15 @@ GEN_XX2FORM(xvnabssp, 0x12, 0x1A, PPC2_VSX),
GEN_XX2FORM(xvnegsp, 0x12, 0x1B, PPC2_VSX),
GEN_XX3FORM(xvcpsgnsp, 0x00, 0x1A, PPC2_VSX),
+GEN_XX3FORM(xsadddp, 0x00, 0x04, PPC2_VSX),
+GEN_XX3FORM(xssubdp, 0x00, 0x05, PPC2_VSX),
+
+GEN_XX3FORM(xvadddp, 0x00, 0x0C, PPC2_VSX),
+GEN_XX3FORM(xvsubdp, 0x00, 0x0D, PPC2_VSX),
+
+GEN_XX3FORM(xvaddsp, 0x00, 0x08, PPC2_VSX),
+GEN_XX3FORM(xvsubsp, 0x00, 0x09, PPC2_VSX),
+
#undef VSX_LOGICAL
#define VSX_LOGICAL(name, opc2, opc3, fl2) \
GEN_XX3FORM(name, opc2, opc3, fl2)
--
1.7.1
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [Qemu-devel] [V4 PATCH 09/22] target-ppc: Add VSX ISA2.06 xmul Instructions
2013-12-18 20:18 [Qemu-devel] [V4 PATCH 00/22] PowerPC VSX Stage 3 Tom Musta
` (7 preceding siblings ...)
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 08/22] target-ppc: Add VSX ISA2.06 xadd/xsub Instructions Tom Musta
@ 2013-12-18 20:19 ` Tom Musta
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 10/22] target-ppc: Add VSX ISA2.06 xdiv Instructions Tom Musta
` (12 subsequent siblings)
21 siblings, 0 replies; 31+ messages in thread
From: Tom Musta @ 2013-12-18 20:19 UTC (permalink / raw)
To: qemu-devel; +Cc: Tom Musta, qemu-ppc
This patch adds the VSX floating point multiply instructions defined
by V2.06 of the PowerPC ISA: xsmuldp, xvmuldp, xvmulsp.
V2: re-implemented VSX_MUL macro.
Signed-off-by: Tom Musta <tommusta@gmail.com>
Reviewed-by: Richard Henderson <address@hidden>
---
target-ppc/fpu_helper.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
target-ppc/helper.h | 3 +++
target-ppc/translate.c | 6 ++++++
3 files changed, 55 insertions(+), 0 deletions(-)
diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index a577d28..51ca589 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -1809,3 +1809,49 @@ VSX_ADD_SUB(xssubdp, sub, 1, float64, f64, 1)
VSX_ADD_SUB(xvsubdp, sub, 2, float64, f64, 0)
VSX_ADD_SUB(xvsubsp, sub, 4, float32, f32, 0)
+/* VSX_MUL - VSX floating point multiply
+ * op - instruction mnemonic
+ * nels - number of elements (1, 2 or 4)
+ * tp - type (float32 or float64)
+ * fld - vsr_t field (f32 or f64)
+ * sfprf - set FPRF
+ */
+#define VSX_MUL(op, nels, tp, fld, sfprf) \
+void helper_##op(CPUPPCState *env, uint32_t opcode) \
+{ \
+ ppc_vsr_t xt, xa, xb; \
+ int i; \
+ \
+ getVSR(xA(opcode), &xa, env); \
+ getVSR(xB(opcode), &xb, env); \
+ getVSR(xT(opcode), &xt, env); \
+ helper_reset_fpstatus(env); \
+ \
+ for (i = 0; i < nels; i++) { \
+ float_status tstat = env->fp_status; \
+ set_float_exception_flags(0, &tstat); \
+ xt.fld[i] = tp##_mul(xa.fld[i], xb.fld[i], &tstat); \
+ env->fp_status.float_exception_flags |= tstat.float_exception_flags; \
+ \
+ if (unlikely(tstat.float_exception_flags & float_flag_invalid)) { \
+ if ((tp##_is_infinity(xa.fld[i]) && tp##_is_zero(xb.fld[i])) || \
+ (tp##_is_infinity(xb.fld[i]) && tp##_is_zero(xa.fld[i]))) { \
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, sfprf); \
+ } else if (tp##_is_signaling_nan(xa.fld[i]) || \
+ tp##_is_signaling_nan(xb.fld[i])) { \
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, sfprf); \
+ } \
+ } \
+ \
+ if (sfprf) { \
+ helper_compute_fprf(env, xt.fld[i], sfprf); \
+ } \
+ } \
+ \
+ putVSR(xT(opcode), &xt, env); \
+ helper_float_check_status(env); \
+}
+
+VSX_MUL(xsmuldp, 1, float64, f64, 1)
+VSX_MUL(xvmuldp, 2, float64, f64, 0)
+VSX_MUL(xvmulsp, 4, float32, f32, 0)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 966200d..ecb900f 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -253,12 +253,15 @@ DEF_HELPER_4(vctsxs, void, env, avr, avr, i32)
DEF_HELPER_2(xsadddp, void, env, i32)
DEF_HELPER_2(xssubdp, void, env, i32)
+DEF_HELPER_2(xsmuldp, void, env, i32)
DEF_HELPER_2(xvadddp, void, env, i32)
DEF_HELPER_2(xvsubdp, void, env, i32)
+DEF_HELPER_2(xvmuldp, void, env, i32)
DEF_HELPER_2(xvaddsp, void, env, i32)
DEF_HELPER_2(xvsubsp, void, env, i32)
+DEF_HELPER_2(xvmulsp, void, env, i32)
DEF_HELPER_2(efscfsi, i32, env, i32)
DEF_HELPER_2(efscfui, i32, env, i32)
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index d20b269..1fb21b7 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -7297,12 +7297,15 @@ static void gen_##name(DisasContext * ctx) \
GEN_VSX_HELPER_2(xsadddp, 0x00, 0x04, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xssubdp, 0x00, 0x05, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsmuldp, 0x00, 0x06, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvadddp, 0x00, 0x0C, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsubdp, 0x00, 0x0D, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmuldp, 0x00, 0x0E, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvaddsp, 0x00, 0x08, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsubsp, 0x00, 0x09, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmulsp, 0x00, 0x0A, 0, PPC2_VSX)
#define VSX_LOGICAL(name, tcg_op) \
static void glue(gen_, name)(DisasContext * ctx) \
@@ -9988,12 +9991,15 @@ GEN_XX3FORM(xvcpsgnsp, 0x00, 0x1A, PPC2_VSX),
GEN_XX3FORM(xsadddp, 0x00, 0x04, PPC2_VSX),
GEN_XX3FORM(xssubdp, 0x00, 0x05, PPC2_VSX),
+GEN_XX3FORM(xsmuldp, 0x00, 0x06, PPC2_VSX),
GEN_XX3FORM(xvadddp, 0x00, 0x0C, PPC2_VSX),
GEN_XX3FORM(xvsubdp, 0x00, 0x0D, PPC2_VSX),
+GEN_XX3FORM(xvmuldp, 0x00, 0x0E, PPC2_VSX),
GEN_XX3FORM(xvaddsp, 0x00, 0x08, PPC2_VSX),
GEN_XX3FORM(xvsubsp, 0x00, 0x09, PPC2_VSX),
+GEN_XX3FORM(xvmulsp, 0x00, 0x0A, PPC2_VSX),
#undef VSX_LOGICAL
#define VSX_LOGICAL(name, opc2, opc3, fl2) \
--
1.7.1
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [Qemu-devel] [V4 PATCH 10/22] target-ppc: Add VSX ISA2.06 xdiv Instructions
2013-12-18 20:18 [Qemu-devel] [V4 PATCH 00/22] PowerPC VSX Stage 3 Tom Musta
` (8 preceding siblings ...)
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 09/22] target-ppc: Add VSX ISA2.06 xmul Instructions Tom Musta
@ 2013-12-18 20:19 ` Tom Musta
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 11/22] target-ppc: Add VSX ISA2.06 xre Instructions Tom Musta
` (11 subsequent siblings)
21 siblings, 0 replies; 31+ messages in thread
From: Tom Musta @ 2013-12-18 20:19 UTC (permalink / raw)
To: qemu-devel; +Cc: Tom Musta, qemu-ppc
This patch adds the VSX floating point divide instructions defined
by V2.06 of the PowerPC ISA: xsdivdp, xvdivdp, xvdivsp.
V2: re-implemented the VSX_DIV macro.
Signed-off-by: Tom Musta <tommusta@gmail.com>
Reviewed-by: Richard Henderson <address@hidden>
---
target-ppc/fpu_helper.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++
target-ppc/helper.h | 3 ++
target-ppc/translate.c | 6 +++++
3 files changed, 58 insertions(+), 0 deletions(-)
diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index 51ca589..c84f432 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -1855,3 +1855,52 @@ void helper_##op(CPUPPCState *env, uint32_t opcode) \
VSX_MUL(xsmuldp, 1, float64, f64, 1)
VSX_MUL(xvmuldp, 2, float64, f64, 0)
VSX_MUL(xvmulsp, 4, float32, f32, 0)
+
+/* VSX_DIV - VSX floating point divide
+ * op - instruction mnemonic
+ * nels - number of elements (1, 2 or 4)
+ * tp - type (float32 or float64)
+ * fld - vsr_t field (f32 or f64)
+ * sfprf - set FPRF
+ */
+#define VSX_DIV(op, nels, tp, fld, sfprf) \
+void helper_##op(CPUPPCState *env, uint32_t opcode) \
+{ \
+ ppc_vsr_t xt, xa, xb; \
+ int i; \
+ \
+ getVSR(xA(opcode), &xa, env); \
+ getVSR(xB(opcode), &xb, env); \
+ getVSR(xT(opcode), &xt, env); \
+ helper_reset_fpstatus(env); \
+ \
+ for (i = 0; i < nels; i++) { \
+ float_status tstat = env->fp_status; \
+ set_float_exception_flags(0, &tstat); \
+ xt.fld[i] = tp##_div(xa.fld[i], xb.fld[i], &tstat); \
+ env->fp_status.float_exception_flags |= tstat.float_exception_flags; \
+ \
+ if (unlikely(tstat.float_exception_flags & float_flag_invalid)) { \
+ if (tp##_is_infinity(xa.fld[i]) && tp##_is_infinity(xb.fld[i])) { \
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIDI, sfprf); \
+ } else if (tp##_is_zero(xa.fld[i]) && \
+ tp##_is_zero(xb.fld[i])) { \
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXZDZ, sfprf); \
+ } else if (tp##_is_signaling_nan(xa.fld[i]) || \
+ tp##_is_signaling_nan(xb.fld[i])) { \
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, sfprf); \
+ } \
+ } \
+ \
+ if (sfprf) { \
+ helper_compute_fprf(env, xt.fld[i], sfprf); \
+ } \
+ } \
+ \
+ putVSR(xT(opcode), &xt, env); \
+ helper_float_check_status(env); \
+}
+
+VSX_DIV(xsdivdp, 1, float64, f64, 1)
+VSX_DIV(xvdivdp, 2, float64, f64, 0)
+VSX_DIV(xvdivsp, 4, float32, f32, 0)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index ecb900f..6ede7ea 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -254,14 +254,17 @@ DEF_HELPER_4(vctsxs, void, env, avr, avr, i32)
DEF_HELPER_2(xsadddp, void, env, i32)
DEF_HELPER_2(xssubdp, void, env, i32)
DEF_HELPER_2(xsmuldp, void, env, i32)
+DEF_HELPER_2(xsdivdp, void, env, i32)
DEF_HELPER_2(xvadddp, void, env, i32)
DEF_HELPER_2(xvsubdp, void, env, i32)
DEF_HELPER_2(xvmuldp, void, env, i32)
+DEF_HELPER_2(xvdivdp, void, env, i32)
DEF_HELPER_2(xvaddsp, void, env, i32)
DEF_HELPER_2(xvsubsp, void, env, i32)
DEF_HELPER_2(xvmulsp, void, env, i32)
+DEF_HELPER_2(xvdivsp, void, env, i32)
DEF_HELPER_2(efscfsi, i32, env, i32)
DEF_HELPER_2(efscfui, i32, env, i32)
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 1fb21b7..e77fcde 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -7298,14 +7298,17 @@ static void gen_##name(DisasContext * ctx) \
GEN_VSX_HELPER_2(xsadddp, 0x00, 0x04, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xssubdp, 0x00, 0x05, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xsmuldp, 0x00, 0x06, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsdivdp, 0x00, 0x07, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvadddp, 0x00, 0x0C, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsubdp, 0x00, 0x0D, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvmuldp, 0x00, 0x0E, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvdivdp, 0x00, 0x0F, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvaddsp, 0x00, 0x08, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsubsp, 0x00, 0x09, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvmulsp, 0x00, 0x0A, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvdivsp, 0x00, 0x0B, 0, PPC2_VSX)
#define VSX_LOGICAL(name, tcg_op) \
static void glue(gen_, name)(DisasContext * ctx) \
@@ -9992,14 +9995,17 @@ GEN_XX3FORM(xvcpsgnsp, 0x00, 0x1A, PPC2_VSX),
GEN_XX3FORM(xsadddp, 0x00, 0x04, PPC2_VSX),
GEN_XX3FORM(xssubdp, 0x00, 0x05, PPC2_VSX),
GEN_XX3FORM(xsmuldp, 0x00, 0x06, PPC2_VSX),
+GEN_XX3FORM(xsdivdp, 0x00, 0x07, PPC2_VSX),
GEN_XX3FORM(xvadddp, 0x00, 0x0C, PPC2_VSX),
GEN_XX3FORM(xvsubdp, 0x00, 0x0D, PPC2_VSX),
GEN_XX3FORM(xvmuldp, 0x00, 0x0E, PPC2_VSX),
+GEN_XX3FORM(xvdivdp, 0x00, 0x0F, PPC2_VSX),
GEN_XX3FORM(xvaddsp, 0x00, 0x08, PPC2_VSX),
GEN_XX3FORM(xvsubsp, 0x00, 0x09, PPC2_VSX),
GEN_XX3FORM(xvmulsp, 0x00, 0x0A, PPC2_VSX),
+GEN_XX3FORM(xvdivsp, 0x00, 0x0B, PPC2_VSX),
#undef VSX_LOGICAL
#define VSX_LOGICAL(name, opc2, opc3, fl2) \
--
1.7.1
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [Qemu-devel] [V4 PATCH 11/22] target-ppc: Add VSX ISA2.06 xre Instructions
2013-12-18 20:18 [Qemu-devel] [V4 PATCH 00/22] PowerPC VSX Stage 3 Tom Musta
` (9 preceding siblings ...)
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 10/22] target-ppc: Add VSX ISA2.06 xdiv Instructions Tom Musta
@ 2013-12-18 20:19 ` Tom Musta
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 12/22] target-ppc: Add VSX ISA2.06 xsqrt Instructions Tom Musta
` (10 subsequent siblings)
21 siblings, 0 replies; 31+ messages in thread
From: Tom Musta @ 2013-12-18 20:19 UTC (permalink / raw)
To: qemu-devel; +Cc: Tom Musta, qemu-ppc
This patch adds the VSX floating point reciprocal estimate instructions
defined by V2.06 of the PowerPC ISA: xsredp, xvredp, xvresp.
Signed-off-by: Tom Musta <tommusta@gmail.com>
Reviewed-by: Richard Henderson <address@hidden>
---
target-ppc/fpu_helper.c | 35 +++++++++++++++++++++++++++++++++++
target-ppc/helper.h | 3 +++
target-ppc/translate.c | 6 ++++++
3 files changed, 44 insertions(+), 0 deletions(-)
diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index c84f432..5908e41 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -1904,3 +1904,38 @@ void helper_##op(CPUPPCState *env, uint32_t opcode) \
VSX_DIV(xsdivdp, 1, float64, f64, 1)
VSX_DIV(xvdivdp, 2, float64, f64, 0)
VSX_DIV(xvdivsp, 4, float32, f32, 0)
+
+/* VSX_RE - VSX floating point reciprocal estimate
+ * op - instruction mnemonic
+ * nels - number of elements (1, 2 or 4)
+ * tp - type (float32 or float64)
+ * fld - vsr_t field (f32 or f64)
+ * sfprf - set FPRF
+ */
+#define VSX_RE(op, nels, tp, fld, sfprf) \
+void helper_##op(CPUPPCState *env, uint32_t opcode) \
+{ \
+ ppc_vsr_t xt, xb; \
+ int i; \
+ \
+ getVSR(xB(opcode), &xb, env); \
+ getVSR(xT(opcode), &xt, env); \
+ helper_reset_fpstatus(env); \
+ \
+ for (i = 0; i < nels; i++) { \
+ if (unlikely(tp##_is_signaling_nan(xb.fld[i]))) { \
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, sfprf); \
+ } \
+ xt.fld[i] = tp##_div(tp##_one, xb.fld[i], &env->fp_status); \
+ if (sfprf) { \
+ helper_compute_fprf(env, xt.fld[0], sfprf); \
+ } \
+ } \
+ \
+ putVSR(xT(opcode), &xt, env); \
+ helper_float_check_status(env); \
+}
+
+VSX_RE(xsredp, 1, float64, f64, 1)
+VSX_RE(xvredp, 2, float64, f64, 0)
+VSX_RE(xvresp, 4, float32, f32, 0)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 6ede7ea..fe5b61c 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -255,16 +255,19 @@ DEF_HELPER_2(xsadddp, void, env, i32)
DEF_HELPER_2(xssubdp, void, env, i32)
DEF_HELPER_2(xsmuldp, void, env, i32)
DEF_HELPER_2(xsdivdp, void, env, i32)
+DEF_HELPER_2(xsredp, void, env, i32)
DEF_HELPER_2(xvadddp, void, env, i32)
DEF_HELPER_2(xvsubdp, void, env, i32)
DEF_HELPER_2(xvmuldp, void, env, i32)
DEF_HELPER_2(xvdivdp, void, env, i32)
+DEF_HELPER_2(xvredp, void, env, i32)
DEF_HELPER_2(xvaddsp, void, env, i32)
DEF_HELPER_2(xvsubsp, void, env, i32)
DEF_HELPER_2(xvmulsp, void, env, i32)
DEF_HELPER_2(xvdivsp, void, env, i32)
+DEF_HELPER_2(xvresp, void, env, i32)
DEF_HELPER_2(efscfsi, i32, env, i32)
DEF_HELPER_2(efscfui, i32, env, i32)
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index e77fcde..d2060b7 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -7299,16 +7299,19 @@ GEN_VSX_HELPER_2(xsadddp, 0x00, 0x04, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xssubdp, 0x00, 0x05, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xsmuldp, 0x00, 0x06, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xsdivdp, 0x00, 0x07, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsredp, 0x14, 0x05, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvadddp, 0x00, 0x0C, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsubdp, 0x00, 0x0D, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvmuldp, 0x00, 0x0E, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvdivdp, 0x00, 0x0F, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvredp, 0x14, 0x0D, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvaddsp, 0x00, 0x08, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsubsp, 0x00, 0x09, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvmulsp, 0x00, 0x0A, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvdivsp, 0x00, 0x0B, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvresp, 0x14, 0x09, 0, PPC2_VSX)
#define VSX_LOGICAL(name, tcg_op) \
static void glue(gen_, name)(DisasContext * ctx) \
@@ -9996,16 +9999,19 @@ GEN_XX3FORM(xsadddp, 0x00, 0x04, PPC2_VSX),
GEN_XX3FORM(xssubdp, 0x00, 0x05, PPC2_VSX),
GEN_XX3FORM(xsmuldp, 0x00, 0x06, PPC2_VSX),
GEN_XX3FORM(xsdivdp, 0x00, 0x07, PPC2_VSX),
+GEN_XX2FORM(xsredp, 0x14, 0x05, PPC2_VSX),
GEN_XX3FORM(xvadddp, 0x00, 0x0C, PPC2_VSX),
GEN_XX3FORM(xvsubdp, 0x00, 0x0D, PPC2_VSX),
GEN_XX3FORM(xvmuldp, 0x00, 0x0E, PPC2_VSX),
GEN_XX3FORM(xvdivdp, 0x00, 0x0F, PPC2_VSX),
+GEN_XX2FORM(xvredp, 0x14, 0x0D, PPC2_VSX),
GEN_XX3FORM(xvaddsp, 0x00, 0x08, PPC2_VSX),
GEN_XX3FORM(xvsubsp, 0x00, 0x09, PPC2_VSX),
GEN_XX3FORM(xvmulsp, 0x00, 0x0A, PPC2_VSX),
GEN_XX3FORM(xvdivsp, 0x00, 0x0B, PPC2_VSX),
+GEN_XX2FORM(xvresp, 0x14, 0x09, PPC2_VSX),
#undef VSX_LOGICAL
#define VSX_LOGICAL(name, opc2, opc3, fl2) \
--
1.7.1
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [Qemu-devel] [V4 PATCH 12/22] target-ppc: Add VSX ISA2.06 xsqrt Instructions
2013-12-18 20:18 [Qemu-devel] [V4 PATCH 00/22] PowerPC VSX Stage 3 Tom Musta
` (10 preceding siblings ...)
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 11/22] target-ppc: Add VSX ISA2.06 xre Instructions Tom Musta
@ 2013-12-18 20:19 ` Tom Musta
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 13/22] target-ppc: Add VSX ISA2.06 xrsqrte Instructions Tom Musta
` (9 subsequent siblings)
21 siblings, 0 replies; 31+ messages in thread
From: Tom Musta @ 2013-12-18 20:19 UTC (permalink / raw)
To: qemu-devel; +Cc: Tom Musta, qemu-ppc
This patch adds the VSX floating point square root instructions
defined by V2.06 of the PowerPC ISA: xssqrtdp, xvsqrtdp, xvsqrtsp.
V2: re-implemented the VSX_SQRT macro.
Signed-off-by: Tom Musta <tommusta@gmail.com>
Reviewed-by: Richard Henderson <address@hidden>
---
target-ppc/fpu_helper.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
target-ppc/helper.h | 3 +++
target-ppc/translate.c | 6 ++++++
3 files changed, 53 insertions(+), 0 deletions(-)
diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index 5908e41..060e6a0 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -1939,3 +1939,47 @@ void helper_##op(CPUPPCState *env, uint32_t opcode) \
VSX_RE(xsredp, 1, float64, f64, 1)
VSX_RE(xvredp, 2, float64, f64, 0)
VSX_RE(xvresp, 4, float32, f32, 0)
+
+/* VSX_SQRT - VSX floating point square root
+ * op - instruction mnemonic
+ * nels - number of elements (1, 2 or 4)
+ * tp - type (float32 or float64)
+ * fld - vsr_t field (f32 or f64)
+ * sfprf - set FPRF
+ */
+#define VSX_SQRT(op, nels, tp, fld, sfprf) \
+void helper_##op(CPUPPCState *env, uint32_t opcode) \
+{ \
+ ppc_vsr_t xt, xb; \
+ int i; \
+ \
+ getVSR(xB(opcode), &xb, env); \
+ getVSR(xT(opcode), &xt, env); \
+ helper_reset_fpstatus(env); \
+ \
+ for (i = 0; i < nels; i++) { \
+ float_status tstat = env->fp_status; \
+ set_float_exception_flags(0, &tstat); \
+ xt.fld[i] = tp##_sqrt(xb.fld[i], &tstat); \
+ env->fp_status.float_exception_flags |= tstat.float_exception_flags; \
+ \
+ if (unlikely(tstat.float_exception_flags & float_flag_invalid)) { \
+ if (tp##_is_neg(xb.fld[i]) && !tp##_is_zero(xb.fld[i])) { \
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, sfprf); \
+ } else if (tp##_is_signaling_nan(xb.fld[i])) { \
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, sfprf); \
+ } \
+ } \
+ \
+ if (sfprf) { \
+ helper_compute_fprf(env, xt.fld[i], sfprf); \
+ } \
+ } \
+ \
+ putVSR(xT(opcode), &xt, env); \
+ helper_float_check_status(env); \
+}
+
+VSX_SQRT(xssqrtdp, 1, float64, f64, 1)
+VSX_SQRT(xvsqrtdp, 2, float64, f64, 0)
+VSX_SQRT(xvsqrtsp, 4, float32, f32, 0)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index fe5b61c..a6e7e62 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -256,18 +256,21 @@ DEF_HELPER_2(xssubdp, void, env, i32)
DEF_HELPER_2(xsmuldp, void, env, i32)
DEF_HELPER_2(xsdivdp, void, env, i32)
DEF_HELPER_2(xsredp, void, env, i32)
+DEF_HELPER_2(xssqrtdp, void, env, i32)
DEF_HELPER_2(xvadddp, void, env, i32)
DEF_HELPER_2(xvsubdp, void, env, i32)
DEF_HELPER_2(xvmuldp, void, env, i32)
DEF_HELPER_2(xvdivdp, void, env, i32)
DEF_HELPER_2(xvredp, void, env, i32)
+DEF_HELPER_2(xvsqrtdp, void, env, i32)
DEF_HELPER_2(xvaddsp, void, env, i32)
DEF_HELPER_2(xvsubsp, void, env, i32)
DEF_HELPER_2(xvmulsp, void, env, i32)
DEF_HELPER_2(xvdivsp, void, env, i32)
DEF_HELPER_2(xvresp, void, env, i32)
+DEF_HELPER_2(xvsqrtsp, void, env, i32)
DEF_HELPER_2(efscfsi, i32, env, i32)
DEF_HELPER_2(efscfui, i32, env, i32)
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index d2060b7..c5c97ba 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -7300,18 +7300,21 @@ GEN_VSX_HELPER_2(xssubdp, 0x00, 0x05, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xsmuldp, 0x00, 0x06, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xsdivdp, 0x00, 0x07, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xsredp, 0x14, 0x05, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xssqrtdp, 0x16, 0x04, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvadddp, 0x00, 0x0C, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsubdp, 0x00, 0x0D, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvmuldp, 0x00, 0x0E, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvdivdp, 0x00, 0x0F, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvredp, 0x14, 0x0D, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvsqrtdp, 0x16, 0x0C, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvaddsp, 0x00, 0x08, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsubsp, 0x00, 0x09, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvmulsp, 0x00, 0x0A, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvdivsp, 0x00, 0x0B, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvresp, 0x14, 0x09, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvsqrtsp, 0x16, 0x08, 0, PPC2_VSX)
#define VSX_LOGICAL(name, tcg_op) \
static void glue(gen_, name)(DisasContext * ctx) \
@@ -10000,18 +10003,21 @@ GEN_XX3FORM(xssubdp, 0x00, 0x05, PPC2_VSX),
GEN_XX3FORM(xsmuldp, 0x00, 0x06, PPC2_VSX),
GEN_XX3FORM(xsdivdp, 0x00, 0x07, PPC2_VSX),
GEN_XX2FORM(xsredp, 0x14, 0x05, PPC2_VSX),
+GEN_XX2FORM(xssqrtdp, 0x16, 0x04, PPC2_VSX),
GEN_XX3FORM(xvadddp, 0x00, 0x0C, PPC2_VSX),
GEN_XX3FORM(xvsubdp, 0x00, 0x0D, PPC2_VSX),
GEN_XX3FORM(xvmuldp, 0x00, 0x0E, PPC2_VSX),
GEN_XX3FORM(xvdivdp, 0x00, 0x0F, PPC2_VSX),
GEN_XX2FORM(xvredp, 0x14, 0x0D, PPC2_VSX),
+GEN_XX2FORM(xvsqrtdp, 0x16, 0x0C, PPC2_VSX),
GEN_XX3FORM(xvaddsp, 0x00, 0x08, PPC2_VSX),
GEN_XX3FORM(xvsubsp, 0x00, 0x09, PPC2_VSX),
GEN_XX3FORM(xvmulsp, 0x00, 0x0A, PPC2_VSX),
GEN_XX3FORM(xvdivsp, 0x00, 0x0B, PPC2_VSX),
GEN_XX2FORM(xvresp, 0x14, 0x09, PPC2_VSX),
+GEN_XX2FORM(xvsqrtsp, 0x16, 0x08, PPC2_VSX),
#undef VSX_LOGICAL
#define VSX_LOGICAL(name, opc2, opc3, fl2) \
--
1.7.1
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [Qemu-devel] [V4 PATCH 13/22] target-ppc: Add VSX ISA2.06 xrsqrte Instructions
2013-12-18 20:18 [Qemu-devel] [V4 PATCH 00/22] PowerPC VSX Stage 3 Tom Musta
` (11 preceding siblings ...)
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 12/22] target-ppc: Add VSX ISA2.06 xsqrt Instructions Tom Musta
@ 2013-12-18 20:19 ` Tom Musta
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 14/22] target-ppc: Add VSX ISA2.06 xtdiv Instructions Tom Musta
` (8 subsequent siblings)
21 siblings, 0 replies; 31+ messages in thread
From: Tom Musta @ 2013-12-18 20:19 UTC (permalink / raw)
To: qemu-devel; +Cc: Tom Musta, qemu-ppc
This patch adds the VSX floating point reciprocal square root
estimate instructions defined by V2.06 of the PowerPC ISA: xsrsqrtedp,
xvrsqrtedp, xvrsqrtesp.
V2: re-implemented VSX_RSQRTE macro.
Signed-off-by: Tom Musta <tommusta@gmail.com>
Reviewed-by: Richard Henderson <address@hidden>
---
target-ppc/fpu_helper.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
target-ppc/helper.h | 3 +++
target-ppc/translate.c | 6 ++++++
3 files changed, 54 insertions(+), 0 deletions(-)
diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index 060e6a0..31669f1 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -1983,3 +1983,48 @@ void helper_##op(CPUPPCState *env, uint32_t opcode) \
VSX_SQRT(xssqrtdp, 1, float64, f64, 1)
VSX_SQRT(xvsqrtdp, 2, float64, f64, 0)
VSX_SQRT(xvsqrtsp, 4, float32, f32, 0)
+
+/* VSX_RSQRTE - VSX floating point reciprocal square root estimate
+ * op - instruction mnemonic
+ * nels - number of elements (1, 2 or 4)
+ * tp - type (float32 or float64)
+ * fld - vsr_t field (f32 or f64)
+ * sfprf - set FPRF
+ */
+#define VSX_RSQRTE(op, nels, tp, fld, sfprf) \
+void helper_##op(CPUPPCState *env, uint32_t opcode) \
+{ \
+ ppc_vsr_t xt, xb; \
+ int i; \
+ \
+ getVSR(xB(opcode), &xb, env); \
+ getVSR(xT(opcode), &xt, env); \
+ helper_reset_fpstatus(env); \
+ \
+ for (i = 0; i < nels; i++) { \
+ float_status tstat = env->fp_status; \
+ set_float_exception_flags(0, &tstat); \
+ xt.fld[i] = tp##_sqrt(xb.fld[i], &tstat); \
+ xt.fld[i] = tp##_div(tp##_one, xt.fld[i], &tstat); \
+ env->fp_status.float_exception_flags |= tstat.float_exception_flags; \
+ \
+ if (unlikely(tstat.float_exception_flags & float_flag_invalid)) { \
+ if (tp##_is_neg(xb.fld[i]) && !tp##_is_zero(xb.fld[i])) { \
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, sfprf); \
+ } else if (tp##_is_signaling_nan(xb.fld[i])) { \
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, sfprf); \
+ } \
+ } \
+ \
+ if (sfprf) { \
+ helper_compute_fprf(env, xt.fld[i], sfprf); \
+ } \
+ } \
+ \
+ putVSR(xT(opcode), &xt, env); \
+ helper_float_check_status(env); \
+}
+
+VSX_RSQRTE(xsrsqrtedp, 1, float64, f64, 1)
+VSX_RSQRTE(xvrsqrtedp, 2, float64, f64, 0)
+VSX_RSQRTE(xvrsqrtesp, 4, float32, f32, 0)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index a6e7e62..4d5e31b 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -257,6 +257,7 @@ DEF_HELPER_2(xsmuldp, void, env, i32)
DEF_HELPER_2(xsdivdp, void, env, i32)
DEF_HELPER_2(xsredp, void, env, i32)
DEF_HELPER_2(xssqrtdp, void, env, i32)
+DEF_HELPER_2(xsrsqrtedp, void, env, i32)
DEF_HELPER_2(xvadddp, void, env, i32)
DEF_HELPER_2(xvsubdp, void, env, i32)
@@ -264,6 +265,7 @@ DEF_HELPER_2(xvmuldp, void, env, i32)
DEF_HELPER_2(xvdivdp, void, env, i32)
DEF_HELPER_2(xvredp, void, env, i32)
DEF_HELPER_2(xvsqrtdp, void, env, i32)
+DEF_HELPER_2(xvrsqrtedp, void, env, i32)
DEF_HELPER_2(xvaddsp, void, env, i32)
DEF_HELPER_2(xvsubsp, void, env, i32)
@@ -271,6 +273,7 @@ DEF_HELPER_2(xvmulsp, void, env, i32)
DEF_HELPER_2(xvdivsp, void, env, i32)
DEF_HELPER_2(xvresp, void, env, i32)
DEF_HELPER_2(xvsqrtsp, void, env, i32)
+DEF_HELPER_2(xvrsqrtesp, void, env, i32)
DEF_HELPER_2(efscfsi, i32, env, i32)
DEF_HELPER_2(efscfui, i32, env, i32)
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index c5c97ba..287b924 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -7301,6 +7301,7 @@ GEN_VSX_HELPER_2(xsmuldp, 0x00, 0x06, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xsdivdp, 0x00, 0x07, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xsredp, 0x14, 0x05, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xssqrtdp, 0x16, 0x04, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsrsqrtedp, 0x14, 0x04, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvadddp, 0x00, 0x0C, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsubdp, 0x00, 0x0D, 0, PPC2_VSX)
@@ -7308,6 +7309,7 @@ GEN_VSX_HELPER_2(xvmuldp, 0x00, 0x0E, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvdivdp, 0x00, 0x0F, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvredp, 0x14, 0x0D, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsqrtdp, 0x16, 0x0C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrsqrtedp, 0x14, 0x0C, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvaddsp, 0x00, 0x08, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsubsp, 0x00, 0x09, 0, PPC2_VSX)
@@ -7315,6 +7317,7 @@ GEN_VSX_HELPER_2(xvmulsp, 0x00, 0x0A, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvdivsp, 0x00, 0x0B, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvresp, 0x14, 0x09, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsqrtsp, 0x16, 0x08, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrsqrtesp, 0x14, 0x08, 0, PPC2_VSX)
#define VSX_LOGICAL(name, tcg_op) \
static void glue(gen_, name)(DisasContext * ctx) \
@@ -10004,6 +10007,7 @@ GEN_XX3FORM(xsmuldp, 0x00, 0x06, PPC2_VSX),
GEN_XX3FORM(xsdivdp, 0x00, 0x07, PPC2_VSX),
GEN_XX2FORM(xsredp, 0x14, 0x05, PPC2_VSX),
GEN_XX2FORM(xssqrtdp, 0x16, 0x04, PPC2_VSX),
+GEN_XX2FORM(xsrsqrtedp, 0x14, 0x04, PPC2_VSX),
GEN_XX3FORM(xvadddp, 0x00, 0x0C, PPC2_VSX),
GEN_XX3FORM(xvsubdp, 0x00, 0x0D, PPC2_VSX),
@@ -10011,6 +10015,7 @@ GEN_XX3FORM(xvmuldp, 0x00, 0x0E, PPC2_VSX),
GEN_XX3FORM(xvdivdp, 0x00, 0x0F, PPC2_VSX),
GEN_XX2FORM(xvredp, 0x14, 0x0D, PPC2_VSX),
GEN_XX2FORM(xvsqrtdp, 0x16, 0x0C, PPC2_VSX),
+GEN_XX2FORM(xvrsqrtedp, 0x14, 0x0C, PPC2_VSX),
GEN_XX3FORM(xvaddsp, 0x00, 0x08, PPC2_VSX),
GEN_XX3FORM(xvsubsp, 0x00, 0x09, PPC2_VSX),
@@ -10018,6 +10023,7 @@ GEN_XX3FORM(xvmulsp, 0x00, 0x0A, PPC2_VSX),
GEN_XX3FORM(xvdivsp, 0x00, 0x0B, PPC2_VSX),
GEN_XX2FORM(xvresp, 0x14, 0x09, PPC2_VSX),
GEN_XX2FORM(xvsqrtsp, 0x16, 0x08, PPC2_VSX),
+GEN_XX2FORM(xvrsqrtesp, 0x14, 0x08, PPC2_VSX),
#undef VSX_LOGICAL
#define VSX_LOGICAL(name, opc2, opc3, fl2) \
--
1.7.1
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [Qemu-devel] [V4 PATCH 14/22] target-ppc: Add VSX ISA2.06 xtdiv Instructions
2013-12-18 20:18 [Qemu-devel] [V4 PATCH 00/22] PowerPC VSX Stage 3 Tom Musta
` (12 preceding siblings ...)
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 13/22] target-ppc: Add VSX ISA2.06 xrsqrte Instructions Tom Musta
@ 2013-12-18 20:19 ` Tom Musta
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 15/22] target-ppc: Add VSX ISA2.06 xtsqrt Instructions Tom Musta
` (7 subsequent siblings)
21 siblings, 0 replies; 31+ messages in thread
From: Tom Musta @ 2013-12-18 20:19 UTC (permalink / raw)
To: qemu-devel; +Cc: Tom Musta, qemu-ppc
This patch adds the VSX floating point test for software divide
instructions defined by V2.06 of the PowerPC ISA: xstdivdp, xvtdivdp,
and xvtdivsp.
V2: added ppc_float*_get_unbiased_exp() routines (pulled back from
softfloat). Eliminated dependency on float*_is_denormalized()
routines.
Signed-off-by: Tom Musta <tommusta@gmail.com>
Reviewed-by: Richard Henderson <address@hidden>
---
target-ppc/fpu_helper.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++
target-ppc/helper.h | 3 ++
target-ppc/translate.c | 6 ++++
3 files changed, 76 insertions(+), 0 deletions(-)
diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index 31669f1..ee03942 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -2028,3 +2028,70 @@ void helper_##op(CPUPPCState *env, uint32_t opcode) \
VSX_RSQRTE(xsrsqrtedp, 1, float64, f64, 1)
VSX_RSQRTE(xvrsqrtedp, 2, float64, f64, 0)
VSX_RSQRTE(xvrsqrtesp, 4, float32, f32, 0)
+
+static inline int ppc_float32_get_unbiased_exp(float32 f)
+{
+ return ((f >> 23) & 0xFF) - 127;
+}
+
+static inline int ppc_float64_get_unbiased_exp(float64 f)
+{
+ return ((f >> 52) & 0x7FF) - 1023;
+}
+
+/* VSX_TDIV - VSX floating point test for divide
+ * op - instruction mnemonic
+ * nels - number of elements (1, 2 or 4)
+ * tp - type (float32 or float64)
+ * fld - vsr_t field (f32 or f64)
+ * emin - minimum unbiased exponent
+ * emax - maximum unbiased exponent
+ * nbits - number of fraction bits
+ */
+#define VSX_TDIV(op, nels, tp, fld, emin, emax, nbits) \
+void helper_##op(CPUPPCState *env, uint32_t opcode) \
+{ \
+ ppc_vsr_t xa, xb; \
+ int i; \
+ int fe_flag = 0; \
+ int fg_flag = 0; \
+ \
+ getVSR(xA(opcode), &xa, env); \
+ getVSR(xB(opcode), &xb, env); \
+ \
+ for (i = 0; i < nels; i++) { \
+ if (unlikely(tp##_is_infinity(xa.fld[i]) || \
+ tp##_is_infinity(xb.fld[i]) || \
+ tp##_is_zero(xb.fld[i]))) { \
+ fe_flag = 1; \
+ fg_flag = 1; \
+ } else { \
+ int e_a = ppc_##tp##_get_unbiased_exp(xa.fld[i]); \
+ int e_b = ppc_##tp##_get_unbiased_exp(xb.fld[i]); \
+ \
+ if (unlikely(tp##_is_any_nan(xa.fld[i]) || \
+ tp##_is_any_nan(xb.fld[i]))) { \
+ fe_flag = 1; \
+ } else if ((e_b <= emin) || (e_b >= (emax-2))) { \
+ fe_flag = 1; \
+ } else if (!tp##_is_zero(xa.fld[i]) && \
+ (((e_a - e_b) >= emax) || \
+ ((e_a - e_b) <= (emin+1)) || \
+ (e_a <= (emin+nbits)))) { \
+ fe_flag = 1; \
+ } \
+ \
+ if (unlikely(tp##_is_zero_or_denormal(xb.fld[i]))) { \
+ /* XB is not zero because of the above check and */ \
+ /* so must be denormalized. */ \
+ fg_flag = 1; \
+ } \
+ } \
+ } \
+ \
+ env->crf[BF(opcode)] = 0x8 | (fg_flag ? 4 : 0) | (fe_flag ? 2 : 0); \
+}
+
+VSX_TDIV(xstdivdp, 1, float64, f64, -1022, 1023, 52)
+VSX_TDIV(xvtdivdp, 2, float64, f64, -1022, 1023, 52)
+VSX_TDIV(xvtdivsp, 4, float32, f32, -126, 127, 23)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 4d5e31b..80cffc9 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -258,6 +258,7 @@ DEF_HELPER_2(xsdivdp, void, env, i32)
DEF_HELPER_2(xsredp, void, env, i32)
DEF_HELPER_2(xssqrtdp, void, env, i32)
DEF_HELPER_2(xsrsqrtedp, void, env, i32)
+DEF_HELPER_2(xstdivdp, void, env, i32)
DEF_HELPER_2(xvadddp, void, env, i32)
DEF_HELPER_2(xvsubdp, void, env, i32)
@@ -266,6 +267,7 @@ DEF_HELPER_2(xvdivdp, void, env, i32)
DEF_HELPER_2(xvredp, void, env, i32)
DEF_HELPER_2(xvsqrtdp, void, env, i32)
DEF_HELPER_2(xvrsqrtedp, void, env, i32)
+DEF_HELPER_2(xvtdivdp, void, env, i32)
DEF_HELPER_2(xvaddsp, void, env, i32)
DEF_HELPER_2(xvsubsp, void, env, i32)
@@ -274,6 +276,7 @@ DEF_HELPER_2(xvdivsp, void, env, i32)
DEF_HELPER_2(xvresp, void, env, i32)
DEF_HELPER_2(xvsqrtsp, void, env, i32)
DEF_HELPER_2(xvrsqrtesp, void, env, i32)
+DEF_HELPER_2(xvtdivsp, void, env, i32)
DEF_HELPER_2(efscfsi, i32, env, i32)
DEF_HELPER_2(efscfui, i32, env, i32)
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 287b924..9a0b36c 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -7302,6 +7302,7 @@ GEN_VSX_HELPER_2(xsdivdp, 0x00, 0x07, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xsredp, 0x14, 0x05, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xssqrtdp, 0x16, 0x04, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xsrsqrtedp, 0x14, 0x04, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xstdivdp, 0x14, 0x07, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvadddp, 0x00, 0x0C, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsubdp, 0x00, 0x0D, 0, PPC2_VSX)
@@ -7310,6 +7311,7 @@ GEN_VSX_HELPER_2(xvdivdp, 0x00, 0x0F, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvredp, 0x14, 0x0D, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsqrtdp, 0x16, 0x0C, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvrsqrtedp, 0x14, 0x0C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvtdivdp, 0x14, 0x0F, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvaddsp, 0x00, 0x08, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsubsp, 0x00, 0x09, 0, PPC2_VSX)
@@ -7318,6 +7320,7 @@ GEN_VSX_HELPER_2(xvdivsp, 0x00, 0x0B, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvresp, 0x14, 0x09, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsqrtsp, 0x16, 0x08, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvrsqrtesp, 0x14, 0x08, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvtdivsp, 0x14, 0x0B, 0, PPC2_VSX)
#define VSX_LOGICAL(name, tcg_op) \
static void glue(gen_, name)(DisasContext * ctx) \
@@ -10008,6 +10011,7 @@ GEN_XX3FORM(xsdivdp, 0x00, 0x07, PPC2_VSX),
GEN_XX2FORM(xsredp, 0x14, 0x05, PPC2_VSX),
GEN_XX2FORM(xssqrtdp, 0x16, 0x04, PPC2_VSX),
GEN_XX2FORM(xsrsqrtedp, 0x14, 0x04, PPC2_VSX),
+GEN_XX3FORM(xstdivdp, 0x14, 0x07, PPC2_VSX),
GEN_XX3FORM(xvadddp, 0x00, 0x0C, PPC2_VSX),
GEN_XX3FORM(xvsubdp, 0x00, 0x0D, PPC2_VSX),
@@ -10016,6 +10020,7 @@ GEN_XX3FORM(xvdivdp, 0x00, 0x0F, PPC2_VSX),
GEN_XX2FORM(xvredp, 0x14, 0x0D, PPC2_VSX),
GEN_XX2FORM(xvsqrtdp, 0x16, 0x0C, PPC2_VSX),
GEN_XX2FORM(xvrsqrtedp, 0x14, 0x0C, PPC2_VSX),
+GEN_XX3FORM(xvtdivdp, 0x14, 0x0F, PPC2_VSX),
GEN_XX3FORM(xvaddsp, 0x00, 0x08, PPC2_VSX),
GEN_XX3FORM(xvsubsp, 0x00, 0x09, PPC2_VSX),
@@ -10024,6 +10029,7 @@ GEN_XX3FORM(xvdivsp, 0x00, 0x0B, PPC2_VSX),
GEN_XX2FORM(xvresp, 0x14, 0x09, PPC2_VSX),
GEN_XX2FORM(xvsqrtsp, 0x16, 0x08, PPC2_VSX),
GEN_XX2FORM(xvrsqrtesp, 0x14, 0x08, PPC2_VSX),
+GEN_XX3FORM(xvtdivsp, 0x14, 0x0B, PPC2_VSX),
#undef VSX_LOGICAL
#define VSX_LOGICAL(name, opc2, opc3, fl2) \
--
1.7.1
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [Qemu-devel] [V4 PATCH 15/22] target-ppc: Add VSX ISA2.06 xtsqrt Instructions
2013-12-18 20:18 [Qemu-devel] [V4 PATCH 00/22] PowerPC VSX Stage 3 Tom Musta
` (13 preceding siblings ...)
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 14/22] target-ppc: Add VSX ISA2.06 xtdiv Instructions Tom Musta
@ 2013-12-18 20:19 ` Tom Musta
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 16/22] target-ppc: Add VSX ISA2.06 Multiply Add Instructions Tom Musta
` (6 subsequent siblings)
21 siblings, 0 replies; 31+ messages in thread
From: Tom Musta @ 2013-12-18 20:19 UTC (permalink / raw)
To: qemu-devel; +Cc: Tom Musta, qemu-ppc
This patch adds the VSX floating point test for software square
root instructions defined by V2.06 of the PowerPC ISA: xstsqrtdp,
xvtsqrtdp, xvtsqrtsp.
V2: (a) using locally implemented ppc_float*_get_unbiased_exp
routines (b) eliminated dependency on float*_is_denormal().
Signed-off-by: Tom Musta <tommusta@gmail.com>
Reviewed-by: Richard Henderson <address@hidden>
---
target-ppc/fpu_helper.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++
target-ppc/helper.h | 3 ++
target-ppc/translate.c | 6 +++++
3 files changed, 63 insertions(+), 0 deletions(-)
diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index ee03942..73227b7 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -2095,3 +2095,57 @@ void helper_##op(CPUPPCState *env, uint32_t opcode) \
VSX_TDIV(xstdivdp, 1, float64, f64, -1022, 1023, 52)
VSX_TDIV(xvtdivdp, 2, float64, f64, -1022, 1023, 52)
VSX_TDIV(xvtdivsp, 4, float32, f32, -126, 127, 23)
+
+/* VSX_TSQRT - VSX floating point test for square root
+ * op - instruction mnemonic
+ * nels - number of elements (1, 2 or 4)
+ * tp - type (float32 or float64)
+ * fld - vsr_t field (f32 or f64)
+ * emin - minimum unbiased exponent
+ * emax - maximum unbiased exponent
+ * nbits - number of fraction bits
+ */
+#define VSX_TSQRT(op, nels, tp, fld, emin, nbits) \
+void helper_##op(CPUPPCState *env, uint32_t opcode) \
+{ \
+ ppc_vsr_t xa, xb; \
+ int i; \
+ int fe_flag = 0; \
+ int fg_flag = 0; \
+ \
+ getVSR(xA(opcode), &xa, env); \
+ getVSR(xB(opcode), &xb, env); \
+ \
+ for (i = 0; i < nels; i++) { \
+ if (unlikely(tp##_is_infinity(xb.fld[i]) || \
+ tp##_is_zero(xb.fld[i]))) { \
+ fe_flag = 1; \
+ fg_flag = 1; \
+ } else { \
+ int e_b = ppc_##tp##_get_unbiased_exp(xb.fld[i]); \
+ \
+ if (unlikely(tp##_is_any_nan(xb.fld[i]))) { \
+ fe_flag = 1; \
+ } else if (unlikely(tp##_is_zero(xb.fld[i]))) { \
+ fe_flag = 1; \
+ } else if (unlikely(tp##_is_neg(xb.fld[i]))) { \
+ fe_flag = 1; \
+ } else if (!tp##_is_zero(xb.fld[i]) && \
+ (e_b <= (emin+nbits))) { \
+ fe_flag = 1; \
+ } \
+ \
+ if (unlikely(tp##_is_zero_or_denormal(xb.fld[i]))) { \
+ /* XB is not zero because of the above check and */ \
+ /* therefore must be denormalized. */ \
+ fg_flag = 1; \
+ } \
+ } \
+ } \
+ \
+ env->crf[BF(opcode)] = 0x8 | (fg_flag ? 4 : 0) | (fe_flag ? 2 : 0); \
+}
+
+VSX_TSQRT(xstsqrtdp, 1, float64, f64, -1022, 52)
+VSX_TSQRT(xvtsqrtdp, 2, float64, f64, -1022, 52)
+VSX_TSQRT(xvtsqrtsp, 4, float32, f32, -126, 23)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 80cffc9..c413c98 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -259,6 +259,7 @@ DEF_HELPER_2(xsredp, void, env, i32)
DEF_HELPER_2(xssqrtdp, void, env, i32)
DEF_HELPER_2(xsrsqrtedp, void, env, i32)
DEF_HELPER_2(xstdivdp, void, env, i32)
+DEF_HELPER_2(xstsqrtdp, void, env, i32)
DEF_HELPER_2(xvadddp, void, env, i32)
DEF_HELPER_2(xvsubdp, void, env, i32)
@@ -268,6 +269,7 @@ DEF_HELPER_2(xvredp, void, env, i32)
DEF_HELPER_2(xvsqrtdp, void, env, i32)
DEF_HELPER_2(xvrsqrtedp, void, env, i32)
DEF_HELPER_2(xvtdivdp, void, env, i32)
+DEF_HELPER_2(xvtsqrtdp, void, env, i32)
DEF_HELPER_2(xvaddsp, void, env, i32)
DEF_HELPER_2(xvsubsp, void, env, i32)
@@ -277,6 +279,7 @@ DEF_HELPER_2(xvresp, void, env, i32)
DEF_HELPER_2(xvsqrtsp, void, env, i32)
DEF_HELPER_2(xvrsqrtesp, void, env, i32)
DEF_HELPER_2(xvtdivsp, void, env, i32)
+DEF_HELPER_2(xvtsqrtsp, void, env, i32)
DEF_HELPER_2(efscfsi, i32, env, i32)
DEF_HELPER_2(efscfui, i32, env, i32)
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 9a0b36c..f1935c7 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -7303,6 +7303,7 @@ GEN_VSX_HELPER_2(xsredp, 0x14, 0x05, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xssqrtdp, 0x16, 0x04, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xsrsqrtedp, 0x14, 0x04, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xstdivdp, 0x14, 0x07, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xstsqrtdp, 0x14, 0x06, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvadddp, 0x00, 0x0C, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsubdp, 0x00, 0x0D, 0, PPC2_VSX)
@@ -7312,6 +7313,7 @@ GEN_VSX_HELPER_2(xvredp, 0x14, 0x0D, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsqrtdp, 0x16, 0x0C, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvrsqrtedp, 0x14, 0x0C, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvtdivdp, 0x14, 0x0F, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvtsqrtdp, 0x14, 0x0E, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvaddsp, 0x00, 0x08, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsubsp, 0x00, 0x09, 0, PPC2_VSX)
@@ -7321,6 +7323,7 @@ GEN_VSX_HELPER_2(xvresp, 0x14, 0x09, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsqrtsp, 0x16, 0x08, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvrsqrtesp, 0x14, 0x08, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvtdivsp, 0x14, 0x0B, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvtsqrtsp, 0x14, 0x0A, 0, PPC2_VSX)
#define VSX_LOGICAL(name, tcg_op) \
static void glue(gen_, name)(DisasContext * ctx) \
@@ -10012,6 +10015,7 @@ GEN_XX2FORM(xsredp, 0x14, 0x05, PPC2_VSX),
GEN_XX2FORM(xssqrtdp, 0x16, 0x04, PPC2_VSX),
GEN_XX2FORM(xsrsqrtedp, 0x14, 0x04, PPC2_VSX),
GEN_XX3FORM(xstdivdp, 0x14, 0x07, PPC2_VSX),
+GEN_XX2FORM(xstsqrtdp, 0x14, 0x06, PPC2_VSX),
GEN_XX3FORM(xvadddp, 0x00, 0x0C, PPC2_VSX),
GEN_XX3FORM(xvsubdp, 0x00, 0x0D, PPC2_VSX),
@@ -10021,6 +10025,7 @@ GEN_XX2FORM(xvredp, 0x14, 0x0D, PPC2_VSX),
GEN_XX2FORM(xvsqrtdp, 0x16, 0x0C, PPC2_VSX),
GEN_XX2FORM(xvrsqrtedp, 0x14, 0x0C, PPC2_VSX),
GEN_XX3FORM(xvtdivdp, 0x14, 0x0F, PPC2_VSX),
+GEN_XX2FORM(xvtsqrtdp, 0x14, 0x0E, PPC2_VSX),
GEN_XX3FORM(xvaddsp, 0x00, 0x08, PPC2_VSX),
GEN_XX3FORM(xvsubsp, 0x00, 0x09, PPC2_VSX),
@@ -10030,6 +10035,7 @@ GEN_XX2FORM(xvresp, 0x14, 0x09, PPC2_VSX),
GEN_XX2FORM(xvsqrtsp, 0x16, 0x08, PPC2_VSX),
GEN_XX2FORM(xvrsqrtesp, 0x14, 0x08, PPC2_VSX),
GEN_XX3FORM(xvtdivsp, 0x14, 0x0B, PPC2_VSX),
+GEN_XX2FORM(xvtsqrtsp, 0x14, 0x0A, PPC2_VSX),
#undef VSX_LOGICAL
#define VSX_LOGICAL(name, opc2, opc3, fl2) \
--
1.7.1
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [Qemu-devel] [V4 PATCH 16/22] target-ppc: Add VSX ISA2.06 Multiply Add Instructions
2013-12-18 20:18 [Qemu-devel] [V4 PATCH 00/22] PowerPC VSX Stage 3 Tom Musta
` (14 preceding siblings ...)
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 15/22] target-ppc: Add VSX ISA2.06 xtsqrt Instructions Tom Musta
@ 2013-12-18 20:19 ` Tom Musta
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 17/22] target-ppc: Add VSX xscmp*dp Instructions Tom Musta
` (5 subsequent siblings)
21 siblings, 0 replies; 31+ messages in thread
From: Tom Musta @ 2013-12-18 20:19 UTC (permalink / raw)
To: qemu-devel; +Cc: Tom Musta, qemu-ppc
This patch adds the VSX floating point multiply/add instructions
defined by V2.06 of the PowerPC ISA:
- xsmaddadp, xvmaddadp, xvmaddasp
- xsmaddmdp, xvmaddmdp, xvmaddmsp
- xsmsubadp, xvmsubadp, xvmsubasp
- xsmsubmdp, xvmsubmdp, xvmsubmsp
- xsnmaddadp, xvnmaddadp, xvnmaddasp
- xsnmaddmdp, xvnmaddmdp, xvnmaddmsp
- xsnmsubadp, xvnmsubadp, xvnmsubasp
- xsnmsubmdp, xvnmsubmdp, xvnmsubmsp
V2: reworked implementation per comments from Richard Henderson and
Peter Maydell.
Signed-off-by: Tom Musta <tommusta@gmail.com>
Reviewed-by: Richard Henderson <address@hidden>
---
target-ppc/fpu_helper.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++
target-ppc/helper.h | 24 +++++++++++
target-ppc/translate.c | 48 ++++++++++++++++++++++
3 files changed, 172 insertions(+), 0 deletions(-)
diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index 73227b7..54c47c8 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -2149,3 +2149,103 @@ void helper_##op(CPUPPCState *env, uint32_t opcode) \
VSX_TSQRT(xstsqrtdp, 1, float64, f64, -1022, 52)
VSX_TSQRT(xvtsqrtdp, 2, float64, f64, -1022, 52)
VSX_TSQRT(xvtsqrtsp, 4, float32, f32, -126, 23)
+
+/* VSX_MADD - VSX floating point muliply/add variations
+ * op - instruction mnemonic
+ * nels - number of elements (1, 2 or 4)
+ * tp - type (float32 or float64)
+ * fld - vsr_t field (f32 or f64)
+ * maddflgs - flags for the float*muladd routine that control the
+ * various forms (madd, msub, nmadd, nmsub)
+ * afrm - A form (1=A, 0=M)
+ * sfprf - set FPRF
+ */
+#define VSX_MADD(op, nels, tp, fld, maddflgs, afrm, sfprf) \
+void helper_##op(CPUPPCState *env, uint32_t opcode) \
+{ \
+ ppc_vsr_t xt_in, xa, xb, xt_out; \
+ ppc_vsr_t *b, *c; \
+ int i; \
+ \
+ if (afrm) { /* AxB + T */ \
+ b = &xb; \
+ c = &xt_in; \
+ } else { /* AxT + B */ \
+ b = &xt_in; \
+ c = &xb; \
+ } \
+ \
+ getVSR(xA(opcode), &xa, env); \
+ getVSR(xB(opcode), &xb, env); \
+ getVSR(xT(opcode), &xt_in, env); \
+ \
+ xt_out = xt_in; \
+ \
+ helper_reset_fpstatus(env); \
+ \
+ for (i = 0; i < nels; i++) { \
+ float_status tstat = env->fp_status; \
+ set_float_exception_flags(0, &tstat); \
+ xt_out.fld[i] = tp##_muladd(xa.fld[i], b->fld[i], c->fld[i], \
+ maddflgs, &tstat); \
+ env->fp_status.float_exception_flags |= tstat.float_exception_flags; \
+ \
+ if (unlikely(tstat.float_exception_flags & float_flag_invalid)) { \
+ if (tp##_is_signaling_nan(xa.fld[i]) || \
+ tp##_is_signaling_nan(b->fld[i]) || \
+ tp##_is_signaling_nan(c->fld[i])) { \
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, sfprf); \
+ tstat.float_exception_flags &= ~float_flag_invalid; \
+ } \
+ if ((tp##_is_infinity(xa.fld[i]) && tp##_is_zero(b->fld[i])) || \
+ (tp##_is_zero(xa.fld[i]) && tp##_is_infinity(b->fld[i]))) { \
+ xt_out.fld[i] = float64_to_##tp(fload_invalid_op_excp(env, \
+ POWERPC_EXCP_FP_VXIMZ, sfprf), &env->fp_status); \
+ tstat.float_exception_flags &= ~float_flag_invalid; \
+ } \
+ if ((tstat.float_exception_flags & float_flag_invalid) && \
+ ((tp##_is_infinity(xa.fld[i]) || \
+ tp##_is_infinity(b->fld[i])) && \
+ tp##_is_infinity(c->fld[i]))) { \
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, sfprf); \
+ } \
+ } \
+ if (sfprf) { \
+ helper_compute_fprf(env, xt_out.fld[i], sfprf); \
+ } \
+ } \
+ putVSR(xT(opcode), &xt_out, env); \
+ helper_float_check_status(env); \
+}
+
+#define MADD_FLGS 0
+#define MSUB_FLGS float_muladd_negate_c
+#define NMADD_FLGS float_muladd_negate_result
+#define NMSUB_FLGS (float_muladd_negate_c | float_muladd_negate_result)
+
+VSX_MADD(xsmaddadp, 1, float64, f64, MADD_FLGS, 1, 1)
+VSX_MADD(xsmaddmdp, 1, float64, f64, MADD_FLGS, 0, 1)
+VSX_MADD(xsmsubadp, 1, float64, f64, MSUB_FLGS, 1, 1)
+VSX_MADD(xsmsubmdp, 1, float64, f64, MSUB_FLGS, 0, 1)
+VSX_MADD(xsnmaddadp, 1, float64, f64, NMADD_FLGS, 1, 1)
+VSX_MADD(xsnmaddmdp, 1, float64, f64, NMADD_FLGS, 0, 1)
+VSX_MADD(xsnmsubadp, 1, float64, f64, NMSUB_FLGS, 1, 1)
+VSX_MADD(xsnmsubmdp, 1, float64, f64, NMSUB_FLGS, 0, 1)
+
+VSX_MADD(xvmaddadp, 2, float64, f64, MADD_FLGS, 1, 0)
+VSX_MADD(xvmaddmdp, 2, float64, f64, MADD_FLGS, 0, 0)
+VSX_MADD(xvmsubadp, 2, float64, f64, MSUB_FLGS, 1, 0)
+VSX_MADD(xvmsubmdp, 2, float64, f64, MSUB_FLGS, 0, 0)
+VSX_MADD(xvnmaddadp, 2, float64, f64, NMADD_FLGS, 1, 0)
+VSX_MADD(xvnmaddmdp, 2, float64, f64, NMADD_FLGS, 0, 0)
+VSX_MADD(xvnmsubadp, 2, float64, f64, NMSUB_FLGS, 1, 0)
+VSX_MADD(xvnmsubmdp, 2, float64, f64, NMSUB_FLGS, 0, 0)
+
+VSX_MADD(xvmaddasp, 4, float32, f32, MADD_FLGS, 1, 0)
+VSX_MADD(xvmaddmsp, 4, float32, f32, MADD_FLGS, 0, 0)
+VSX_MADD(xvmsubasp, 4, float32, f32, MSUB_FLGS, 1, 0)
+VSX_MADD(xvmsubmsp, 4, float32, f32, MSUB_FLGS, 0, 0)
+VSX_MADD(xvnmaddasp, 4, float32, f32, NMADD_FLGS, 1, 0)
+VSX_MADD(xvnmaddmsp, 4, float32, f32, NMADD_FLGS, 0, 0)
+VSX_MADD(xvnmsubasp, 4, float32, f32, NMSUB_FLGS, 1, 0)
+VSX_MADD(xvnmsubmsp, 4, float32, f32, NMSUB_FLGS, 0, 0)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index c413c98..7368908 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -260,6 +260,14 @@ DEF_HELPER_2(xssqrtdp, void, env, i32)
DEF_HELPER_2(xsrsqrtedp, void, env, i32)
DEF_HELPER_2(xstdivdp, void, env, i32)
DEF_HELPER_2(xstsqrtdp, void, env, i32)
+DEF_HELPER_2(xsmaddadp, void, env, i32)
+DEF_HELPER_2(xsmaddmdp, void, env, i32)
+DEF_HELPER_2(xsmsubadp, void, env, i32)
+DEF_HELPER_2(xsmsubmdp, void, env, i32)
+DEF_HELPER_2(xsnmaddadp, void, env, i32)
+DEF_HELPER_2(xsnmaddmdp, void, env, i32)
+DEF_HELPER_2(xsnmsubadp, void, env, i32)
+DEF_HELPER_2(xsnmsubmdp, void, env, i32)
DEF_HELPER_2(xvadddp, void, env, i32)
DEF_HELPER_2(xvsubdp, void, env, i32)
@@ -270,6 +278,14 @@ DEF_HELPER_2(xvsqrtdp, void, env, i32)
DEF_HELPER_2(xvrsqrtedp, void, env, i32)
DEF_HELPER_2(xvtdivdp, void, env, i32)
DEF_HELPER_2(xvtsqrtdp, void, env, i32)
+DEF_HELPER_2(xvmaddadp, void, env, i32)
+DEF_HELPER_2(xvmaddmdp, void, env, i32)
+DEF_HELPER_2(xvmsubadp, void, env, i32)
+DEF_HELPER_2(xvmsubmdp, void, env, i32)
+DEF_HELPER_2(xvnmaddadp, void, env, i32)
+DEF_HELPER_2(xvnmaddmdp, void, env, i32)
+DEF_HELPER_2(xvnmsubadp, void, env, i32)
+DEF_HELPER_2(xvnmsubmdp, void, env, i32)
DEF_HELPER_2(xvaddsp, void, env, i32)
DEF_HELPER_2(xvsubsp, void, env, i32)
@@ -280,6 +296,14 @@ DEF_HELPER_2(xvsqrtsp, void, env, i32)
DEF_HELPER_2(xvrsqrtesp, void, env, i32)
DEF_HELPER_2(xvtdivsp, void, env, i32)
DEF_HELPER_2(xvtsqrtsp, void, env, i32)
+DEF_HELPER_2(xvmaddasp, void, env, i32)
+DEF_HELPER_2(xvmaddmsp, void, env, i32)
+DEF_HELPER_2(xvmsubasp, void, env, i32)
+DEF_HELPER_2(xvmsubmsp, void, env, i32)
+DEF_HELPER_2(xvnmaddasp, void, env, i32)
+DEF_HELPER_2(xvnmaddmsp, void, env, i32)
+DEF_HELPER_2(xvnmsubasp, void, env, i32)
+DEF_HELPER_2(xvnmsubmsp, void, env, i32)
DEF_HELPER_2(efscfsi, i32, env, i32)
DEF_HELPER_2(efscfui, i32, env, i32)
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index f1935c7..3a62125 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -7304,6 +7304,14 @@ GEN_VSX_HELPER_2(xssqrtdp, 0x16, 0x04, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xsrsqrtedp, 0x14, 0x04, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xstdivdp, 0x14, 0x07, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xstsqrtdp, 0x14, 0x06, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsmaddadp, 0x04, 0x04, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsmaddmdp, 0x04, 0x05, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsmsubadp, 0x04, 0x06, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsmsubmdp, 0x04, 0x07, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsnmaddadp, 0x04, 0x14, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsnmaddmdp, 0x04, 0x15, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsnmsubadp, 0x04, 0x16, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsnmsubmdp, 0x04, 0x17, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvadddp, 0x00, 0x0C, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsubdp, 0x00, 0x0D, 0, PPC2_VSX)
@@ -7314,6 +7322,14 @@ GEN_VSX_HELPER_2(xvsqrtdp, 0x16, 0x0C, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvrsqrtedp, 0x14, 0x0C, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvtdivdp, 0x14, 0x0F, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvtsqrtdp, 0x14, 0x0E, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmaddadp, 0x04, 0x0C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmaddmdp, 0x04, 0x0D, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmsubadp, 0x04, 0x0E, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmsubmdp, 0x04, 0x0F, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvnmaddadp, 0x04, 0x1C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvnmaddmdp, 0x04, 0x1D, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvnmsubadp, 0x04, 0x1E, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvnmsubmdp, 0x04, 0x1F, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvaddsp, 0x00, 0x08, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsubsp, 0x00, 0x09, 0, PPC2_VSX)
@@ -7324,6 +7340,14 @@ GEN_VSX_HELPER_2(xvsqrtsp, 0x16, 0x08, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvrsqrtesp, 0x14, 0x08, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvtdivsp, 0x14, 0x0B, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvtsqrtsp, 0x14, 0x0A, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmaddasp, 0x04, 0x08, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmaddmsp, 0x04, 0x09, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmsubasp, 0x04, 0x0A, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmsubmsp, 0x04, 0x0B, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvnmaddasp, 0x04, 0x18, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvnmaddmsp, 0x04, 0x19, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvnmsubasp, 0x04, 0x1A, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvnmsubmsp, 0x04, 0x1B, 0, PPC2_VSX)
#define VSX_LOGICAL(name, tcg_op) \
static void glue(gen_, name)(DisasContext * ctx) \
@@ -10016,6 +10040,14 @@ GEN_XX2FORM(xssqrtdp, 0x16, 0x04, PPC2_VSX),
GEN_XX2FORM(xsrsqrtedp, 0x14, 0x04, PPC2_VSX),
GEN_XX3FORM(xstdivdp, 0x14, 0x07, PPC2_VSX),
GEN_XX2FORM(xstsqrtdp, 0x14, 0x06, PPC2_VSX),
+GEN_XX3FORM(xsmaddadp, 0x04, 0x04, PPC2_VSX),
+GEN_XX3FORM(xsmaddmdp, 0x04, 0x05, PPC2_VSX),
+GEN_XX3FORM(xsmsubadp, 0x04, 0x06, PPC2_VSX),
+GEN_XX3FORM(xsmsubmdp, 0x04, 0x07, PPC2_VSX),
+GEN_XX3FORM(xsnmaddadp, 0x04, 0x14, PPC2_VSX),
+GEN_XX3FORM(xsnmaddmdp, 0x04, 0x15, PPC2_VSX),
+GEN_XX3FORM(xsnmsubadp, 0x04, 0x16, PPC2_VSX),
+GEN_XX3FORM(xsnmsubmdp, 0x04, 0x17, PPC2_VSX),
GEN_XX3FORM(xvadddp, 0x00, 0x0C, PPC2_VSX),
GEN_XX3FORM(xvsubdp, 0x00, 0x0D, PPC2_VSX),
@@ -10026,6 +10058,14 @@ GEN_XX2FORM(xvsqrtdp, 0x16, 0x0C, PPC2_VSX),
GEN_XX2FORM(xvrsqrtedp, 0x14, 0x0C, PPC2_VSX),
GEN_XX3FORM(xvtdivdp, 0x14, 0x0F, PPC2_VSX),
GEN_XX2FORM(xvtsqrtdp, 0x14, 0x0E, PPC2_VSX),
+GEN_XX3FORM(xvmaddadp, 0x04, 0x0C, PPC2_VSX),
+GEN_XX3FORM(xvmaddmdp, 0x04, 0x0D, PPC2_VSX),
+GEN_XX3FORM(xvmsubadp, 0x04, 0x0E, PPC2_VSX),
+GEN_XX3FORM(xvmsubmdp, 0x04, 0x0F, PPC2_VSX),
+GEN_XX3FORM(xvnmaddadp, 0x04, 0x1C, PPC2_VSX),
+GEN_XX3FORM(xvnmaddmdp, 0x04, 0x1D, PPC2_VSX),
+GEN_XX3FORM(xvnmsubadp, 0x04, 0x1E, PPC2_VSX),
+GEN_XX3FORM(xvnmsubmdp, 0x04, 0x1F, PPC2_VSX),
GEN_XX3FORM(xvaddsp, 0x00, 0x08, PPC2_VSX),
GEN_XX3FORM(xvsubsp, 0x00, 0x09, PPC2_VSX),
@@ -10036,6 +10076,14 @@ GEN_XX2FORM(xvsqrtsp, 0x16, 0x08, PPC2_VSX),
GEN_XX2FORM(xvrsqrtesp, 0x14, 0x08, PPC2_VSX),
GEN_XX3FORM(xvtdivsp, 0x14, 0x0B, PPC2_VSX),
GEN_XX2FORM(xvtsqrtsp, 0x14, 0x0A, PPC2_VSX),
+GEN_XX3FORM(xvmaddasp, 0x04, 0x08, PPC2_VSX),
+GEN_XX3FORM(xvmaddmsp, 0x04, 0x09, PPC2_VSX),
+GEN_XX3FORM(xvmsubasp, 0x04, 0x0A, PPC2_VSX),
+GEN_XX3FORM(xvmsubmsp, 0x04, 0x0B, PPC2_VSX),
+GEN_XX3FORM(xvnmaddasp, 0x04, 0x18, PPC2_VSX),
+GEN_XX3FORM(xvnmaddmsp, 0x04, 0x19, PPC2_VSX),
+GEN_XX3FORM(xvnmsubasp, 0x04, 0x1A, PPC2_VSX),
+GEN_XX3FORM(xvnmsubmsp, 0x04, 0x1B, PPC2_VSX),
#undef VSX_LOGICAL
#define VSX_LOGICAL(name, opc2, opc3, fl2) \
--
1.7.1
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [Qemu-devel] [V4 PATCH 17/22] target-ppc: Add VSX xscmp*dp Instructions
2013-12-18 20:18 [Qemu-devel] [V4 PATCH 00/22] PowerPC VSX Stage 3 Tom Musta
` (15 preceding siblings ...)
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 16/22] target-ppc: Add VSX ISA2.06 Multiply Add Instructions Tom Musta
@ 2013-12-18 20:19 ` Tom Musta
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 18/22] target-ppc: Add VSX xmax/xmin Instructions Tom Musta
` (4 subsequent siblings)
21 siblings, 0 replies; 31+ messages in thread
From: Tom Musta @ 2013-12-18 20:19 UTC (permalink / raw)
To: qemu-devel; +Cc: Tom Musta, qemu-ppc
This patch adds the VSX scalar floating point compare ordered
and unordered instructions.
Signed-off-by: Tom Musta <tommusta@gmail.com>
Reviewed-by: Richard Henderson <address@hidden>
---
target-ppc/fpu_helper.c | 39 +++++++++++++++++++++++++++++++++++++++
target-ppc/helper.h | 2 ++
target-ppc/translate.c | 4 ++++
3 files changed, 45 insertions(+), 0 deletions(-)
diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index 54c47c8..eb5d878 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -2249,3 +2249,42 @@ VSX_MADD(xvnmaddasp, 4, float32, f32, NMADD_FLGS, 1, 0)
VSX_MADD(xvnmaddmsp, 4, float32, f32, NMADD_FLGS, 0, 0)
VSX_MADD(xvnmsubasp, 4, float32, f32, NMSUB_FLGS, 1, 0)
VSX_MADD(xvnmsubmsp, 4, float32, f32, NMSUB_FLGS, 0, 0)
+
+#define VSX_SCALAR_CMP(op, ordered) \
+void helper_##op(CPUPPCState *env, uint32_t opcode) \
+{ \
+ ppc_vsr_t xa, xb; \
+ uint32_t cc = 0; \
+ \
+ getVSR(xA(opcode), &xa, env); \
+ getVSR(xB(opcode), &xb, env); \
+ \
+ if (unlikely(float64_is_any_nan(xa.f64[0]) || \
+ float64_is_any_nan(xb.f64[0]))) { \
+ if (float64_is_signaling_nan(xa.f64[0]) || \
+ float64_is_signaling_nan(xb.f64[0])) { \
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0); \
+ } \
+ if (ordered) { \
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXVC, 0); \
+ } \
+ cc = 1; \
+ } else { \
+ if (float64_lt(xa.f64[0], xb.f64[0], &env->fp_status)) { \
+ cc = 8; \
+ } else if (!float64_le(xa.f64[0], xb.f64[0], &env->fp_status)) { \
+ cc = 4; \
+ } else { \
+ cc = 2; \
+ } \
+ } \
+ \
+ env->fpscr &= ~(0x0F << FPSCR_FPRF); \
+ env->fpscr |= cc << FPSCR_FPRF; \
+ env->crf[BF(opcode)] = cc; \
+ \
+ helper_float_check_status(env); \
+}
+
+VSX_SCALAR_CMP(xscmpodp, 1)
+VSX_SCALAR_CMP(xscmpudp, 0)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 7368908..cd72388 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -268,6 +268,8 @@ DEF_HELPER_2(xsnmaddadp, void, env, i32)
DEF_HELPER_2(xsnmaddmdp, void, env, i32)
DEF_HELPER_2(xsnmsubadp, void, env, i32)
DEF_HELPER_2(xsnmsubmdp, void, env, i32)
+DEF_HELPER_2(xscmpodp, void, env, i32)
+DEF_HELPER_2(xscmpudp, void, env, i32)
DEF_HELPER_2(xvadddp, void, env, i32)
DEF_HELPER_2(xvsubdp, void, env, i32)
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 3a62125..a2a4e2d 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -7312,6 +7312,8 @@ GEN_VSX_HELPER_2(xsnmaddadp, 0x04, 0x14, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xsnmaddmdp, 0x04, 0x15, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xsnmsubadp, 0x04, 0x16, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xsnmsubmdp, 0x04, 0x17, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xscmpodp, 0x0C, 0x05, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xscmpudp, 0x0C, 0x04, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvadddp, 0x00, 0x0C, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsubdp, 0x00, 0x0D, 0, PPC2_VSX)
@@ -10048,6 +10050,8 @@ GEN_XX3FORM(xsnmaddadp, 0x04, 0x14, PPC2_VSX),
GEN_XX3FORM(xsnmaddmdp, 0x04, 0x15, PPC2_VSX),
GEN_XX3FORM(xsnmsubadp, 0x04, 0x16, PPC2_VSX),
GEN_XX3FORM(xsnmsubmdp, 0x04, 0x17, PPC2_VSX),
+GEN_XX2FORM(xscmpodp, 0x0C, 0x05, PPC2_VSX),
+GEN_XX2FORM(xscmpudp, 0x0C, 0x04, PPC2_VSX),
GEN_XX3FORM(xvadddp, 0x00, 0x0C, PPC2_VSX),
GEN_XX3FORM(xvsubdp, 0x00, 0x0D, PPC2_VSX),
--
1.7.1
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [Qemu-devel] [V4 PATCH 18/22] target-ppc: Add VSX xmax/xmin Instructions
2013-12-18 20:18 [Qemu-devel] [V4 PATCH 00/22] PowerPC VSX Stage 3 Tom Musta
` (16 preceding siblings ...)
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 17/22] target-ppc: Add VSX xscmp*dp Instructions Tom Musta
@ 2013-12-18 20:19 ` Tom Musta
2013-12-24 16:23 ` Richard Henderson
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 19/22] target-ppc: Add VSX Vector Compare Instructions Tom Musta
` (3 subsequent siblings)
21 siblings, 1 reply; 31+ messages in thread
From: Tom Musta @ 2013-12-18 20:19 UTC (permalink / raw)
To: qemu-devel; +Cc: Tom Musta, qemu-ppc
This patch adds the VSX floating point maximum and minimum
instructions:
- xsmaxdp, xvmaxdp, xvmaxsp
- xsmindp, xvmindp, xvminsp
Because of the Power ISA definitions of maximum and minimum
on various boundary cases, the standard softfloat comparison
routines (e.g. float64_lt) do not work as well as one might
think. Therefore specific routines for comparing 64 and 32
bit floating point numbers are implemented in the PowerPC
helper code.
V2: consolidated into a single macro, using the softfloat
float*_max/float*_min routines.
Signed-off-by: Tom Musta <tommusta@gmail.com>
Reviewed-by: Richard Henderson <address@hidden>
---
target-ppc/fpu_helper.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++
target-ppc/helper.h | 6 +++++
target-ppc/translate.c | 12 +++++++++++
3 files changed, 68 insertions(+), 0 deletions(-)
diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index eb5d878..b90541c 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -2288,3 +2288,53 @@ void helper_##op(CPUPPCState *env, uint32_t opcode) \
VSX_SCALAR_CMP(xscmpodp, 1)
VSX_SCALAR_CMP(xscmpudp, 0)
+
+#define float64_snan_to_qnan(x) ((x) | 0x0008000000000000ul)
+#define float32_snan_to_qnan(x) ((x) | 0x00400000)
+
+/* VSX_MAX_MIN - VSX floating point maximum/minimum
+ * name - instruction mnemonic
+ * op - operation (max or min)
+ * nels - number of elements (1, 2 or 4)
+ * tp - type (float32 or float64)
+ * fld - vsr_t field (f32 or f64)
+ */
+#define VSX_MAX_MIN(name, op, nels, tp, fld) \
+void helper_##name(CPUPPCState *env, uint32_t opcode) \
+{ \
+ ppc_vsr_t xt, xa, xb; \
+ int i; \
+ \
+ getVSR(xA(opcode), &xa, env); \
+ getVSR(xB(opcode), &xb, env); \
+ getVSR(xT(opcode), &xt, env); \
+ \
+ for (i = 0; i < nels; i++) { \
+ if (unlikely(tp##_is_any_nan(xa.fld[i]) || \
+ tp##_is_any_nan(xb.fld[i]))) { \
+ if (tp##_is_signaling_nan(xa.fld[i])) { \
+ xt.fld[i] = tp##_snan_to_qnan(xa.fld[i]); \
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0); \
+ } else if (tp##_is_signaling_nan(xb.fld[i])) { \
+ xt.fld[i] = tp##_snan_to_qnan(xb.fld[i]); \
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0); \
+ } else if (tp##_is_quiet_nan(xb.fld[i])) { \
+ xt.fld[i] = xa.fld[i]; \
+ } else { /* XA is QNaN */ \
+ xt.fld[i] = xb.fld[i]; \
+ } \
+ } else { \
+ xt.fld[i] = tp##_##op(xa.fld[i], xb.fld[i], &env->fp_status); \
+ } \
+ } \
+ \
+ putVSR(xT(opcode), &xt, env); \
+ helper_float_check_status(env); \
+}
+
+VSX_MAX_MIN(xsmaxdp, max, 1, float64, f64)
+VSX_MAX_MIN(xvmaxdp, max, 2, float64, f64)
+VSX_MAX_MIN(xvmaxsp, max, 4, float32, f32)
+VSX_MAX_MIN(xsmindp, min, 1, float64, f64)
+VSX_MAX_MIN(xvmindp, min, 2, float64, f64)
+VSX_MAX_MIN(xvminsp, min, 4, float32, f32)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index cd72388..4a65d39 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -270,6 +270,8 @@ DEF_HELPER_2(xsnmsubadp, void, env, i32)
DEF_HELPER_2(xsnmsubmdp, void, env, i32)
DEF_HELPER_2(xscmpodp, void, env, i32)
DEF_HELPER_2(xscmpudp, void, env, i32)
+DEF_HELPER_2(xsmaxdp, void, env, i32)
+DEF_HELPER_2(xsmindp, void, env, i32)
DEF_HELPER_2(xvadddp, void, env, i32)
DEF_HELPER_2(xvsubdp, void, env, i32)
@@ -288,6 +290,8 @@ DEF_HELPER_2(xvnmaddadp, void, env, i32)
DEF_HELPER_2(xvnmaddmdp, void, env, i32)
DEF_HELPER_2(xvnmsubadp, void, env, i32)
DEF_HELPER_2(xvnmsubmdp, void, env, i32)
+DEF_HELPER_2(xvmaxdp, void, env, i32)
+DEF_HELPER_2(xvmindp, void, env, i32)
DEF_HELPER_2(xvaddsp, void, env, i32)
DEF_HELPER_2(xvsubsp, void, env, i32)
@@ -306,6 +310,8 @@ DEF_HELPER_2(xvnmaddasp, void, env, i32)
DEF_HELPER_2(xvnmaddmsp, void, env, i32)
DEF_HELPER_2(xvnmsubasp, void, env, i32)
DEF_HELPER_2(xvnmsubmsp, void, env, i32)
+DEF_HELPER_2(xvmaxsp, void, env, i32)
+DEF_HELPER_2(xvminsp, void, env, i32)
DEF_HELPER_2(efscfsi, i32, env, i32)
DEF_HELPER_2(efscfui, i32, env, i32)
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index a2a4e2d..10c238a 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -7314,6 +7314,8 @@ GEN_VSX_HELPER_2(xsnmsubadp, 0x04, 0x16, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xsnmsubmdp, 0x04, 0x17, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xscmpodp, 0x0C, 0x05, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xscmpudp, 0x0C, 0x04, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsmaxdp, 0x00, 0x14, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsmindp, 0x00, 0x15, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvadddp, 0x00, 0x0C, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsubdp, 0x00, 0x0D, 0, PPC2_VSX)
@@ -7332,6 +7334,8 @@ GEN_VSX_HELPER_2(xvnmaddadp, 0x04, 0x1C, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvnmaddmdp, 0x04, 0x1D, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvnmsubadp, 0x04, 0x1E, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvnmsubmdp, 0x04, 0x1F, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmaxdp, 0x00, 0x1C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmindp, 0x00, 0x1D, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvaddsp, 0x00, 0x08, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsubsp, 0x00, 0x09, 0, PPC2_VSX)
@@ -7350,6 +7354,8 @@ GEN_VSX_HELPER_2(xvnmaddasp, 0x04, 0x18, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvnmaddmsp, 0x04, 0x19, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvnmsubasp, 0x04, 0x1A, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvnmsubmsp, 0x04, 0x1B, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmaxsp, 0x00, 0x18, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvminsp, 0x00, 0x19, 0, PPC2_VSX)
#define VSX_LOGICAL(name, tcg_op) \
static void glue(gen_, name)(DisasContext * ctx) \
@@ -10052,6 +10058,8 @@ GEN_XX3FORM(xsnmsubadp, 0x04, 0x16, PPC2_VSX),
GEN_XX3FORM(xsnmsubmdp, 0x04, 0x17, PPC2_VSX),
GEN_XX2FORM(xscmpodp, 0x0C, 0x05, PPC2_VSX),
GEN_XX2FORM(xscmpudp, 0x0C, 0x04, PPC2_VSX),
+GEN_XX3FORM(xsmaxdp, 0x00, 0x14, PPC2_VSX),
+GEN_XX3FORM(xsmindp, 0x00, 0x15, PPC2_VSX),
GEN_XX3FORM(xvadddp, 0x00, 0x0C, PPC2_VSX),
GEN_XX3FORM(xvsubdp, 0x00, 0x0D, PPC2_VSX),
@@ -10070,6 +10078,8 @@ GEN_XX3FORM(xvnmaddadp, 0x04, 0x1C, PPC2_VSX),
GEN_XX3FORM(xvnmaddmdp, 0x04, 0x1D, PPC2_VSX),
GEN_XX3FORM(xvnmsubadp, 0x04, 0x1E, PPC2_VSX),
GEN_XX3FORM(xvnmsubmdp, 0x04, 0x1F, PPC2_VSX),
+GEN_XX3FORM(xvmaxdp, 0x00, 0x1C, PPC2_VSX),
+GEN_XX3FORM(xvmindp, 0x00, 0x1D, PPC2_VSX),
GEN_XX3FORM(xvaddsp, 0x00, 0x08, PPC2_VSX),
GEN_XX3FORM(xvsubsp, 0x00, 0x09, PPC2_VSX),
@@ -10088,6 +10098,8 @@ GEN_XX3FORM(xvnmaddasp, 0x04, 0x18, PPC2_VSX),
GEN_XX3FORM(xvnmaddmsp, 0x04, 0x19, PPC2_VSX),
GEN_XX3FORM(xvnmsubasp, 0x04, 0x1A, PPC2_VSX),
GEN_XX3FORM(xvnmsubmsp, 0x04, 0x1B, PPC2_VSX),
+GEN_XX3FORM(xvmaxsp, 0x00, 0x18, PPC2_VSX),
+GEN_XX3FORM(xvminsp, 0x00, 0x19, PPC2_VSX),
#undef VSX_LOGICAL
#define VSX_LOGICAL(name, opc2, opc3, fl2) \
--
1.7.1
^ permalink raw reply related [flat|nested] 31+ messages in thread
* Re: [Qemu-devel] [V4 PATCH 18/22] target-ppc: Add VSX xmax/xmin Instructions
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 18/22] target-ppc: Add VSX xmax/xmin Instructions Tom Musta
@ 2013-12-24 16:23 ` Richard Henderson
0 siblings, 0 replies; 31+ messages in thread
From: Richard Henderson @ 2013-12-24 16:23 UTC (permalink / raw)
To: Tom Musta, qemu-devel; +Cc: qemu-ppc
On 12/18/2013 12:19 PM, Tom Musta wrote:
> + if (unlikely(tp##_is_any_nan(xa.fld[i]) || \
> + tp##_is_any_nan(xb.fld[i]))) { \
> + if (tp##_is_signaling_nan(xa.fld[i])) { \
> + xt.fld[i] = tp##_snan_to_qnan(xa.fld[i]); \
> + fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0); \
> + } else if (tp##_is_signaling_nan(xb.fld[i])) { \
> + xt.fld[i] = tp##_snan_to_qnan(xb.fld[i]); \
> + fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0); \
> + } else if (tp##_is_quiet_nan(xb.fld[i])) { \
> + xt.fld[i] = xa.fld[i]; \
> + } else { /* XA is QNaN */ \
> + xt.fld[i] = xb.fld[i]; \
> + } \
> + } else { \
> + xt.fld[i] = tp##_##op(xa.fld[i], xb.fld[i], &env->fp_status); \
> + } \
We have minnum/maxnum now.
r~
^ permalink raw reply [flat|nested] 31+ messages in thread
* [Qemu-devel] [V4 PATCH 19/22] target-ppc: Add VSX Vector Compare Instructions
2013-12-18 20:18 [Qemu-devel] [V4 PATCH 00/22] PowerPC VSX Stage 3 Tom Musta
` (17 preceding siblings ...)
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 18/22] target-ppc: Add VSX xmax/xmin Instructions Tom Musta
@ 2013-12-18 20:19 ` Tom Musta
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 20/22] target-ppc: Add VSX Floating Point to Floating Point Conversion Instructions Tom Musta
` (2 subsequent siblings)
21 siblings, 0 replies; 31+ messages in thread
From: Tom Musta @ 2013-12-18 20:19 UTC (permalink / raw)
To: qemu-devel; +Cc: Tom Musta, qemu-ppc
This patch adds the VSX floating point compare vector instructions:
- xvcmpeqdp[.], xvcmpgedp[.], xvcmpgtdp[.]
- xvcmpeqsp[.], xvcmpgesp[.], xvcmpgtsp[.]
Signed-off-by: Tom Musta <tommusta@gmail.com>
Reviewed-by: Richard Henderson <address@hidden>
---
target-ppc/fpu_helper.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++
target-ppc/helper.h | 6 +++++
target-ppc/translate.c | 23 +++++++++++++++++++
3 files changed, 86 insertions(+), 0 deletions(-)
diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index b90541c..ec1df4f 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -2338,3 +2338,60 @@ VSX_MAX_MIN(xvmaxsp, max, 4, float32, f32)
VSX_MAX_MIN(xsmindp, min, 1, float64, f64)
VSX_MAX_MIN(xvmindp, min, 2, float64, f64)
VSX_MAX_MIN(xvminsp, min, 4, float32, f32)
+
+/* VSX_CMP - VSX floating point compare
+ * op - instruction mnemonic
+ * nels - number of elements (1, 2 or 4)
+ * tp - type (float32 or float64)
+ * fld - vsr_t field (f32 or f64)
+ * cmp - comparison operation
+ * svxvc - set VXVC bit
+ */
+#define VSX_CMP(op, nels, tp, fld, cmp, svxvc) \
+void helper_##op(CPUPPCState *env, uint32_t opcode) \
+{ \
+ ppc_vsr_t xt, xa, xb; \
+ int i; \
+ int all_true = 1; \
+ int all_false = 1; \
+ \
+ getVSR(xA(opcode), &xa, env); \
+ getVSR(xB(opcode), &xb, env); \
+ getVSR(xT(opcode), &xt, env); \
+ \
+ for (i = 0; i < nels; i++) { \
+ if (unlikely(tp##_is_any_nan(xa.fld[i]) || \
+ tp##_is_any_nan(xb.fld[i]))) { \
+ if (tp##_is_signaling_nan(xa.fld[i]) || \
+ tp##_is_signaling_nan(xb.fld[i])) { \
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0); \
+ } \
+ if (svxvc) { \
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXVC, 0); \
+ } \
+ xt.fld[i] = 0; \
+ all_true = 0; \
+ } else { \
+ if (tp##_##cmp(xb.fld[i], xa.fld[i], &env->fp_status) == 1) { \
+ xt.fld[i] = -1; \
+ all_false = 0; \
+ } else { \
+ xt.fld[i] = 0; \
+ all_true = 0; \
+ } \
+ } \
+ } \
+ \
+ putVSR(xT(opcode), &xt, env); \
+ if ((opcode >> (31-21)) & 1) { \
+ env->crf[6] = (all_true ? 0x8 : 0) | (all_false ? 0x2 : 0); \
+ } \
+ helper_float_check_status(env); \
+ }
+
+VSX_CMP(xvcmpeqdp, 2, float64, f64, eq, 0)
+VSX_CMP(xvcmpgedp, 2, float64, f64, le, 1)
+VSX_CMP(xvcmpgtdp, 2, float64, f64, lt, 1)
+VSX_CMP(xvcmpeqsp, 4, float32, f32, eq, 0)
+VSX_CMP(xvcmpgesp, 4, float32, f32, le, 1)
+VSX_CMP(xvcmpgtsp, 4, float32, f32, lt, 1)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 4a65d39..35389c5 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -292,6 +292,9 @@ DEF_HELPER_2(xvnmsubadp, void, env, i32)
DEF_HELPER_2(xvnmsubmdp, void, env, i32)
DEF_HELPER_2(xvmaxdp, void, env, i32)
DEF_HELPER_2(xvmindp, void, env, i32)
+DEF_HELPER_2(xvcmpeqdp, void, env, i32)
+DEF_HELPER_2(xvcmpgedp, void, env, i32)
+DEF_HELPER_2(xvcmpgtdp, void, env, i32)
DEF_HELPER_2(xvaddsp, void, env, i32)
DEF_HELPER_2(xvsubsp, void, env, i32)
@@ -312,6 +315,9 @@ DEF_HELPER_2(xvnmsubasp, void, env, i32)
DEF_HELPER_2(xvnmsubmsp, void, env, i32)
DEF_HELPER_2(xvmaxsp, void, env, i32)
DEF_HELPER_2(xvminsp, void, env, i32)
+DEF_HELPER_2(xvcmpeqsp, void, env, i32)
+DEF_HELPER_2(xvcmpgesp, void, env, i32)
+DEF_HELPER_2(xvcmpgtsp, void, env, i32)
DEF_HELPER_2(efscfsi, i32, env, i32)
DEF_HELPER_2(efscfui, i32, env, i32)
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 10c238a..377a482 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -7336,6 +7336,9 @@ GEN_VSX_HELPER_2(xvnmsubadp, 0x04, 0x1E, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvnmsubmdp, 0x04, 0x1F, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvmaxdp, 0x00, 0x1C, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvmindp, 0x00, 0x1D, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcmpeqdp, 0x0C, 0x0C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcmpgtdp, 0x0C, 0x0D, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcmpgedp, 0x0C, 0x0E, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvaddsp, 0x00, 0x08, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsubsp, 0x00, 0x09, 0, PPC2_VSX)
@@ -7356,6 +7359,9 @@ GEN_VSX_HELPER_2(xvnmsubasp, 0x04, 0x1A, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvnmsubmsp, 0x04, 0x1B, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvmaxsp, 0x00, 0x18, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvminsp, 0x00, 0x19, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcmpeqsp, 0x0C, 0x08, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcmpgtsp, 0x0C, 0x09, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcmpgesp, 0x0C, 0x0A, 0, PPC2_VSX)
#define VSX_LOGICAL(name, tcg_op) \
static void glue(gen_, name)(DisasContext * ctx) \
@@ -10006,6 +10012,17 @@ GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 0, PPC_NONE, fl2), \
GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 2, opc3, 0, PPC_NONE, fl2), \
GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 3, opc3, 0, PPC_NONE, fl2)
+#undef GEN_XX3_RC_FORM
+#define GEN_XX3_RC_FORM(name, opc2, opc3, fl2) \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x00, opc3 | 0x00, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x01, opc3 | 0x00, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x02, opc3 | 0x00, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x03, opc3 | 0x00, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x00, opc3 | 0x10, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x01, opc3 | 0x10, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x02, opc3 | 0x10, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x03, opc3 | 0x10, 0, PPC_NONE, fl2)
+
#undef GEN_XX3FORM_DM
#define GEN_XX3FORM_DM(name, opc2, opc3) \
GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x00, opc3|0x00, 0, PPC_NONE, PPC2_VSX),\
@@ -10080,6 +10097,9 @@ GEN_XX3FORM(xvnmsubadp, 0x04, 0x1E, PPC2_VSX),
GEN_XX3FORM(xvnmsubmdp, 0x04, 0x1F, PPC2_VSX),
GEN_XX3FORM(xvmaxdp, 0x00, 0x1C, PPC2_VSX),
GEN_XX3FORM(xvmindp, 0x00, 0x1D, PPC2_VSX),
+GEN_XX3_RC_FORM(xvcmpeqdp, 0x0C, 0x0C, PPC2_VSX),
+GEN_XX3_RC_FORM(xvcmpgtdp, 0x0C, 0x0D, PPC2_VSX),
+GEN_XX3_RC_FORM(xvcmpgedp, 0x0C, 0x0E, PPC2_VSX),
GEN_XX3FORM(xvaddsp, 0x00, 0x08, PPC2_VSX),
GEN_XX3FORM(xvsubsp, 0x00, 0x09, PPC2_VSX),
@@ -10100,6 +10120,9 @@ GEN_XX3FORM(xvnmsubasp, 0x04, 0x1A, PPC2_VSX),
GEN_XX3FORM(xvnmsubmsp, 0x04, 0x1B, PPC2_VSX),
GEN_XX3FORM(xvmaxsp, 0x00, 0x18, PPC2_VSX),
GEN_XX3FORM(xvminsp, 0x00, 0x19, PPC2_VSX),
+GEN_XX3_RC_FORM(xvcmpeqsp, 0x0C, 0x08, PPC2_VSX),
+GEN_XX3_RC_FORM(xvcmpgtsp, 0x0C, 0x09, PPC2_VSX),
+GEN_XX3_RC_FORM(xvcmpgesp, 0x0C, 0x0A, PPC2_VSX),
#undef VSX_LOGICAL
#define VSX_LOGICAL(name, opc2, opc3, fl2) \
--
1.7.1
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [Qemu-devel] [V4 PATCH 20/22] target-ppc: Add VSX Floating Point to Floating Point Conversion Instructions
2013-12-18 20:18 [Qemu-devel] [V4 PATCH 00/22] PowerPC VSX Stage 3 Tom Musta
` (18 preceding siblings ...)
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 19/22] target-ppc: Add VSX Vector Compare Instructions Tom Musta
@ 2013-12-18 20:19 ` Tom Musta
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 21/22] target-ppc: Add VSX ISA2.06 Integer " Tom Musta
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 22/22] target-ppc: Add VSX Rounding Instructions Tom Musta
21 siblings, 0 replies; 31+ messages in thread
From: Tom Musta @ 2013-12-18 20:19 UTC (permalink / raw)
To: qemu-devel; +Cc: Tom Musta, qemu-ppc
This patch adds the VSX instructions that convert between floating
point formats: xscvdpsp, xscvspdp, xvcvdpsp, xvcvspdp.
Signed-off-by: Tom Musta <tommusta@gmail.com>
Reviewed-by: Richard Henderson <address@hidden>
---
target-ppc/fpu_helper.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
target-ppc/helper.h | 4 ++++
target-ppc/translate.c | 8 ++++++++
3 files changed, 58 insertions(+), 0 deletions(-)
diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index ec1df4f..c9fb59f 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -2395,3 +2395,49 @@ VSX_CMP(xvcmpgtdp, 2, float64, f64, lt, 1)
VSX_CMP(xvcmpeqsp, 4, float32, f32, eq, 0)
VSX_CMP(xvcmpgesp, 4, float32, f32, le, 1)
VSX_CMP(xvcmpgtsp, 4, float32, f32, lt, 1)
+
+#if defined(HOST_WORDS_BIGENDIAN)
+#define JOFFSET 0
+#else
+#define JOFFSET 1
+#endif
+
+/* VSX_CVT_FP_TO_FP - VSX floating point/floating point conversion
+ * op - instruction mnemonic
+ * nels - number of elements (1, 2 or 4)
+ * stp - source type (float32 or float64)
+ * ttp - target type (float32 or float64)
+ * sfld - source vsr_t field
+ * tfld - target vsr_t field (f32 or f64)
+ * sfprf - set FPRF
+ */
+#define VSX_CVT_FP_TO_FP(op, nels, stp, ttp, sfld, tfld, sfprf) \
+void helper_##op(CPUPPCState *env, uint32_t opcode) \
+{ \
+ ppc_vsr_t xt, xb; \
+ int i; \
+ \
+ getVSR(xB(opcode), &xb, env); \
+ getVSR(xT(opcode), &xt, env); \
+ \
+ for (i = 0; i < nels; i++) { \
+ int j = 2*i + JOFFSET; \
+ xt.tfld = stp##_to_##ttp(xb.sfld, &env->fp_status); \
+ if (unlikely(stp##_is_signaling_nan(xb.sfld))) { \
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0); \
+ xt.tfld = ttp##_snan_to_qnan(xt.tfld); \
+ } \
+ if (sfprf) { \
+ helper_compute_fprf(env, ttp##_to_float64(xt.tfld, \
+ &env->fp_status), sfprf); \
+ } \
+ } \
+ \
+ putVSR(xT(opcode), &xt, env); \
+ helper_float_check_status(env); \
+}
+
+VSX_CVT_FP_TO_FP(xscvdpsp, 1, float64, float32, f64[i], f32[j], 1)
+VSX_CVT_FP_TO_FP(xscvspdp, 1, float32, float64, f32[j], f64[i], 1)
+VSX_CVT_FP_TO_FP(xvcvdpsp, 2, float64, float32, f64[i], f32[j], 0)
+VSX_CVT_FP_TO_FP(xvcvspdp, 2, float32, float64, f32[j], f64[i], 0)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 35389c5..dd9518c 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -272,6 +272,8 @@ DEF_HELPER_2(xscmpodp, void, env, i32)
DEF_HELPER_2(xscmpudp, void, env, i32)
DEF_HELPER_2(xsmaxdp, void, env, i32)
DEF_HELPER_2(xsmindp, void, env, i32)
+DEF_HELPER_2(xscvdpsp, void, env, i32)
+DEF_HELPER_2(xscvspdp, void, env, i32)
DEF_HELPER_2(xvadddp, void, env, i32)
DEF_HELPER_2(xvsubdp, void, env, i32)
@@ -295,6 +297,7 @@ DEF_HELPER_2(xvmindp, void, env, i32)
DEF_HELPER_2(xvcmpeqdp, void, env, i32)
DEF_HELPER_2(xvcmpgedp, void, env, i32)
DEF_HELPER_2(xvcmpgtdp, void, env, i32)
+DEF_HELPER_2(xvcvdpsp, void, env, i32)
DEF_HELPER_2(xvaddsp, void, env, i32)
DEF_HELPER_2(xvsubsp, void, env, i32)
@@ -318,6 +321,7 @@ DEF_HELPER_2(xvminsp, void, env, i32)
DEF_HELPER_2(xvcmpeqsp, void, env, i32)
DEF_HELPER_2(xvcmpgesp, void, env, i32)
DEF_HELPER_2(xvcmpgtsp, void, env, i32)
+DEF_HELPER_2(xvcvspdp, void, env, i32)
DEF_HELPER_2(efscfsi, i32, env, i32)
DEF_HELPER_2(efscfui, i32, env, i32)
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 377a482..1366ced 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -7316,6 +7316,8 @@ GEN_VSX_HELPER_2(xscmpodp, 0x0C, 0x05, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xscmpudp, 0x0C, 0x04, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xsmaxdp, 0x00, 0x14, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xsmindp, 0x00, 0x15, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xscvdpsp, 0x12, 0x10, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xscvspdp, 0x12, 0x14, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvadddp, 0x00, 0x0C, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsubdp, 0x00, 0x0D, 0, PPC2_VSX)
@@ -7339,6 +7341,7 @@ GEN_VSX_HELPER_2(xvmindp, 0x00, 0x1D, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvcmpeqdp, 0x0C, 0x0C, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvcmpgtdp, 0x0C, 0x0D, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvcmpgedp, 0x0C, 0x0E, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvdpsp, 0x12, 0x18, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvaddsp, 0x00, 0x08, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsubsp, 0x00, 0x09, 0, PPC2_VSX)
@@ -7362,6 +7365,7 @@ GEN_VSX_HELPER_2(xvminsp, 0x00, 0x19, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvcmpeqsp, 0x0C, 0x08, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvcmpgtsp, 0x0C, 0x09, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvcmpgesp, 0x0C, 0x0A, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvspdp, 0x12, 0x1C, 0, PPC2_VSX)
#define VSX_LOGICAL(name, tcg_op) \
static void glue(gen_, name)(DisasContext * ctx) \
@@ -10077,6 +10081,8 @@ GEN_XX2FORM(xscmpodp, 0x0C, 0x05, PPC2_VSX),
GEN_XX2FORM(xscmpudp, 0x0C, 0x04, PPC2_VSX),
GEN_XX3FORM(xsmaxdp, 0x00, 0x14, PPC2_VSX),
GEN_XX3FORM(xsmindp, 0x00, 0x15, PPC2_VSX),
+GEN_XX2FORM(xscvdpsp, 0x12, 0x10, PPC2_VSX),
+GEN_XX2FORM(xscvspdp, 0x12, 0x14, PPC2_VSX),
GEN_XX3FORM(xvadddp, 0x00, 0x0C, PPC2_VSX),
GEN_XX3FORM(xvsubdp, 0x00, 0x0D, PPC2_VSX),
@@ -10100,6 +10106,7 @@ GEN_XX3FORM(xvmindp, 0x00, 0x1D, PPC2_VSX),
GEN_XX3_RC_FORM(xvcmpeqdp, 0x0C, 0x0C, PPC2_VSX),
GEN_XX3_RC_FORM(xvcmpgtdp, 0x0C, 0x0D, PPC2_VSX),
GEN_XX3_RC_FORM(xvcmpgedp, 0x0C, 0x0E, PPC2_VSX),
+GEN_XX2FORM(xvcvdpsp, 0x12, 0x18, PPC2_VSX),
GEN_XX3FORM(xvaddsp, 0x00, 0x08, PPC2_VSX),
GEN_XX3FORM(xvsubsp, 0x00, 0x09, PPC2_VSX),
@@ -10123,6 +10130,7 @@ GEN_XX3FORM(xvminsp, 0x00, 0x19, PPC2_VSX),
GEN_XX3_RC_FORM(xvcmpeqsp, 0x0C, 0x08, PPC2_VSX),
GEN_XX3_RC_FORM(xvcmpgtsp, 0x0C, 0x09, PPC2_VSX),
GEN_XX3_RC_FORM(xvcmpgesp, 0x0C, 0x0A, PPC2_VSX),
+GEN_XX2FORM(xvcvspdp, 0x12, 0x1C, PPC2_VSX),
#undef VSX_LOGICAL
#define VSX_LOGICAL(name, opc2, opc3, fl2) \
--
1.7.1
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [Qemu-devel] [V4 PATCH 21/22] target-ppc: Add VSX ISA2.06 Integer Conversion Instructions
2013-12-18 20:18 [Qemu-devel] [V4 PATCH 00/22] PowerPC VSX Stage 3 Tom Musta
` (19 preceding siblings ...)
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 20/22] target-ppc: Add VSX Floating Point to Floating Point Conversion Instructions Tom Musta
@ 2013-12-18 20:19 ` Tom Musta
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 22/22] target-ppc: Add VSX Rounding Instructions Tom Musta
21 siblings, 0 replies; 31+ messages in thread
From: Tom Musta @ 2013-12-18 20:19 UTC (permalink / raw)
To: qemu-devel; +Cc: Tom Musta, qemu-ppc
This patch adds the VSX Integer Conversion instructions defined by
V2.06 of the PowerPC ISA:
- xscvdpsxds, xscvdpsxws, xscvdpuxds, xscvdpuxws
- xvcvdpsxds, xvcvdpsxws, xvcvdpuxds, xvcvdpuxws
- xvcvspsxds, xvcvspsxws, xvcvspuxds, xvcvspuxws
- xscvsxddp, xscvuxddp
- xvcvsxddp, xscvsxwdp, xvcvuxddp, xvcvuxwdp
- xvcvsxdsp, xscvsxwsp, xvcvuxdsp, xvcvuxwsp
Signed-off-by: Tom Musta <tommusta@gmail.com>
Reviewed-by: Richard Henderson <address@hidden>
---
target-ppc/fpu_helper.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++
target-ppc/helper.h | 22 ++++++++++
target-ppc/translate.c | 44 +++++++++++++++++++
3 files changed, 173 insertions(+), 0 deletions(-)
diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index c9fb59f..f913ad7 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -2441,3 +2441,110 @@ VSX_CVT_FP_TO_FP(xscvdpsp, 1, float64, float32, f64[i], f32[j], 1)
VSX_CVT_FP_TO_FP(xscvspdp, 1, float32, float64, f32[j], f64[i], 1)
VSX_CVT_FP_TO_FP(xvcvdpsp, 2, float64, float32, f64[i], f32[j], 0)
VSX_CVT_FP_TO_FP(xvcvspdp, 2, float32, float64, f32[j], f64[i], 0)
+
+/* VSX_CVT_FP_TO_INT - VSX floating point to integer conversion
+ * op - instruction mnemonic
+ * nels - number of elements (1, 2 or 4)
+ * stp - source type (float32 or float64)
+ * ttp - target type (int32, uint32, int64 or uint64)
+ * sfld - source vsr_t field
+ * tfld - target vsr_t field
+ * jdef - definition of the j index (i or 2*i)
+ * rnan - resulting NaN
+ */
+#define VSX_CVT_FP_TO_INT(op, nels, stp, ttp, sfld, tfld, jdef, rnan) \
+void helper_##op(CPUPPCState *env, uint32_t opcode) \
+{ \
+ ppc_vsr_t xt, xb; \
+ int i; \
+ \
+ getVSR(xB(opcode), &xb, env); \
+ getVSR(xT(opcode), &xt, env); \
+ \
+ for (i = 0; i < nels; i++) { \
+ int j = jdef; \
+ if (unlikely(stp##_is_any_nan(xb.sfld))) { \
+ if (stp##_is_signaling_nan(xb.sfld)) { \
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0); \
+ } \
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXCVI, 0); \
+ xt.tfld = rnan; \
+ } else { \
+ xt.tfld = stp##_to_##ttp(xb.sfld, &env->fp_status); \
+ if (env->fp_status.float_exception_flags & float_flag_invalid) { \
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXCVI, 0); \
+ } \
+ } \
+ } \
+ \
+ putVSR(xT(opcode), &xt, env); \
+ helper_float_check_status(env); \
+}
+
+VSX_CVT_FP_TO_INT(xscvdpsxds, 1, float64, int64, f64[j], u64[i], i, \
+ 0x8000000000000000ul)
+VSX_CVT_FP_TO_INT(xscvdpsxws, 1, float64, int32, f64[i], u32[j], \
+ 2*i + JOFFSET, 0x80000000l)
+VSX_CVT_FP_TO_INT(xscvdpuxds, 1, float64, uint64, f64[j], u64[i], i, 0ul)
+VSX_CVT_FP_TO_INT(xscvdpuxws, 1, float64, uint32, f64[i], u32[j], \
+ 2*i + JOFFSET, 0)
+VSX_CVT_FP_TO_INT(xvcvdpsxds, 2, float64, int64, f64[j], u64[i], i, \
+ 0x8000000000000000ul)
+VSX_CVT_FP_TO_INT(xvcvdpsxws, 2, float64, int32, f64[i], u32[j], \
+ 2*i + JOFFSET, 0x80000000l)
+VSX_CVT_FP_TO_INT(xvcvdpuxds, 2, float64, uint64, f64[j], u64[i], i, 0ul)
+VSX_CVT_FP_TO_INT(xvcvdpuxws, 2, float64, uint32, f64[i], u32[j], \
+ 2*i + JOFFSET, 0)
+VSX_CVT_FP_TO_INT(xvcvspsxds, 2, float32, int64, f32[j], u64[i], \
+ 2*i + JOFFSET, 0x8000000000000000ul)
+VSX_CVT_FP_TO_INT(xvcvspsxws, 4, float32, int32, f32[j], u32[j], i, \
+ 0x80000000l)
+VSX_CVT_FP_TO_INT(xvcvspuxds, 2, float32, uint64, f32[j], u64[i], \
+ 2*i + JOFFSET, 0ul)
+VSX_CVT_FP_TO_INT(xvcvspuxws, 4, float32, uint32, f32[j], u32[i], i, 0)
+
+/* VSX_CVT_INT_TO_FP - VSX integer to floating point conversion
+ * op - instruction mnemonic
+ * nels - number of elements (1, 2 or 4)
+ * stp - source type (int32, uint32, int64 or uint64)
+ * ttp - target type (float32 or float64)
+ * sfld - source vsr_t field
+ * tfld - target vsr_t field
+ * jdef - definition of the j index (i or 2*i)
+ * sfprf - set FPRF
+ */
+#define VSX_CVT_INT_TO_FP(op, nels, stp, ttp, sfld, tfld, jdef, sfprf) \
+void helper_##op(CPUPPCState *env, uint32_t opcode) \
+{ \
+ ppc_vsr_t xt, xb; \
+ int i; \
+ \
+ getVSR(xB(opcode), &xb, env); \
+ getVSR(xT(opcode), &xt, env); \
+ \
+ for (i = 0; i < nels; i++) { \
+ int j = jdef; \
+ xt.tfld = stp##_to_##ttp(xb.sfld, &env->fp_status); \
+ if (sfprf) { \
+ helper_compute_fprf(env, xt.tfld, sfprf); \
+ } \
+ } \
+ \
+ putVSR(xT(opcode), &xt, env); \
+ helper_float_check_status(env); \
+}
+
+VSX_CVT_INT_TO_FP(xscvsxddp, 1, int64, float64, u64[j], f64[i], i, 1)
+VSX_CVT_INT_TO_FP(xscvuxddp, 1, uint64, float64, u64[j], f64[i], i, 1)
+VSX_CVT_INT_TO_FP(xvcvsxddp, 2, int64, float64, u64[j], f64[i], i, 0)
+VSX_CVT_INT_TO_FP(xvcvuxddp, 2, uint64, float64, u64[j], f64[i], i, 0)
+VSX_CVT_INT_TO_FP(xvcvsxwdp, 2, int32, float64, u32[j], f64[i], \
+ 2*i + JOFFSET, 0)
+VSX_CVT_INT_TO_FP(xvcvuxwdp, 2, uint64, float64, u32[j], f64[i], \
+ 2*i + JOFFSET, 0)
+VSX_CVT_INT_TO_FP(xvcvsxdsp, 2, int64, float32, u64[i], f32[j], \
+ 2*i + JOFFSET, 0)
+VSX_CVT_INT_TO_FP(xvcvuxdsp, 2, uint64, float32, u64[i], f32[j], \
+ 2*i + JOFFSET, 0)
+VSX_CVT_INT_TO_FP(xvcvsxwsp, 4, int32, float32, u32[j], f32[i], i, 0)
+VSX_CVT_INT_TO_FP(xvcvuxwsp, 4, uint32, float32, u32[j], f32[i], i, 0)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index dd9518c..de46b6f 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -274,6 +274,12 @@ DEF_HELPER_2(xsmaxdp, void, env, i32)
DEF_HELPER_2(xsmindp, void, env, i32)
DEF_HELPER_2(xscvdpsp, void, env, i32)
DEF_HELPER_2(xscvspdp, void, env, i32)
+DEF_HELPER_2(xscvdpsxds, void, env, i32)
+DEF_HELPER_2(xscvdpsxws, void, env, i32)
+DEF_HELPER_2(xscvdpuxds, void, env, i32)
+DEF_HELPER_2(xscvdpuxws, void, env, i32)
+DEF_HELPER_2(xscvsxddp, void, env, i32)
+DEF_HELPER_2(xscvuxddp, void, env, i32)
DEF_HELPER_2(xvadddp, void, env, i32)
DEF_HELPER_2(xvsubdp, void, env, i32)
@@ -298,6 +304,14 @@ DEF_HELPER_2(xvcmpeqdp, void, env, i32)
DEF_HELPER_2(xvcmpgedp, void, env, i32)
DEF_HELPER_2(xvcmpgtdp, void, env, i32)
DEF_HELPER_2(xvcvdpsp, void, env, i32)
+DEF_HELPER_2(xvcvdpsxds, void, env, i32)
+DEF_HELPER_2(xvcvdpsxws, void, env, i32)
+DEF_HELPER_2(xvcvdpuxds, void, env, i32)
+DEF_HELPER_2(xvcvdpuxws, void, env, i32)
+DEF_HELPER_2(xvcvsxddp, void, env, i32)
+DEF_HELPER_2(xvcvuxddp, void, env, i32)
+DEF_HELPER_2(xvcvsxwdp, void, env, i32)
+DEF_HELPER_2(xvcvuxwdp, void, env, i32)
DEF_HELPER_2(xvaddsp, void, env, i32)
DEF_HELPER_2(xvsubsp, void, env, i32)
@@ -322,6 +336,14 @@ DEF_HELPER_2(xvcmpeqsp, void, env, i32)
DEF_HELPER_2(xvcmpgesp, void, env, i32)
DEF_HELPER_2(xvcmpgtsp, void, env, i32)
DEF_HELPER_2(xvcvspdp, void, env, i32)
+DEF_HELPER_2(xvcvspsxds, void, env, i32)
+DEF_HELPER_2(xvcvspsxws, void, env, i32)
+DEF_HELPER_2(xvcvspuxds, void, env, i32)
+DEF_HELPER_2(xvcvspuxws, void, env, i32)
+DEF_HELPER_2(xvcvsxdsp, void, env, i32)
+DEF_HELPER_2(xvcvuxdsp, void, env, i32)
+DEF_HELPER_2(xvcvsxwsp, void, env, i32)
+DEF_HELPER_2(xvcvuxwsp, void, env, i32)
DEF_HELPER_2(efscfsi, i32, env, i32)
DEF_HELPER_2(efscfui, i32, env, i32)
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 1366ced..bb36e8f 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -7318,6 +7318,12 @@ GEN_VSX_HELPER_2(xsmaxdp, 0x00, 0x14, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xsmindp, 0x00, 0x15, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xscvdpsp, 0x12, 0x10, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xscvspdp, 0x12, 0x14, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xscvdpsxds, 0x10, 0x15, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xscvdpsxws, 0x10, 0x05, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xscvdpuxds, 0x10, 0x14, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xscvdpuxws, 0x10, 0x04, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xscvsxddp, 0x10, 0x17, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xscvuxddp, 0x10, 0x16, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvadddp, 0x00, 0x0C, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsubdp, 0x00, 0x0D, 0, PPC2_VSX)
@@ -7342,6 +7348,14 @@ GEN_VSX_HELPER_2(xvcmpeqdp, 0x0C, 0x0C, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvcmpgtdp, 0x0C, 0x0D, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvcmpgedp, 0x0C, 0x0E, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvcvdpsp, 0x12, 0x18, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvdpsxds, 0x10, 0x1D, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvdpsxws, 0x10, 0x0D, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvdpuxds, 0x10, 0x1C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvdpuxws, 0x10, 0x0C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvsxddp, 0x10, 0x1F, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvuxddp, 0x10, 0x1E, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvsxwdp, 0x10, 0x0F, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvuxwdp, 0x10, 0x0E, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvaddsp, 0x00, 0x08, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsubsp, 0x00, 0x09, 0, PPC2_VSX)
@@ -7366,6 +7380,14 @@ GEN_VSX_HELPER_2(xvcmpeqsp, 0x0C, 0x08, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvcmpgtsp, 0x0C, 0x09, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvcmpgesp, 0x0C, 0x0A, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvcvspdp, 0x12, 0x1C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvspsxds, 0x10, 0x19, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvspsxws, 0x10, 0x09, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvspuxds, 0x10, 0x18, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvspuxws, 0x10, 0x08, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvsxdsp, 0x10, 0x1B, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvuxdsp, 0x10, 0x1A, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvsxwsp, 0x10, 0x0B, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvuxwsp, 0x10, 0x0A, 0, PPC2_VSX)
#define VSX_LOGICAL(name, tcg_op) \
static void glue(gen_, name)(DisasContext * ctx) \
@@ -10083,6 +10105,12 @@ GEN_XX3FORM(xsmaxdp, 0x00, 0x14, PPC2_VSX),
GEN_XX3FORM(xsmindp, 0x00, 0x15, PPC2_VSX),
GEN_XX2FORM(xscvdpsp, 0x12, 0x10, PPC2_VSX),
GEN_XX2FORM(xscvspdp, 0x12, 0x14, PPC2_VSX),
+GEN_XX2FORM(xscvdpsxds, 0x10, 0x15, PPC2_VSX),
+GEN_XX2FORM(xscvdpsxws, 0x10, 0x05, PPC2_VSX),
+GEN_XX2FORM(xscvdpuxds, 0x10, 0x14, PPC2_VSX),
+GEN_XX2FORM(xscvdpuxws, 0x10, 0x04, PPC2_VSX),
+GEN_XX2FORM(xscvsxddp, 0x10, 0x17, PPC2_VSX),
+GEN_XX2FORM(xscvuxddp, 0x10, 0x16, PPC2_VSX),
GEN_XX3FORM(xvadddp, 0x00, 0x0C, PPC2_VSX),
GEN_XX3FORM(xvsubdp, 0x00, 0x0D, PPC2_VSX),
@@ -10107,6 +10135,14 @@ GEN_XX3_RC_FORM(xvcmpeqdp, 0x0C, 0x0C, PPC2_VSX),
GEN_XX3_RC_FORM(xvcmpgtdp, 0x0C, 0x0D, PPC2_VSX),
GEN_XX3_RC_FORM(xvcmpgedp, 0x0C, 0x0E, PPC2_VSX),
GEN_XX2FORM(xvcvdpsp, 0x12, 0x18, PPC2_VSX),
+GEN_XX2FORM(xvcvdpsxds, 0x10, 0x1D, PPC2_VSX),
+GEN_XX2FORM(xvcvdpsxws, 0x10, 0x0D, PPC2_VSX),
+GEN_XX2FORM(xvcvdpuxds, 0x10, 0x1C, PPC2_VSX),
+GEN_XX2FORM(xvcvdpuxws, 0x10, 0x0C, PPC2_VSX),
+GEN_XX2FORM(xvcvsxddp, 0x10, 0x1F, PPC2_VSX),
+GEN_XX2FORM(xvcvuxddp, 0x10, 0x1E, PPC2_VSX),
+GEN_XX2FORM(xvcvsxwdp, 0x10, 0x0F, PPC2_VSX),
+GEN_XX2FORM(xvcvuxwdp, 0x10, 0x0E, PPC2_VSX),
GEN_XX3FORM(xvaddsp, 0x00, 0x08, PPC2_VSX),
GEN_XX3FORM(xvsubsp, 0x00, 0x09, PPC2_VSX),
@@ -10131,6 +10167,14 @@ GEN_XX3_RC_FORM(xvcmpeqsp, 0x0C, 0x08, PPC2_VSX),
GEN_XX3_RC_FORM(xvcmpgtsp, 0x0C, 0x09, PPC2_VSX),
GEN_XX3_RC_FORM(xvcmpgesp, 0x0C, 0x0A, PPC2_VSX),
GEN_XX2FORM(xvcvspdp, 0x12, 0x1C, PPC2_VSX),
+GEN_XX2FORM(xvcvspsxds, 0x10, 0x19, PPC2_VSX),
+GEN_XX2FORM(xvcvspsxws, 0x10, 0x09, PPC2_VSX),
+GEN_XX2FORM(xvcvspuxds, 0x10, 0x18, PPC2_VSX),
+GEN_XX2FORM(xvcvspuxws, 0x10, 0x08, PPC2_VSX),
+GEN_XX2FORM(xvcvsxdsp, 0x10, 0x1B, PPC2_VSX),
+GEN_XX2FORM(xvcvuxdsp, 0x10, 0x1A, PPC2_VSX),
+GEN_XX2FORM(xvcvsxwsp, 0x10, 0x0B, PPC2_VSX),
+GEN_XX2FORM(xvcvuxwsp, 0x10, 0x0A, PPC2_VSX),
#undef VSX_LOGICAL
#define VSX_LOGICAL(name, opc2, opc3, fl2) \
--
1.7.1
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [Qemu-devel] [V4 PATCH 22/22] target-ppc: Add VSX Rounding Instructions
2013-12-18 20:18 [Qemu-devel] [V4 PATCH 00/22] PowerPC VSX Stage 3 Tom Musta
` (20 preceding siblings ...)
2013-12-18 20:19 ` [Qemu-devel] [V4 PATCH 21/22] target-ppc: Add VSX ISA2.06 Integer " Tom Musta
@ 2013-12-18 20:19 ` Tom Musta
21 siblings, 0 replies; 31+ messages in thread
From: Tom Musta @ 2013-12-18 20:19 UTC (permalink / raw)
To: qemu-devel; +Cc: Tom Musta, qemu-ppc
This patch adds the VSX Round to Floating Point Integer instructions:
- xsrdpi, xsrdpic, xsrdpim, xsrdpip, xsrdpiz
- xvrdpi, xvrdpic, xvrdpim, xvrdpip, xvrdpiz
- xvrspi, xvrspic, xvrspim, xvrspip, xvrspiz
Signed-off-by: Tom Musta <tommusta@gmail.com>
Reviewed-by: Richard Henderson <address@hidden>
---
target-ppc/fpu_helper.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++
target-ppc/helper.h | 15 ++++++++++
target-ppc/translate.c | 30 ++++++++++++++++++++
3 files changed, 113 insertions(+), 0 deletions(-)
diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index f913ad7..f3d02cc 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -2548,3 +2548,71 @@ VSX_CVT_INT_TO_FP(xvcvuxdsp, 2, uint64, float32, u64[i], f32[j], \
2*i + JOFFSET, 0)
VSX_CVT_INT_TO_FP(xvcvsxwsp, 4, int32, float32, u32[j], f32[i], i, 0)
VSX_CVT_INT_TO_FP(xvcvuxwsp, 4, uint32, float32, u32[j], f32[i], i, 0)
+
+/* For "use current rounding mode", define a value that will not be one of
+ * the existing rounding model enums.
+ */
+#define FLOAT_ROUND_CURRENT (float_round_nearest_even + float_round_down + \
+ float_round_up + float_round_to_zero)
+
+/* VSX_ROUND - VSX floating point round
+ * op - instruction mnemonic
+ * nels - number of elements (1, 2 or 4)
+ * tp - type (float32 or float64)
+ * fld - vsr_t field (f32 or f64)
+ * rmode - rounding mode
+ * sfprf - set FPRF
+ */
+#define VSX_ROUND(op, nels, tp, fld, rmode, sfprf) \
+void helper_##op(CPUPPCState *env, uint32_t opcode) \
+{ \
+ ppc_vsr_t xt, xb; \
+ int i; \
+ getVSR(xB(opcode), &xb, env); \
+ getVSR(xT(opcode), &xt, env); \
+ \
+ if (rmode != FLOAT_ROUND_CURRENT) { \
+ set_float_rounding_mode(rmode, &env->fp_status); \
+ } \
+ \
+ for (i = 0; i < nels; i++) { \
+ if (unlikely(tp##_is_signaling_nan(xb.fld[i]))) { \
+ fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0); \
+ xt.fld[i] = tp##_snan_to_qnan(xb.fld[i]); \
+ } else { \
+ xt.fld[i] = tp##_round_to_int(xb.fld[i], &env->fp_status); \
+ } \
+ if (sfprf) { \
+ helper_compute_fprf(env, xt.fld[i], sfprf); \
+ } \
+ } \
+ \
+ /* If this is not a "use current rounding mode" instruction, \
+ * then inhibit setting of the XX bit and restore rounding \
+ * mode from FPSCR */ \
+ if (rmode != FLOAT_ROUND_CURRENT) { \
+ fpscr_set_rounding_mode(env); \
+ env->fp_status.float_exception_flags &= ~float_flag_inexact; \
+ } \
+ \
+ putVSR(xT(opcode), &xt, env); \
+ helper_float_check_status(env); \
+}
+
+VSX_ROUND(xsrdpi, 1, float64, f64, float_round_nearest_even, 1)
+VSX_ROUND(xsrdpic, 1, float64, f64, FLOAT_ROUND_CURRENT, 1)
+VSX_ROUND(xsrdpim, 1, float64, f64, float_round_down, 1)
+VSX_ROUND(xsrdpip, 1, float64, f64, float_round_up, 1)
+VSX_ROUND(xsrdpiz, 1, float64, f64, float_round_to_zero, 1)
+
+VSX_ROUND(xvrdpi, 2, float64, f64, float_round_nearest_even, 0)
+VSX_ROUND(xvrdpic, 2, float64, f64, FLOAT_ROUND_CURRENT, 0)
+VSX_ROUND(xvrdpim, 2, float64, f64, float_round_down, 0)
+VSX_ROUND(xvrdpip, 2, float64, f64, float_round_up, 0)
+VSX_ROUND(xvrdpiz, 2, float64, f64, float_round_to_zero, 0)
+
+VSX_ROUND(xvrspi, 4, float32, f32, float_round_nearest_even, 0)
+VSX_ROUND(xvrspic, 4, float32, f32, FLOAT_ROUND_CURRENT, 0)
+VSX_ROUND(xvrspim, 4, float32, f32, float_round_down, 0)
+VSX_ROUND(xvrspip, 4, float32, f32, float_round_up, 0)
+VSX_ROUND(xvrspiz, 4, float32, f32, float_round_to_zero, 0)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index de46b6f..0276b02 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -280,6 +280,11 @@ DEF_HELPER_2(xscvdpuxds, void, env, i32)
DEF_HELPER_2(xscvdpuxws, void, env, i32)
DEF_HELPER_2(xscvsxddp, void, env, i32)
DEF_HELPER_2(xscvuxddp, void, env, i32)
+DEF_HELPER_2(xsrdpi, void, env, i32)
+DEF_HELPER_2(xsrdpic, void, env, i32)
+DEF_HELPER_2(xsrdpim, void, env, i32)
+DEF_HELPER_2(xsrdpip, void, env, i32)
+DEF_HELPER_2(xsrdpiz, void, env, i32)
DEF_HELPER_2(xvadddp, void, env, i32)
DEF_HELPER_2(xvsubdp, void, env, i32)
@@ -312,6 +317,11 @@ DEF_HELPER_2(xvcvsxddp, void, env, i32)
DEF_HELPER_2(xvcvuxddp, void, env, i32)
DEF_HELPER_2(xvcvsxwdp, void, env, i32)
DEF_HELPER_2(xvcvuxwdp, void, env, i32)
+DEF_HELPER_2(xvrdpi, void, env, i32)
+DEF_HELPER_2(xvrdpic, void, env, i32)
+DEF_HELPER_2(xvrdpim, void, env, i32)
+DEF_HELPER_2(xvrdpip, void, env, i32)
+DEF_HELPER_2(xvrdpiz, void, env, i32)
DEF_HELPER_2(xvaddsp, void, env, i32)
DEF_HELPER_2(xvsubsp, void, env, i32)
@@ -344,6 +354,11 @@ DEF_HELPER_2(xvcvsxdsp, void, env, i32)
DEF_HELPER_2(xvcvuxdsp, void, env, i32)
DEF_HELPER_2(xvcvsxwsp, void, env, i32)
DEF_HELPER_2(xvcvuxwsp, void, env, i32)
+DEF_HELPER_2(xvrspi, void, env, i32)
+DEF_HELPER_2(xvrspic, void, env, i32)
+DEF_HELPER_2(xvrspim, void, env, i32)
+DEF_HELPER_2(xvrspip, void, env, i32)
+DEF_HELPER_2(xvrspiz, void, env, i32)
DEF_HELPER_2(efscfsi, i32, env, i32)
DEF_HELPER_2(efscfui, i32, env, i32)
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index bb36e8f..52d7165 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -7324,6 +7324,11 @@ GEN_VSX_HELPER_2(xscvdpuxds, 0x10, 0x14, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xscvdpuxws, 0x10, 0x04, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xscvsxddp, 0x10, 0x17, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xscvuxddp, 0x10, 0x16, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsrdpi, 0x12, 0x04, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsrdpic, 0x16, 0x06, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsrdpim, 0x12, 0x07, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsrdpip, 0x12, 0x06, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsrdpiz, 0x12, 0x05, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvadddp, 0x00, 0x0C, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsubdp, 0x00, 0x0D, 0, PPC2_VSX)
@@ -7356,6 +7361,11 @@ GEN_VSX_HELPER_2(xvcvsxddp, 0x10, 0x1F, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvcvuxddp, 0x10, 0x1E, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvcvsxwdp, 0x10, 0x0F, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvcvuxwdp, 0x10, 0x0E, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrdpi, 0x12, 0x0C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrdpic, 0x16, 0x0E, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrdpim, 0x12, 0x0F, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrdpip, 0x12, 0x0E, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrdpiz, 0x12, 0x0D, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvaddsp, 0x00, 0x08, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvsubsp, 0x00, 0x09, 0, PPC2_VSX)
@@ -7388,6 +7398,11 @@ GEN_VSX_HELPER_2(xvcvsxdsp, 0x10, 0x1B, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvcvuxdsp, 0x10, 0x1A, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvcvsxwsp, 0x10, 0x0B, 0, PPC2_VSX)
GEN_VSX_HELPER_2(xvcvuxwsp, 0x10, 0x0A, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrspi, 0x12, 0x08, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrspic, 0x16, 0x0A, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrspim, 0x12, 0x0B, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrspip, 0x12, 0x0A, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrspiz, 0x12, 0x09, 0, PPC2_VSX)
#define VSX_LOGICAL(name, tcg_op) \
static void glue(gen_, name)(DisasContext * ctx) \
@@ -10111,6 +10126,11 @@ GEN_XX2FORM(xscvdpuxds, 0x10, 0x14, PPC2_VSX),
GEN_XX2FORM(xscvdpuxws, 0x10, 0x04, PPC2_VSX),
GEN_XX2FORM(xscvsxddp, 0x10, 0x17, PPC2_VSX),
GEN_XX2FORM(xscvuxddp, 0x10, 0x16, PPC2_VSX),
+GEN_XX2FORM(xsrdpi, 0x12, 0x04, PPC2_VSX),
+GEN_XX2FORM(xsrdpic, 0x16, 0x06, PPC2_VSX),
+GEN_XX2FORM(xsrdpim, 0x12, 0x07, PPC2_VSX),
+GEN_XX2FORM(xsrdpip, 0x12, 0x06, PPC2_VSX),
+GEN_XX2FORM(xsrdpiz, 0x12, 0x05, PPC2_VSX),
GEN_XX3FORM(xvadddp, 0x00, 0x0C, PPC2_VSX),
GEN_XX3FORM(xvsubdp, 0x00, 0x0D, PPC2_VSX),
@@ -10143,6 +10163,11 @@ GEN_XX2FORM(xvcvsxddp, 0x10, 0x1F, PPC2_VSX),
GEN_XX2FORM(xvcvuxddp, 0x10, 0x1E, PPC2_VSX),
GEN_XX2FORM(xvcvsxwdp, 0x10, 0x0F, PPC2_VSX),
GEN_XX2FORM(xvcvuxwdp, 0x10, 0x0E, PPC2_VSX),
+GEN_XX2FORM(xvrdpi, 0x12, 0x0C, PPC2_VSX),
+GEN_XX2FORM(xvrdpic, 0x16, 0x0E, PPC2_VSX),
+GEN_XX2FORM(xvrdpim, 0x12, 0x0F, PPC2_VSX),
+GEN_XX2FORM(xvrdpip, 0x12, 0x0E, PPC2_VSX),
+GEN_XX2FORM(xvrdpiz, 0x12, 0x0D, PPC2_VSX),
GEN_XX3FORM(xvaddsp, 0x00, 0x08, PPC2_VSX),
GEN_XX3FORM(xvsubsp, 0x00, 0x09, PPC2_VSX),
@@ -10175,6 +10200,11 @@ GEN_XX2FORM(xvcvsxdsp, 0x10, 0x1B, PPC2_VSX),
GEN_XX2FORM(xvcvuxdsp, 0x10, 0x1A, PPC2_VSX),
GEN_XX2FORM(xvcvsxwsp, 0x10, 0x0B, PPC2_VSX),
GEN_XX2FORM(xvcvuxwsp, 0x10, 0x0A, PPC2_VSX),
+GEN_XX2FORM(xvrspi, 0x12, 0x08, PPC2_VSX),
+GEN_XX2FORM(xvrspic, 0x16, 0x0A, PPC2_VSX),
+GEN_XX2FORM(xvrspim, 0x12, 0x0B, PPC2_VSX),
+GEN_XX2FORM(xvrspip, 0x12, 0x0A, PPC2_VSX),
+GEN_XX2FORM(xvrspiz, 0x12, 0x09, PPC2_VSX),
#undef VSX_LOGICAL
#define VSX_LOGICAL(name, opc2, opc3, fl2) \
--
1.7.1
^ permalink raw reply related [flat|nested] 31+ messages in thread