[Qemu-devel] [PATCH] target-arm: Don't overflow when calculating value for signed VABAL

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

* [Qemu-devel] [PATCH] target-arm: Don't overflow when calculating value for signed VABAL
@ 2011-04-11 15:32 Peter Maydell
  2011-04-12 21:32 ` Aurelien Jarno
  0 siblings, 1 reply; 4+ messages in thread
From: Peter Maydell @ 2011-04-11 15:32 UTC (permalink / raw)
  To: qemu-devel; +Cc: patches

In the VABAL instruction we take the absolute difference of two
values of size x and store it in a result of size 2x. This means
we have to be careful to calculate the absolute difference using
a wide enough type that we don't accidentally overflow.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target-arm/neon_helper.c |   38 +++++++++++++++++++++-----------------
 1 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index c3ac96a..7df925a 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -1514,9 +1514,13 @@ uint64_t HELPER(neon_addl_saturate_s64)(uint64_t a, uint64_t b)
     return result;
 }
 
-#define DO_ABD(dest, x, y, type) do { \
-    type tmp_x = x; \
-    type tmp_y = y; \
+/* We have to do the arithmetic in a larger type than
+ * the input type, because for example with a signed 32 bit
+ * op the absolute difference can overflow a signed 32 bit value.
+ */
+#define DO_ABD(dest, x, y, intype, arithtype) do {            \
+    arithtype tmp_x = (intype)(x);                            \
+    arithtype tmp_y = (intype)(y);                            \
     dest = ((tmp_x > tmp_y) ? tmp_x - tmp_y : tmp_y - tmp_x); \
     } while(0)
 
@@ -1524,12 +1528,12 @@ uint64_t HELPER(neon_abdl_u16)(uint32_t a, uint32_t b)
 {
     uint64_t tmp;
     uint64_t result;
-    DO_ABD(result, a, b, uint8_t);
-    DO_ABD(tmp, a >> 8, b >> 8, uint8_t);
+    DO_ABD(result, a, b, uint8_t, uint32_t);
+    DO_ABD(tmp, a >> 8, b >> 8, uint8_t, uint32_t);
     result |= tmp << 16;
-    DO_ABD(tmp, a >> 16, b >> 16, uint8_t);
+    DO_ABD(tmp, a >> 16, b >> 16, uint8_t, uint32_t);
     result |= tmp << 32;
-    DO_ABD(tmp, a >> 24, b >> 24, uint8_t);
+    DO_ABD(tmp, a >> 24, b >> 24, uint8_t, uint32_t);
     result |= tmp << 48;
     return result;
 }
@@ -1538,12 +1542,12 @@ uint64_t HELPER(neon_abdl_s16)(uint32_t a, uint32_t b)
 {
     uint64_t tmp;
     uint64_t result;
-    DO_ABD(result, a, b, int8_t);
-    DO_ABD(tmp, a >> 8, b >> 8, int8_t);
+    DO_ABD(result, a, b, int8_t, int32_t);
+    DO_ABD(tmp, a >> 8, b >> 8, int8_t, int32_t);
     result |= tmp << 16;
-    DO_ABD(tmp, a >> 16, b >> 16, int8_t);
+    DO_ABD(tmp, a >> 16, b >> 16, int8_t, int32_t);
     result |= tmp << 32;
-    DO_ABD(tmp, a >> 24, b >> 24, int8_t);
+    DO_ABD(tmp, a >> 24, b >> 24, int8_t, int32_t);
     result |= tmp << 48;
     return result;
 }
@@ -1552,8 +1556,8 @@ uint64_t HELPER(neon_abdl_u32)(uint32_t a, uint32_t b)
 {
     uint64_t tmp;
     uint64_t result;
-    DO_ABD(result, a, b, uint16_t);
-    DO_ABD(tmp, a >> 16, b >> 16, uint16_t);
+    DO_ABD(result, a, b, uint16_t, uint32_t);
+    DO_ABD(tmp, a >> 16, b >> 16, uint16_t, uint32_t);
     return result | (tmp << 32);
 }
 
@@ -1561,22 +1565,22 @@ uint64_t HELPER(neon_abdl_s32)(uint32_t a, uint32_t b)
 {
     uint64_t tmp;
     uint64_t result;
-    DO_ABD(result, a, b, int16_t);
-    DO_ABD(tmp, a >> 16, b >> 16, int16_t);
+    DO_ABD(result, a, b, int16_t, int32_t);
+    DO_ABD(tmp, a >> 16, b >> 16, int16_t, int32_t);
     return result | (tmp << 32);
 }
 
 uint64_t HELPER(neon_abdl_u64)(uint32_t a, uint32_t b)
 {
     uint64_t result;
-    DO_ABD(result, a, b, uint32_t);
+    DO_ABD(result, a, b, uint32_t, uint64_t);
     return result;
 }
 
 uint64_t HELPER(neon_abdl_s64)(uint32_t a, uint32_t b)
 {
     uint64_t result;
-    DO_ABD(result, a, b, int32_t);
+    DO_ABD(result, a, b, int32_t, int64_t);
     return result;
 }
 #undef DO_ABD
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [Qemu-devel] [PATCH] target-arm: Don't overflow when calculating value for signed VABAL
  2011-04-11 15:32 [Qemu-devel] [PATCH] target-arm: Don't overflow when calculating value for signed VABAL Peter Maydell
@ 2011-04-12 21:32 ` Aurelien Jarno
  2011-04-12 22:31   ` Peter Maydell
  0 siblings, 1 reply; 4+ messages in thread
From: Aurelien Jarno @ 2011-04-12 21:32 UTC (permalink / raw)
  To: Peter Maydell; +Cc: qemu-devel, patches

On Mon, Apr 11, 2011 at 04:32:08PM +0100, Peter Maydell wrote:
> In the VABAL instruction we take the absolute difference of two
> values of size x and store it in a result of size 2x. This means
> we have to be careful to calculate the absolute difference using
> a wide enough type that we don't accidentally overflow.
> 
> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
> ---
>  target-arm/neon_helper.c |   38 +++++++++++++++++++++-----------------
>  1 files changed, 21 insertions(+), 17 deletions(-)
> 
> diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
> index c3ac96a..7df925a 100644
> --- a/target-arm/neon_helper.c
> +++ b/target-arm/neon_helper.c
> @@ -1514,9 +1514,13 @@ uint64_t HELPER(neon_addl_saturate_s64)(uint64_t a, uint64_t b)
>      return result;
>  }
>  
> -#define DO_ABD(dest, x, y, type) do { \
> -    type tmp_x = x; \
> -    type tmp_y = y; \
> +/* We have to do the arithmetic in a larger type than
> + * the input type, because for example with a signed 32 bit
> + * op the absolute difference can overflow a signed 32 bit value.
> + */
> +#define DO_ABD(dest, x, y, intype, arithtype) do {            \
> +    arithtype tmp_x = (intype)(x);                            \
> +    arithtype tmp_y = (intype)(y);                            \
>      dest = ((tmp_x > tmp_y) ? tmp_x - tmp_y : tmp_y - tmp_x); \
>      } while(0)
>  
> @@ -1524,12 +1528,12 @@ uint64_t HELPER(neon_abdl_u16)(uint32_t a, uint32_t b)
>  {
>      uint64_t tmp;
>      uint64_t result;
> -    DO_ABD(result, a, b, uint8_t);
> -    DO_ABD(tmp, a >> 8, b >> 8, uint8_t);
> +    DO_ABD(result, a, b, uint8_t, uint32_t);
> +    DO_ABD(tmp, a >> 8, b >> 8, uint8_t, uint32_t);
>      result |= tmp << 16;
> -    DO_ABD(tmp, a >> 16, b >> 16, uint8_t);
> +    DO_ABD(tmp, a >> 16, b >> 16, uint8_t, uint32_t);
>      result |= tmp << 32;
> -    DO_ABD(tmp, a >> 24, b >> 24, uint8_t);
> +    DO_ABD(tmp, a >> 24, b >> 24, uint8_t, uint32_t);
>      result |= tmp << 48;
>      return result;
>  }

Do we really need a 32-bit type for the computation here?

> @@ -1538,12 +1542,12 @@ uint64_t HELPER(neon_abdl_s16)(uint32_t a, uint32_t b)
>  {
>      uint64_t tmp;
>      uint64_t result;
> -    DO_ABD(result, a, b, int8_t);
> -    DO_ABD(tmp, a >> 8, b >> 8, int8_t);
> +    DO_ABD(result, a, b, int8_t, int32_t);
> +    DO_ABD(tmp, a >> 8, b >> 8, int8_t, int32_t);
>      result |= tmp << 16;
> -    DO_ABD(tmp, a >> 16, b >> 16, int8_t);
> +    DO_ABD(tmp, a >> 16, b >> 16, int8_t, int32_t);
>      result |= tmp << 32;
> -    DO_ABD(tmp, a >> 24, b >> 24, int8_t);
> +    DO_ABD(tmp, a >> 24, b >> 24, int8_t, int32_t);
>      result |= tmp << 48;
>      return result;
>  }

Ditto.

> @@ -1552,8 +1556,8 @@ uint64_t HELPER(neon_abdl_u32)(uint32_t a, uint32_t b)
>  {
>      uint64_t tmp;
>      uint64_t result;
> -    DO_ABD(result, a, b, uint16_t);
> -    DO_ABD(tmp, a >> 16, b >> 16, uint16_t);
> +    DO_ABD(result, a, b, uint16_t, uint32_t);
> +    DO_ABD(tmp, a >> 16, b >> 16, uint16_t, uint32_t);
>      return result | (tmp << 32);
>  }
>
> @@ -1561,22 +1565,22 @@ uint64_t HELPER(neon_abdl_s32)(uint32_t a, uint32_t b)
>  {
>      uint64_t tmp;
>      uint64_t result;
> -    DO_ABD(result, a, b, int16_t);
> -    DO_ABD(tmp, a >> 16, b >> 16, int16_t);
> +    DO_ABD(result, a, b, int16_t, int32_t);
> +    DO_ABD(tmp, a >> 16, b >> 16, int16_t, int32_t);
>      return result | (tmp << 32);
>  }
>  
>  uint64_t HELPER(neon_abdl_u64)(uint32_t a, uint32_t b)
>  {
>      uint64_t result;
> -    DO_ABD(result, a, b, uint32_t);
> +    DO_ABD(result, a, b, uint32_t, uint64_t);
>      return result;
>  }
>  
>  uint64_t HELPER(neon_abdl_s64)(uint32_t a, uint32_t b)
>  {
>      uint64_t result;
> -    DO_ABD(result, a, b, int32_t);
> +    DO_ABD(result, a, b, int32_t, int64_t);
>      return result;
>  }
>  #undef DO_ABD

All the others looks fine.

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [Qemu-devel] [PATCH] target-arm: Don't overflow when calculating value for signed VABAL
  2011-04-12 21:32 ` Aurelien Jarno
@ 2011-04-12 22:31   ` Peter Maydell
  2011-04-13 20:45     ` Aurelien Jarno
  0 siblings, 1 reply; 4+ messages in thread
From: Peter Maydell @ 2011-04-12 22:31 UTC (permalink / raw)
  To: Aurelien Jarno; +Cc: qemu-devel, patches

On 12 April 2011 22:32, Aurelien Jarno <aurelien@aurel32.net> wrote:
> On Mon, Apr 11, 2011 at 04:32:08PM +0100, Peter Maydell wrote:

>> @@ -1524,12 +1528,12 @@ uint64_t HELPER(neon_abdl_u16)(uint32_t a, uint32_t b)
>>  {
>>      uint64_t tmp;
>>      uint64_t result;
>> -    DO_ABD(result, a, b, uint8_t);
>> -    DO_ABD(tmp, a >> 8, b >> 8, uint8_t);
>> +    DO_ABD(result, a, b, uint8_t, uint32_t);
>> +    DO_ABD(tmp, a >> 8, b >> 8, uint8_t, uint32_t);
>>      result |= tmp << 16;
>> -    DO_ABD(tmp, a >> 16, b >> 16, uint8_t);
>> +    DO_ABD(tmp, a >> 16, b >> 16, uint8_t, uint32_t);
>>      result |= tmp << 32;
>> -    DO_ABD(tmp, a >> 24, b >> 24, uint8_t);
>> +    DO_ABD(tmp, a >> 24, b >> 24, uint8_t, uint32_t);
>>      result |= tmp << 48;
>>      return result;
>>  }
>
> Do we really need a 32-bit type for the computation here?

No, anything wider than 8 will do, but my guess was that in
practice 32 bits would be fractionally more efficient than
unnecessarily forcing 16 bit arithmetic. For that matter I
guess we could just say "int" and "unsigned int" since C
guarantees us at least 16 bits there.

-- PMM

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [Qemu-devel] [PATCH] target-arm: Don't overflow when calculating value for signed VABAL
  2011-04-12 22:31   ` Peter Maydell
@ 2011-04-13 20:45     ` Aurelien Jarno
  0 siblings, 0 replies; 4+ messages in thread
From: Aurelien Jarno @ 2011-04-13 20:45 UTC (permalink / raw)
  To: Peter Maydell; +Cc: qemu-devel, patches

On Tue, Apr 12, 2011 at 11:31:20PM +0100, Peter Maydell wrote:
> On 12 April 2011 22:32, Aurelien Jarno <aurelien@aurel32.net> wrote:
> > On Mon, Apr 11, 2011 at 04:32:08PM +0100, Peter Maydell wrote:
> 
> >> @@ -1524,12 +1528,12 @@ uint64_t HELPER(neon_abdl_u16)(uint32_t a, uint32_t b)
> >>  {
> >>      uint64_t tmp;
> >>      uint64_t result;
> >> -    DO_ABD(result, a, b, uint8_t);
> >> -    DO_ABD(tmp, a >> 8, b >> 8, uint8_t);
> >> +    DO_ABD(result, a, b, uint8_t, uint32_t);
> >> +    DO_ABD(tmp, a >> 8, b >> 8, uint8_t, uint32_t);
> >>      result |= tmp << 16;
> >> -    DO_ABD(tmp, a >> 16, b >> 16, uint8_t);
> >> +    DO_ABD(tmp, a >> 16, b >> 16, uint8_t, uint32_t);
> >>      result |= tmp << 32;
> >> -    DO_ABD(tmp, a >> 24, b >> 24, uint8_t);
> >> +    DO_ABD(tmp, a >> 24, b >> 24, uint8_t, uint32_t);
> >>      result |= tmp << 48;
> >>      return result;
> >>  }
> >
> > Do we really need a 32-bit type for the computation here?
> 
> No, anything wider than 8 will do, but my guess was that in
> practice 32 bits would be fractionally more efficient than
> unnecessarily forcing 16 bit arithmetic. For that matter I
> guess we could just say "int" and "unsigned int" since C
> guarantees us at least 16 bits there.

My guess was that in 2011 a compiler can optimize that itself if it is
faster, and so it should be presented the size that is really needed.

It turns to be the case on x86_64, but not on arm or ia64. I have
therefore applied this patch as is.

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2011-04-13 20:45 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-04-11 15:32 [Qemu-devel] [PATCH] target-arm: Don't overflow when calculating value for signed VABAL Peter Maydell
2011-04-12 21:32 ` Aurelien Jarno
2011-04-12 22:31   ` Peter Maydell
2011-04-13 20:45     ` Aurelien Jarno

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).