qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-devel@nongnu.org
Cc: Christophe Lyon <christophe.lyon@st.com>, patches@linaro.org
Subject: [Qemu-devel] [PATCH 01/10] target-arm: Fix rounding constant addition for Neon shifts
Date: Tue, 15 Feb 2011 13:44:41 +0000	[thread overview]
Message-ID: <1297777490-5323-2-git-send-email-peter.maydell@linaro.org> (raw)
In-Reply-To: <1297777490-5323-1-git-send-email-peter.maydell@linaro.org>

From: Christophe Lyon <christophe.lyon@st.com>

Handle cases where adding the rounding constant could overflow in Neon
shift instructions: VRSHR, VRSRA, VQRSHRN, VQRSHRUN, VRSHRN.

Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
[peter.maydell@linaro.org: fix handling of large shifts in rshl_s32,
calculate signed saturated value as other functions do.]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target-arm/neon_helper.c |  139 ++++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 127 insertions(+), 12 deletions(-)

diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index dc09968..cc63636 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -558,9 +558,28 @@ uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop)
     }} while (0)
 NEON_VOP(rshl_s8, neon_s8, 4)
 NEON_VOP(rshl_s16, neon_s16, 2)
-NEON_VOP(rshl_s32, neon_s32, 1)
 #undef NEON_FN
 
+/* The addition of the rounding constant may overflow, so we use an
+ * intermediate 64 bits accumulator.  */
+uint32_t HELPER(neon_rshl_s32)(uint32_t valop, uint32_t shiftop)
+{
+    int32_t dest;
+    int32_t val = (int32_t)valop;
+    int8_t shift = (int8_t)shiftop;
+    if ((shift >= 32) || (shift <= -32)) {
+        dest = 0;
+    } else if (shift < 0) {
+        int64_t big_dest = ((int64_t)val + (1 << (-1 - shift)));
+        dest = big_dest >> -shift;
+    } else {
+        dest = val << shift;
+    }
+    return dest;
+}
+
+/* Handling addition overflow with 64 bits inputs values is more
+ * tricky than with 32 bits values.  */
 uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
 {
     int8_t shift = (int8_t)shiftop;
@@ -574,7 +593,16 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
         val++;
         val >>= 1;
     } else if (shift < 0) {
-        val = (val + ((int64_t)1 << (-1 - shift))) >> -shift;
+        val >>= (-shift - 1);
+        if (val == INT64_MAX) {
+            /* In this case, it means that the rounding constant is 1,
+             * and the addition would overflow. Return the actual
+             * result directly.  */
+            val = 0x4000000000000000LL;
+        } else {
+            val++;
+            val >>= 1;
+        }
     } else {
         val <<= shift;
     }
@@ -596,9 +624,29 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
     }} while (0)
 NEON_VOP(rshl_u8, neon_u8, 4)
 NEON_VOP(rshl_u16, neon_u16, 2)
-NEON_VOP(rshl_u32, neon_u32, 1)
 #undef NEON_FN
 
+/* The addition of the rounding constant may overflow, so we use an
+ * intermediate 64 bits accumulator.  */
+uint32_t HELPER(neon_rshl_u32)(uint32_t val, uint32_t shiftop)
+{
+    uint32_t dest;
+    int8_t shift = (int8_t)shiftop;
+    if (shift >= 32 || shift < -32) {
+        dest = 0;
+    } else if (shift == -32) {
+        dest = val >> 31;
+    } else if (shift < 0) {
+        uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift)));
+        dest = big_dest >> -shift;
+    } else {
+        dest = val << shift;
+    }
+    return dest;
+}
+
+/* Handling addition overflow with 64 bits inputs values is more
+ * tricky than with 32 bits values.  */
 uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop)
 {
     int8_t shift = (uint8_t)shiftop;
@@ -607,9 +655,17 @@ uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop)
     } else if (shift == -64) {
         /* Rounding a 1-bit result just preserves that bit.  */
         val >>= 63;
-    } if (shift < 0) {
-        val = (val + ((uint64_t)1 << (-1 - shift))) >> -shift;
-        val >>= -shift;
+    } else if (shift < 0) {
+        val >>= (-shift - 1);
+        if (val == UINT64_MAX) {
+            /* In this case, it means that the rounding constant is 1,
+             * and the addition would overflow. Return the actual
+             * result directly.  */
+            val = 0x8000000000000000ULL;
+        } else {
+            val++;
+            val >>= 1;
+        }
     } else {
         val <<= shift;
     }
@@ -784,14 +840,43 @@ uint64_t HELPER(neon_qshlu_s64)(CPUState *env, uint64_t valop, uint64_t shiftop)
     }} while (0)
 NEON_VOP_ENV(qrshl_u8, neon_u8, 4)
 NEON_VOP_ENV(qrshl_u16, neon_u16, 2)
-NEON_VOP_ENV(qrshl_u32, neon_u32, 1)
 #undef NEON_FN
 
+/* The addition of the rounding constant may overflow, so we use an
+ * intermediate 64 bits accumulator.  */
+uint32_t HELPER(neon_qrshl_u32)(CPUState *env, uint32_t val, uint32_t shiftop)
+{
+    uint32_t dest;
+    int8_t shift = (int8_t)shiftop;
+    if (shift < 0) {
+        uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift)));
+        dest = big_dest >> -shift;
+    } else {
+        dest = val << shift;
+        if ((dest >> shift) != val) {
+            SET_QC();
+            dest = ~0;
+        }
+    }
+    return dest;
+}
+
+/* Handling addition overflow with 64 bits inputs values is more
+ * tricky than with 32 bits values.  */
 uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop)
 {
     int8_t shift = (int8_t)shiftop;
     if (shift < 0) {
-        val = (val + (1 << (-1 - shift))) >> -shift;
+        val >>= (-shift - 1);
+        if (val == UINT64_MAX) {
+            /* In this case, it means that the rounding constant is 1,
+             * and the addition would overflow. Return the actual
+             * result directly.  */
+            val = 0x8000000000000000ULL;
+        } else {
+            val++;
+            val >>= 1;
+        }
     } else { \
         uint64_t tmp = val;
         val <<= shift;
@@ -817,22 +902,52 @@ uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop)
     }} while (0)
 NEON_VOP_ENV(qrshl_s8, neon_s8, 4)
 NEON_VOP_ENV(qrshl_s16, neon_s16, 2)
-NEON_VOP_ENV(qrshl_s32, neon_s32, 1)
 #undef NEON_FN
 
+/* The addition of the rounding constant may overflow, so we use an
+ * intermediate 64 bits accumulator.  */
+uint32_t HELPER(neon_qrshl_s32)(CPUState *env, uint32_t valop, uint32_t shiftop)
+{
+    int32_t dest;
+    int32_t val = (int32_t)valop;
+    int8_t shift = (int8_t)shiftop;
+    if (shift < 0) {
+        int64_t big_dest = ((int64_t)val + (1 << (-1 - shift)));
+        dest = big_dest >> -shift;
+    } else {
+        dest = val << shift;
+        if ((dest >> shift) != val) {
+            SET_QC();
+            dest = (val >> 31) ^ ~SIGNBIT;
+        }
+    }
+    return dest;
+}
+
+/* Handling addition overflow with 64 bits inputs values is more
+ * tricky than with 32 bits values.  */
 uint64_t HELPER(neon_qrshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop)
 {
     int8_t shift = (uint8_t)shiftop;
     int64_t val = valop;
 
     if (shift < 0) {
-        val = (val + (1 << (-1 - shift))) >> -shift;
+        val >>= (-shift - 1);
+        if (val == INT64_MAX) {
+            /* In this case, it means that the rounding constant is 1,
+             * and the addition would overflow. Return the actual
+             * result directly.  */
+            val = 0x4000000000000000ULL;
+        } else {
+            val++;
+            val >>= 1;
+        }
     } else {
-        int64_t tmp = val;;
+        int64_t tmp = val;
         val <<= shift;
         if ((val >> shift) != tmp) {
             SET_QC();
-            val = tmp >> 31;
+            val = (tmp >> 63) ^ ~SIGNBIT64;
         }
     }
     return val;
-- 
1.7.1

  reply	other threads:[~2011-02-15 13:45 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-02-15 13:44 [Qemu-devel] [PATCH 00/10] Fix Neon shift instructions Peter Maydell
2011-02-15 13:44 ` Peter Maydell [this message]
2011-02-15 13:44 ` [Qemu-devel] [PATCH 02/10] target-arm: Fix signed VRSHL by large shift counts Peter Maydell
2011-02-15 13:44 ` [Qemu-devel] [PATCH 03/10] target-arm: Fix unsigned VRSHL.s8 and .s16 right shifts by type width Peter Maydell
2011-02-15 13:44 ` [Qemu-devel] [PATCH 04/10] target-arm: fix unsigned 64 bit right shifts Peter Maydell
2011-02-15 13:44 ` [Qemu-devel] [PATCH 05/10] target-arm: Fix saturated values for Neon " Peter Maydell
2011-02-15 13:44 ` [Qemu-devel] [PATCH 06/10] target-arm: fix Neon VQSHRN and VSHRN Peter Maydell
2011-02-15 13:44 ` [Qemu-devel] [PATCH 07/10] target-arm: fix decoding of Neon 64 bit shifts Peter Maydell
2011-02-15 13:44 ` [Qemu-devel] [PATCH 08/10] target-arm: Fix signed VQRSHL by large shift counts Peter Maydell
2011-02-15 13:44 ` [Qemu-devel] [PATCH 09/10] target-arm: Fix unsigned " Peter Maydell
2011-02-15 13:44 ` [Qemu-devel] [PATCH 10/10] target-arm: Fix shift by immediate and narrow where src, dest overlap Peter Maydell
2011-02-20 16:52   ` Aurelien Jarno
2011-02-15 16:35 ` [Qemu-devel] Re: [PATCH 00/10] Fix Neon shift instructions Christophe Lyon
2011-02-20 16:52 ` [Qemu-devel] " Aurelien Jarno

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1297777490-5323-2-git-send-email-peter.maydell@linaro.org \
    --to=peter.maydell@linaro.org \
    --cc=christophe.lyon@st.com \
    --cc=patches@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).