qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Philippe Mathieu-Daudé" <philmd@linaro.org>
To: qemu-devel@nongnu.org
Cc: qemu-arm@nongnu.org, "Peter Maydell" <peter.maydell@linaro.org>,
	"Richard Henderson" <richard.henderson@linaro.org>,
	"Philippe Mathieu-Daudé" <philmd@linaro.org>
Subject: [PATCH v3 12/34] target/arm/tcg: Move NEON helpers to neon_helper.c
Date: Mon, 19 Jun 2023 17:42:40 +0200	[thread overview]
Message-ID: <20230619154302.80350-13-philmd@linaro.org> (raw)
In-Reply-To: <20230619154302.80350-1-philmd@linaro.org>

Move various NEON helpers to the well named neon_helper.c.

Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
---
 target/arm/tcg/helper-a64.c  |  80 --------------------------
 target/arm/tcg/neon_helper.c | 106 +++++++++++++++++++++++++++++++++++
 target/arm/tcg/op_helper.c   |  22 --------
 3 files changed, 106 insertions(+), 102 deletions(-)

diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c
index c43f22e7d4..6312238676 100644
--- a/target/arm/tcg/helper-a64.c
+++ b/target/arm/tcg/helper-a64.c
@@ -93,25 +93,6 @@ void HELPER(msr_i_daifclear)(CPUARMState *env, uint32_t imm)
     arm_rebuild_hflags(env);
 }
 
-/* 64bit/double versions of the neon float compare functions */
-uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp)
-{
-    float_status *fpst = fpstp;
-    return -float64_eq_quiet(a, b, fpst);
-}
-
-uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, void *fpstp)
-{
-    float_status *fpst = fpstp;
-    return -float64_le(b, a, fpst);
-}
-
-uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp)
-{
-    float_status *fpst = fpstp;
-    return -float64_lt(b, a, fpst);
-}
-
 /* Reciprocal step and sqrt step. Note that unlike the A32/T32
  * versions, these do a fully fused multiply-add or
  * multiply-add-and-halve.
@@ -207,67 +188,6 @@ float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp)
     return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
 }
 
-/* Pairwise long add: add pairs of adjacent elements into
- * double-width elements in the result (eg _s8 is an 8x8->16 op)
- */
-uint64_t HELPER(neon_addlp_s8)(uint64_t a)
-{
-    uint64_t nsignmask = 0x0080008000800080ULL;
-    uint64_t wsignmask = 0x8000800080008000ULL;
-    uint64_t elementmask = 0x00ff00ff00ff00ffULL;
-    uint64_t tmp1, tmp2;
-    uint64_t res, signres;
-
-    /* Extract odd elements, sign extend each to a 16 bit field */
-    tmp1 = a & elementmask;
-    tmp1 ^= nsignmask;
-    tmp1 |= wsignmask;
-    tmp1 = (tmp1 - nsignmask) ^ wsignmask;
-    /* Ditto for the even elements */
-    tmp2 = (a >> 8) & elementmask;
-    tmp2 ^= nsignmask;
-    tmp2 |= wsignmask;
-    tmp2 = (tmp2 - nsignmask) ^ wsignmask;
-
-    /* calculate the result by summing bits 0..14, 16..22, etc,
-     * and then adjusting the sign bits 15, 23, etc manually.
-     * This ensures the addition can't overflow the 16 bit field.
-     */
-    signres = (tmp1 ^ tmp2) & wsignmask;
-    res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask);
-    res ^= signres;
-
-    return res;
-}
-
-uint64_t HELPER(neon_addlp_u8)(uint64_t a)
-{
-    uint64_t tmp;
-
-    tmp = a & 0x00ff00ff00ff00ffULL;
-    tmp += (a >> 8) & 0x00ff00ff00ff00ffULL;
-    return tmp;
-}
-
-uint64_t HELPER(neon_addlp_s16)(uint64_t a)
-{
-    int32_t reslo, reshi;
-
-    reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16);
-    reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48);
-
-    return (uint32_t)reslo | (((uint64_t)reshi) << 32);
-}
-
-uint64_t HELPER(neon_addlp_u16)(uint64_t a)
-{
-    uint64_t tmp;
-
-    tmp = a & 0x0000ffff0000ffffULL;
-    tmp += (a >> 16) & 0x0000ffff0000ffffULL;
-    return tmp;
-}
-
 /* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
 uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp)
 {
diff --git a/target/arm/tcg/neon_helper.c b/target/arm/tcg/neon_helper.c
index bc6c4a54e9..cd668eb43a 100644
--- a/target/arm/tcg/neon_helper.c
+++ b/target/arm/tcg/neon_helper.c
@@ -1738,3 +1738,109 @@ void HELPER(neon_zip16)(void *vd, void *vm)
     rm[0] = m0;
     rd[0] = d0;
 }
+
+uint64_t HELPER(neon_tbl)(CPUARMState *env, uint32_t desc,
+                          uint64_t ireg, uint64_t def)
+{
+    uint64_t tmp, val = 0;
+    uint32_t maxindex = ((desc & 3) + 1) * 8;
+    uint32_t base_reg = desc >> 2;
+    uint32_t shift, index, reg;
+
+    for (shift = 0; shift < 64; shift += 8) {
+        index = (ireg >> shift) & 0xff;
+        if (index < maxindex) {
+            reg = base_reg + (index >> 3);
+            tmp = *aa32_vfp_dreg(env, reg);
+            tmp = ((tmp >> ((index & 7) << 3)) & 0xff) << shift;
+        } else {
+            tmp = def & (0xffull << shift);
+        }
+        val |= tmp;
+    }
+    return val;
+}
+
+#ifdef TARGET_AARCH64
+
+/* Pairwise long add: add pairs of adjacent elements into
+ * double-width elements in the result (eg _s8 is an 8x8->16 op)
+ */
+uint64_t HELPER(neon_addlp_s8)(uint64_t a)
+{
+    uint64_t nsignmask = 0x0080008000800080ULL;
+    uint64_t wsignmask = 0x8000800080008000ULL;
+    uint64_t elementmask = 0x00ff00ff00ff00ffULL;
+    uint64_t tmp1, tmp2;
+    uint64_t res, signres;
+
+    /* Extract odd elements, sign extend each to a 16 bit field */
+    tmp1 = a & elementmask;
+    tmp1 ^= nsignmask;
+    tmp1 |= wsignmask;
+    tmp1 = (tmp1 - nsignmask) ^ wsignmask;
+    /* Ditto for the even elements */
+    tmp2 = (a >> 8) & elementmask;
+    tmp2 ^= nsignmask;
+    tmp2 |= wsignmask;
+    tmp2 = (tmp2 - nsignmask) ^ wsignmask;
+
+    /* calculate the result by summing bits 0..14, 16..22, etc,
+     * and then adjusting the sign bits 15, 23, etc manually.
+     * This ensures the addition can't overflow the 16 bit field.
+     */
+    signres = (tmp1 ^ tmp2) & wsignmask;
+    res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask);
+    res ^= signres;
+
+    return res;
+}
+
+uint64_t HELPER(neon_addlp_u8)(uint64_t a)
+{
+    uint64_t tmp;
+
+    tmp = a & 0x00ff00ff00ff00ffULL;
+    tmp += (a >> 8) & 0x00ff00ff00ff00ffULL;
+    return tmp;
+}
+
+uint64_t HELPER(neon_addlp_s16)(uint64_t a)
+{
+    int32_t reslo, reshi;
+
+    reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16);
+    reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48);
+
+    return (uint32_t)reslo | (((uint64_t)reshi) << 32);
+}
+
+uint64_t HELPER(neon_addlp_u16)(uint64_t a)
+{
+    uint64_t tmp;
+
+    tmp = a & 0x0000ffff0000ffffULL;
+    tmp += (a >> 16) & 0x0000ffff0000ffffULL;
+    return tmp;
+}
+
+/* 64bit/double versions of the neon float compare functions */
+uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp)
+{
+    float_status *fpst = fpstp;
+    return -float64_eq_quiet(a, b, fpst);
+}
+
+uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, void *fpstp)
+{
+    float_status *fpst = fpstp;
+    return -float64_le(b, a, fpst);
+}
+
+uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp)
+{
+    float_status *fpst = fpstp;
+    return -float64_lt(b, a, fpst);
+}
+
+#endif /* TARGET_AARCH64 */
diff --git a/target/arm/tcg/op_helper.c b/target/arm/tcg/op_helper.c
index 3baf8004f6..70a9c37b74 100644
--- a/target/arm/tcg/op_helper.c
+++ b/target/arm/tcg/op_helper.c
@@ -82,28 +82,6 @@ void raise_exception_ra(CPUARMState *env, uint32_t excp, uint32_t syndrome,
     raise_exception(env, excp, syndrome, target_el);
 }
 
-uint64_t HELPER(neon_tbl)(CPUARMState *env, uint32_t desc,
-                          uint64_t ireg, uint64_t def)
-{
-    uint64_t tmp, val = 0;
-    uint32_t maxindex = ((desc & 3) + 1) * 8;
-    uint32_t base_reg = desc >> 2;
-    uint32_t shift, index, reg;
-
-    for (shift = 0; shift < 64; shift += 8) {
-        index = (ireg >> shift) & 0xff;
-        if (index < maxindex) {
-            reg = base_reg + (index >> 3);
-            tmp = *aa32_vfp_dreg(env, reg);
-            tmp = ((tmp >> ((index & 7) << 3)) & 0xff) << shift;
-        } else {
-            tmp = def & (0xffull << shift);
-        }
-        val |= tmp;
-    }
-    return val;
-}
-
 void HELPER(v8m_stackcheck)(CPUARMState *env, uint32_t newvalue)
 {
     /*
-- 
2.38.1



  parent reply	other threads:[~2023-06-19 15:46 UTC|newest]

Thread overview: 54+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-19 15:42 [PATCH v3 00/34] target/arm/tcg: Remove inclusions of 'exec/helper-[proto/gen].h' Philippe Mathieu-Daudé
2023-06-19 15:42 ` [PATCH v3 01/34] target/arm/tcg: Remove CONFIG_TCG #ifdef'ry check Philippe Mathieu-Daudé
2023-06-19 15:42 ` [PATCH v3 02/34] target/arm/tcg: Rename helper template headers as '.h.inc' Philippe Mathieu-Daudé
2023-06-19 15:42 ` [PATCH v3 03/34] target/arm/tcg: Extract iwMMXt helpers from the generic 'helper.h' Philippe Mathieu-Daudé
2023-06-19 15:42 ` [PATCH v3 04/34] target/arm/tcg: Fix iwmmxt-related code style Philippe Mathieu-Daudé
2023-06-19 15:42 ` [PATCH v3 05/34] target/arm/tcg: Expose some iwmmxt methods in 'translate.h' Philippe Mathieu-Daudé
2023-06-19 15:42 ` [PATCH v3 06/34] target/arm/tcg: Extract iwmmxt code to translate-iwmmxt.c Philippe Mathieu-Daudé
2023-06-19 15:42 ` [PATCH v3 07/34] target/arm/tcg: Reduce 'helper-iwmmxt.h.inc' inclusion Philippe Mathieu-Daudé
2023-06-19 15:42 ` [PATCH v3 08/34] target/arm/tcg: Un-inline VFP translation helpers Philippe Mathieu-Daudé
2023-06-20  9:06   ` Richard Henderson
2023-06-19 15:42 ` [PATCH v3 09/34] target/arm/tcg: Move VFP helpers from helper-a64.c to vfp_helper.c Philippe Mathieu-Daudé
2023-06-20 10:20   ` Richard Henderson
2023-06-19 15:42 ` [PATCH v3 10/34] target/arm/tcg: Extract VFP definitions to 'helper-vfp.h.inc' Philippe Mathieu-Daudé
2023-06-19 15:42 ` [PATCH v3 11/34] target/arm/tcg: Reduce 'helper-vfp.h.inc' inclusion Philippe Mathieu-Daudé
2023-06-20 10:22   ` Richard Henderson
2023-06-19 15:42 ` Philippe Mathieu-Daudé [this message]
2023-06-19 15:54 ` [PATCH v3 13/34] target/arm/tcg: Extract NEON definitions to 'helper-neon.h.inc' Philippe Mathieu-Daudé
2023-06-19 15:54   ` [PATCH v3 14/34] target/arm/tcg: Reduce 'helper-neon.h.inc' inclusion Philippe Mathieu-Daudé
2023-06-20 10:24     ` Richard Henderson
2023-06-19 15:54   ` [PATCH v3 15/34] target/arm/tcg: Export some generic vector helpers Philippe Mathieu-Daudé
2023-06-20 10:24     ` Richard Henderson
2023-06-19 15:54   ` [PATCH v3 16/34] target/arm/tcg: Extract generic vector helpers to translate-gvec.c Philippe Mathieu-Daudé
2023-06-20 10:26     ` Richard Henderson
2023-06-19 15:54   ` [PATCH v3 17/34] target/arm/tcg: Extract gvec definitions to 'helper-gvec.h.inc' Philippe Mathieu-Daudé
2023-06-20 10:26     ` Richard Henderson
2023-06-19 15:54   ` [PATCH v3 18/34] target/arm/tcg: Reduce 'helper-gvec.h.inc' inclusion Philippe Mathieu-Daudé
2023-06-20 10:28     ` Richard Henderson
2023-06-19 15:54   ` [PATCH v3 19/34] target/arm/tcg: Extract SVE2 definitions to 'helper-sve.h.inc' Philippe Mathieu-Daudé
2023-06-19 15:54   ` [PATCH v3 20/34] target/arm/tcg: Reduce 'helper-sve.h.inc' inclusion Philippe Mathieu-Daudé
2023-06-20 10:30     ` Richard Henderson
2023-06-19 15:54   ` [PATCH v3 21/34] target/arm/tcg: Extract crypto definitions to 'helper-crypto.h.inc' Philippe Mathieu-Daudé
2023-06-20 10:36     ` Richard Henderson
2023-06-19 15:54   ` [PATCH v3 22/34] target/arm/tcg: Reduce 'helper-crypto.h.inc' inclusion Philippe Mathieu-Daudé
2023-06-20 10:38     ` Richard Henderson
2023-06-20 11:37       ` Philippe Mathieu-Daudé
2023-06-19 15:54   ` [PATCH v3 23/34] target/arm/tcg: Reduce 'helper-mve.h.inc' inclusion Philippe Mathieu-Daudé
2023-06-20 10:39     ` Richard Henderson
2023-06-19 15:55   ` [PATCH v3 24/34] target/arm/tcg: Reduce 'helper-sme.h.inc' inclusion Philippe Mathieu-Daudé
2023-06-20 10:39     ` Richard Henderson
2023-06-19 15:55   ` [PATCH v3 25/34] target/arm/tcg: Extract PAuth definitions to 'helper-pauth.h.inc' Philippe Mathieu-Daudé
2023-06-20 10:40     ` Richard Henderson
2023-06-19 15:55   ` [PATCH v3 26/34] target/arm/tcg: Extract MemTag definitions to 'helper-mte.h.inc' Philippe Mathieu-Daudé
2023-06-20 10:40     ` Richard Henderson
2023-06-19 15:55   ` [PATCH v3 27/34] target/arm/tcg: Reduce 'helper-a64.h.inc' inclusion Philippe Mathieu-Daudé
2023-06-19 15:55   ` [PATCH v3 28/34] target/arm/tcg: Move v8m_stackcheck() from op_helper.c to m_helper.c Philippe Mathieu-Daudé
2023-06-20 10:41     ` Richard Henderson
2023-06-19 15:55   ` [PATCH v3 29/34] target/arm/tcg: Extract M-profile definitions to 'helper-m.h.inc' Philippe Mathieu-Daudé
2023-06-20 10:42     ` Richard Henderson
2023-06-19 15:55   ` [PATCH v3 30/34] target/arm/tcg: Reduce 'helper-m.h.inc' inclusion Philippe Mathieu-Daudé
2023-06-19 15:55   ` [PATCH v3 31/34] target/arm/tcg: Inline 'exec/helper-gen.h' Philippe Mathieu-Daudé
2023-06-19 15:55   ` [PATCH v3 32/34] target/arm/tcg: Inline 'exec/helper-proto.h' Philippe Mathieu-Daudé
2023-06-19 15:55   ` [PATCH v3 33/34] target/arm/tcg: Rename 'helper.h' -> 'tcg/helper.h.inc' Philippe Mathieu-Daudé
2023-06-20 10:44     ` Richard Henderson
2023-06-19 15:55   ` [PATCH v3 34/34] tests/tcg/aarch64: Rename bti-crt.inc.c -> bti-crt.c.inc Philippe Mathieu-Daudé

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230619154302.80350-13-philmd@linaro.org \
    --to=philmd@linaro.org \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-arm@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).