[Qemu-devel] [PATCH 4/8] bswap: Rewrite all ld<type>_<endian>_p functions

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: Richard Henderson <rth@twiddle.net>
To: qemu-devel@nongnu.org
Cc: Blue Swirl <blauwirbel@gmail.com>, Riku Voipio <riku.voipio@iki.fi>
Subject: [Qemu-devel] [PATCH 4/8] bswap: Rewrite all ld<type>_<endian>_p functions
Date: Fri,  4 Jan 2013 16:39:29 -0800	[thread overview]
Message-ID: <1357346373-13898-5-git-send-email-rth@twiddle.net> (raw)
In-Reply-To: <1357346373-13898-1-git-send-email-rth@twiddle.net>

Use the new host endian unaligned access functions instead of
open coding byte-by-byte references.  Remove assembly special
cases for i386 and ppc -- we've now exposed the operation to
the compiler sufficiently for these to be optimized automatically.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 include/qemu/bswap.h | 300 ++++++---------------------------------------------
 1 file changed, 30 insertions(+), 270 deletions(-)

diff --git a/include/qemu/bswap.h b/include/qemu/bswap.h
index 381554b..68cda6a 100644
--- a/include/qemu/bswap.h
+++ b/include/qemu/bswap.h
@@ -215,11 +215,6 @@ static inline void cpu_to_be64wu(uint64_t *p, uint64_t v)
 #define leul_to_cpu(v) (v)
 #endif
 
-#undef le_bswap
-#undef be_bswap
-#undef le_bswaps
-#undef be_bswaps
-
 /* len must be one of 1, 2, 4 */
 static inline uint32_t qemu_bswap_len(uint32_t value, int len)
 {
@@ -377,115 +372,61 @@ static inline void stq_p(void *ptr, uint64_t v)
     memcpy(ptr, &v, sizeof(v));
 }
 
-/* NOTE: on arm, putting 2 in /proc/sys/debug/alignment so that the
-   kernel handles unaligned load/stores may give better results, but
-   it is a system wide setting : bad */
-#if defined(HOST_WORDS_BIGENDIAN) || defined(WORDS_ALIGNED)
-
-/* conservative code for little endian unaligned accesses */
 static inline int lduw_le_p(const void *ptr)
 {
-#ifdef _ARCH_PPC
-    int val;
-    __asm__ __volatile__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (ptr));
-    return val;
-#else
-    const uint8_t *p = ptr;
-    return p[0] | (p[1] << 8);
-#endif
+    return (uint16_t)le_bswap(lduw_p(ptr), 16);
 }
 
 static inline int ldsw_le_p(const void *ptr)
 {
-#ifdef _ARCH_PPC
-    int val;
-    __asm__ __volatile__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (ptr));
-    return (int16_t)val;
-#else
-    const uint8_t *p = ptr;
-    return (int16_t)(p[0] | (p[1] << 8));
-#endif
+    return (int16_t)le_bswap(lduw_p(ptr), 16);
 }
 
 static inline int ldl_le_p(const void *ptr)
 {
-#ifdef _ARCH_PPC
-    int val;
-    __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (ptr));
-    return val;
-#else
-    const uint8_t *p = ptr;
-    return p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24);
-#endif
+    return le_bswap(ldl_p(ptr), 32);
 }
 
 static inline uint64_t ldq_le_p(const void *ptr)
 {
-    const uint8_t *p = ptr;
-    uint32_t v1, v2;
-    v1 = ldl_le_p(p);
-    v2 = ldl_le_p(p + 4);
-    return v1 | ((uint64_t)v2 << 32);
+    return le_bswap(ldq_p(ptr), 64);
 }
 
 static inline void stw_le_p(void *ptr, int v)
 {
-#ifdef _ARCH_PPC
-    __asm__ __volatile__ ("sthbrx %1,0,%2" : "=m" (*(uint16_t *)ptr) : "r" (v), "r" (ptr));
-#else
-    uint8_t *p = ptr;
-    p[0] = v;
-    p[1] = v >> 8;
-#endif
+    stw_p(ptr, le_bswap(v, 16));
 }
 
 static inline void stl_le_p(void *ptr, int v)
 {
-#ifdef _ARCH_PPC
-    __asm__ __volatile__ ("stwbrx %1,0,%2" : "=m" (*(uint32_t *)ptr) : "r" (v), "r" (ptr));
-#else
-    uint8_t *p = ptr;
-    p[0] = v;
-    p[1] = v >> 8;
-    p[2] = v >> 16;
-    p[3] = v >> 24;
-#endif
+    stl_p(ptr, le_bswap(v, 32));
 }
 
 static inline void stq_le_p(void *ptr, uint64_t v)
 {
-    uint8_t *p = ptr;
-    stl_le_p(p, (uint32_t)v);
-    stl_le_p(p + 4, v >> 32);
+    stq_p(ptr, le_bswap(v, 64));
 }
 
 /* float access */
 
 static inline float32 ldfl_le_p(const void *ptr)
 {
-    union {
-        float32 f;
-        uint32_t i;
-    } u;
-    u.i = ldl_le_p(ptr);
+    CPU_FloatU u;
+    u.l = ldl_le_p(ptr);
     return u.f;
 }
 
 static inline void stfl_le_p(void *ptr, float32 v)
 {
-    union {
-        float32 f;
-        uint32_t i;
-    } u;
+    CPU_FloatU u;
     u.f = v;
-    stl_le_p(ptr, u.i);
+    stl_le_p(ptr, u.l);
 }
 
 static inline float64 ldfq_le_p(const void *ptr)
 {
     CPU_DoubleU u;
-    u.l.lower = ldl_le_p(ptr);
-    u.l.upper = ldl_le_p(ptr + 4);
+    u.ll = ldq_le_p(ptr);
     return u.d;
 }
 
@@ -493,188 +434,64 @@ static inline void stfq_le_p(void *ptr, float64 v)
 {
     CPU_DoubleU u;
     u.d = v;
-    stl_le_p(ptr, u.l.lower);
-    stl_le_p(ptr + 4, u.l.upper);
+    stq_le_p(ptr, u.ll);
 }
 
-#else
-
-static inline int lduw_le_p(const void *ptr)
-{
-    return *(uint16_t *)ptr;
-}
-
-static inline int ldsw_le_p(const void *ptr)
-{
-    return *(int16_t *)ptr;
-}
-
-static inline int ldl_le_p(const void *ptr)
-{
-    return *(uint32_t *)ptr;
-}
-
-static inline uint64_t ldq_le_p(const void *ptr)
-{
-    return *(uint64_t *)ptr;
-}
-
-static inline void stw_le_p(void *ptr, int v)
-{
-    *(uint16_t *)ptr = v;
-}
-
-static inline void stl_le_p(void *ptr, int v)
-{
-    *(uint32_t *)ptr = v;
-}
-
-static inline void stq_le_p(void *ptr, uint64_t v)
-{
-    *(uint64_t *)ptr = v;
-}
-
-/* float access */
-
-static inline float32 ldfl_le_p(const void *ptr)
-{
-    return *(float32 *)ptr;
-}
-
-static inline float64 ldfq_le_p(const void *ptr)
-{
-    return *(float64 *)ptr;
-}
-
-static inline void stfl_le_p(void *ptr, float32 v)
-{
-    *(float32 *)ptr = v;
-}
-
-static inline void stfq_le_p(void *ptr, float64 v)
-{
-    *(float64 *)ptr = v;
-}
-#endif
-
-#if !defined(HOST_WORDS_BIGENDIAN) || defined(WORDS_ALIGNED)
-
 static inline int lduw_be_p(const void *ptr)
 {
-#if defined(__i386__)
-    int val;
-    asm volatile ("movzwl %1, %0\n"
-                  "xchgb %b0, %h0\n"
-                  : "=q" (val)
-                  : "m" (*(uint16_t *)ptr));
-    return val;
-#else
-    const uint8_t *b = ptr;
-    return ((b[0] << 8) | b[1]);
-#endif
+    return (uint16_t)be_bswap(lduw_p(ptr), 16);
 }
 
 static inline int ldsw_be_p(const void *ptr)
 {
-#if defined(__i386__)
-    int val;
-    asm volatile ("movzwl %1, %0\n"
-                  "xchgb %b0, %h0\n"
-                  : "=q" (val)
-                  : "m" (*(uint16_t *)ptr));
-    return (int16_t)val;
-#else
-    const uint8_t *b = ptr;
-    return (int16_t)((b[0] << 8) | b[1]);
-#endif
+    return (int16_t)be_bswap(lduw_p(ptr), 16);
 }
 
 static inline int ldl_be_p(const void *ptr)
 {
-#if defined(__i386__) || defined(__x86_64__)
-    int val;
-    asm volatile ("movl %1, %0\n"
-                  "bswap %0\n"
-                  : "=r" (val)
-                  : "m" (*(uint32_t *)ptr));
-    return val;
-#else
-    const uint8_t *b = ptr;
-    return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3];
-#endif
+    return be_bswap(ldl_p(ptr), 32);
 }
 
 static inline uint64_t ldq_be_p(const void *ptr)
 {
-    uint32_t a,b;
-    a = ldl_be_p(ptr);
-    b = ldl_be_p((uint8_t *)ptr + 4);
-    return (((uint64_t)a<<32)|b);
+    return be_bswap(ldq_p(ptr), 64);
 }
 
 static inline void stw_be_p(void *ptr, int v)
 {
-#if defined(__i386__)
-    asm volatile ("xchgb %b0, %h0\n"
-                  "movw %w0, %1\n"
-                  : "=q" (v)
-                  : "m" (*(uint16_t *)ptr), "0" (v));
-#else
-    uint8_t *d = (uint8_t *) ptr;
-    d[0] = v >> 8;
-    d[1] = v;
-#endif
+    stw_p(ptr, be_bswap(v, 16));
 }
 
 static inline void stl_be_p(void *ptr, int v)
 {
-#if defined(__i386__) || defined(__x86_64__)
-    asm volatile ("bswap %0\n"
-                  "movl %0, %1\n"
-                  : "=r" (v)
-                  : "m" (*(uint32_t *)ptr), "0" (v));
-#else
-    uint8_t *d = (uint8_t *) ptr;
-    d[0] = v >> 24;
-    d[1] = v >> 16;
-    d[2] = v >> 8;
-    d[3] = v;
-#endif
+    stl_p(ptr, be_bswap(v, 32));
 }
 
 static inline void stq_be_p(void *ptr, uint64_t v)
 {
-    stl_be_p(ptr, v >> 32);
-    stl_be_p((uint8_t *)ptr + 4, v);
+    stq_p(ptr, be_bswap(v, 64));
 }
 
 /* float access */
 
 static inline float32 ldfl_be_p(const void *ptr)
 {
-    union {
-        float32 f;
-        uint32_t i;
-    } u;
-    u.i = ldl_be_p(ptr);
+    CPU_FloatU u;
+    u.l = ldl_be_p(ptr);
     return u.f;
 }
 
 static inline void stfl_be_p(void *ptr, float32 v)
 {
-    union {
-        float32 f;
-        uint32_t i;
-    } u;
+    CPU_FloatU u;
     u.f = v;
-    stl_be_p(ptr, u.i);
+    stl_be_p(ptr, u.l);
 }
 
 static inline float64 ldfq_be_p(const void *ptr)
 {
     CPU_DoubleU u;
-    u.l.upper = ldl_be_p(ptr);
-    u.l.lower = ldl_be_p((uint8_t *)ptr + 4);
+    u.ll = ldq_be_p(ptr);
     return u.d;
 }
 
@@ -682,69 +499,12 @@ static inline void stfq_be_p(void *ptr, float64 v)
 {
     CPU_DoubleU u;
     u.d = v;
-    stl_be_p(ptr, u.l.upper);
-    stl_be_p((uint8_t *)ptr + 4, u.l.lower);
-}
-
-#else
-
-static inline int lduw_be_p(const void *ptr)
-{
-    return *(uint16_t *)ptr;
-}
-
-static inline int ldsw_be_p(const void *ptr)
-{
-    return *(int16_t *)ptr;
-}
-
-static inline int ldl_be_p(const void *ptr)
-{
-    return *(uint32_t *)ptr;
-}
-
-static inline uint64_t ldq_be_p(const void *ptr)
-{
-    return *(uint64_t *)ptr;
-}
-
-static inline void stw_be_p(void *ptr, int v)
-{
-    *(uint16_t *)ptr = v;
-}
-
-static inline void stl_be_p(void *ptr, int v)
-{
-    *(uint32_t *)ptr = v;
-}
-
-static inline void stq_be_p(void *ptr, uint64_t v)
-{
-    *(uint64_t *)ptr = v;
-}
-
-/* float access */
-
-static inline float32 ldfl_be_p(const void *ptr)
-{
-    return *(float32 *)ptr;
-}
-
-static inline float64 ldfq_be_p(const void *ptr)
-{
-    return *(float64 *)ptr;
-}
-
-static inline void stfl_be_p(void *ptr, float32 v)
-{
-    *(float32 *)ptr = v;
-}
-
-static inline void stfq_be_p(void *ptr, float64 v)
-{
-    *(float64 *)ptr = v;
+    stq_be_p(ptr, u.ll);
 }
 
-#endif
+#undef le_bswap
+#undef be_bswap
+#undef le_bswaps
+#undef be_bswaps
 
 #endif /* BSWAP_H */
-- 
1.7.11.7

next prev parent reply	other threads:[~2013-01-05  0:39 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-01-05  0:39 [Qemu-devel] [PATCH v2 0/8] linux-user fixes Richard Henderson
2013-01-05  0:39 ` [Qemu-devel] [PATCH 1/8] fdt: Use bswapN instead of bswap_N Richard Henderson
2013-01-05  0:39 ` [Qemu-devel] [PATCH 2/8] bswap: Tidy base definitions of bswapN Richard Henderson
2013-01-05  0:39 ` [Qemu-devel] [PATCH 3/8] bswap: Add host endian unaligned access functions Richard Henderson
2013-01-05  0:39 ` Richard Henderson [this message]
2013-01-05  0:39 ` [Qemu-devel] [PATCH 5/8] bswap: Rewrite cpu_to_<endian><type>u with {ld, st}<type>_<endian>_p Richard Henderson
2013-01-05  0:39 ` [Qemu-devel] [PATCH 6/8] linux-user: Rewrite __get_user/__put_user with __builtin_choose_expr Richard Henderson
2013-01-05  0:39 ` [Qemu-devel] [PATCH 7/8] alpha-linux-user: Fix sigaction Richard Henderson
2013-01-05  0:39 ` [Qemu-devel] [PATCH 8/8] user: Consider symbolic links as possible directories Richard Henderson
2013-01-12 16:07 ` [Qemu-devel] [PATCH v2 0/8] linux-user fixes Blue Swirl

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:381554b dfblob:68cda6a )
 OR (
bs:"[Qemu-devel] [PATCH 4/8] bswap: Rewrite all ld<type>_<endian>_p functions" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1357346373-13898-5-git-send-email-rth@twiddle.net \
    --to=rth@twiddle.net \
    --cc=blauwirbel@gmail.com \
    --cc=qemu-devel@nongnu.org \
    --cc=riku.voipio@iki.fi \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).