[PATCH] powerpc: merge byteorder.h

linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed

* [PATCH] powerpc: merge byteorder.h
@ 2005-09-27 19:28 Becky Bruce
  2005-09-27 21:15 ` Gabriel Paubert
  0 siblings, 1 reply; 3+ messages in thread
From: Becky Bruce @ 2005-09-27 19:28 UTC (permalink / raw)
  To: linuxppc64-dev, linuxppc-dev

powerpc: Merge byteorder.h

Essentially adopts the 64-bit version of this file.  The 32-bit version had
been using unsigned ints for arguments/return values that were actually
only 16 bits - the new file uses __u16 for these items as in the 64-bit
version of the header.  The order of some of the asm constraints
in the 64-bit version was slightly different than the 32-bit version,
but they produce identical code.  

Signed-off-by: Becky Bruce <becky.bruce@freescale.com>
Signed-off-by: Kumar Gala <kumar.gala@freescale.com>

---
commit 01344596fdecbe2b97e122b6a50570a19218cd2f
tree ccdf20534198e69459d95f21b8f45aafc00e8238
parent d407c9f3f6f3c84d9daec257f9a2550aacbd2892
author Becky Bruce <becky.bruce@freescale.com> Tue, 27 Sep 2005 14:04:44 -0500
committer Becky Bruce <becky.bruce@freescale.com> Tue, 27 Sep 2005 14:04:44 -0500

 include/asm-powerpc/byteorder.h |   89 +++++++++++++++++++++++++++++++++++++++
 include/asm-ppc/byteorder.h     |   76 ---------------------------------
 include/asm-ppc64/byteorder.h   |   86 --------------------------------------
 3 files changed, 89 insertions(+), 162 deletions(-)

diff --git a/include/asm-powerpc/byteorder.h b/include/asm-powerpc/byteorder.h
new file mode 100644
--- /dev/null
+++ b/include/asm-powerpc/byteorder.h
@@ -0,0 +1,89 @@
+#ifndef _ASM_POWERPC_BYTEORDER_H
+#define _ASM_POWERPC_BYTEORDER_H
+
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/types.h>
+#include <linux/compiler.h>
+
+#ifdef __GNUC__
+#ifdef __KERNEL__
+
+static __inline__ __u16 ld_le16(const volatile __u16 *addr)
+{
+	__u16 val;
+
+	__asm__ __volatile__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr));
+	return val;
+}
+
+static __inline__ void st_le16(volatile __u16 *addr, const __u16 val)
+{
+	__asm__ __volatile__ ("sthbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr));
+}
+
+static __inline__ __u32 ld_le32(const volatile __u32 *addr)
+{
+	__u32 val;
+
+	__asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr));
+	return val;
+}
+
+static __inline__ void st_le32(volatile __u32 *addr, const __u32 val)
+{
+	__asm__ __volatile__ ("stwbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr));
+}
+
+static __inline__ __attribute_const__ __u16 ___arch__swab16(__u16 value)
+{
+	__u16 result;
+
+	__asm__("rlwimi %0,%1,8,16,23"
+	    : "=r" (result)
+	    : "r" (value), "0" (value >> 8));
+	return result;
+}
+
+static __inline__ __attribute_const__ __u32 ___arch__swab32(__u32 value)
+{
+	__u32 result;
+
+	__asm__("rlwimi %0,%1,24,16,23\n\t"
+	    "rlwimi %0,%1,8,8,15\n\t"
+	    "rlwimi %0,%1,24,0,7"
+	    : "=r" (result)
+	    : "r" (value), "0" (value >> 24));
+	return result;
+}
+
+#define __arch__swab16(x) ___arch__swab16(x)
+#define __arch__swab32(x) ___arch__swab32(x)
+
+/* The same, but returns converted value from the location pointer by addr. */
+#define __arch__swab16p(addr) ld_le16(addr)
+#define __arch__swab32p(addr) ld_le32(addr)
+
+/* The same, but do the conversion in situ, ie. put the value back to addr. */
+#define __arch__swab16s(addr) st_le16(addr,*addr)
+#define __arch__swab32s(addr) st_le32(addr,*addr)
+
+#endif /* __KERNEL__ */
+
+#ifndef __STRICT_ANSI__
+#define __BYTEORDER_HAS_U64__
+#ifndef __powerpc64__
+#define __SWAB_64_THRU_32__
+#endif /* __powerpc64__ */
+#endif /* __STRICT_ANSI__ */
+
+#endif /* __GNUC__ */
+
+#include <linux/byteorder/big_endian.h>
+
+#endif /* _ASM_POWERPC_BYTEORDER_H */
diff --git a/include/asm-ppc/byteorder.h b/include/asm-ppc/byteorder.h
deleted file mode 100644
--- a/include/asm-ppc/byteorder.h
+++ /dev/null
@@ -1,76 +0,0 @@
-#ifndef _PPC_BYTEORDER_H
-#define _PPC_BYTEORDER_H
-
-#include <asm/types.h>
-#include <linux/compiler.h>
-
-#ifdef __GNUC__
-#ifdef __KERNEL__
-
-extern __inline__ unsigned ld_le16(const volatile unsigned short *addr)
-{
-	unsigned val;
-
-	__asm__ __volatile__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr));
-	return val;
-}
-
-extern __inline__ void st_le16(volatile unsigned short *addr, const unsigned val)
-{
-	__asm__ __volatile__ ("sthbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr));
-}
-
-extern __inline__ unsigned ld_le32(const volatile unsigned *addr)
-{
-	unsigned val;
-
-	__asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr));
-	return val;
-}
-
-extern __inline__ void st_le32(volatile unsigned *addr, const unsigned val)
-{
-	__asm__ __volatile__ ("stwbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr));
-}
-
-static __inline__ __attribute_const__ __u16 ___arch__swab16(__u16 value)
-{
-	__u16 result;
-
-	__asm__("rlwimi %0,%2,8,16,23" : "=&r" (result) : "0" (value >> 8), "r" (value));
-	return result;
-}
-
-static __inline__ __attribute_const__ __u32 ___arch__swab32(__u32 value)
-{
-	__u32 result;
-
-	__asm__("rlwimi %0,%2,24,16,23" : "=&r" (result) : "0" (value>>24), "r" (value));
-	__asm__("rlwimi %0,%2,8,8,15"   : "=&r" (result) : "0" (result),    "r" (value));
-	__asm__("rlwimi %0,%2,24,0,7"   : "=&r" (result) : "0" (result),    "r" (value));
-
-	return result;
-}
-#define __arch__swab32(x) ___arch__swab32(x)
-#define __arch__swab16(x) ___arch__swab16(x)
-
-/* The same, but returns converted value from the location pointer by addr. */
-#define __arch__swab16p(addr) ld_le16(addr)
-#define __arch__swab32p(addr) ld_le32(addr)
-
-/* The same, but do the conversion in situ, ie. put the value back to addr. */
-#define __arch__swab16s(addr) st_le16(addr,*addr)
-#define __arch__swab32s(addr) st_le32(addr,*addr)
-
-#endif /* __KERNEL__ */
-
-#if !defined(__STRICT_ANSI__) || defined(__KERNEL__)
-#  define __BYTEORDER_HAS_U64__
-#  define __SWAB_64_THRU_32__
-#endif
-
-#endif /* __GNUC__ */
-
-#include <linux/byteorder/big_endian.h>
-
-#endif /* _PPC_BYTEORDER_H */
diff --git a/include/asm-ppc64/byteorder.h b/include/asm-ppc64/byteorder.h
deleted file mode 100644
--- a/include/asm-ppc64/byteorder.h
+++ /dev/null
@@ -1,86 +0,0 @@
-#ifndef _PPC64_BYTEORDER_H
-#define _PPC64_BYTEORDER_H
-
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <asm/types.h>
-#include <linux/compiler.h>
-
-#ifdef __GNUC__
-#ifdef __KERNEL__
-
-static __inline__ __u16 ld_le16(const volatile __u16 *addr)
-{
-	__u16 val;
-
-	__asm__ __volatile__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr));
-	return val;
-}
-
-static __inline__ void st_le16(volatile __u16 *addr, const __u16 val)
-{
-	__asm__ __volatile__ ("sthbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr));
-}
-
-static __inline__ __u32 ld_le32(const volatile __u32 *addr)
-{
-	__u32 val;
-
-	__asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr));
-	return val;
-}
-
-static __inline__ void st_le32(volatile __u32 *addr, const __u32 val)
-{
-	__asm__ __volatile__ ("stwbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr));
-}
-
-static __inline__ __attribute_const__ __u16 ___arch__swab16(__u16 value)
-{
-	__u16 result;
-
-	__asm__("rlwimi %0,%1,8,16,23"
-	    : "=r" (result)
-	    : "r" (value), "0" (value >> 8));
-	return result;
-}
-
-static __inline__ __attribute_const__ __u32 ___arch__swab32(__u32 value)
-{
-	__u32 result;
-
-	__asm__("rlwimi %0,%1,24,16,23\n\t"
-	    "rlwimi %0,%1,8,8,15\n\t"
-	    "rlwimi %0,%1,24,0,7"
-	    : "=r" (result)
-	    : "r" (value), "0" (value >> 24));
-	return result;
-}
-
-#define __arch__swab16(x) ___arch__swab16(x)
-#define __arch__swab32(x) ___arch__swab32(x)
-
-/* The same, but returns converted value from the location pointer by addr. */
-#define __arch__swab16p(addr) ld_le16(addr)
-#define __arch__swab32p(addr) ld_le32(addr)
-
-/* The same, but do the conversion in situ, ie. put the value back to addr. */
-#define __arch__swab16s(addr) st_le16(addr,*addr)
-#define __arch__swab32s(addr) st_le32(addr,*addr)
-
-#endif /* __KERNEL__ */
-
-#ifndef __STRICT_ANSI__
-#define __BYTEORDER_HAS_U64__
-#endif
-
-#endif /* __GNUC__ */
-
-#include <linux/byteorder/big_endian.h>
-
-#endif /* _PPC64_BYTEORDER_H */

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] powerpc: merge byteorder.h
  2005-09-27 19:28 [PATCH] powerpc: merge byteorder.h Becky Bruce
@ 2005-09-27 21:15 ` Gabriel Paubert
  2005-10-05 18:20   ` [PATCH] powerpc: improved byte swapping functions Gabriel Paubert
  0 siblings, 1 reply; 3+ messages in thread
From: Gabriel Paubert @ 2005-09-27 21:15 UTC (permalink / raw)
  To: Becky Bruce; +Cc: linuxppc64-dev, linuxppc-dev

On Tue, Sep 27, 2005 at 02:28:56PM -0500, Becky Bruce wrote:
> powerpc: Merge byteorder.h
> 
> Essentially adopts the 64-bit version of this file.  The 32-bit version had
> been using unsigned ints for arguments/return values that were actually
> only 16 bits - the new file uses __u16 for these items as in the 64-bit
> version of the header.  The order of some of the asm constraints
> in the 64-bit version was slightly different than the 32-bit version,
> but they produce identical code.  
> 
> Signed-off-by: Becky Bruce <becky.bruce@freescale.com>
> Signed-off-by: Kumar Gala <kumar.gala@freescale.com>
> 
> ---
> commit 01344596fdecbe2b97e122b6a50570a19218cd2f
> tree ccdf20534198e69459d95f21b8f45aafc00e8238
> parent d407c9f3f6f3c84d9daec257f9a2550aacbd2892
> author Becky Bruce <becky.bruce@freescale.com> Tue, 27 Sep 2005 14:04:44 -0500
> committer Becky Bruce <becky.bruce@freescale.com> Tue, 27 Sep 2005 14:04:44 -0500
> 
>  include/asm-powerpc/byteorder.h |   89 +++++++++++++++++++++++++++++++++++++++
>  include/asm-ppc/byteorder.h     |   76 ---------------------------------
>  include/asm-ppc64/byteorder.h   |   86 --------------------------------------
>  3 files changed, 89 insertions(+), 162 deletions(-)
> 
> diff --git a/include/asm-powerpc/byteorder.h b/include/asm-powerpc/byteorder.h
> new file mode 100644
> --- /dev/null
> +++ b/include/asm-powerpc/byteorder.h
> @@ -0,0 +1,89 @@
> +#ifndef _ASM_POWERPC_BYTEORDER_H
> +#define _ASM_POWERPC_BYTEORDER_H
> +
> +/*
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#include <asm/types.h>
> +#include <linux/compiler.h>
> +
> +#ifdef __GNUC__
> +#ifdef __KERNEL__
> +
> +static __inline__ __u16 ld_le16(const volatile __u16 *addr)
> +{
> +	__u16 val;
> +
> +	__asm__ __volatile__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr));
> +	return val;
> +}
> +
> +static __inline__ void st_le16(volatile __u16 *addr, const __u16 val)
> +{
> +	__asm__ __volatile__ ("sthbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr));
> +}
> +
> +static __inline__ __u32 ld_le32(const volatile __u32 *addr)
> +{
> +	__u32 val;
> +
> +	__asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr));
> +	return val;
> +}
> +
> +static __inline__ void st_le32(volatile __u32 *addr, const __u32 val)
> +{
> +	__asm__ __volatile__ ("stwbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr));
> +}
> +
> +static __inline__ __attribute_const__ __u16 ___arch__swab16(__u16 value)
> +{
> +	__u16 result;
> +
> +	__asm__("rlwimi %0,%1,8,16,23"
> +	    : "=r" (result)
> +	    : "r" (value), "0" (value >> 8));
> +	return result;
> +}

This needs 2 registers where one is enough and often
adds a truncation to 16 bit of results in the generated
code. Consider instead:

	__u32 result;
	__asm__("	rlwimi %0,%0,16,8,15 # ??12->?212\n"
		: =r(result) : "0"(value));
	return (__u16)(result>>8);

GCC will combine the result>>8 with the truncation
to the lower sixteen bits, often saving one instruction.

This will likely generate worse code if you really need 
to keep both the original value and the swapped one, but 
this should not be the common case.

> +
> +static __inline__ __attribute_const__ __u32 ___arch__swab32(__u32 value)
> +{
> +	__u32 result;
> +
> +	__asm__("rlwimi %0,%1,24,16,23\n\t"
> +	    "rlwimi %0,%1,8,8,15\n\t"
> +	    "rlwimi %0,%1,24,0,7"
> +	    : "=r" (result)
> +	    : "r" (value), "0" (value >> 24));
> +	return result;
> +}

That one can be improved too, in only needs one
rotlwi (rlwinm withou any mask) and 2 rlwimi:

__asm__("	rotlwi %0,%1,8 # 1234 -> 2341\n"
	"	rlwimi %0,%1,24,0,7          \n"
	"	rlwimi %0,%1,24,16,23        \n"
	: "=&r" (result) : r(value))

Notes:

- reformat as you like, but I prefer the tab at the
beginning of every line, otherwise the assembly output
does not look right. I always add a \n at the end
just in case (do all versions of GCC add it or not?).

- the earlyclobber (&) is really necessary otherwise
GCC might allocate the same register for input and
output for __arch_swab32.

What should really be implemented in GCC is a recognizer
for the manipulations that can easily be mapped to 
a single rlwimi instruction. But it is too complex
for the standard combiner IIRC. 

	Regards,
	Gabriel

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH] powerpc: improved byte swapping functions
  2005-09-27 21:15 ` Gabriel Paubert
@ 2005-10-05 18:20   ` Gabriel Paubert
  0 siblings, 0 replies; 3+ messages in thread
From: Gabriel Paubert @ 2005-10-05 18:20 UTC (permalink / raw)
  To: Becky Bruce; +Cc: linuxppc64-dev, linuxppc-dev

From: Gabriel Paubert <paubert@iram.es>

The previous versions of ___arch__swab16 and ___arch__swab32 were
not optimal. In most cases the code can be made shorter and faster
with this patch.

Signed-off-by: Gabriel Paubert <paubert@iram.es>

---

Additional notes: 

1) for ___arch__swab16, the trick is to let the compiler 
generate a single rlwinm instruction for the final right 
shift and cast. 

2) For ___arch_swab32, the rotated value passed as a parameter 
already has 2 bytes at the right place, so only 2 rlwimi 
instructions are necessary to complete the byte swap.

3) edit if you don't like the formatting of the result.

4) I've been reading the thread about how to format patches
and I hope that I got it right. But I believe that the
diffstat output is overkill for such a small patch.

	Regards,
	Gabriel

diff --git a/include/asm-powerpc/byteorder.h b/include/asm-powerpc/byteorder.h
--- a/include/asm-powerpc/byteorder.h
+++ b/include/asm-powerpc/byteorder.h
@@ -42,23 +42,22 @@ static __inline__ void st_le32(volatile 

 static __inline__ __attribute_const__ __u16 ___arch__swab16(__u16 value)
 {
-	__u16 result;
+	__u32 tmp;

-	__asm__("rlwimi %0,%1,8,16,23"
-	    : "=r" (result)
-	    : "r" (value), "0" (value >> 8));
-	return result;
+	__asm__("rlwimi %0,%0,16,8,15"
+		: "=r" (tmp) : "0" (value));
+	return (__u16)(tmp>>8);
 }

 static __inline__ __attribute_const__ __u32 ___arch__swab32(__u32 value)
 {
 	__u32 result;

-	__asm__("rlwimi %0,%1,24,16,23\n\t"
-	    "rlwimi %0,%1,8,8,15\n\t"
-	    "rlwimi %0,%1,24,0,7"
+	__asm__(
+"	rlwimi %0,%1,24,16,23\n"
+"	rlwimi %0,%1,24,0,7\n"
 	    : "=r" (result)
-	    : "r" (value), "0" (value >> 24));
+	    : "r" (value), "0" ((value >> 24)|(value<<8)));
 	return result;
 }

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2005-10-05 18:20 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-09-27 19:28 [PATCH] powerpc: merge byteorder.h Becky Bruce
2005-09-27 21:15 ` Gabriel Paubert
2005-10-05 18:20   ` [PATCH] powerpc: improved byte swapping functions Gabriel Paubert

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).