linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
From: Anton Blanchard <anton@samba.org>
To: Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	Paul Mackerras <paulus@samba.org>
Cc: linuxppc-dev@lists.ozlabs.org
Subject: [PATCH 01/39] powerpc: Fix endian issues in VMX copy loops
Date: Mon, 23 Sep 2013 12:04:35 +1000	[thread overview]
Message-ID: <1379901913-5945-2-git-send-email-anton@samba.org> (raw)
In-Reply-To: <1379901913-5945-1-git-send-email-anton@samba.org>

Fix the permute loops for little endian.

Signed-off-by: Anton Blanchard <anton@samba.org>
---
 arch/powerpc/lib/copyuser_power7.S | 54 +++++++++++++++++++++----------------
 arch/powerpc/lib/memcpy_power7.S   | 55 ++++++++++++++++++++++----------------
 2 files changed, 63 insertions(+), 46 deletions(-)

diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S
index d1f1179..e8e9c36 100644
--- a/arch/powerpc/lib/copyuser_power7.S
+++ b/arch/powerpc/lib/copyuser_power7.S
@@ -19,6 +19,14 @@
  */
 #include <asm/ppc_asm.h>
 
+#ifdef __BIG_ENDIAN__
+#define LVS(VRT,RA,RB)		lvsl	VRT,RA,RB
+#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRA,VRB,VRC
+#else
+#define LVS(VRT,RA,RB)		lvsr	VRT,RA,RB
+#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRB,VRA,VRC
+#endif
+
 	.macro err1
 100:
 	.section __ex_table,"a"
@@ -552,13 +560,13 @@ err3;	stw	r7,4(r3)
 	li	r10,32
 	li	r11,48
 
-	lvsl	vr16,0,r4	/* Setup permute control vector */
+	LVS(vr16,0,r4)		/* Setup permute control vector */
 err3;	lvx	vr0,0,r4
 	addi	r4,r4,16
 
 	bf	cr7*4+3,5f
 err3;	lvx	vr1,r0,r4
-	vperm	vr8,vr0,vr1,vr16
+	VPERM(vr8,vr0,vr1,vr16)
 	addi	r4,r4,16
 err3;	stvx	vr8,r0,r3
 	addi	r3,r3,16
@@ -566,9 +574,9 @@ err3;	stvx	vr8,r0,r3
 
 5:	bf	cr7*4+2,6f
 err3;	lvx	vr1,r0,r4
-	vperm	vr8,vr0,vr1,vr16
+	VPERM(vr8,vr0,vr1,vr16)
 err3;	lvx	vr0,r4,r9
-	vperm	vr9,vr1,vr0,vr16
+	VPERM(vr9,vr1,vr0,vr16)
 	addi	r4,r4,32
 err3;	stvx	vr8,r0,r3
 err3;	stvx	vr9,r3,r9
@@ -576,13 +584,13 @@ err3;	stvx	vr9,r3,r9
 
 6:	bf	cr7*4+1,7f
 err3;	lvx	vr3,r0,r4
-	vperm	vr8,vr0,vr3,vr16
+	VPERM(vr8,vr0,vr3,vr16)
 err3;	lvx	vr2,r4,r9
-	vperm	vr9,vr3,vr2,vr16
+	VPERM(vr9,vr3,vr2,vr16)
 err3;	lvx	vr1,r4,r10
-	vperm	vr10,vr2,vr1,vr16
+	VPERM(vr10,vr2,vr1,vr16)
 err3;	lvx	vr0,r4,r11
-	vperm	vr11,vr1,vr0,vr16
+	VPERM(vr11,vr1,vr0,vr16)
 	addi	r4,r4,64
 err3;	stvx	vr8,r0,r3
 err3;	stvx	vr9,r3,r9
@@ -611,21 +619,21 @@ err3;	stvx	vr11,r3,r11
 	.align	5
 8:
 err4;	lvx	vr7,r0,r4
-	vperm	vr8,vr0,vr7,vr16
+	VPERM(vr8,vr0,vr7,vr16)
 err4;	lvx	vr6,r4,r9
-	vperm	vr9,vr7,vr6,vr16
+	VPERM(vr9,vr7,vr6,vr16)
 err4;	lvx	vr5,r4,r10
-	vperm	vr10,vr6,vr5,vr16
+	VPERM(vr10,vr6,vr5,vr16)
 err4;	lvx	vr4,r4,r11
-	vperm	vr11,vr5,vr4,vr16
+	VPERM(vr11,vr5,vr4,vr16)
 err4;	lvx	vr3,r4,r12
-	vperm	vr12,vr4,vr3,vr16
+	VPERM(vr12,vr4,vr3,vr16)
 err4;	lvx	vr2,r4,r14
-	vperm	vr13,vr3,vr2,vr16
+	VPERM(vr13,vr3,vr2,vr16)
 err4;	lvx	vr1,r4,r15
-	vperm	vr14,vr2,vr1,vr16
+	VPERM(vr14,vr2,vr1,vr16)
 err4;	lvx	vr0,r4,r16
-	vperm	vr15,vr1,vr0,vr16
+	VPERM(vr15,vr1,vr0,vr16)
 	addi	r4,r4,128
 err4;	stvx	vr8,r0,r3
 err4;	stvx	vr9,r3,r9
@@ -649,13 +657,13 @@ err4;	stvx	vr15,r3,r16
 
 	bf	cr7*4+1,9f
 err3;	lvx	vr3,r0,r4
-	vperm	vr8,vr0,vr3,vr16
+	VPERM(vr8,vr0,vr3,vr16)
 err3;	lvx	vr2,r4,r9
-	vperm	vr9,vr3,vr2,vr16
+	VPERM(vr9,vr3,vr2,vr16)
 err3;	lvx	vr1,r4,r10
-	vperm	vr10,vr2,vr1,vr16
+	VPERM(vr10,vr2,vr1,vr16)
 err3;	lvx	vr0,r4,r11
-	vperm	vr11,vr1,vr0,vr16
+	VPERM(vr11,vr1,vr0,vr16)
 	addi	r4,r4,64
 err3;	stvx	vr8,r0,r3
 err3;	stvx	vr9,r3,r9
@@ -665,9 +673,9 @@ err3;	stvx	vr11,r3,r11
 
 9:	bf	cr7*4+2,10f
 err3;	lvx	vr1,r0,r4
-	vperm	vr8,vr0,vr1,vr16
+	VPERM(vr8,vr0,vr1,vr16)
 err3;	lvx	vr0,r4,r9
-	vperm	vr9,vr1,vr0,vr16
+	VPERM(vr9,vr1,vr0,vr16)
 	addi	r4,r4,32
 err3;	stvx	vr8,r0,r3
 err3;	stvx	vr9,r3,r9
@@ -675,7 +683,7 @@ err3;	stvx	vr9,r3,r9
 
 10:	bf	cr7*4+3,11f
 err3;	lvx	vr1,r0,r4
-	vperm	vr8,vr0,vr1,vr16
+	VPERM(vr8,vr0,vr1,vr16)
 	addi	r4,r4,16
 err3;	stvx	vr8,r0,r3
 	addi	r3,r3,16
diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S
index 0663630..e4177db 100644
--- a/arch/powerpc/lib/memcpy_power7.S
+++ b/arch/powerpc/lib/memcpy_power7.S
@@ -20,6 +20,15 @@
 #include <asm/ppc_asm.h>
 
 _GLOBAL(memcpy_power7)
+
+#ifdef __BIG_ENDIAN__
+#define LVS(VRT,RA,RB)		lvsl	VRT,RA,RB
+#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRA,VRB,VRC
+#else
+#define LVS(VRT,RA,RB)		lvsr	VRT,RA,RB
+#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRB,VRA,VRC
+#endif
+
 #ifdef CONFIG_ALTIVEC
 	cmpldi	r5,16
 	cmpldi	cr1,r5,4096
@@ -485,13 +494,13 @@ _GLOBAL(memcpy_power7)
 	li	r10,32
 	li	r11,48
 
-	lvsl	vr16,0,r4	/* Setup permute control vector */
+	LVS(vr16,0,r4)		/* Setup permute control vector */
 	lvx	vr0,0,r4
 	addi	r4,r4,16
 
 	bf	cr7*4+3,5f
 	lvx	vr1,r0,r4
-	vperm	vr8,vr0,vr1,vr16
+	VPERM(vr8,vr0,vr1,vr16)
 	addi	r4,r4,16
 	stvx	vr8,r0,r3
 	addi	r3,r3,16
@@ -499,9 +508,9 @@ _GLOBAL(memcpy_power7)
 
 5:	bf	cr7*4+2,6f
 	lvx	vr1,r0,r4
-	vperm	vr8,vr0,vr1,vr16
+	VPERM(vr8,vr0,vr1,vr16)
 	lvx	vr0,r4,r9
-	vperm	vr9,vr1,vr0,vr16
+	VPERM(vr9,vr1,vr0,vr16)
 	addi	r4,r4,32
 	stvx	vr8,r0,r3
 	stvx	vr9,r3,r9
@@ -509,13 +518,13 @@ _GLOBAL(memcpy_power7)
 
 6:	bf	cr7*4+1,7f
 	lvx	vr3,r0,r4
-	vperm	vr8,vr0,vr3,vr16
+	VPERM(vr8,vr0,vr3,vr16)
 	lvx	vr2,r4,r9
-	vperm	vr9,vr3,vr2,vr16
+	VPERM(vr9,vr3,vr2,vr16)
 	lvx	vr1,r4,r10
-	vperm	vr10,vr2,vr1,vr16
+	VPERM(vr10,vr2,vr1,vr16)
 	lvx	vr0,r4,r11
-	vperm	vr11,vr1,vr0,vr16
+	VPERM(vr11,vr1,vr0,vr16)
 	addi	r4,r4,64
 	stvx	vr8,r0,r3
 	stvx	vr9,r3,r9
@@ -544,21 +553,21 @@ _GLOBAL(memcpy_power7)
 	.align	5
 8:
 	lvx	vr7,r0,r4
-	vperm	vr8,vr0,vr7,vr16
+	VPERM(vr8,vr0,vr7,vr16)
 	lvx	vr6,r4,r9
-	vperm	vr9,vr7,vr6,vr16
+	VPERM(vr9,vr7,vr6,vr16)
 	lvx	vr5,r4,r10
-	vperm	vr10,vr6,vr5,vr16
+	VPERM(vr10,vr6,vr5,vr16)
 	lvx	vr4,r4,r11
-	vperm	vr11,vr5,vr4,vr16
+	VPERM(vr11,vr5,vr4,vr16)
 	lvx	vr3,r4,r12
-	vperm	vr12,vr4,vr3,vr16
+	VPERM(vr12,vr4,vr3,vr16)
 	lvx	vr2,r4,r14
-	vperm	vr13,vr3,vr2,vr16
+	VPERM(vr13,vr3,vr2,vr16)
 	lvx	vr1,r4,r15
-	vperm	vr14,vr2,vr1,vr16
+	VPERM(vr14,vr2,vr1,vr16)
 	lvx	vr0,r4,r16
-	vperm	vr15,vr1,vr0,vr16
+	VPERM(vr15,vr1,vr0,vr16)
 	addi	r4,r4,128
 	stvx	vr8,r0,r3
 	stvx	vr9,r3,r9
@@ -582,13 +591,13 @@ _GLOBAL(memcpy_power7)
 
 	bf	cr7*4+1,9f
 	lvx	vr3,r0,r4
-	vperm	vr8,vr0,vr3,vr16
+	VPERM(vr8,vr0,vr3,vr16)
 	lvx	vr2,r4,r9
-	vperm	vr9,vr3,vr2,vr16
+	VPERM(vr9,vr3,vr2,vr16)
 	lvx	vr1,r4,r10
-	vperm	vr10,vr2,vr1,vr16
+	VPERM(vr10,vr2,vr1,vr16)
 	lvx	vr0,r4,r11
-	vperm	vr11,vr1,vr0,vr16
+	VPERM(vr11,vr1,vr0,vr16)
 	addi	r4,r4,64
 	stvx	vr8,r0,r3
 	stvx	vr9,r3,r9
@@ -598,9 +607,9 @@ _GLOBAL(memcpy_power7)
 
 9:	bf	cr7*4+2,10f
 	lvx	vr1,r0,r4
-	vperm	vr8,vr0,vr1,vr16
+	VPERM(vr8,vr0,vr1,vr16)
 	lvx	vr0,r4,r9
-	vperm	vr9,vr1,vr0,vr16
+	VPERM(vr9,vr1,vr0,vr16)
 	addi	r4,r4,32
 	stvx	vr8,r0,r3
 	stvx	vr9,r3,r9
@@ -608,7 +617,7 @@ _GLOBAL(memcpy_power7)
 
 10:	bf	cr7*4+3,11f
 	lvx	vr1,r0,r4
-	vperm	vr8,vr0,vr1,vr16
+	VPERM(vr8,vr0,vr1,vr16)
 	addi	r4,r4,16
 	stvx	vr8,r0,r3
 	addi	r3,r3,16
-- 
1.8.1.2

  reply	other threads:[~2013-09-23  2:04 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-09-23  2:04 [PATCH 00/39] Second round of 64bit PowerPC little endian patches Anton Blanchard
2013-09-23  2:04 ` Anton Blanchard [this message]
2013-09-23  2:04 ` [PATCH 02/39] powerpc: Book 3S MMU little endian support Anton Blanchard
2013-09-23  2:04 ` [PATCH 03/39] powerpc: Fix offset of FPRs in VSX registers in little endian builds Anton Blanchard
2013-09-23  2:04 ` [PATCH 04/39] powerpc: PTRACE_PEEKUSR/PTRACE_POKEUSER of FPR " Anton Blanchard
2013-09-23  2:04 ` [PATCH 05/39] powerpc: Little endian builds double word swap VSX state during context save/restore Anton Blanchard
2013-09-23  2:04 ` [PATCH 06/39] powerpc: Support endian agnostic MMIO Anton Blanchard
2013-09-23  2:04 ` [PATCH 07/39] powerpc: Add little endian support for word-at-a-time functions Anton Blanchard
2013-09-23  2:04 ` [PATCH 08/39] powerpc: Set MSR_LE bit on little endian builds Anton Blanchard
2013-09-23  2:04 ` [PATCH 09/39] powerpc: Reset MSR_LE on signal entry Anton Blanchard
2013-09-23  2:04 ` [PATCH 10/39] powerpc: Include the appropriate endianness header Anton Blanchard
2013-09-23  2:04 ` [PATCH 11/39] powerpc: endian safe trampoline Anton Blanchard
2013-12-28  7:24   ` Olof Johansson
2013-12-28  7:58     ` Benjamin Herrenschmidt
2013-09-23  2:04 ` [PATCH 12/39] powerpc: Remove open coded byte swap macro in alignment handler Anton Blanchard
2013-09-23  2:04 ` [PATCH 13/39] powerpc: Remove hard coded FP offsets " Anton Blanchard
2013-09-23  2:04 ` [PATCH 14/39] powerpc: Alignment handler shouldn't access VSX registers with TS_FPR Anton Blanchard
2013-09-23  2:04 ` [PATCH 15/39] powerpc: Add little endian support to alignment handler Anton Blanchard
2013-09-23  2:04 ` [PATCH 16/39] powerpc: Handle VSX alignment faults in little endian mode Anton Blanchard
2013-09-23  2:04 ` [PATCH 17/39] powerpc: Use generic checksum code in little endian Anton Blanchard
2013-09-23  2:04 ` [PATCH 18/39] powerpc: Use generic memcpy " Anton Blanchard
2013-09-23  2:04 ` [PATCH 19/39] powerpc: uname should return ppc64le/ppcle on little endian builds Anton Blanchard
2013-09-23  2:04 ` [PATCH 20/39] powerpc: Little endian fixes for platforms/powernv/opal.c Anton Blanchard
2013-09-23  2:04 ` [PATCH 21/39] powerpc: Little endian fix for arch/powerpc/platforms/powernv/pci.c Anton Blanchard
2013-09-23  2:04 ` [PATCH 22/39] powerpc: Little endian fix for arch/powerpc/platforms/powernv/pci-p5ioc2.c Anton Blanchard
2013-09-23  2:04 ` [PATCH 23/39] powerpc: Little endian sparse clean up for arch/powerpc/platforms/powernv/pci-ioda.c Anton Blanchard
2013-09-23  2:04 ` [PATCH 24/39] powerpc/powernv: Fix endian issues in OPAL RTC driver Anton Blanchard
2013-09-23  2:04 ` [PATCH 25/39] powerpc/powernv: Fix endian issues in OPAL ICS backend Anton Blanchard
2013-09-23  2:05 ` [PATCH 26/39] powerpc/powernv: Make OPAL NVRAM device tree accesses endian safe Anton Blanchard
2013-09-23  2:05 ` [PATCH 27/39] powerpc/powernv: Fix endian issues in powernv PCI code Anton Blanchard
2013-09-23  2:05 ` [PATCH 28/39] powerpc/powernv: Fix endian issues in OPAL console and udbg backend Anton Blanchard
2013-09-23  2:05 ` [PATCH 29/39] powerpc/powernv: Fix OPAL entry and exit in little endian mode Anton Blanchard
2013-09-23  2:05 ` [PATCH 30/39] powerpc/powernv: Don't register exception handlers " Anton Blanchard
2013-09-23  2:05 ` [PATCH 31/39] powerpc/powernv: More little endian issues in OPAL RTC driver Anton Blanchard
2013-09-23  2:05 ` [PATCH 32/39] powerpc/powernv: Fix some PCI sparse errors and one LE bug Anton Blanchard
2013-09-23  2:05 ` [PATCH 33/39] powerpc/hvsi: Fix endian issues in HVSI driver Anton Blanchard
2013-09-23  2:05 ` [PATCH 34/39] tty/hvc_opal: powerpc: Make OPAL HVC device tree accesses endian safe Anton Blanchard
2013-09-23  2:05 ` [PATCH 35/39] KVM: PPC: Disable KVM on little endian builds Anton Blanchard
2013-09-23  2:05 ` [PATCH 36/39] powerpc/kvm/book3s_hv: Add little endian guest support Anton Blanchard
2013-09-25 12:10   ` [PATCH] powerpc/kvmbook3s_hv: propagate H_SET_MODE to the host Laurent Dufour
2013-09-25 12:27     ` Greg Kurz
2013-09-25 22:31     ` Paul Mackerras
2013-09-27  8:14       ` Laurent Dufour
2013-09-27 13:59       ` [PATCH V2] powerpc/kvm/book3s_hv: propagate H_SET_MODE_RESOURCE_LE " Laurent Dufour
2013-09-27 14:45         ` Greg Kurz
2013-09-30 18:40   ` [PATCH 36/39] powerpc/kvm/book3s_hv: Add little endian guest support Alexander Graf
2013-09-23  2:05 ` [PATCH 37/39] powerpc: Add ability to build little endian kernels Anton Blanchard
2013-09-23  2:05 ` [PATCH 38/39] powerpc: Don't set HAVE_EFFICIENT_UNALIGNED_ACCESS on little endian builds Anton Blanchard
2013-09-23  2:05 ` [PATCH 39/39] powerpc: Work around little endian gcc bug Anton Blanchard

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1379901913-5945-2-git-send-email-anton@samba.org \
    --to=anton@samba.org \
    --cc=benh@kernel.crashing.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=paulus@samba.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).