* [PATCH] powerpc: Adjust base and index registers in Altivec macros
@ 2009-08-20 23:21 Mike Wolf
2009-08-27 4:36 ` Michael Neuling
0 siblings, 1 reply; 2+ messages in thread
From: Mike Wolf @ 2009-08-20 23:21 UTC (permalink / raw)
To: linuxppc-dev
On POWER6 systems RA needs to be the base and RB the index.
If they are reversed you take a misdirect hit.
Signed-off-by: Mike Wolf <mjwolf@us.ibm.com>
----
--- altivec.orig/arch/powerpc/include/asm/ppc_asm.h 2009-08-17 15:39:52.000000000 -0500
+++ altivec/arch/powerpc/include/asm/ppc_asm.h 2009-08-20 18:08:30.000000000 -0500
@@ -98,13 +98,13 @@
#define REST_16FPRS(n, base) REST_8FPRS(n, base); REST_8FPRS(n+8, base)
#define REST_32FPRS(n, base) REST_16FPRS(n, base); REST_16FPRS(n+16, base)
-#define SAVE_VR(n,b,base) li b,THREAD_VR0+(16*(n)); stvx n,b,base
+#define SAVE_VR(n,b,base) li b,THREAD_VR0+(16*(n)); stvx n,base,b
#define SAVE_2VRS(n,b,base) SAVE_VR(n,b,base); SAVE_VR(n+1,b,base)
#define SAVE_4VRS(n,b,base) SAVE_2VRS(n,b,base); SAVE_2VRS(n+2,b,base)
#define SAVE_8VRS(n,b,base) SAVE_4VRS(n,b,base); SAVE_4VRS(n+4,b,base)
#define SAVE_16VRS(n,b,base) SAVE_8VRS(n,b,base); SAVE_8VRS(n+8,b,base)
#define SAVE_32VRS(n,b,base) SAVE_16VRS(n,b,base); SAVE_16VRS(n+16,b,base)
-#define REST_VR(n,b,base) li b,THREAD_VR0+(16*(n)); lvx n,b,base
+#define REST_VR(n,b,base) li b,THREAD_VR0+(16*(n)); lvx n,base,b
#define REST_2VRS(n,b,base) REST_VR(n,b,base); REST_VR(n+1,b,base)
#define REST_4VRS(n,b,base) REST_2VRS(n,b,base); REST_2VRS(n+2,b,base)
#define REST_8VRS(n,b,base) REST_4VRS(n,b,base); REST_4VRS(n+4,b,base)
@@ -112,26 +112,26 @@
#define REST_32VRS(n,b,base) REST_16VRS(n,b,base); REST_16VRS(n+16,b,base)
/* Save the lower 32 VSRs in the thread VSR region */
-#define SAVE_VSR(n,b,base) li b,THREAD_VSR0+(16*(n)); STXVD2X(n,b,base)
+#define SAVE_VSR(n,b,base) li b,THREAD_VSR0+(16*(n)); STXVD2X(n,base,b)
#define SAVE_2VSRS(n,b,base) SAVE_VSR(n,b,base); SAVE_VSR(n+1,b,base)
#define SAVE_4VSRS(n,b,base) SAVE_2VSRS(n,b,base); SAVE_2VSRS(n+2,b,base)
#define SAVE_8VSRS(n,b,base) SAVE_4VSRS(n,b,base); SAVE_4VSRS(n+4,b,base)
#define SAVE_16VSRS(n,b,base) SAVE_8VSRS(n,b,base); SAVE_8VSRS(n+8,b,base)
#define SAVE_32VSRS(n,b,base) SAVE_16VSRS(n,b,base); SAVE_16VSRS(n+16,b,base)
-#define REST_VSR(n,b,base) li b,THREAD_VSR0+(16*(n)); LXVD2X(n,b,base)
+#define REST_VSR(n,b,base) li b,THREAD_VSR0+(16*(n)); LXVD2X(n,base,b)
#define REST_2VSRS(n,b,base) REST_VSR(n,b,base); REST_VSR(n+1,b,base)
#define REST_4VSRS(n,b,base) REST_2VSRS(n,b,base); REST_2VSRS(n+2,b,base)
#define REST_8VSRS(n,b,base) REST_4VSRS(n,b,base); REST_4VSRS(n+4,b,base)
#define REST_16VSRS(n,b,base) REST_8VSRS(n,b,base); REST_8VSRS(n+8,b,base)
#define REST_32VSRS(n,b,base) REST_16VSRS(n,b,base); REST_16VSRS(n+16,b,base)
/* Save the upper 32 VSRs (32-63) in the thread VSX region (0-31) */
-#define SAVE_VSRU(n,b,base) li b,THREAD_VR0+(16*(n)); STXVD2X(n+32,b,base)
+#define SAVE_VSRU(n,b,base) li b,THREAD_VR0+(16*(n)); STXVD2X(n+32,base,b)
#define SAVE_2VSRSU(n,b,base) SAVE_VSRU(n,b,base); SAVE_VSRU(n+1,b,base)
#define SAVE_4VSRSU(n,b,base) SAVE_2VSRSU(n,b,base); SAVE_2VSRSU(n+2,b,base)
#define SAVE_8VSRSU(n,b,base) SAVE_4VSRSU(n,b,base); SAVE_4VSRSU(n+4,b,base)
#define SAVE_16VSRSU(n,b,base) SAVE_8VSRSU(n,b,base); SAVE_8VSRSU(n+8,b,base)
#define SAVE_32VSRSU(n,b,base) SAVE_16VSRSU(n,b,base); SAVE_16VSRSU(n+16,b,base)
-#define REST_VSRU(n,b,base) li b,THREAD_VR0+(16*(n)); LXVD2X(n+32,b,base)
+#define REST_VSRU(n,b,base) li b,THREAD_VR0+(16*(n)); LXVD2X(n+32,base,b)
#define REST_2VSRSU(n,b,base) REST_VSRU(n,b,base); REST_VSRU(n+1,b,base)
#define REST_4VSRSU(n,b,base) REST_2VSRSU(n,b,base); REST_2VSRSU(n+2,b,base)
#define REST_8VSRSU(n,b,base) REST_4VSRSU(n,b,base); REST_4VSRSU(n+4,b,base)
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH] powerpc: Adjust base and index registers in Altivec macros
2009-08-20 23:21 [PATCH] powerpc: Adjust base and index registers in Altivec macros Mike Wolf
@ 2009-08-27 4:36 ` Michael Neuling
0 siblings, 0 replies; 2+ messages in thread
From: Michael Neuling @ 2009-08-27 4:36 UTC (permalink / raw)
To: mjw; +Cc: linuxppc-dev
> On POWER6 systems RA needs to be the base and RB the index.
> If they are reversed you take a misdirect hit.
>
> Signed-off-by: Mike Wolf <mjwolf@us.ibm.com>
Looks good.... thanks Mike!
Acked-by: Michael Neuling <mikey@neuling.org>
>
> ----
> --- altivec.orig/arch/powerpc/include/asm/ppc_asm.h 2009-08-17 15:39:52.000
000000 -0500
> +++ altivec/arch/powerpc/include/asm/ppc_asm.h 2009-08-20 18:08:30.000
000000 -0500
> @@ -98,13 +98,13 @@
> #define REST_16FPRS(n, base) REST_8FPRS(n, base); REST_8FPRS(n+8, base)
> #define REST_32FPRS(n, base) REST_16FPRS(n, base); REST_16FPRS(n+16, base)
>
> -#define SAVE_VR(n,b,base) li b,THREAD_VR0+(16*(n)); stvx n,b,base
> +#define SAVE_VR(n,b,base) li b,THREAD_VR0+(16*(n)); stvx n,base,b
> #define SAVE_2VRS(n,b,base) SAVE_VR(n,b,base); SAVE_VR(n+1,b,base)
> #define SAVE_4VRS(n,b,base) SAVE_2VRS(n,b,base); SAVE_2VRS(n+2,b,base)
> #define SAVE_8VRS(n,b,base) SAVE_4VRS(n,b,base); SAVE_4VRS(n+4,b,base)
> #define SAVE_16VRS(n,b,base) SAVE_8VRS(n,b,base); SAVE_8VRS(n+8,b,base)
> #define SAVE_32VRS(n,b,base) SAVE_16VRS(n,b,base); SAVE_16VRS(n+16,b,base)
> -#define REST_VR(n,b,base) li b,THREAD_VR0+(16*(n)); lvx n,b,base
> +#define REST_VR(n,b,base) li b,THREAD_VR0+(16*(n)); lvx n,base,b
> #define REST_2VRS(n,b,base) REST_VR(n,b,base); REST_VR(n+1,b,base)
> #define REST_4VRS(n,b,base) REST_2VRS(n,b,base); REST_2VRS(n+2,b,base)
> #define REST_8VRS(n,b,base) REST_4VRS(n,b,base); REST_4VRS(n+4,b,base)
> @@ -112,26 +112,26 @@
> #define REST_32VRS(n,b,base) REST_16VRS(n,b,base); REST_16VRS(n+16,b,base)
>
> /* Save the lower 32 VSRs in the thread VSR region */
> -#define SAVE_VSR(n,b,base) li b,THREAD_VSR0+(16*(n)); STXVD2X(n,b,base)
> +#define SAVE_VSR(n,b,base) li b,THREAD_VSR0+(16*(n)); STXVD2X(n,base,b)
> #define SAVE_2VSRS(n,b,base) SAVE_VSR(n,b,base); SAVE_VSR(n+1,b,base)
> #define SAVE_4VSRS(n,b,base) SAVE_2VSRS(n,b,base); SAVE_2VSRS(n+2,b,base)
> #define SAVE_8VSRS(n,b,base) SAVE_4VSRS(n,b,base); SAVE_4VSRS(n+4,b,base)
> #define SAVE_16VSRS(n,b,base) SAVE_8VSRS(n,b,base); SAVE_8VSRS(n+8,b,
base)
> #define SAVE_32VSRS(n,b,base) SAVE_16VSRS(n,b,base); SAVE_16VSRS(n+16
,b,base)
> -#define REST_VSR(n,b,base) li b,THREAD_VSR0+(16*(n)); LXVD2X(n,b,base)
> +#define REST_VSR(n,b,base) li b,THREAD_VSR0+(16*(n)); LXVD2X(n,base,b)
> #define REST_2VSRS(n,b,base) REST_VSR(n,b,base); REST_VSR(n+1,b,base)
> #define REST_4VSRS(n,b,base) REST_2VSRS(n,b,base); REST_2VSRS(n+2,b,base)
> #define REST_8VSRS(n,b,base) REST_4VSRS(n,b,base); REST_4VSRS(n+4,b,base)
> #define REST_16VSRS(n,b,base) REST_8VSRS(n,b,base); REST_8VSRS(n+8,b,
base)
> #define REST_32VSRS(n,b,base) REST_16VSRS(n,b,base); REST_16VSRS(n+16
,b,base)
> /* Save the upper 32 VSRs (32-63) in the thread VSX region (0-31) */
> -#define SAVE_VSRU(n,b,base) li b,THREAD_VR0+(16*(n)); STXVD2X(n+32,b,base)
> +#define SAVE_VSRU(n,b,base) li b,THREAD_VR0+(16*(n)); STXVD2X(n+32,base,b)
> #define SAVE_2VSRSU(n,b,base) SAVE_VSRU(n,b,base); SAVE_VSRU(n+1,b,ba
se)
> #define SAVE_4VSRSU(n,b,base) SAVE_2VSRSU(n,b,base); SAVE_2VSRSU(n+2,
b,base)
> #define SAVE_8VSRSU(n,b,base) SAVE_4VSRSU(n,b,base); SAVE_4VSRSU(n+4,
b,base)
> #define SAVE_16VSRSU(n,b,base) SAVE_8VSRSU(n,b,base); SAVE_8VSRSU(n+8,
b,base)
> #define SAVE_32VSRSU(n,b,base) SAVE_16VSRSU(n,b,base); SAVE_16VSRSU(n+
16,b,base)
> -#define REST_VSRU(n,b,base) li b,THREAD_VR0+(16*(n)); LXVD2X(n+32,b,base)
> +#define REST_VSRU(n,b,base) li b,THREAD_VR0+(16*(n)); LXVD2X(n+32,base,b)
> #define REST_2VSRSU(n,b,base) REST_VSRU(n,b,base); REST_VSRU(n+1,b,ba
se)
> #define REST_4VSRSU(n,b,base) REST_2VSRSU(n,b,base); REST_2VSRSU(n+2,
b,base)
> #define REST_8VSRSU(n,b,base) REST_4VSRSU(n,b,base); REST_4VSRSU(n+4,
b,base)
>
>
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
>
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2009-08-27 4:36 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-08-20 23:21 [PATCH] powerpc: Adjust base and index registers in Altivec macros Mike Wolf
2009-08-27 4:36 ` Michael Neuling
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).