From: Michael Neuling <mikey@neuling.org>
To: Philippe Bergheaud <felix@linux.vnet.ibm.com>
Cc: Linuxppc-dev@lists.ozlabs.org
Subject: Re: [PATCH] powerpc: memcpy optimization for 64bit LE
Date: Wed, 06 Nov 2013 17:22:46 +1100 [thread overview]
Message-ID: <11438.1383718966@ale.ozlabs.ibm.com> (raw)
In-Reply-To: <1383640732-21449-1-git-send-email-felix@linux.vnet.ibm.com>
Philippe Bergheaud <felix@linux.vnet.ibm.com> wrote:
> Unaligned stores take alignment exceptions on POWER7 running in little-endian.
> This is a dumb little-endian base memcpy that prevents unaligned stores.
> It is replaced by the VMX memcpy at boot.
Is this any faster than the generic version?
Mikey
>
> Signed-off-by: Philippe Bergheaud <felix@linux.vnet.ibm.com>
> ---
> arch/powerpc/include/asm/string.h | 4 ----
> arch/powerpc/kernel/ppc_ksyms.c | 2 --
> arch/powerpc/lib/Makefile | 2 --
> arch/powerpc/lib/memcpy_64.S | 19 +++++++++++++++++++
> 4 files changed, 19 insertions(+), 8 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h
> index 0dffad6..e40010a 100644
> --- a/arch/powerpc/include/asm/string.h
> +++ b/arch/powerpc/include/asm/string.h
> @@ -10,9 +10,7 @@
> #define __HAVE_ARCH_STRNCMP
> #define __HAVE_ARCH_STRCAT
> #define __HAVE_ARCH_MEMSET
> -#ifdef __BIG_ENDIAN__
> #define __HAVE_ARCH_MEMCPY
> -#endif
> #define __HAVE_ARCH_MEMMOVE
> #define __HAVE_ARCH_MEMCMP
> #define __HAVE_ARCH_MEMCHR
> @@ -24,9 +22,7 @@ extern int strcmp(const char *,const char *);
> extern int strncmp(const char *, const char *, __kernel_size_t);
> extern char * strcat(char *, const char *);
> extern void * memset(void *,int,__kernel_size_t);
> -#ifdef __BIG_ENDIAN__
> extern void * memcpy(void *,const void *,__kernel_size_t);
> -#endif
> extern void * memmove(void *,const void *,__kernel_size_t);
> extern int memcmp(const void *,const void *,__kernel_size_t);
> extern void * memchr(const void *,int,__kernel_size_t);
> diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
> index 526ad5c..0c2dd60 100644
> --- a/arch/powerpc/kernel/ppc_ksyms.c
> +++ b/arch/powerpc/kernel/ppc_ksyms.c
> @@ -147,9 +147,7 @@ EXPORT_SYMBOL(__ucmpdi2);
> #endif
> long long __bswapdi2(long long);
> EXPORT_SYMBOL(__bswapdi2);
> -#ifdef __BIG_ENDIAN__
> EXPORT_SYMBOL(memcpy);
> -#endif
> EXPORT_SYMBOL(memset);
> EXPORT_SYMBOL(memmove);
> EXPORT_SYMBOL(memcmp);
> diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
> index 5310132..6670361 100644
> --- a/arch/powerpc/lib/Makefile
> +++ b/arch/powerpc/lib/Makefile
> @@ -23,9 +23,7 @@ obj-y += checksum_$(CONFIG_WORD_SIZE).o
> obj-$(CONFIG_PPC64) += checksum_wrappers_64.o
> endif
>
> -ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),)
> obj-$(CONFIG_PPC64) += memcpy_power7.o memcpy_64.o
> -endif
>
> obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o
>
> diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
> index d2bbbc8..358cf74 100644
> --- a/arch/powerpc/lib/memcpy_64.S
> +++ b/arch/powerpc/lib/memcpy_64.S
> @@ -12,10 +12,28 @@
> .align 7
> _GLOBAL(memcpy)
> BEGIN_FTR_SECTION
> +#ifdef __LITTLE_ENDIAN__
> + cmpdi cr7,r5,0 /* dumb little-endian memcpy */
> +#else
> std r3,48(r1) /* save destination pointer for return value */
> +#endif
> FTR_SECTION_ELSE
> b memcpy_power7
> ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
> +#ifdef __LITTLE_ENDIAN__
> + addi r5,r5,-1
> + addi r9,r3,-1
> + add r5,r3,r5
> + subf r5,r9,r5
> + addi r4,r4,-1
> + mtctr r5
> + beqlr cr7
> +1:
> + lbzu r10,1(r4)
> + stbu r10,1(r9)
> + bdnz 1b
> + blr
> +#else
> PPC_MTOCRF(0x01,r5)
> cmpldi cr1,r5,16
> neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry
> @@ -201,3 +219,4 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
> stb r0,0(r3)
> 4: ld r3,48(r1) /* return dest pointer */
> blr
> +#endif
> --
> 1.7.10.4
>
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
>
next prev parent reply other threads:[~2013-11-06 6:22 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-11-05 8:38 [PATCH] powerpc: memcpy optimization for 64bit LE Philippe Bergheaud
2013-11-06 6:22 ` Michael Neuling [this message]
2013-11-06 10:21 ` Philippe Bergheaud
2013-11-07 2:10 ` Michael Neuling
2013-11-07 13:01 ` [PATCH v2] " Philippe Bergheaud
2013-11-07 2:07 ` [PATCH] " Anton Blanchard
-- strict thread matches above, loose matches on Subject: below --
2014-04-29 23:10 Anton Blanchard
2014-04-29 23:12 ` Anton Blanchard
2014-05-05 12:56 ` Philippe Bergheaud
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=11438.1383718966@ale.ozlabs.ibm.com \
--to=mikey@neuling.org \
--cc=Linuxppc-dev@lists.ozlabs.org \
--cc=felix@linux.vnet.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).