From mboxrd@z Thu Jan 1 00:00:00 1970 From: joonwoop@codeaurora.org (Joonwoo Park) Date: Tue, 29 Jul 2014 23:28:26 -0700 Subject: [PATCH] arm64: optimize memcpy_{from,to}io() and memset_io() Message-ID: <1406701706-12808-1-git-send-email-joonwoop@codeaurora.org> To: linux-arm-kernel@lists.infradead.org List-Id: linux-arm-kernel.lists.infradead.org Optimize memcpy_{from,to}io() and memset_io() by transferring in 64 bit as much as possible with minimized barrier usage. This simplest optimization brings faster throughput compare to current byte-by-byte read and write with barrier in the loop. Code's skeleton is taken from the powerpc. Signed-off-by: Joonwoo Park Acked-by: Trilok Soni --- arch/arm64/kernel/io.c | 72 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 62 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kernel/io.c b/arch/arm64/kernel/io.c index 7d37ead..c0e3ab1 100644 --- a/arch/arm64/kernel/io.c +++ b/arch/arm64/kernel/io.c @@ -20,18 +20,34 @@ #include #include +#define IO_CHECK_ALIGN(v, a) ((((unsigned long)(v)) & ((a) - 1)) == 0) + /* * Copy data from IO memory space to "real" memory space. */ void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count) { - unsigned char *t = to; - while (count) { + while (count && (!IO_CHECK_ALIGN(from, 8) || !IO_CHECK_ALIGN(to, 8))) { + *(u8 *)to = readb_relaxed(from); + from++; + to++; count--; - *t = readb(from); - t++; + } + + while (count >= 8) { + *(u64 *)to = readq_relaxed(from); + from += 8; + to += 8; + count -= 8; + } + + while (count) { + *(u8 *)to = readb_relaxed(from); from++; + to++; + count--; } + __iormb(); } EXPORT_SYMBOL(__memcpy_fromio); @@ -40,12 +56,28 @@ EXPORT_SYMBOL(__memcpy_fromio); */ void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count) { - const unsigned char *f = from; + void *p = (void __force *)from; + + __iowmb(); + while (count && (!IO_CHECK_ALIGN(p, 8) || !IO_CHECK_ALIGN(from, 8))) { + writeb_relaxed(*(volatile u8 *)from, p); + from++; + p++; + count--; + } + + while (count >= 8) { + writeq_relaxed(*(volatile u64 *)from, p); + from += 8; + p += 8; + count -= 8; + } + while (count) { + writeb_relaxed(*(volatile u8 *)from, p); + from++; + p++; count--; - writeb(*f, to); - f++; - to++; } } EXPORT_SYMBOL(__memcpy_toio); @@ -55,10 +87,30 @@ EXPORT_SYMBOL(__memcpy_toio); */ void __memset_io(volatile void __iomem *dst, int c, size_t count) { + void *p = (void __force *)dst; + u64 qc = c; + + qc |= qc << 8; + qc |= qc << 16; + qc |= qc << 32; + + __iowmb(); + while (count && !IO_CHECK_ALIGN(p, 8)) { + writeb_relaxed(c, p); + p++; + count--; + } + + while (count >= 8) { + writeq_relaxed(c, p); + p += 8; + count -= 8; + } + while (count) { + writeb_relaxed(c, p); + p++; count--; - writeb(c, dst); - dst++; } } EXPORT_SYMBOL(__memset_io); -- The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum, hosted by The Linux Foundation