From: Alexey Dobriyan <adobriyan@gmail.com>
To: linux-kernel@vger.kernel.org
Cc: x86@kernel.org, tglx@linutronix.de, mingo@redhat.com,
hpa@zytor.com, Alexey Dobriyan <adobriyan@gmail.com>
Subject: [PATCH 3/5] -march=native: REP MOVSB support
Date: Fri, 8 Dec 2017 01:41:52 +0300 [thread overview]
Message-ID: <20171207224154.4687-3-adobriyan@gmail.com> (raw)
In-Reply-To: <20171207224154.4687-1-adobriyan@gmail.com>
If CPU advertises fast REP MOVSB, use it.
Inline copy_page() to use only 3 registers across function call
not whole shebang as required by ABI.
Also, tell gcc to use REP MOVSB for memcpy(), this saves terabytes of .text.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
Makefile | 3 +++
arch/x86/include/asm/page_64.h | 13 +++++++++++++
arch/x86/kernel/relocate_kernel_64.S | 15 +++++++++++++++
arch/x86/lib/Makefile | 5 ++++-
arch/x86/lib/memcpy_64.S | 13 +++++++++++++
arch/x86/xen/xen-pvh.S | 4 ++++
scripts/kconfig/cpuid.c | 9 +++++++++
scripts/march-native.sh | 1 +
8 files changed, 62 insertions(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index c1cc730b81a8..84abac4c181a 100644
--- a/Makefile
+++ b/Makefile
@@ -590,6 +590,9 @@ ifeq ($(dot-config),1)
ifdef CONFIG_MARCH_NATIVE
KBUILD_CFLAGS += -march=native
endif
+ifdef CONFIG_MARCH_NATIVE_REP_MOVSB
+KBUILD_CFLAGS += -mmemcpy-strategy=rep_byte:-1:align
+endif
ifeq ($(KBUILD_EXTMOD),)
# Read in dependencies to all Kconfig* files, make sure to run
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 4baa6bceb232..c2353661eaf1 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -50,7 +50,20 @@ static inline void clear_page(void *page)
: "memory", "rax", "rcx");
}
+#ifdef CONFIG_MARCH_NATIVE_REP_MOVSB
+static __always_inline void copy_page(void *to, void *from)
+{
+ uint32_t len = PAGE_SIZE;
+ asm volatile (
+ "rep movsb"
+ : "+D" (to), "+S" (from), "+c" (len)
+ :
+ : "memory"
+ );
+}
+#else
void copy_page(void *to, void *from);
+#endif
#ifdef CONFIG_X86_MCE
#define arch_unmap_kpfn arch_unmap_kpfn
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index 307d3bac5f04..6ccfb9a63d5c 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -260,18 +260,33 @@ swap_pages:
movq %rsi, %rax
movq %r10, %rdi
+#ifdef CONFIG_MARCH_NATIVE_REP_MOVSB
+ mov $4096, %ecx
+ rep movsb
+#else
movl $512, %ecx
rep ; movsq
+#endif
movq %rax, %rdi
movq %rdx, %rsi
+#ifdef CONFIG_MARCH_NATIVE_REP_MOVSB
+ mov $4096, %ecx
+ rep movsb
+#else
movl $512, %ecx
rep ; movsq
+#endif
movq %rdx, %rdi
movq %r10, %rsi
+#ifdef CONFIG_MARCH_NATIVE_REP_MOVSB
+ mov $4096, %ecx
+ rep movsb
+#else
movl $512, %ecx
rep ; movsq
+#endif
lea PAGE_SIZE(%rax), %rsi
jmp 0b
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index c26ad76e7048..8f9460bef2ec 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -45,7 +45,10 @@ endif
else
obj-y += iomap_copy_64.o
lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
- lib-y += clear_page_64.o copy_page_64.o
+ lib-y += clear_page_64.o
+ifneq ($(CONFIG_MARCH_NATIVE_REP_MOVSB),y)
+ lib-y += copy_page_64.o
+endif
lib-y += memmove_64.o memset_64.o
lib-y += copy_user_64.o
lib-y += cmpxchg16b_emu.o
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 9a53a06e5a3e..28a807eb9bd6 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -15,6 +15,18 @@
.weak memcpy
+#ifdef CONFIG_MARCH_NATIVE_REP_MOVSB
+ENTRY(__memcpy)
+ENTRY(memcpy)
+ mov %rdi, %rax
+ mov %rdx, %rcx
+ rep movsb
+ ret
+ENDPROC(memcpy)
+ENDPROC(__memcpy)
+EXPORT_SYMBOL(memcpy)
+EXPORT_SYMBOL(__memcpy)
+#else
/*
* memcpy - Copy a memory block.
*
@@ -181,6 +193,7 @@ ENTRY(memcpy_orig)
.Lend:
retq
ENDPROC(memcpy_orig)
+#endif
#ifndef CONFIG_UML
/*
diff --git a/arch/x86/xen/xen-pvh.S b/arch/x86/xen/xen-pvh.S
index e1a5fbeae08d..1dc30f0dfdf7 100644
--- a/arch/x86/xen/xen-pvh.S
+++ b/arch/x86/xen/xen-pvh.S
@@ -68,9 +68,13 @@ ENTRY(pvh_start_xen)
mov $_pa(pvh_start_info), %edi
mov %ebx, %esi
mov _pa(pvh_start_info_sz), %ecx
+#ifdef CONFIG_MARCH_NATIVE_REP_MOVSB
+ rep movsb
+#else
shr $2,%ecx
rep
movsl
+#endif
mov $_pa(early_stack_end), %esp
diff --git a/scripts/kconfig/cpuid.c b/scripts/kconfig/cpuid.c
index e565dd446bdf..4947f47e7728 100644
--- a/scripts/kconfig/cpuid.c
+++ b/scripts/kconfig/cpuid.c
@@ -43,6 +43,7 @@ static inline void cpuid2(uint32_t eax0, uint32_t ecx0, uint32_t *eax, uint32_t
}
static bool popcnt = false;
+static bool rep_movsb = false;
static uint32_t eax0_max;
@@ -57,6 +58,13 @@ static void intel(void)
if (ecx & (1 << 23))
popcnt = true;
}
+ if (eax0_max >= 7) {
+ cpuid2(7, 0, &eax, &ecx, &edx, &ebx);
+// printf("%08x %08x %08x %08x\n", eax, ecx, edx, ebx);
+
+ if (ebx & (1 << 9))
+ rep_movsb = true;
+ }
}
int main(int argc, char *argv[])
@@ -76,6 +84,7 @@ int main(int argc, char *argv[])
#define _(x) if (streq(opt, #x)) return x ? EXIT_SUCCESS : EXIT_FAILURE
_(popcnt);
+ _(rep_movsb);
#undef _
return EXIT_FAILURE;
diff --git a/scripts/march-native.sh b/scripts/march-native.sh
index 6641e356b646..eb52c20c56b4 100755
--- a/scripts/march-native.sh
+++ b/scripts/march-native.sh
@@ -31,6 +31,7 @@ option() {
if test -x "$CPUID"; then
"$CPUID" popcnt && option "CONFIG_MARCH_NATIVE_POPCNT"
+ "$CPUID" rep_movsb && option "CONFIG_MARCH_NATIVE_REP_MOVSB"
fi
if test ! -f "$AUTOCONF1" -o ! -f "$AUTOCONF2"; then
--
2.13.6
next prev parent reply other threads:[~2017-12-07 22:42 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-12-07 22:41 [PATCH v0 1/5] x86_64: march=native support Alexey Dobriyan
2017-12-07 22:41 ` [PATCH 2/5] -march=native: POPCNT support Alexey Dobriyan
2017-12-07 23:07 ` H. Peter Anvin
2017-12-08 10:09 ` Alexey Dobriyan
2017-12-07 22:41 ` Alexey Dobriyan [this message]
2017-12-07 22:41 ` [PATCH 4/5] -march=native: REP STOSB Alexey Dobriyan
2017-12-08 19:08 ` Andi Kleen
2017-12-07 22:41 ` [PATCH 5/5] -march=native: MOVBE support Alexey Dobriyan
2017-12-07 23:32 ` [PATCH v0 1/5] x86_64: march=native support H. Peter Anvin
2017-12-08 9:57 ` Alexey Dobriyan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20171207224154.4687-3-adobriyan@gmail.com \
--to=adobriyan@gmail.com \
--cc=hpa@zytor.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=tglx@linutronix.de \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.