From: Alexey Dobriyan <adobriyan@gmail.com>
To: linux-kernel@vger.kernel.org
Cc: x86@kernel.org, tglx@linutronix.de, mingo@redhat.com,
hpa@zytor.com, Alexey Dobriyan <adobriyan@gmail.com>
Subject: [PATCH 4/5] -march=native: REP STOSB
Date: Fri, 8 Dec 2017 01:41:53 +0300 [thread overview]
Message-ID: <20171207224154.4687-4-adobriyan@gmail.com> (raw)
In-Reply-To: <20171207224154.4687-1-adobriyan@gmail.com>
If CPU advertises fast REP STOSB, use it.
Inline clear_page() to use only 3 registers across function call
not whole shebang as required by ABI.
Also, tell gcc to use REP STOSB for memset(), this saves terabytes of .text.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
Makefile | 3 +++
arch/x86/boot/compressed/head_64.S | 4 ++++
arch/x86/crypto/sha1_ssse3_asm.S | 7 ++++++-
arch/x86/include/asm/page_64.h | 13 +++++++++++++
arch/x86/lib/Makefile | 2 ++
arch/x86/lib/memset_64.S | 15 +++++++++++++++
scripts/kconfig/cpuid.c | 6 +++++-
scripts/march-native.sh | 1 +
8 files changed, 49 insertions(+), 2 deletions(-)
diff --git a/Makefile b/Makefile
index 84abac4c181a..70d91d52ee60 100644
--- a/Makefile
+++ b/Makefile
@@ -593,6 +593,9 @@ endif
ifdef CONFIG_MARCH_NATIVE_REP_MOVSB
KBUILD_CFLAGS += -mmemcpy-strategy=rep_byte:-1:align
endif
+ifdef CONFIG_MARCH_NATIVE_REP_STOSB
+KBUILD_CFLAGS += -mmemset-strategy=rep_byte:-1:align
+endif
ifeq ($(KBUILD_EXTMOD),)
# Read in dependencies to all Kconfig* files, make sure to run
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 20919b4f3133..a7913f5e18b6 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -447,8 +447,12 @@ relocated:
leaq _bss(%rip), %rdi
leaq _ebss(%rip), %rcx
subq %rdi, %rcx
+#ifdef CONFIG_MARCH_NATIVE_REP_STOSB
+ rep stosb
+#else
shrq $3, %rcx
rep stosq
+#endif
/*
* Adjust our own GOT
diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S
index 6204bd53528c..ffa41d7a582a 100644
--- a/arch/x86/crypto/sha1_ssse3_asm.S
+++ b/arch/x86/crypto/sha1_ssse3_asm.S
@@ -94,10 +94,15 @@
SHA1_PIPELINED_MAIN_BODY
# cleanup workspace
- mov $8, %ecx
mov %rsp, %rdi
xor %rax, %rax
+#ifdef CONFIG_MARCH_NATIVE_REP_STOSB
+ mov $64, %ecx
+ rep stosb
+#else
+ mov $8, %ecx
rep stosq
+#endif
mov %rbp, %rsp # deallocate workspace
pop %rbp
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index c2353661eaf1..b3d275b07624 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -36,6 +36,18 @@ extern unsigned long __phys_addr_symbol(unsigned long);
#define pfn_valid(pfn) ((pfn) < max_pfn)
#endif
+#ifdef CONFIG_MARCH_NATIVE_REP_STOSB
+static __always_inline void clear_page(void *page)
+{
+ uint32_t len = PAGE_SIZE;
+ asm volatile (
+ "rep stosb"
+ : "+D" (page), "+c" (len)
+ : "a" (0)
+ : "memory"
+ );
+}
+#else
void clear_page_orig(void *page);
void clear_page_rep(void *page);
void clear_page_erms(void *page);
@@ -49,6 +61,7 @@ static inline void clear_page(void *page)
"0" (page)
: "memory", "rax", "rcx");
}
+#endif
#ifdef CONFIG_MARCH_NATIVE_REP_MOVSB
static __always_inline void copy_page(void *to, void *from)
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 8f9460bef2ec..6cb356408ebb 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -45,7 +45,9 @@ endif
else
obj-y += iomap_copy_64.o
lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
+ifneq ($(CONFIG_MARCH_NATIVE_REP_STOSB),y)
lib-y += clear_page_64.o
+endif
ifneq ($(CONFIG_MARCH_NATIVE_REP_MOVSB),y)
lib-y += copy_page_64.o
endif
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 9bc861c71e75..7786d1a65423 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -8,6 +8,20 @@
.weak memset
+#ifdef CONFIG_MARCH_NATIVE_REP_STOSB
+ENTRY(memset)
+ENTRY(__memset)
+ mov %esi, %eax
+ mov %rdi, %rsi
+ mov %rdx, %rcx
+ rep stosb
+ mov %rsi, %rax
+ ret
+ENDPROC(memset)
+ENDPROC(__memset)
+EXPORT_SYMBOL(memset)
+EXPORT_SYMBOL(__memset)
+#else
/*
* ISO C memset - set a memory block to a byte value. This function uses fast
* string to get better performance than the original function. The code is
@@ -140,3 +154,4 @@ ENTRY(memset_orig)
jmp .Lafter_bad_alignment
.Lfinal:
ENDPROC(memset_orig)
+#endif
diff --git a/scripts/kconfig/cpuid.c b/scripts/kconfig/cpuid.c
index 4947f47e7728..ecb285183581 100644
--- a/scripts/kconfig/cpuid.c
+++ b/scripts/kconfig/cpuid.c
@@ -44,6 +44,7 @@ static inline void cpuid2(uint32_t eax0, uint32_t ecx0, uint32_t *eax, uint32_t
static bool popcnt = false;
static bool rep_movsb = false;
+static bool rep_stosb = false;
static uint32_t eax0_max;
@@ -62,8 +63,10 @@ static void intel(void)
cpuid2(7, 0, &eax, &ecx, &edx, &ebx);
// printf("%08x %08x %08x %08x\n", eax, ecx, edx, ebx);
- if (ebx & (1 << 9))
+ if (ebx & (1 << 9)) {
rep_movsb = true;
+ rep_stosb = true;
+ }
}
}
@@ -85,6 +88,7 @@ int main(int argc, char *argv[])
#define _(x) if (streq(opt, #x)) return x ? EXIT_SUCCESS : EXIT_FAILURE
_(popcnt);
_(rep_movsb);
+ _(rep_stosb);
#undef _
return EXIT_FAILURE;
diff --git a/scripts/march-native.sh b/scripts/march-native.sh
index eb52c20c56b4..d3adf0edb2be 100755
--- a/scripts/march-native.sh
+++ b/scripts/march-native.sh
@@ -32,6 +32,7 @@ option() {
if test -x "$CPUID"; then
"$CPUID" popcnt && option "CONFIG_MARCH_NATIVE_POPCNT"
"$CPUID" rep_movsb && option "CONFIG_MARCH_NATIVE_REP_MOVSB"
+ "$CPUID" rep_stosb && option "CONFIG_MARCH_NATIVE_REP_STOSB"
fi
if test ! -f "$AUTOCONF1" -o ! -f "$AUTOCONF2"; then
--
2.13.6
next prev parent reply other threads:[~2017-12-07 22:42 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-12-07 22:41 [PATCH v0 1/5] x86_64: march=native support Alexey Dobriyan
2017-12-07 22:41 ` [PATCH 2/5] -march=native: POPCNT support Alexey Dobriyan
2017-12-07 23:07 ` H. Peter Anvin
2017-12-08 10:09 ` Alexey Dobriyan
2017-12-07 22:41 ` [PATCH 3/5] -march=native: REP MOVSB support Alexey Dobriyan
2017-12-07 22:41 ` Alexey Dobriyan [this message]
2017-12-08 19:08 ` [PATCH 4/5] -march=native: REP STOSB Andi Kleen
2017-12-07 22:41 ` [PATCH 5/5] -march=native: MOVBE support Alexey Dobriyan
2017-12-07 23:32 ` [PATCH v0 1/5] x86_64: march=native support H. Peter Anvin
2017-12-08 9:57 ` Alexey Dobriyan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20171207224154.4687-4-adobriyan@gmail.com \
--to=adobriyan@gmail.com \
--cc=hpa@zytor.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=tglx@linutronix.de \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.