From: tip-bot for Fenghua Yu <fenghua.yu@intel.com>
To: linux-tip-commits@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, hpa@zytor.com, mingo@redhat.com,
fenghua.yu@intel.com, tglx@linutronix.de, hpa@linux.intel.com
Subject: [tip:perf/core] x86, mem: copy_user_64.S: Support copy_to/from_user by enhanced REP MOVSB/STOSB
Date: Wed, 18 May 2011 20:42:16 GMT [thread overview]
Message-ID: <tip-4307bec9344aed83f8107c3eb4285bd9d218fc10@git.kernel.org> (raw)
In-Reply-To: <1305671358-14478-7-git-send-email-fenghua.yu@intel.com>
Commit-ID: 4307bec9344aed83f8107c3eb4285bd9d218fc10
Gitweb: http://git.kernel.org/tip/4307bec9344aed83f8107c3eb4285bd9d218fc10
Author: Fenghua Yu <fenghua.yu@intel.com>
AuthorDate: Tue, 17 May 2011 15:29:15 -0700
Committer: H. Peter Anvin <hpa@linux.intel.com>
CommitDate: Tue, 17 May 2011 15:40:28 -0700
x86, mem: copy_user_64.S: Support copy_to/from_user by enhanced REP MOVSB/STOSB
Support copy_to_user/copy_from_user() by enhanced REP MOVSB/STOSB.
On processors supporting enhanced REP MOVSB/STOSB, the alternative
copy_user_enhanced_fast_string function using enhanced rep movsb overrides the
original function and the fast string function.
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Link: http://lkml.kernel.org/r/1305671358-14478-7-git-send-email-fenghua.yu@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
arch/x86/lib/copy_user_64.S | 65 ++++++++++++++++++++++++++++++++++++------
1 files changed, 55 insertions(+), 10 deletions(-)
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 99e4826..d17a117 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -15,23 +15,30 @@
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/cpufeature.h>
+#include <asm/alternative-asm.h>
- .macro ALTERNATIVE_JUMP feature,orig,alt
+/*
+ * By placing feature2 after feature1 in altinstructions section, we logically
+ * implement:
+ * If CPU has feature2, jmp to alt2 is used
+ * else if CPU has feature1, jmp to alt1 is used
+ * else jmp to orig is used.
+ */
+ .macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
0:
.byte 0xe9 /* 32bit jump */
.long \orig-1f /* by default jump to orig */
1:
.section .altinstr_replacement,"ax"
2: .byte 0xe9 /* near jump with 32bit immediate */
- .long \alt-1b /* offset */ /* or alternatively to alt */
+ .long \alt1-1b /* offset */ /* or alternatively to alt1 */
+3: .byte 0xe9 /* near jump with 32bit immediate */
+ .long \alt2-1b /* offset */ /* or alternatively to alt2 */
.previous
+
.section .altinstructions,"a"
- .align 8
- .quad 0b
- .quad 2b
- .word \feature /* when feature is set */
- .byte 5
- .byte 5
+ altinstruction_entry 0b,2b,\feature1,5,5
+ altinstruction_entry 0b,3b,\feature2,5,5
.previous
.endm
@@ -73,7 +80,9 @@ ENTRY(_copy_to_user)
jc bad_to_user
cmpq TI_addr_limit(%rax),%rcx
jae bad_to_user
- ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
+ ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
+ copy_user_generic_unrolled,copy_user_generic_string, \
+ copy_user_enhanced_fast_string
CFI_ENDPROC
ENDPROC(_copy_to_user)
@@ -86,7 +95,9 @@ ENTRY(_copy_from_user)
jc bad_from_user
cmpq TI_addr_limit(%rax),%rcx
jae bad_from_user
- ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
+ ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
+ copy_user_generic_unrolled,copy_user_generic_string, \
+ copy_user_enhanced_fast_string
CFI_ENDPROC
ENDPROC(_copy_from_user)
@@ -255,3 +266,37 @@ ENTRY(copy_user_generic_string)
.previous
CFI_ENDPROC
ENDPROC(copy_user_generic_string)
+
+/*
+ * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
+ * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
+ *
+ * Input:
+ * rdi destination
+ * rsi source
+ * rdx count
+ *
+ * Output:
+ * eax uncopied bytes or 0 if successful.
+ */
+ENTRY(copy_user_enhanced_fast_string)
+ CFI_STARTPROC
+ andl %edx,%edx
+ jz 2f
+ movl %edx,%ecx
+1: rep
+ movsb
+2: xorl %eax,%eax
+ ret
+
+ .section .fixup,"ax"
+12: movl %ecx,%edx /* ecx is zerorest also */
+ jmp copy_user_handle_tail
+ .previous
+
+ .section __ex_table,"a"
+ .align 8
+ .quad 1b,12b
+ .previous
+ CFI_ENDPROC
+ENDPROC(copy_user_enhanced_fast_string)
next prev parent reply other threads:[~2011-05-18 20:42 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-05-17 22:29 [PATCH 0/9] Optimize string operations by enhanced REP MOVSB/STOSB Fenghua Yu
2011-05-17 22:29 ` [PATCH 1/9] x86, cpu: Enable enhanced REP MOVSB/STOSB feature Fenghua Yu
2011-05-17 23:13 ` [tip:x86/cpufeature] x86, cpufeature: Add CPU feature bit for enhanced REP MOVSB/STOSB tip-bot for Fenghua Yu
2011-05-17 22:29 ` [PATCH 2/9] x86/kernel/cpu/intel.c: Initialize Enhanced REP MOVSB/STOSBenhanced Fenghua Yu
2011-05-18 2:46 ` Andi Kleen
2011-05-18 3:47 ` H. Peter Anvin
2011-05-18 20:40 ` [tip:perf/core] x86, mem, intel: Initialize Enhanced REP MOVSB/STOSB tip-bot for Fenghua Yu
2011-05-17 22:29 ` [PATCH 3/9] x86/kernel/alternative.c: Add comment for applying alternatives order Fenghua Yu
2011-05-18 20:40 ` [tip:perf/core] x86, alternative, doc: " tip-bot for Fenghua Yu
2011-05-17 22:29 ` [PATCH 4/9] x86, alternative-asm.h: Add altinstruction_entry macro Fenghua Yu
2011-05-18 20:41 ` [tip:perf/core] x86, alternative: " tip-bot for Fenghua Yu
2011-05-17 22:29 ` [PATCH 5/9] x86/lib/clear_page_64.S: Support clear_page() with enhanced REP MOVSB/STOSB Fenghua Yu
2011-05-18 20:41 ` [tip:perf/core] x86, mem: clear_page_64.S: " tip-bot for Fenghua Yu
2011-05-17 22:29 ` [PATCH 6/9] x86/lib/copy_user_64.S: Support copy_to_user/copy_from_user by " Fenghua Yu
2011-05-18 20:42 ` tip-bot for Fenghua Yu [this message]
2011-05-17 22:29 ` [PATCH 7/9] x86/lib/memcpy_64.S: Optimize memcpy " Fenghua Yu
2011-05-18 6:35 ` Ingo Molnar
2011-05-18 19:04 ` Yu, Fenghua
2011-05-18 20:42 ` [tip:perf/core] x86, mem: memcpy_64.S: " tip-bot for Fenghua Yu
2011-05-17 22:29 ` [PATCH 8/9] x86/lib/memmove_64.S: Optimize memmove " Fenghua Yu
2011-05-18 20:43 ` [tip:perf/core] x86, mem: memmove_64.S: " tip-bot for Fenghua Yu
2011-05-17 22:29 ` [PATCH 9/9] x86/lib/memset_64.S: Optimize memset " Fenghua Yu
2011-05-18 2:57 ` Andi Kleen
2011-05-18 3:09 ` Yu, Fenghua
2011-05-18 4:05 ` Andi Kleen
2011-05-18 18:33 ` Yu, Fenghua
2011-05-18 18:39 ` Andi Kleen
2011-05-18 18:47 ` Ingo Molnar
2011-05-18 18:49 ` Yu, Fenghua
2011-05-18 20:43 ` [tip:perf/core] x86, mem: memset_64.S: " tip-bot for Fenghua Yu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=tip-4307bec9344aed83f8107c3eb4285bd9d218fc10@git.kernel.org \
--to=fenghua.yu@intel.com \
--cc=hpa@linux.intel.com \
--cc=hpa@zytor.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-tip-commits@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=tglx@linutronix.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.