From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
To: "mingo@elte.hu" <mingo@elte.hu>
Cc: "Ma, Ling" <ling.ma@intel.com>,
"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
"H. Peter Anvin" <hpa@zytor.com>
Subject: [PATCH -tip] Revert x86, mem: Optimize memcpy by avoiding memory false dependece
Date: Thu, 8 Jul 2010 02:27:33 -0400 [thread overview]
Message-ID: <20100708062733.GA24740@Krystal> (raw)
* Ma, Ling (ling.ma@intel.com) wrote:
> Hi Mathieu
>
> I did git revert as bellow
> Revert "x86, mem: Optimize memcpy by avoiding memory false dependece"
>
> This reverts commit 77ff5e49c20bb72a99a12bfb00dbf90b2302f087.
> And I found the machine will crash with your config file
>
> Could you revert the patch : 77ff5e49c20bb72a99a12bfb00dbf90b2302f087 on your
> machine, and check it again ?
Hi Ling,
I just successfully booted my machine 3 times in a row with a revert of commit
a1e5278e40f16a4611264f8da9e557c16cb6f6ed from -tip. This is actually the same as
reverting commit 77ff5e49c20bb72a99a12bfb00dbf90b2302f087, as
a1e5278e40f16a4611264f8da9e557c16cb6f6ed is simply a merge of that commit (the
diffs are exactly the same).
Without this revert, the machine gets to:
Loading Linux 2.6.35-rc4-tip-trace+ ...
and reboots all by itself. I've let it reboot 3 times in a row just to make
sure.
So I am therefore certain that the faulty commit is
"x86, mem: Optimize memcpy by avoiding memory false dependece"
identified by commit id 77ff5e49c20bb72a99a12bfb00dbf90b2302f087, merged by
commit a1e5278e40f16a4611264f8da9e557c16cb6f6ed into -tip.
For the sake of the commit log, the affected machine is an Intel Xeon E5405.
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
CC: "Ma, Ling" <ling.ma@intel.com>
CC: "mingo@elte.hu" <mingo@elte.hu>
CC: "H. Peter Anvin" <hpa@zytor.com>
---
arch/x86/lib/memcpy_64.S | 156 ++++++++++++++++-------------------------------
1 file changed, 54 insertions(+), 102 deletions(-)
Index: linux.trees.git/arch/x86/lib/memcpy_64.S
===================================================================
--- linux.trees.git.orig/arch/x86/lib/memcpy_64.S 2010-07-08 02:11:58.000000000 -0400
+++ linux.trees.git/arch/x86/lib/memcpy_64.S 2010-07-08 02:20:48.000000000 -0400
@@ -40,132 +40,84 @@
ENTRY(__memcpy)
ENTRY(memcpy)
CFI_STARTPROC
- movq %rdi, %rax
/*
- * Use 32bit CMP here to avoid long NOP padding.
+ * Put the number of full 64-byte blocks into %ecx.
+ * Tail portion is handled at the end:
*/
- cmp $0x20, %edx
- jb .Lhandle_tail
+ movq %rdi, %rax
+ movl %edx, %ecx
+ shrl $6, %ecx
+ jz .Lhandle_tail
+ .p2align 4
+.Lloop_64:
/*
- * We check whether memory false dependece could occur,
- * then jump to corresponding copy mode.
+ * We decrement the loop index here - and the zero-flag is
+ * checked at the end of the loop (instructions inbetween do
+ * not change the zero flag):
*/
- cmp %dil, %sil
- jl .Lcopy_backward
- subl $0x20, %edx
-.Lcopy_forward_loop:
- subq $0x20, %rdx
+ decl %ecx
/*
- * Move in blocks of 4x8 bytes:
+ * Move in blocks of 4x16 bytes:
*/
- movq 0*8(%rsi), %r8
- movq 1*8(%rsi), %r9
- movq 2*8(%rsi), %r10
- movq 3*8(%rsi), %r11
- leaq 4*8(%rsi), %rsi
-
- movq %r8, 0*8(%rdi)
- movq %r9, 1*8(%rdi)
- movq %r10, 2*8(%rdi)
- movq %r11, 3*8(%rdi)
- leaq 4*8(%rdi), %rdi
- jae .Lcopy_forward_loop
- addq $0x20, %rdx
- jmp .Lhandle_tail
+ movq 0*8(%rsi), %r11
+ movq 1*8(%rsi), %r8
+ movq %r11, 0*8(%rdi)
+ movq %r8, 1*8(%rdi)
-.Lcopy_backward:
- /*
- * Calculate copy position to tail.
- */
- addq %rdx, %rsi
- addq %rdx, %rdi
- subq $0x20, %rdx
- /*
- * At most 3 ALU operations in one cycle,
- * so append NOPS in the same 16bytes trunk.
- */
- .p2align 4
-.Lcopy_backward_loop:
- subq $0x20, %rdx
- movq -1*8(%rsi), %r8
- movq -2*8(%rsi), %r9
- movq -3*8(%rsi), %r10
- movq -4*8(%rsi), %r11
- leaq -4*8(%rsi), %rsi
- movq %r8, -1*8(%rdi)
- movq %r9, -2*8(%rdi)
- movq %r10, -3*8(%rdi)
- movq %r11, -4*8(%rdi)
- leaq -4*8(%rdi), %rdi
- jae .Lcopy_backward_loop
+ movq 2*8(%rsi), %r9
+ movq 3*8(%rsi), %r10
+ movq %r9, 2*8(%rdi)
+ movq %r10, 3*8(%rdi)
+
+ movq 4*8(%rsi), %r11
+ movq 5*8(%rsi), %r8
+ movq %r11, 4*8(%rdi)
+ movq %r8, 5*8(%rdi)
+
+ movq 6*8(%rsi), %r9
+ movq 7*8(%rsi), %r10
+ movq %r9, 6*8(%rdi)
+ movq %r10, 7*8(%rdi)
+
+ leaq 64(%rsi), %rsi
+ leaq 64(%rdi), %rdi
+
+ jnz .Lloop_64
- /*
- * Calculate copy position to head.
- */
- addq $0x20, %rdx
- subq %rdx, %rsi
- subq %rdx, %rdi
.Lhandle_tail:
- cmpq $16, %rdx
- jb .Lless_16bytes
+ movl %edx, %ecx
+ andl $63, %ecx
+ shrl $3, %ecx
+ jz .Lhandle_7
- /*
- * Move data from 16 bytes to 31 bytes.
- */
- movq 0*8(%rsi), %r8
- movq 1*8(%rsi), %r9
- movq -2*8(%rsi, %rdx), %r10
- movq -1*8(%rsi, %rdx), %r11
- movq %r8, 0*8(%rdi)
- movq %r9, 1*8(%rdi)
- movq %r10, -2*8(%rdi, %rdx)
- movq %r11, -1*8(%rdi, %rdx)
- retq
.p2align 4
-.Lless_16bytes:
- cmpq $8, %rdx
- jb .Lless_8bytes
- /*
- * Move data from 8 bytes to 15 bytes.
- */
- movq 0*8(%rsi), %r8
- movq -1*8(%rsi, %rdx), %r9
- movq %r8, 0*8(%rdi)
- movq %r9, -1*8(%rdi, %rdx)
- retq
- .p2align 4
-.Lless_8bytes:
- cmpq $4, %rdx
- jb .Lless_3bytes
+.Lloop_8:
+ decl %ecx
+ movq (%rsi), %r8
+ movq %r8, (%rdi)
+ leaq 8(%rdi), %rdi
+ leaq 8(%rsi), %rsi
+ jnz .Lloop_8
+
+.Lhandle_7:
+ movl %edx, %ecx
+ andl $7, %ecx
+ jz .Lend
- /*
- * Move data from 4 bytes to 7 bytes.
- */
- movl (%rsi), %ecx
- movl -4(%rsi, %rdx), %r8d
- movl %ecx, (%rdi)
- movl %r8d, -4(%rdi, %rdx)
- retq
.p2align 4
-.Lless_3bytes:
- cmpl $0, %edx
- je .Lend
- /*
- * Move data from 1 bytes to 3 bytes.
- */
.Lloop_1:
movb (%rsi), %r8b
movb %r8b, (%rdi)
incq %rdi
incq %rsi
- decl %edx
+ decl %ecx
jnz .Lloop_1
.Lend:
- retq
+ ret
CFI_ENDPROC
ENDPROC(memcpy)
ENDPROC(__memcpy)
--
Mathieu Desnoyers
Operating System Efficiency R&D Consultant
EfficiOS Inc.
http://www.efficios.com
next reply other threads:[~2010-07-08 6:27 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-07-08 6:27 Mathieu Desnoyers [this message]
2010-07-08 7:11 ` [PATCH -tip] Revert x86, mem: Optimize memcpy by avoiding memory false dependece Ingo Molnar
2010-07-08 15:00 ` Mathieu Desnoyers
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100708062733.GA24740@Krystal \
--to=mathieu.desnoyers@efficios.com \
--cc=hpa@zytor.com \
--cc=ling.ma@intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.