public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [patch 2.6.17-rc5 1/2] i386 memcpy: use as few moves as possible for I/O
@ 2006-05-31  0:59 Chuck Ebbert
  2006-05-31  1:36 ` H. Peter Anvin
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Chuck Ebbert @ 2006-05-31  0:59 UTC (permalink / raw)
  To: linux-kernel; +Cc: Chris Lesiak, H. Peter Anvin, Andrew Morton, Linus Torvalds

Chris Lesiak reported that changes to i386's __memcpy() broke his device
because it can't handle byte moves and the new code uses them for
all trailing bytes when the length is not divisible by four.  The old
code tried to use a 16-bit move and/or a byte move as needed.

H. Peter Anvin:
"There are only a few semantics that make sense: fixed 8, 16, 32, or 64
bits, plus "optimal"; the latter to be used for anything that doesn't
require a specific transfer size.  Logically, an unqualified
"memcpy_to/fromio" should be the optimal size (as few transfers as
possible)"

So add back the old code as __minimal_memcpy and have IO transfers
use that.

Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com>

---

 include/asm-i386/io.h     |    4 ++--
 include/asm-i386/string.h |   21 +++++++++++++++++++++
 2 files changed, 23 insertions(+), 2 deletions(-)

--- 2.6.17-rc5-32.orig/include/asm-i386/io.h
+++ 2.6.17-rc5-32/include/asm-i386/io.h
@@ -202,11 +202,11 @@ static inline void memset_io(volatile vo
 }
 static inline void memcpy_fromio(void *dst, const volatile void __iomem *src, int count)
 {
-	__memcpy(dst, (void __force *) src, count);
+	__minimal_memcpy(dst, (void __force *) src, count);
 }
 static inline void memcpy_toio(volatile void __iomem *dst, const void *src, int count)
 {
-	__memcpy((void __force *) dst, src, count);
+	__minimal_memcpy((void __force *) dst, src, count);
 }
 
 /*
--- 2.6.17-rc5-32.orig/include/asm-i386/string.h
+++ 2.6.17-rc5-32/include/asm-i386/string.h
@@ -220,6 +220,28 @@ return (to);
 }
 
 /*
+ * Do memcpy with as few moves as possible (for transfers to/from IO space.)
+ */
+static inline void * __minimal_memcpy(void * to, const void * from, size_t n)
+{
+int d0, d1, d2;
+__asm__ __volatile__(
+	"rep ; movsl\n\t"
+	"testb $2,%b4\n\t"
+	"jz 1f\n\t"
+	"movsw\n"
+	"1:\n\t"
+	"testb $1,%b4\n\t"
+	"jz 2f\n\t"
+	"movsb\n"
+	"2:"
+	: "=&c" (d0), "=&D" (d1), "=&S" (d2)
+	:"0" (n/4), "q" (n), "1" ((long) to), "2" ((long) from)
+	: "memory");
+return to;
+}
+
+/*
  * This looks ugly, but the compiler can optimize it totally,
  * as the count is constant.
  */
-- 
Chuck

^ permalink raw reply	[flat|nested] 5+ messages in thread
* Re: [patch 2.6.17-rc5 1/2] i386 memcpy: use as few moves as  possible for I/O
@ 2006-05-31 23:01 Chuck Ebbert
  0 siblings, 0 replies; 5+ messages in thread
From: Chuck Ebbert @ 2006-05-31 23:01 UTC (permalink / raw)
  To: H. Peter Anvin; +Cc: Linus Torvalds, Andrew Morton, Chris Lesiak, linux-kernel

In-Reply-To: <447D1094.20409@zytor.com>

On Tue, 30 May 2006 20:42:12 -0700, H. Peter Anvin wrote:

> I was thinking some more about that, and I suspect the "right" way to do 
> this looks something like the attached code.  Note that it assymetric, 
> and that it's probably too long to inline.
> 
> I haven't tested this yet, and I probably won't have time to do so this 
> evening.

There were some small problems, but I think I fixed them:


/*
 * arch/i386/lib/memcpy_io.S
 *
 * The most general form of memory copy to/from I/O space, used for
 * devices which can handle arbitrary transactions with appropriate
 * handling of byte enables.  The goal is to produce the minimum
 * number of naturally aligned transactions on the bus.
 */
	
#include <linux/config.h>
#include <linux/linkage.h>
	
.macro	build_memcpy_io_fn fn_name,align_reg

	.globl	\fn_name
	.type	\fn_name, @function
	
	ALIGN
\fn_name:

	ebp_space=0
#ifdef CONFIG_FRAME_POINTER
	pushl	%ebp
	movl	%esp,%ebp
	ebp_space=4
#endif

	pushl	%edi
	pushl	%esi
	
#ifdef CONFIG_REGPARM
	movl	%eax, %edi
	movl	%edx, %esi
#else
	movl	12+ebp_space(%esp), %edi
	movl	20+ebp_space(%esp), %ecx
	movl	16+ebp_space(%esp), %esi
#endif

	jecxz	1f
	
	testl	$1, \align_reg
	jz	2f
	movsb
	decl	%ecx
2:
	cmpl	$2, %ecx
	jb	3f
	testl	$2, \align_reg
	jz	4f
	movsw
	decl	%ecx
	decl	%ecx
4:
	movl	%ecx, %edx
	shrl	$2, %ecx
	jz	5f
	rep ; movsl
5:
	movl	%edx, %ecx
	testb	$2, %cl
	jz	3f
	movsw
3:
	testb	$1, %cl
	jz	1f
	movsb
1:
	pop	%esi
	pop	%edi

#ifdef CONFIG_FRAME_POINTER
	leave
#endif

	ret

	.size	\fn_name, .-\fn_name

.endm

	build_memcpy_io_fn fn_name=memcpy_fromio,align_reg=%esi
	build_memcpy_io_fn fn_name=memcpy_toio,align_reg=%edi
-- 
Chuck

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2006-05-31 23:07 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-05-31  0:59 [patch 2.6.17-rc5 1/2] i386 memcpy: use as few moves as possible for I/O Chuck Ebbert
2006-05-31  1:36 ` H. Peter Anvin
2006-05-31  3:42 ` H. Peter Anvin
2006-05-31 11:39 ` linux-os (Dick Johnson)
  -- strict thread matches above, loose matches on Subject: below --
2006-05-31 23:01 Chuck Ebbert

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox