public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [patch 2.6.16-rc5-mm2] i386 memcpy: optimal memcpy for IO
@ 2006-06-02  1:28 Chuck Ebbert
  2006-06-02  5:06 ` H. Peter Anvin
  0 siblings, 1 reply; 3+ messages in thread
From: Chuck Ebbert @ 2006-06-02  1:28 UTC (permalink / raw)
  To: linux-kernel; +Cc: Andrew Morton, H. Peter Anvin, Linus Torvalds

From: H. Peter Anvin <hpa@zytor.com>

Optimal memcpy for moves to/from IO space.  Does as few moves as
possible while keeping transfers optimally aligned.

Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com>

---

Andrew, remove i386-memcpy-use-as-few-moves-as.patch from -mm
and replace it with this, please.

 arch/i386/lib/memcpy.c |   57 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/asm-i386/io.h  |   11 ++-------
 2 files changed, 60 insertions(+), 8 deletions(-)

--- 2.6.17-rc5-32.orig/arch/i386/lib/memcpy.c
+++ 2.6.17-rc5-32/arch/i386/lib/memcpy.c
@@ -42,3 +42,60 @@ void *memmove(void *dest, const void *sr
 	return dest;
 }
 EXPORT_SYMBOL(memmove);
+
+/*
+ * The most general form of memory copy to/from I/O space, used for
+ * devices which can handle arbitrary transactions with appropriate
+ * handling of byte enables.  The goal is to produce the minimum
+ * number of naturally aligned transactions on the bus.
+ */
+
+#define build_memcpy_io(dst, src, count, align_reg)	\
+({							\
+unsigned long d0, d1, d2, d3;				\
+asm volatile(						\
+	"jecxz	1f\n\t"					\
+							\
+	"testl	$1, " align_reg "\n\t"			\
+	"jz	2f\n\t"					\
+	"movsb\n\t"					\
+	"decl	%2\n"					\
+"2:\n\t"						\
+	"cmpl	$2, %2\n\t"				\
+	"jb	3f\n\t"					\
+	"testl	$2, " align_reg "\n\t"			\
+	"jz	4f\n\t"					\
+	"movsw\n\t"					\
+	"decl	%2\n\t"					\
+	"decl	%2\n"					\
+"4:\n\t"						\
+	"movl	%2, %3\n\t"				\
+	"shrl	$2, %2\n\t"				\
+	"jz	5f\n\t"					\
+	"rep ; movsl\n"					\
+"5:\n\t"						\
+	"movl	%3, %2\n\t"				\
+	"testb	$2, %b2\n\t"				\
+	"jz	3f\n\t"					\
+	"movsw\n"					\
+"3:\n\t"						\
+	"testb	$1, %b2\n\t"				\
+	"jz	1f\n\t"					\
+	"movsb\n"					\
+"1:"							\
+	: "=&D" (d0), "=&S" (d1), "=&c" (d2), "=&g" (d3)\
+	: "0" (dst), "1" (src), "2" (count)		\
+	: "memory");					\
+})
+
+void memcpy_fromio(void *dst, const volatile void __iomem *src, int count)
+{
+	build_memcpy_io(dst, src, count, "%%esi");
+}
+EXPORT_SYMBOL(memcpy_fromio)
+
+void memcpy_toio(volatile void __iomem *dst, const void *src, int count)
+{
+	build_memcpy_io(dst, src, count, "%%edi");
+}
+EXPORT_SYMBOL(memcpy_toio)
--- 2.6.17-rc5-32.orig/include/asm-i386/io.h
+++ 2.6.17-rc5-32/include/asm-i386/io.h
@@ -200,14 +200,9 @@ static inline void memset_io(volatile vo
 {
 	memset((void __force *) addr, val, count);
 }
-static inline void memcpy_fromio(void *dst, const volatile void __iomem *src, int count)
-{
-	__memcpy(dst, (void __force *) src, count);
-}
-static inline void memcpy_toio(volatile void __iomem *dst, const void *src, int count)
-{
-	__memcpy((void __force *) dst, src, count);
-}
+
+extern void memcpy_fromio(void *dst, const volatile void __iomem *src, int count);
+extern void memcpy_toio(volatile void __iomem *dst, const void *src, int count);
 
 /*
  * ISA space is 'always mapped' on a typical x86 system, no need to
-- 
Chuck

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [patch 2.6.16-rc5-mm2] i386 memcpy: optimal memcpy for IO
  2006-06-02  1:28 [patch 2.6.16-rc5-mm2] i386 memcpy: optimal memcpy for IO Chuck Ebbert
@ 2006-06-02  5:06 ` H. Peter Anvin
  0 siblings, 0 replies; 3+ messages in thread
From: H. Peter Anvin @ 2006-06-02  5:06 UTC (permalink / raw)
  To: Chuck Ebbert; +Cc: linux-kernel, Andrew Morton, Linus Torvalds

Chuck Ebbert wrote:
> From: H. Peter Anvin <hpa@zytor.com>
> 
> Optimal memcpy for moves to/from IO space.  Does as few moves as
> possible while keeping transfers optimally aligned.
> 
> Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com>
> 

Why wrap this in C code when it's just assembly anyway?  It just makes 
it look ugly...

	-hpa

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [patch 2.6.16-rc5-mm2] i386 memcpy: optimal memcpy for IO
@ 2006-06-02  5:57 Chuck Ebbert
  0 siblings, 0 replies; 3+ messages in thread
From: Chuck Ebbert @ 2006-06-02  5:57 UTC (permalink / raw)
  To: H. Peter Anvin; +Cc: Linus Torvalds, Andrew Morton, linux-kernel

In-Reply-To: <447FC75F.6090905@zytor.com>

On Thu, 01 Jun 2006 22:06:39 -0700, H. Peter Anvin wrote:

> Why wrap this in C code when it's just assembly anyway?  It just makes 
> it look ugly...

Because I couldn't figure out how to do EXPORT_SYMBOL for assembler
code. :|

And the C compiler handles frame pointers and CONFIG_REGPARM
automatically, so that made the code simpler.

-- 
Chuck


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2006-06-02  6:03 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-06-02  1:28 [patch 2.6.16-rc5-mm2] i386 memcpy: optimal memcpy for IO Chuck Ebbert
2006-06-02  5:06 ` H. Peter Anvin
  -- strict thread matches above, loose matches on Subject: below --
2006-06-02  5:57 Chuck Ebbert

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox