* [PATCH 3 of 3] Add __raw_memcpy_toio32 to each arch
2006-01-06 20:26 [PATCH 0 of 3] 32-bit MMIO copy routine Bryan O'Sullivan
@ 2006-01-06 20:26 ` Bryan O'Sullivan
0 siblings, 0 replies; 17+ messages in thread
From: Bryan O'Sullivan @ 2006-01-06 20:26 UTC (permalink / raw)
To: linux-kernel
Most arches use the <asm-generic/raw_memcpy_toio32.h> routine, while
x86_64 uses memcpy32, which is substantially faster, even on a bus
that is substantially slower than the CPU.
Signed-off-by: Bryan O'Sullivan <bos@pathscale.com>
diff -r 33790477a163 -r 9e06b832c26c include/asm-alpha/io.h
--- a/include/asm-alpha/io.h Fri Jan 6 12:25:02 2006 -0800
+++ b/include/asm-alpha/io.h Fri Jan 6 12:25:04 2006 -0800
@@ -504,6 +504,8 @@
extern void memcpy_toio(volatile void __iomem *, const void *, long);
extern void _memset_c_io(volatile void __iomem *, unsigned long, long);
+#include <asm-generic/raw_memcpy_toio32.h>
+
static inline void memset_io(volatile void __iomem *addr, u8 c, long len)
{
_memset_c_io(addr, 0x0101010101010101UL * c, len);
diff -r 33790477a163 -r 9e06b832c26c include/asm-arm/io.h
--- a/include/asm-arm/io.h Fri Jan 6 12:25:02 2006 -0800
+++ b/include/asm-arm/io.h Fri Jan 6 12:25:04 2006 -0800
@@ -184,6 +184,8 @@
#define memset_io(c,v,l) _memset_io(__mem_pci(c),(v),(l))
#define memcpy_fromio(a,c,l) _memcpy_fromio((a),__mem_pci(c),(l))
#define memcpy_toio(c,a,l) _memcpy_toio(__mem_pci(c),(a),(l))
+
+#include <asm-generic/raw_memcpy_toio32.h>
#define eth_io_copy_and_sum(s,c,l,b) \
eth_copy_and_sum((s),__mem_pci(c),(l),(b))
diff -r 33790477a163 -r 9e06b832c26c include/asm-cris/io.h
--- a/include/asm-cris/io.h Fri Jan 6 12:25:02 2006 -0800
+++ b/include/asm-cris/io.h Fri Jan 6 12:25:04 2006 -0800
@@ -121,6 +121,8 @@
#define memcpy_fromio(a,b,c) memcpy((a),(void *)(b),(c))
#define memcpy_toio(a,b,c) memcpy((void *)(a),(b),(c))
+#include <asm-generic/raw_memcpy_toio32.h>
+
/*
* Again, CRIS does not require mem IO specific function.
*/
diff -r 33790477a163 -r 9e06b832c26c include/asm-frv/io.h
--- a/include/asm-frv/io.h Fri Jan 6 12:25:02 2006 -0800
+++ b/include/asm-frv/io.h Fri Jan 6 12:25:04 2006 -0800
@@ -124,6 +124,8 @@
memcpy((void __force *) dst, src, count);
}
+#include <asm-generic/raw_memcpy_toio32.h>
+
static inline uint8_t inb(unsigned long addr)
{
return __builtin_read8((void *)addr);
diff -r 33790477a163 -r 9e06b832c26c include/asm-h8300/io.h
--- a/include/asm-h8300/io.h Fri Jan 6 12:25:02 2006 -0800
+++ b/include/asm-h8300/io.h Fri Jan 6 12:25:04 2006 -0800
@@ -209,6 +209,8 @@
#define memcpy_fromio(a,b,c) memcpy((a),(void *)(b),(c))
#define memcpy_toio(a,b,c) memcpy((void *)(a),(b),(c))
+#include <asm-generic/raw_memcpy_toio32.h>
+
#define mmiowb()
#define inb(addr) ((h8300_buswidth(addr))?readw((addr) & ~1) & 0xff:readb(addr))
diff -r 33790477a163 -r 9e06b832c26c include/asm-i386/io.h
--- a/include/asm-i386/io.h Fri Jan 6 12:25:02 2006 -0800
+++ b/include/asm-i386/io.h Fri Jan 6 12:25:04 2006 -0800
@@ -203,6 +203,8 @@
{
__memcpy((void __force *) dst, src, count);
}
+
+#include <asm-generic/raw_memcpy_toio32.h>
/*
* ISA space is 'always mapped' on a typical x86 system, no need to
diff -r 33790477a163 -r 9e06b832c26c include/asm-ia64/io.h
--- a/include/asm-ia64/io.h Fri Jan 6 12:25:02 2006 -0800
+++ b/include/asm-ia64/io.h Fri Jan 6 12:25:04 2006 -0800
@@ -443,6 +443,8 @@
extern void memcpy_toio(volatile void __iomem *dst, const void *src, long n);
extern void memset_io(volatile void __iomem *s, int c, long n);
+#include <asm-generic/raw_memcpy_toio32.h>
+
#define dma_cache_inv(_start,_size) do { } while (0)
#define dma_cache_wback(_start,_size) do { } while (0)
#define dma_cache_wback_inv(_start,_size) do { } while (0)
diff -r 33790477a163 -r 9e06b832c26c include/asm-m32r/io.h
--- a/include/asm-m32r/io.h Fri Jan 6 12:25:02 2006 -0800
+++ b/include/asm-m32r/io.h Fri Jan 6 12:25:04 2006 -0800
@@ -216,6 +216,8 @@
memcpy((void __force *) dst, src, count);
}
+#include <asm-generic/raw_memcpy_toio32.h>
+
/*
* Convert a physical pointer to a virtual kernel pointer for /dev/mem
* access
diff -r 33790477a163 -r 9e06b832c26c include/asm-m68knommu/io.h
--- a/include/asm-m68knommu/io.h Fri Jan 6 12:25:02 2006 -0800
+++ b/include/asm-m68knommu/io.h Fri Jan 6 12:25:04 2006 -0800
@@ -113,6 +113,8 @@
#define memcpy_fromio(a,b,c) memcpy((a),(void *)(b),(c))
#define memcpy_toio(a,b,c) memcpy((void *)(a),(b),(c))
+#include <asm-generic/raw_memcpy_toio32.h>
+
#define inb(addr) readb(addr)
#define inw(addr) readw(addr)
#define inl(addr) readl(addr)
diff -r 33790477a163 -r 9e06b832c26c include/asm-mips/io.h
--- a/include/asm-mips/io.h Fri Jan 6 12:25:02 2006 -0800
+++ b/include/asm-mips/io.h Fri Jan 6 12:25:04 2006 -0800
@@ -534,6 +534,8 @@
memcpy((void __force *) dst, src, count);
}
+#include <asm-generic/raw_memcpy_toio32.h>
+
/*
* Memory Mapped I/O
*/
diff -r 33790477a163 -r 9e06b832c26c include/asm-parisc/io.h
--- a/include/asm-parisc/io.h Fri Jan 6 12:25:02 2006 -0800
+++ b/include/asm-parisc/io.h Fri Jan 6 12:25:04 2006 -0800
@@ -294,6 +294,8 @@
void memcpy_fromio(void *dst, const volatile void __iomem *src, int count);
void memcpy_toio(volatile void __iomem *dst, const void *src, int count);
+#include <asm-generic/raw_memcpy_toio32.h>
+
/* Support old drivers which don't ioremap.
* NB this interface is scheduled to disappear in 2.5
*/
diff -r 33790477a163 -r 9e06b832c26c include/asm-powerpc/io.h
--- a/include/asm-powerpc/io.h Fri Jan 6 12:25:02 2006 -0800
+++ b/include/asm-powerpc/io.h Fri Jan 6 12:25:04 2006 -0800
@@ -63,6 +63,8 @@
#define memcpy_fromio(a,b,c) iSeries_memcpy_fromio((a), (b), (c))
#define memcpy_toio(a,b,c) iSeries_memcpy_toio((a), (b), (c))
+#include <asm-generic/raw_memcpy_toio32.h>
+
#define inb(addr) readb(((void __iomem *)(long)(addr)))
#define inw(addr) readw(((void __iomem *)(long)(addr)))
#define inl(addr) readl(((void __iomem *)(long)(addr)))
diff -r 33790477a163 -r 9e06b832c26c include/asm-ppc/io.h
--- a/include/asm-ppc/io.h Fri Jan 6 12:25:02 2006 -0800
+++ b/include/asm-ppc/io.h Fri Jan 6 12:25:04 2006 -0800
@@ -367,6 +367,8 @@
}
#endif
+#include <asm-generic/raw_memcpy_toio32.h>
+
#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void __force *)(void __iomem *)(b),(c),(d))
/*
diff -r 33790477a163 -r 9e06b832c26c include/asm-s390/io.h
--- a/include/asm-s390/io.h Fri Jan 6 12:25:02 2006 -0800
+++ b/include/asm-s390/io.h Fri Jan 6 12:25:04 2006 -0800
@@ -99,6 +99,8 @@
#define memcpy_fromio(a,b,c) memcpy((a),__io_virt(b),(c))
#define memcpy_toio(a,b,c) memcpy(__io_virt(a),(b),(c))
+#include <asm-generic/raw_memcpy_toio32.h>
+
#define inb_p(addr) readb(addr)
#define inb(addr) readb(addr)
diff -r 33790477a163 -r 9e06b832c26c include/asm-sh/io.h
--- a/include/asm-sh/io.h Fri Jan 6 12:25:02 2006 -0800
+++ b/include/asm-sh/io.h Fri Jan 6 12:25:04 2006 -0800
@@ -177,6 +177,8 @@
extern void memcpy_toio(unsigned long, const void *, unsigned long);
extern void memset_io(unsigned long, int, unsigned long);
+#include <asm-generic/raw_memcpy_toio32.h>
+
/* SuperH on-chip I/O functions */
static __inline__ unsigned char ctrl_inb(unsigned long addr)
{
diff -r 33790477a163 -r 9e06b832c26c include/asm-sh64/io.h
--- a/include/asm-sh64/io.h Fri Jan 6 12:25:02 2006 -0800
+++ b/include/asm-sh64/io.h Fri Jan 6 12:25:04 2006 -0800
@@ -125,6 +125,8 @@
void memcpy_toio(void __iomem *to, const void *from, long count);
void memcpy_fromio(void *to, void __iomem *from, long count);
+
+#include <asm-generic/raw_memcpy_toio32.h>
#define mmiowb()
diff -r 33790477a163 -r 9e06b832c26c include/asm-sparc/io.h
--- a/include/asm-sparc/io.h Fri Jan 6 12:25:02 2006 -0800
+++ b/include/asm-sparc/io.h Fri Jan 6 12:25:04 2006 -0800
@@ -239,6 +239,8 @@
#define memcpy_toio(d,s,sz) _memcpy_toio(d,s,sz)
+#include <asm-generic/raw_memcpy_toio32.h>
+
#ifdef __KERNEL__
/*
diff -r 33790477a163 -r 9e06b832c26c include/asm-sparc64/io.h
--- a/include/asm-sparc64/io.h Fri Jan 6 12:25:02 2006 -0800
+++ b/include/asm-sparc64/io.h Fri Jan 6 12:25:04 2006 -0800
@@ -440,6 +440,8 @@
#define memcpy_toio(d,s,sz) _memcpy_toio(d,s,sz)
+#include <asm-generic/raw_memcpy_toio32.h>
+
static inline int check_signature(void __iomem *io_addr,
const unsigned char *signature,
int length)
diff -r 33790477a163 -r 9e06b832c26c include/asm-v850/io.h
--- a/include/asm-v850/io.h Fri Jan 6 12:25:02 2006 -0800
+++ b/include/asm-v850/io.h Fri Jan 6 12:25:04 2006 -0800
@@ -130,6 +130,8 @@
#define memcpy_fromio(dst, src, len) memcpy (dst, (void *)src, len)
#define memcpy_toio(dst, src, len) memcpy ((void *)dst, src, len)
+#include <asm-generic/raw_memcpy_toio32.h>
+
/*
* Convert a physical pointer to a virtual kernel pointer for /dev/mem
* access
diff -r 33790477a163 -r 9e06b832c26c include/asm-x86_64/io.h
--- a/include/asm-x86_64/io.h Fri Jan 6 12:25:02 2006 -0800
+++ b/include/asm-x86_64/io.h Fri Jan 6 12:25:04 2006 -0800
@@ -252,6 +252,13 @@
__memcpy_toio((unsigned long)to,from,len);
}
+#include <asm/string.h>
+
+static inline void __raw_memcpy_toio32(void __iomem *dst, const void *src, size_t count)
+{
+ memcpy32((void __force *) dst, src, count);
+}
+
void memset_io(volatile void __iomem *a, int b, size_t c);
/*
diff -r 33790477a163 -r 9e06b832c26c include/asm-xtensa/io.h
--- a/include/asm-xtensa/io.h Fri Jan 6 12:25:02 2006 -0800
+++ b/include/asm-xtensa/io.h Fri Jan 6 12:25:04 2006 -0800
@@ -159,6 +159,8 @@
#define memcpy_fromio(a,b,c) memcpy((a),(void *)(b),(c))
#define memcpy_toio(a,b,c) memcpy((void *)(a),(b),(c))
+#include <asm-generic/raw_memcpy_toio32.h>
+
/* At this point the Xtensa doesn't provide byte swap instructions */
#ifdef __XTENSA_EB__
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 0 of 3] 32-bit MMIO copy routines, reworked
@ 2006-01-10 19:53 Bryan O'Sullivan
2006-01-10 19:53 ` [PATCH 1 of 3] Introduce __raw_memcpy_toio32 Bryan O'Sullivan
` (2 more replies)
0 siblings, 3 replies; 17+ messages in thread
From: Bryan O'Sullivan @ 2006-01-10 19:53 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel, hch, ak, rdreier
[-- Attachment #1: Type: text/plain, Size: 752 bytes --]
After some more review comments from Roland, Andrew and Chris Hellwig,
here is a reworked set of 32-bit MMIO copy patches.
These use CONFIG_RAW_MEMCPY_IO to determine whether an arch should use
the generic __raw_memcpy_toio32 routine or its own specialised version.
We provide a specialised implementation for x86_64.
These patches should apply cleanly against current -git, and have been
tested on i386 and x86_64.
The patch series is as follows:
raw_memcpy_io.patch
Introduce the generic MMIO 32-bit copy routine.
x86_64-memcpy32.patch
Add memcpy32 routine to x86_64.
arch-specific-raw_memcpy_io.patch
Get each arch to use generic memcpy_io code, except x86_64, which
uses memcpy32.
Signed-off-by: Bryan O'Sullivan <bos@pathscale.com>
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 1 of 3] Introduce __raw_memcpy_toio32
2006-01-10 19:53 [PATCH 0 of 3] 32-bit MMIO copy routines, reworked Bryan O'Sullivan
@ 2006-01-10 19:53 ` Bryan O'Sullivan
2006-01-10 19:53 ` [PATCH 2 of 3] memcpy32 for x86_64 Bryan O'Sullivan
2006-01-10 19:53 ` [PATCH 3 of 3] Add __raw_memcpy_toio32 to each arch Bryan O'Sullivan
2 siblings, 0 replies; 17+ messages in thread
From: Bryan O'Sullivan @ 2006-01-10 19:53 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel, hch, ak, rdreier
This arch-independent routine copies data to a memory-mapped I/O region,
using 32-bit accesses. It does not guarantee access ordering, nor does
it perform a memory barrier afterwards. This style of access is required
by some devices.
Signed-off-by: Bryan O'Sullivan <bos@pathscale.com>
diff -r 48616306e7bd -r 2d4af213d9c5 lib/Makefile
--- a/lib/Makefile Tue Jan 10 10:41:42 2006 +0800
+++ b/lib/Makefile Tue Jan 10 11:52:46 2006 -0800
@@ -21,6 +21,7 @@
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
lib-$(CONFIG_SEMAPHORE_SLEEPERS) += semaphore-sleepers.o
lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o
+lib-$(CONFIG_GENERIC_RAW_MEMCPY_IO) += raw_memcpy_io.o
obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o
obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
diff -r 48616306e7bd -r 2d4af213d9c5 lib/raw_memcpy_io.c
--- /dev/null Thu Jan 1 00:00:00 1970 +0000
+++ b/lib/raw_memcpy_io.c Tue Jan 10 11:52:46 2006 -0800
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2006 PathScale, Inc. All Rights Reserved.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <linux/types.h>
+#include <asm/io.h>
+
+/**
+ * __raw_memcpy_toio32 - copy data to MMIO space, in 32-bit units
+ * @to: destination, in MMIO space (must be 32-bit aligned)
+ * @from: source (must be 32-bit aligned)
+ * @count: number of 32-bit quantities to copy
+ *
+ * Copy data from kernel space to MMIO space, in units of 32 bits at a
+ * time. Order of access is not guaranteed, nor is a memory barrier
+ * performed afterwards.
+ */
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count)
+{
+ u32 __iomem *dst = to;
+ const u32 *src = from;
+ size_t i;
+
+ for (i = 0; i < count; i++)
+ __raw_writel(*src++, dst++);
+}
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 2 of 3] memcpy32 for x86_64
2006-01-10 19:53 [PATCH 0 of 3] 32-bit MMIO copy routines, reworked Bryan O'Sullivan
2006-01-10 19:53 ` [PATCH 1 of 3] Introduce __raw_memcpy_toio32 Bryan O'Sullivan
@ 2006-01-10 19:53 ` Bryan O'Sullivan
2006-01-12 8:38 ` Denis Vlasenko
2006-01-10 19:53 ` [PATCH 3 of 3] Add __raw_memcpy_toio32 to each arch Bryan O'Sullivan
2 siblings, 1 reply; 17+ messages in thread
From: Bryan O'Sullivan @ 2006-01-10 19:53 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel, hch, ak, rdreier
Introduce an x86_64-specific memcpy32 routine. The routine is similar
to memcpy, but is guaranteed to work in units of 32 bits at a time.
Signed-off-by: Bryan O'Sullivan <bos@pathscale.com>
diff -r 2d4af213d9c5 -r b4863171295f arch/x86_64/kernel/x8664_ksyms.c
--- a/arch/x86_64/kernel/x8664_ksyms.c Tue Jan 10 11:52:46 2006 -0800
+++ b/arch/x86_64/kernel/x8664_ksyms.c Tue Jan 10 11:52:48 2006 -0800
@@ -164,6 +164,8 @@
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(__memcpy);
+EXPORT_SYMBOL_GPL(memcpy32);
+
#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM
/* prototypes are wrong, these are assembly with custom calling functions */
extern void rwsem_down_read_failed_thunk(void);
diff -r 2d4af213d9c5 -r b4863171295f arch/x86_64/lib/Makefile
--- a/arch/x86_64/lib/Makefile Tue Jan 10 11:52:46 2006 -0800
+++ b/arch/x86_64/lib/Makefile Tue Jan 10 11:52:48 2006 -0800
@@ -9,4 +9,4 @@
lib-y := csum-partial.o csum-copy.o csum-wrappers.o delay.o \
usercopy.o getuser.o putuser.o \
thunk.o clear_page.o copy_page.o bitstr.o bitops.o
-lib-y += memcpy.o memmove.o memset.o copy_user.o
+lib-y += memcpy.o memcpy32.o memmove.o memset.o copy_user.o
diff -r 2d4af213d9c5 -r b4863171295f include/asm-x86_64/string.h
--- a/include/asm-x86_64/string.h Tue Jan 10 11:52:46 2006 -0800
+++ b/include/asm-x86_64/string.h Tue Jan 10 11:52:48 2006 -0800
@@ -45,6 +45,9 @@
#define __HAVE_ARCH_MEMMOVE
void * memmove(void * dest,const void *src,size_t count);
+/* copy data, 32 bits at a time */
+void memcpy32(void *dst, const void *src, size_t count);
+
/* Use C out of line version for memcmp */
#define memcmp __builtin_memcmp
int memcmp(const void * cs,const void * ct,size_t count);
diff -r 2d4af213d9c5 -r b4863171295f arch/x86_64/lib/memcpy32.S
--- /dev/null Thu Jan 1 00:00:00 1970 +0000
+++ b/arch/x86_64/lib/memcpy32.S Tue Jan 10 11:52:48 2006 -0800
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2006 PathScale, Inc. All Rights Reserved.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+/*
+ * Registers used below:
+ * dst - rdi
+ * src - rsi
+ * count - rdx
+ */
+
+/**
+ * memcpy32 - copy data, in units of 32 bits at a time
+ * @dst: destination (must be 32-bit aligned)
+ * @src: source (must be 32-bit aligned)
+ * @count: number of 32-bit quantities to copy
+ */
+ .globl memcpy32
+memcpy32:
+ movl %edx,%ecx
+ shrl $1,%ecx
+ andl $1,%edx
+ rep movsq
+ movl %edx,%ecx
+ rep movsd
+ ret
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 3 of 3] Add __raw_memcpy_toio32 to each arch
2006-01-10 19:53 [PATCH 0 of 3] 32-bit MMIO copy routines, reworked Bryan O'Sullivan
2006-01-10 19:53 ` [PATCH 1 of 3] Introduce __raw_memcpy_toio32 Bryan O'Sullivan
2006-01-10 19:53 ` [PATCH 2 of 3] memcpy32 for x86_64 Bryan O'Sullivan
@ 2006-01-10 19:53 ` Bryan O'Sullivan
2006-01-10 20:08 ` Andi Kleen
2 siblings, 1 reply; 17+ messages in thread
From: Bryan O'Sullivan @ 2006-01-10 19:53 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel, hch, ak, rdreier
Most arches use the generic routine. x86_64 uses memcpy32 instead;
this is substantially faster, even over a bus that is much slower than
the CPU.
Signed-off-by: Bryan O'Sullivan <bos@pathscale.com>
diff -r b4863171295f -r 5673a186625f arch/alpha/Kconfig
--- a/arch/alpha/Kconfig Tue Jan 10 11:52:48 2006 -0800
+++ b/arch/alpha/Kconfig Tue Jan 10 11:52:51 2006 -0800
@@ -42,6 +42,10 @@
default y
config GENERIC_IRQ_PROBE
+ bool
+ default y
+
+config GENERIC_RAW_MEMCPY_IO
bool
default y
diff -r b4863171295f -r 5673a186625f arch/arm/Kconfig
--- a/arch/arm/Kconfig Tue Jan 10 11:52:48 2006 -0800
+++ b/arch/arm/Kconfig Tue Jan 10 11:52:51 2006 -0800
@@ -59,6 +59,10 @@
config GENERIC_BUST_SPINLOCK
bool
+
+config GENERIC_RAW_MEMCPY_IO
+ bool
+ default y
config ARCH_MAY_HAVE_PC_FDC
bool
diff -r b4863171295f -r 5673a186625f arch/arm26/Kconfig
--- a/arch/arm26/Kconfig Tue Jan 10 11:52:48 2006 -0800
+++ b/arch/arm26/Kconfig Tue Jan 10 11:52:51 2006 -0800
@@ -33,6 +33,10 @@
config FORCE_MAX_ZONEORDER
int
default 9
+
+config GENERIC_RAW_MEMCPY_IO
+ bool
+ default y
config RWSEM_GENERIC_SPINLOCK
bool
diff -r b4863171295f -r 5673a186625f arch/cris/Kconfig
--- a/arch/cris/Kconfig Tue Jan 10 11:52:48 2006 -0800
+++ b/arch/cris/Kconfig Tue Jan 10 11:52:51 2006 -0800
@@ -17,6 +17,10 @@
bool
config GENERIC_CALIBRATE_DELAY
+ bool
+ default y
+
+config GENERIC_RAW_MEMCPY_IO
bool
default y
diff -r b4863171295f -r 5673a186625f arch/frv/Kconfig
--- a/arch/frv/Kconfig Tue Jan 10 11:52:48 2006 -0800
+++ b/arch/frv/Kconfig Tue Jan 10 11:52:51 2006 -0800
@@ -24,6 +24,10 @@
config GENERIC_CALIBRATE_DELAY
bool
default n
+
+config GENERIC_RAW_MEMCPY_IO
+ bool
+ default y
config GENERIC_HARDIRQS
bool
diff -r b4863171295f -r 5673a186625f arch/h8300/Kconfig
--- a/arch/h8300/Kconfig Tue Jan 10 11:52:48 2006 -0800
+++ b/arch/h8300/Kconfig Tue Jan 10 11:52:51 2006 -0800
@@ -30,6 +30,10 @@
default n
config GENERIC_CALIBRATE_DELAY
+ bool
+ default y
+
+config GENERIC_RAW_MEMCPY_IO
bool
default y
diff -r b4863171295f -r 5673a186625f arch/i386/Kconfig
--- a/arch/i386/Kconfig Tue Jan 10 11:52:48 2006 -0800
+++ b/arch/i386/Kconfig Tue Jan 10 11:52:51 2006 -0800
@@ -34,6 +34,10 @@
default y
config GENERIC_IOMAP
+ bool
+ default y
+
+config GENERIC_RAW_MEMCPY_IO
bool
default y
diff -r b4863171295f -r 5673a186625f arch/ia64/Kconfig
--- a/arch/ia64/Kconfig Tue Jan 10 11:52:48 2006 -0800
+++ b/arch/ia64/Kconfig Tue Jan 10 11:52:51 2006 -0800
@@ -47,6 +47,10 @@
default y
config GENERIC_IOMAP
+ bool
+ default y
+
+config GENERIC_RAW_MEMCPY_IO
bool
default y
diff -r b4863171295f -r 5673a186625f arch/m32r/Kconfig
--- a/arch/m32r/Kconfig Tue Jan 10 11:52:48 2006 -0800
+++ b/arch/m32r/Kconfig Tue Jan 10 11:52:51 2006 -0800
@@ -25,6 +25,10 @@
default y
config GENERIC_IRQ_PROBE
+ bool
+ default y
+
+config GENERIC_RAW_MEMCPY_IO
bool
default y
diff -r b4863171295f -r 5673a186625f arch/m68k/Kconfig
--- a/arch/m68k/Kconfig Tue Jan 10 11:52:48 2006 -0800
+++ b/arch/m68k/Kconfig Tue Jan 10 11:52:51 2006 -0800
@@ -18,6 +18,10 @@
bool
config GENERIC_CALIBRATE_DELAY
+ bool
+ default y
+
+config GENERIC_RAW_MEMCPY_IO
bool
default y
diff -r b4863171295f -r 5673a186625f arch/m68knommu/Kconfig
--- a/arch/m68knommu/Kconfig Tue Jan 10 11:52:48 2006 -0800
+++ b/arch/m68knommu/Kconfig Tue Jan 10 11:52:51 2006 -0800
@@ -26,6 +26,10 @@
default n
config GENERIC_CALIBRATE_DELAY
+ bool
+ default y
+
+config GENERIC_RAW_MEMCPY_IO
bool
default y
diff -r b4863171295f -r 5673a186625f arch/mips/Kconfig
--- a/arch/mips/Kconfig Tue Jan 10 11:52:48 2006 -0800
+++ b/arch/mips/Kconfig Tue Jan 10 11:52:51 2006 -0800
@@ -800,6 +800,10 @@
bool
config GENERIC_CALIBRATE_DELAY
+ bool
+ default y
+
+config GENERIC_RAW_MEMCPY_IO
bool
default y
diff -r b4863171295f -r 5673a186625f arch/parisc/Kconfig
--- a/arch/parisc/Kconfig Tue Jan 10 11:52:48 2006 -0800
+++ b/arch/parisc/Kconfig Tue Jan 10 11:52:51 2006 -0800
@@ -37,6 +37,10 @@
config GENERIC_IRQ_PROBE
def_bool y
+
+config GENERIC_RAW_MEMCPY_IO
+ bool
+ default y
# unless you want to implement ACPI on PA-RISC ... ;-)
config PM
diff -r b4863171295f -r 5673a186625f arch/powerpc/Kconfig
--- a/arch/powerpc/Kconfig Tue Jan 10 11:52:48 2006 -0800
+++ b/arch/powerpc/Kconfig Tue Jan 10 11:52:51 2006 -0800
@@ -38,6 +38,10 @@
default y
config GENERIC_CALIBRATE_DELAY
+ bool
+ default y
+
+config GENERIC_RAW_MEMCPY_IO
bool
default y
diff -r b4863171295f -r 5673a186625f arch/ppc/Kconfig
--- a/arch/ppc/Kconfig Tue Jan 10 11:52:48 2006 -0800
+++ b/arch/ppc/Kconfig Tue Jan 10 11:52:51 2006 -0800
@@ -16,6 +16,10 @@
bool
config RWSEM_XCHGADD_ALGORITHM
+ bool
+ default y
+
+config GENERIC_RAW_MEMCPY_IO
bool
default y
diff -r b4863171295f -r 5673a186625f arch/s390/Kconfig
--- a/arch/s390/Kconfig Tue Jan 10 11:52:48 2006 -0800
+++ b/arch/s390/Kconfig Tue Jan 10 11:52:51 2006 -0800
@@ -20,6 +20,10 @@
config GENERIC_BUST_SPINLOCK
bool
+
+config GENERIC_RAW_MEMCPY_IO
+ bool
+ default y
mainmenu "Linux Kernel Configuration"
diff -r b4863171295f -r 5673a186625f arch/sh/Kconfig
--- a/arch/sh/Kconfig Tue Jan 10 11:52:48 2006 -0800
+++ b/arch/sh/Kconfig Tue Jan 10 11:52:51 2006 -0800
@@ -30,6 +30,10 @@
default y
config GENERIC_CALIBRATE_DELAY
+ bool
+ default y
+
+config GENERIC_RAW_MEMCPY_IO
bool
default y
diff -r b4863171295f -r 5673a186625f arch/sh64/Kconfig
--- a/arch/sh64/Kconfig Tue Jan 10 11:52:48 2006 -0800
+++ b/arch/sh64/Kconfig Tue Jan 10 11:52:51 2006 -0800
@@ -34,6 +34,10 @@
config GENERIC_ISA_DMA
bool
+
+config GENERIC_RAW_MEMCPY_IO
+ bool
+ default y
source init/Kconfig
diff -r b4863171295f -r 5673a186625f arch/sparc/Kconfig
--- a/arch/sparc/Kconfig Tue Jan 10 11:52:48 2006 -0800
+++ b/arch/sparc/Kconfig Tue Jan 10 11:52:51 2006 -0800
@@ -152,6 +152,10 @@
bool
config GENERIC_CALIBRATE_DELAY
+ bool
+ default y
+
+config GENERIC_RAW_MEMCPY_IO
bool
default y
diff -r b4863171295f -r 5673a186625f arch/sparc64/Kconfig
--- a/arch/sparc64/Kconfig Tue Jan 10 11:52:48 2006 -0800
+++ b/arch/sparc64/Kconfig Tue Jan 10 11:52:51 2006 -0800
@@ -166,6 +166,10 @@
bool
default y
+config GENERIC_RAW_MEMCPY_IO
+ bool
+ default y
+
choice
prompt "SPARC64 Huge TLB Page Size"
depends on HUGETLB_PAGE
diff -r b4863171295f -r 5673a186625f arch/v850/Kconfig
--- a/arch/v850/Kconfig Tue Jan 10 11:52:48 2006 -0800
+++ b/arch/v850/Kconfig Tue Jan 10 11:52:51 2006 -0800
@@ -17,6 +17,9 @@
bool
default n
config GENERIC_CALIBRATE_DELAY
+ bool
+ default y
+config GENERIC_RAW_MEMCPY_IO
bool
default y
diff -r b4863171295f -r 5673a186625f arch/xtensa/Kconfig
--- a/arch/xtensa/Kconfig Tue Jan 10 11:52:48 2006 -0800
+++ b/arch/xtensa/Kconfig Tue Jan 10 11:52:51 2006 -0800
@@ -27,6 +27,10 @@
default y
config GENERIC_HARDIRQS
+ bool
+ default y
+
+config GENERIC_RAW_MEMCPY_IO
bool
default y
diff -r b4863171295f -r 5673a186625f include/asm-alpha/io.h
--- a/include/asm-alpha/io.h Tue Jan 10 11:52:48 2006 -0800
+++ b/include/asm-alpha/io.h Tue Jan 10 11:52:51 2006 -0800
@@ -504,6 +504,8 @@
extern void memcpy_toio(volatile void __iomem *, const void *, long);
extern void _memset_c_io(volatile void __iomem *, unsigned long, long);
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
static inline void memset_io(volatile void __iomem *addr, u8 c, long len)
{
_memset_c_io(addr, 0x0101010101010101UL * c, len);
diff -r b4863171295f -r 5673a186625f include/asm-arm/io.h
--- a/include/asm-arm/io.h Tue Jan 10 11:52:48 2006 -0800
+++ b/include/asm-arm/io.h Tue Jan 10 11:52:51 2006 -0800
@@ -189,6 +189,8 @@
#define memset_io(c,v,l) _memset_io(__mem_pci(c),(v),(l))
#define memcpy_fromio(a,c,l) _memcpy_fromio((a),__mem_pci(c),(l))
#define memcpy_toio(c,a,l) _memcpy_toio(__mem_pci(c),(a),(l))
+
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
#define eth_io_copy_and_sum(s,c,l,b) \
eth_copy_and_sum((s),__mem_pci(c),(l),(b))
diff -r b4863171295f -r 5673a186625f include/asm-cris/io.h
--- a/include/asm-cris/io.h Tue Jan 10 11:52:48 2006 -0800
+++ b/include/asm-cris/io.h Tue Jan 10 11:52:51 2006 -0800
@@ -121,6 +121,8 @@
#define memcpy_fromio(a,b,c) memcpy((a),(void *)(b),(c))
#define memcpy_toio(a,b,c) memcpy((void *)(a),(b),(c))
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
/*
* Again, CRIS does not require mem IO specific function.
*/
diff -r b4863171295f -r 5673a186625f include/asm-frv/io.h
--- a/include/asm-frv/io.h Tue Jan 10 11:52:48 2006 -0800
+++ b/include/asm-frv/io.h Tue Jan 10 11:52:51 2006 -0800
@@ -127,6 +127,8 @@
memcpy((void __force *) dst, src, count);
}
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
static inline uint8_t inb(unsigned long addr)
{
return __builtin_read8((void *)addr);
diff -r b4863171295f -r 5673a186625f include/asm-h8300/io.h
--- a/include/asm-h8300/io.h Tue Jan 10 11:52:48 2006 -0800
+++ b/include/asm-h8300/io.h Tue Jan 10 11:52:51 2006 -0800
@@ -209,6 +209,8 @@
#define memcpy_fromio(a,b,c) memcpy((a),(void *)(b),(c))
#define memcpy_toio(a,b,c) memcpy((void *)(a),(b),(c))
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
#define mmiowb()
#define inb(addr) ((h8300_buswidth(addr))?readw((addr) & ~1) & 0xff:readb(addr))
diff -r b4863171295f -r 5673a186625f include/asm-i386/io.h
--- a/include/asm-i386/io.h Tue Jan 10 11:52:48 2006 -0800
+++ b/include/asm-i386/io.h Tue Jan 10 11:52:51 2006 -0800
@@ -203,6 +203,8 @@
{
__memcpy((void __force *) dst, src, count);
}
+
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
/*
* ISA space is 'always mapped' on a typical x86 system, no need to
diff -r b4863171295f -r 5673a186625f include/asm-ia64/io.h
--- a/include/asm-ia64/io.h Tue Jan 10 11:52:48 2006 -0800
+++ b/include/asm-ia64/io.h Tue Jan 10 11:52:51 2006 -0800
@@ -444,6 +444,8 @@
extern void memcpy_toio(volatile void __iomem *dst, const void *src, long n);
extern void memset_io(volatile void __iomem *s, int c, long n);
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
#define dma_cache_inv(_start,_size) do { } while (0)
#define dma_cache_wback(_start,_size) do { } while (0)
#define dma_cache_wback_inv(_start,_size) do { } while (0)
diff -r b4863171295f -r 5673a186625f include/asm-m32r/io.h
--- a/include/asm-m32r/io.h Tue Jan 10 11:52:48 2006 -0800
+++ b/include/asm-m32r/io.h Tue Jan 10 11:52:51 2006 -0800
@@ -216,6 +216,8 @@
memcpy((void __force *) dst, src, count);
}
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
/*
* Convert a physical pointer to a virtual kernel pointer for /dev/mem
* access
diff -r b4863171295f -r 5673a186625f include/asm-m68knommu/io.h
--- a/include/asm-m68knommu/io.h Tue Jan 10 11:52:48 2006 -0800
+++ b/include/asm-m68knommu/io.h Tue Jan 10 11:52:51 2006 -0800
@@ -113,6 +113,8 @@
#define memcpy_fromio(a,b,c) memcpy((a),(void *)(b),(c))
#define memcpy_toio(a,b,c) memcpy((void *)(a),(b),(c))
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
#define inb(addr) readb(addr)
#define inw(addr) readw(addr)
#define inl(addr) readl(addr)
diff -r b4863171295f -r 5673a186625f include/asm-mips/io.h
--- a/include/asm-mips/io.h Tue Jan 10 11:52:48 2006 -0800
+++ b/include/asm-mips/io.h Tue Jan 10 11:52:51 2006 -0800
@@ -534,6 +534,8 @@
memcpy((void __force *) dst, src, count);
}
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
/*
* Memory Mapped I/O
*/
diff -r b4863171295f -r 5673a186625f include/asm-parisc/io.h
--- a/include/asm-parisc/io.h Tue Jan 10 11:52:48 2006 -0800
+++ b/include/asm-parisc/io.h Tue Jan 10 11:52:51 2006 -0800
@@ -294,6 +294,8 @@
void memcpy_fromio(void *dst, const volatile void __iomem *src, int count);
void memcpy_toio(volatile void __iomem *dst, const void *src, int count);
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
/* Support old drivers which don't ioremap.
* NB this interface is scheduled to disappear in 2.5
*/
diff -r b4863171295f -r 5673a186625f include/asm-powerpc/io.h
--- a/include/asm-powerpc/io.h Tue Jan 10 11:52:48 2006 -0800
+++ b/include/asm-powerpc/io.h Tue Jan 10 11:52:51 2006 -0800
@@ -64,6 +64,8 @@
#define memcpy_fromio(a,b,c) iSeries_memcpy_fromio((a), (b), (c))
#define memcpy_toio(a,b,c) iSeries_memcpy_toio((a), (b), (c))
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
#define inb(addr) readb(((void __iomem *)(long)(addr)))
#define inw(addr) readw(((void __iomem *)(long)(addr)))
#define inl(addr) readl(((void __iomem *)(long)(addr)))
diff -r b4863171295f -r 5673a186625f include/asm-ppc/io.h
--- a/include/asm-ppc/io.h Tue Jan 10 11:52:48 2006 -0800
+++ b/include/asm-ppc/io.h Tue Jan 10 11:52:51 2006 -0800
@@ -369,6 +369,8 @@
}
#endif
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void __force *)(void __iomem *)(b),(c),(d))
/*
diff -r b4863171295f -r 5673a186625f include/asm-s390/io.h
--- a/include/asm-s390/io.h Tue Jan 10 11:52:48 2006 -0800
+++ b/include/asm-s390/io.h Tue Jan 10 11:52:51 2006 -0800
@@ -99,6 +99,8 @@
#define memcpy_fromio(a,b,c) memcpy((a),__io_virt(b),(c))
#define memcpy_toio(a,b,c) memcpy(__io_virt(a),(b),(c))
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
#define inb_p(addr) readb(addr)
#define inb(addr) readb(addr)
diff -r b4863171295f -r 5673a186625f include/asm-sh/io.h
--- a/include/asm-sh/io.h Tue Jan 10 11:52:48 2006 -0800
+++ b/include/asm-sh/io.h Tue Jan 10 11:52:51 2006 -0800
@@ -177,6 +177,8 @@
extern void memcpy_toio(unsigned long, const void *, unsigned long);
extern void memset_io(unsigned long, int, unsigned long);
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
/* SuperH on-chip I/O functions */
static __inline__ unsigned char ctrl_inb(unsigned long addr)
{
diff -r b4863171295f -r 5673a186625f include/asm-sh64/io.h
--- a/include/asm-sh64/io.h Tue Jan 10 11:52:48 2006 -0800
+++ b/include/asm-sh64/io.h Tue Jan 10 11:52:51 2006 -0800
@@ -125,6 +125,8 @@
void memcpy_toio(void __iomem *to, const void *from, long count);
void memcpy_fromio(void *to, void __iomem *from, long count);
+
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
#define mmiowb()
diff -r b4863171295f -r 5673a186625f include/asm-sparc/io.h
--- a/include/asm-sparc/io.h Tue Jan 10 11:52:48 2006 -0800
+++ b/include/asm-sparc/io.h Tue Jan 10 11:52:51 2006 -0800
@@ -239,6 +239,8 @@
#define memcpy_toio(d,s,sz) _memcpy_toio(d,s,sz)
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
#ifdef __KERNEL__
/*
diff -r b4863171295f -r 5673a186625f include/asm-sparc64/io.h
--- a/include/asm-sparc64/io.h Tue Jan 10 11:52:48 2006 -0800
+++ b/include/asm-sparc64/io.h Tue Jan 10 11:52:51 2006 -0800
@@ -440,6 +440,8 @@
#define memcpy_toio(d,s,sz) _memcpy_toio(d,s,sz)
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
static inline int check_signature(void __iomem *io_addr,
const unsigned char *signature,
int length)
diff -r b4863171295f -r 5673a186625f include/asm-v850/io.h
--- a/include/asm-v850/io.h Tue Jan 10 11:52:48 2006 -0800
+++ b/include/asm-v850/io.h Tue Jan 10 11:52:51 2006 -0800
@@ -130,6 +130,8 @@
#define memcpy_fromio(dst, src, len) memcpy (dst, (void *)src, len)
#define memcpy_toio(dst, src, len) memcpy ((void *)dst, src, len)
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
/*
* Convert a physical pointer to a virtual kernel pointer for /dev/mem
* access
diff -r b4863171295f -r 5673a186625f include/asm-x86_64/io.h
--- a/include/asm-x86_64/io.h Tue Jan 10 11:52:48 2006 -0800
+++ b/include/asm-x86_64/io.h Tue Jan 10 11:52:51 2006 -0800
@@ -252,6 +252,14 @@
__memcpy_toio((unsigned long)to,from,len);
}
+#include <asm/string.h>
+
+/* See lib/raw_memcpy_io.c for kernel doc. */
+static inline void __raw_memcpy_toio32(void __iomem *dst, const void *src, size_t count)
+{
+ memcpy32((void __force *) dst, src, count);
+}
+
void memset_io(volatile void __iomem *a, int b, size_t c);
/*
diff -r b4863171295f -r 5673a186625f include/asm-xtensa/io.h
--- a/include/asm-xtensa/io.h Tue Jan 10 11:52:48 2006 -0800
+++ b/include/asm-xtensa/io.h Tue Jan 10 11:52:51 2006 -0800
@@ -159,6 +159,8 @@
#define memcpy_fromio(a,b,c) memcpy((a),(void *)(b),(c))
#define memcpy_toio(a,b,c) memcpy((void *)(a),(b),(c))
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
/* At this point the Xtensa doesn't provide byte swap instructions */
#ifdef __XTENSA_EB__
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 3 of 3] Add __raw_memcpy_toio32 to each arch
2006-01-10 19:53 ` [PATCH 3 of 3] Add __raw_memcpy_toio32 to each arch Bryan O'Sullivan
@ 2006-01-10 20:08 ` Andi Kleen
2006-01-10 22:52 ` Bryan O'Sullivan
0 siblings, 1 reply; 17+ messages in thread
From: Andi Kleen @ 2006-01-10 20:08 UTC (permalink / raw)
To: Bryan O'Sullivan; +Cc: akpm, linux-kernel, hch, rdreier
On Tuesday 10 January 2006 20:53, Bryan O'Sullivan wrote:
> Most arches use the generic routine. x86_64 uses memcpy32 instead;
> this is substantially faster, even over a bus that is much slower than
> the CPU.
So did you run numbers against the C implementation with -funroll-loops ?
What were the results?
-Andi
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 3 of 3] Add __raw_memcpy_toio32 to each arch
2006-01-10 20:08 ` Andi Kleen
@ 2006-01-10 22:52 ` Bryan O'Sullivan
0 siblings, 0 replies; 17+ messages in thread
From: Bryan O'Sullivan @ 2006-01-10 22:52 UTC (permalink / raw)
To: Andi Kleen; +Cc: akpm, linux-kernel, hch, rdreier
On Tue, 2006-01-10 at 21:08 +0100, Andi Kleen wrote:
> On Tuesday 10 January 2006 20:53, Bryan O'Sullivan wrote:
> > Most arches use the generic routine. x86_64 uses memcpy32 instead;
> > this is substantially faster, even over a bus that is much slower than
> > the CPU.
>
> So did you run numbers against the C implementation with -funroll-loops ?
> What were the results?
The C implementation is about 5% slower when copying over
HyperTransport.
<b
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 3 of 3] Add __raw_memcpy_toio32 to each arch
2006-01-11 22:39 [PATCH 0 of 3] MMIO 32-bit copy routine, the final frontier Bryan O'Sullivan
@ 2006-01-11 22:39 ` Bryan O'Sullivan
2006-01-11 23:46 ` Andrew Morton
0 siblings, 1 reply; 17+ messages in thread
From: Bryan O'Sullivan @ 2006-01-11 22:39 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel, hch, ak
Most arches use the generic routine. x86_64 uses memcpy32 instead;
this is substantially faster, even over a bus that is much slower than
the CPU.
Signed-off-by: Bryan O'Sullivan <bos@pathscale.com>
diff -r 1052904816d7 -r ee6ce7e55dc7 arch/x86_64/lib/io.c
--- a/arch/x86_64/lib/io.c Wed Jan 11 14:35:45 2006 -0800
+++ b/arch/x86_64/lib/io.c Wed Jan 11 14:35:45 2006 -0800
@@ -21,3 +21,9 @@
memset((void *)a,b,c);
}
EXPORT_SYMBOL(memset_io);
+
+/* override generic definition in lib/raw_memcpy_io.c */
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count)
+{
+ memcpy32((void __force *) to, from, count);
+}
diff -r 1052904816d7 -r ee6ce7e55dc7 include/asm-alpha/io.h
--- a/include/asm-alpha/io.h Wed Jan 11 14:35:45 2006 -0800
+++ b/include/asm-alpha/io.h Wed Jan 11 14:35:45 2006 -0800
@@ -504,6 +504,8 @@
extern void memcpy_toio(volatile void __iomem *, const void *, long);
extern void _memset_c_io(volatile void __iomem *, unsigned long, long);
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
static inline void memset_io(volatile void __iomem *addr, u8 c, long len)
{
_memset_c_io(addr, 0x0101010101010101UL * c, len);
diff -r 1052904816d7 -r ee6ce7e55dc7 include/asm-arm/io.h
--- a/include/asm-arm/io.h Wed Jan 11 14:35:45 2006 -0800
+++ b/include/asm-arm/io.h Wed Jan 11 14:35:45 2006 -0800
@@ -189,6 +189,8 @@
#define memset_io(c,v,l) _memset_io(__mem_pci(c),(v),(l))
#define memcpy_fromio(a,c,l) _memcpy_fromio((a),__mem_pci(c),(l))
#define memcpy_toio(c,a,l) _memcpy_toio(__mem_pci(c),(a),(l))
+
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
#define eth_io_copy_and_sum(s,c,l,b) \
eth_copy_and_sum((s),__mem_pci(c),(l),(b))
diff -r 1052904816d7 -r ee6ce7e55dc7 include/asm-cris/io.h
--- a/include/asm-cris/io.h Wed Jan 11 14:35:45 2006 -0800
+++ b/include/asm-cris/io.h Wed Jan 11 14:35:45 2006 -0800
@@ -121,6 +121,8 @@
#define memcpy_fromio(a,b,c) memcpy((a),(void *)(b),(c))
#define memcpy_toio(a,b,c) memcpy((void *)(a),(b),(c))
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
/*
* Again, CRIS does not require mem IO specific function.
*/
diff -r 1052904816d7 -r ee6ce7e55dc7 include/asm-frv/io.h
--- a/include/asm-frv/io.h Wed Jan 11 14:35:45 2006 -0800
+++ b/include/asm-frv/io.h Wed Jan 11 14:35:45 2006 -0800
@@ -127,6 +127,8 @@
memcpy((void __force *) dst, src, count);
}
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
static inline uint8_t inb(unsigned long addr)
{
return __builtin_read8((void *)addr);
diff -r 1052904816d7 -r ee6ce7e55dc7 include/asm-h8300/io.h
--- a/include/asm-h8300/io.h Wed Jan 11 14:35:45 2006 -0800
+++ b/include/asm-h8300/io.h Wed Jan 11 14:35:45 2006 -0800
@@ -209,6 +209,8 @@
#define memcpy_fromio(a,b,c) memcpy((a),(void *)(b),(c))
#define memcpy_toio(a,b,c) memcpy((void *)(a),(b),(c))
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
#define mmiowb()
#define inb(addr) ((h8300_buswidth(addr))?readw((addr) & ~1) & 0xff:readb(addr))
diff -r 1052904816d7 -r ee6ce7e55dc7 include/asm-i386/io.h
--- a/include/asm-i386/io.h Wed Jan 11 14:35:45 2006 -0800
+++ b/include/asm-i386/io.h Wed Jan 11 14:35:45 2006 -0800
@@ -203,6 +203,8 @@
{
__memcpy((void __force *) dst, src, count);
}
+
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
/*
* ISA space is 'always mapped' on a typical x86 system, no need to
diff -r 1052904816d7 -r ee6ce7e55dc7 include/asm-ia64/io.h
--- a/include/asm-ia64/io.h Wed Jan 11 14:35:45 2006 -0800
+++ b/include/asm-ia64/io.h Wed Jan 11 14:35:45 2006 -0800
@@ -444,6 +444,8 @@
extern void memcpy_toio(volatile void __iomem *dst, const void *src, long n);
extern void memset_io(volatile void __iomem *s, int c, long n);
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
#define dma_cache_inv(_start,_size) do { } while (0)
#define dma_cache_wback(_start,_size) do { } while (0)
#define dma_cache_wback_inv(_start,_size) do { } while (0)
diff -r 1052904816d7 -r ee6ce7e55dc7 include/asm-m32r/io.h
--- a/include/asm-m32r/io.h Wed Jan 11 14:35:45 2006 -0800
+++ b/include/asm-m32r/io.h Wed Jan 11 14:35:45 2006 -0800
@@ -216,6 +216,8 @@
memcpy((void __force *) dst, src, count);
}
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
/*
* Convert a physical pointer to a virtual kernel pointer for /dev/mem
* access
diff -r 1052904816d7 -r ee6ce7e55dc7 include/asm-m68knommu/io.h
--- a/include/asm-m68knommu/io.h Wed Jan 11 14:35:45 2006 -0800
+++ b/include/asm-m68knommu/io.h Wed Jan 11 14:35:45 2006 -0800
@@ -113,6 +113,8 @@
#define memcpy_fromio(a,b,c) memcpy((a),(void *)(b),(c))
#define memcpy_toio(a,b,c) memcpy((void *)(a),(b),(c))
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
#define inb(addr) readb(addr)
#define inw(addr) readw(addr)
#define inl(addr) readl(addr)
diff -r 1052904816d7 -r ee6ce7e55dc7 include/asm-mips/io.h
--- a/include/asm-mips/io.h Wed Jan 11 14:35:45 2006 -0800
+++ b/include/asm-mips/io.h Wed Jan 11 14:35:45 2006 -0800
@@ -534,6 +534,8 @@
memcpy((void __force *) dst, src, count);
}
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
/*
* Memory Mapped I/O
*/
diff -r 1052904816d7 -r ee6ce7e55dc7 include/asm-parisc/io.h
--- a/include/asm-parisc/io.h Wed Jan 11 14:35:45 2006 -0800
+++ b/include/asm-parisc/io.h Wed Jan 11 14:35:45 2006 -0800
@@ -294,6 +294,8 @@
void memcpy_fromio(void *dst, const volatile void __iomem *src, int count);
void memcpy_toio(volatile void __iomem *dst, const void *src, int count);
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
/* Support old drivers which don't ioremap.
* NB this interface is scheduled to disappear in 2.5
*/
diff -r 1052904816d7 -r ee6ce7e55dc7 include/asm-powerpc/io.h
--- a/include/asm-powerpc/io.h Wed Jan 11 14:35:45 2006 -0800
+++ b/include/asm-powerpc/io.h Wed Jan 11 14:35:45 2006 -0800
@@ -64,6 +64,8 @@
#define memcpy_fromio(a,b,c) iSeries_memcpy_fromio((a), (b), (c))
#define memcpy_toio(a,b,c) iSeries_memcpy_toio((a), (b), (c))
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
#define inb(addr) readb(((void __iomem *)(long)(addr)))
#define inw(addr) readw(((void __iomem *)(long)(addr)))
#define inl(addr) readl(((void __iomem *)(long)(addr)))
diff -r 1052904816d7 -r ee6ce7e55dc7 include/asm-ppc/io.h
--- a/include/asm-ppc/io.h Wed Jan 11 14:35:45 2006 -0800
+++ b/include/asm-ppc/io.h Wed Jan 11 14:35:45 2006 -0800
@@ -369,6 +369,8 @@
}
#endif
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void __force *)(void __iomem *)(b),(c),(d))
/*
diff -r 1052904816d7 -r ee6ce7e55dc7 include/asm-s390/io.h
--- a/include/asm-s390/io.h Wed Jan 11 14:35:45 2006 -0800
+++ b/include/asm-s390/io.h Wed Jan 11 14:35:45 2006 -0800
@@ -99,6 +99,8 @@
#define memcpy_fromio(a,b,c) memcpy((a),__io_virt(b),(c))
#define memcpy_toio(a,b,c) memcpy(__io_virt(a),(b),(c))
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
#define inb_p(addr) readb(addr)
#define inb(addr) readb(addr)
diff -r 1052904816d7 -r ee6ce7e55dc7 include/asm-sh/io.h
--- a/include/asm-sh/io.h Wed Jan 11 14:35:45 2006 -0800
+++ b/include/asm-sh/io.h Wed Jan 11 14:35:45 2006 -0800
@@ -177,6 +177,8 @@
extern void memcpy_toio(unsigned long, const void *, unsigned long);
extern void memset_io(unsigned long, int, unsigned long);
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
/* SuperH on-chip I/O functions */
static __inline__ unsigned char ctrl_inb(unsigned long addr)
{
diff -r 1052904816d7 -r ee6ce7e55dc7 include/asm-sh64/io.h
--- a/include/asm-sh64/io.h Wed Jan 11 14:35:45 2006 -0800
+++ b/include/asm-sh64/io.h Wed Jan 11 14:35:45 2006 -0800
@@ -125,6 +125,8 @@
void memcpy_toio(void __iomem *to, const void *from, long count);
void memcpy_fromio(void *to, void __iomem *from, long count);
+
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
#define mmiowb()
diff -r 1052904816d7 -r ee6ce7e55dc7 include/asm-sparc/io.h
--- a/include/asm-sparc/io.h Wed Jan 11 14:35:45 2006 -0800
+++ b/include/asm-sparc/io.h Wed Jan 11 14:35:45 2006 -0800
@@ -239,6 +239,8 @@
#define memcpy_toio(d,s,sz) _memcpy_toio(d,s,sz)
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
#ifdef __KERNEL__
/*
diff -r 1052904816d7 -r ee6ce7e55dc7 include/asm-sparc64/io.h
--- a/include/asm-sparc64/io.h Wed Jan 11 14:35:45 2006 -0800
+++ b/include/asm-sparc64/io.h Wed Jan 11 14:35:45 2006 -0800
@@ -440,6 +440,8 @@
#define memcpy_toio(d,s,sz) _memcpy_toio(d,s,sz)
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
static inline int check_signature(void __iomem *io_addr,
const unsigned char *signature,
int length)
diff -r 1052904816d7 -r ee6ce7e55dc7 include/asm-v850/io.h
--- a/include/asm-v850/io.h Wed Jan 11 14:35:45 2006 -0800
+++ b/include/asm-v850/io.h Wed Jan 11 14:35:45 2006 -0800
@@ -130,6 +130,8 @@
#define memcpy_fromio(dst, src, len) memcpy (dst, (void *)src, len)
#define memcpy_toio(dst, src, len) memcpy ((void *)dst, src, len)
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
/*
* Convert a physical pointer to a virtual kernel pointer for /dev/mem
* access
diff -r 1052904816d7 -r ee6ce7e55dc7 include/asm-x86_64/io.h
--- a/include/asm-x86_64/io.h Wed Jan 11 14:35:45 2006 -0800
+++ b/include/asm-x86_64/io.h Wed Jan 11 14:35:45 2006 -0800
@@ -252,6 +252,8 @@
__memcpy_toio((unsigned long)to,from,len);
}
+void __raw_memcpy_toio32(void __iomem *dst, const void *src, size_t count);
+
void memset_io(volatile void __iomem *a, int b, size_t c);
/*
diff -r 1052904816d7 -r ee6ce7e55dc7 include/asm-xtensa/io.h
--- a/include/asm-xtensa/io.h Wed Jan 11 14:35:45 2006 -0800
+++ b/include/asm-xtensa/io.h Wed Jan 11 14:35:45 2006 -0800
@@ -159,6 +159,8 @@
#define memcpy_fromio(a,b,c) memcpy((a),(void *)(b),(c))
#define memcpy_toio(a,b,c) memcpy((void *)(a),(b),(c))
+void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
+
/* At this point the Xtensa doesn't provide byte swap instructions */
#ifdef __XTENSA_EB__
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 3 of 3] Add __raw_memcpy_toio32 to each arch
2006-01-11 22:39 ` [PATCH 3 of 3] Add __raw_memcpy_toio32 to each arch Bryan O'Sullivan
@ 2006-01-11 23:46 ` Andrew Morton
2006-01-12 0:05 ` Bryan O'Sullivan
0 siblings, 1 reply; 17+ messages in thread
From: Andrew Morton @ 2006-01-11 23:46 UTC (permalink / raw)
To: Bryan O'Sullivan; +Cc: linux-kernel, hch, ak
"Bryan O'Sullivan" <bos@pathscale.com> wrote:
>
> Most arches use the generic routine. x86_64 uses memcpy32 instead;
> this is substantially faster, even over a bus that is much slower than
> the CPU.
>
> Signed-off-by: Bryan O'Sullivan <bos@pathscale.com>
>
> diff -r 1052904816d7 -r ee6ce7e55dc7 arch/x86_64/lib/io.c
> --- a/arch/x86_64/lib/io.c Wed Jan 11 14:35:45 2006 -0800
> +++ b/arch/x86_64/lib/io.c Wed Jan 11 14:35:45 2006 -0800
> @@ -21,3 +21,9 @@
> memset((void *)a,b,c);
> }
> EXPORT_SYMBOL(memset_io);
> +
> +/* override generic definition in lib/raw_memcpy_io.c */
> +void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count)
> +{
> + memcpy32((void __force *) to, from, count);
> +}
> diff -r 1052904816d7 -r ee6ce7e55dc7 include/asm-alpha/io.h
> --- a/include/asm-alpha/io.h Wed Jan 11 14:35:45 2006 -0800
> +++ b/include/asm-alpha/io.h Wed Jan 11 14:35:45 2006 -0800
> @@ -504,6 +504,8 @@
> extern void memcpy_toio(volatile void __iomem *, const void *, long);
> extern void _memset_c_io(volatile void __iomem *, unsigned long, long);
>
> +void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
> +
<etc>
How's about we add a new linux/io.h which does:
#include <asm/io.h>
void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
?
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 3 of 3] Add __raw_memcpy_toio32 to each arch
2006-01-11 23:46 ` Andrew Morton
@ 2006-01-12 0:05 ` Bryan O'Sullivan
2006-01-12 0:13 ` Andrew Morton
0 siblings, 1 reply; 17+ messages in thread
From: Bryan O'Sullivan @ 2006-01-12 0:05 UTC (permalink / raw)
To: Andrew Morton; +Cc: rdreier, linux-kernel, hch, ak
On Wed, 2006-01-11 at 15:46 -0800, Andrew Morton wrote:
> How's about we add a new linux/io.h which does:
>
> #include <asm/io.h>
> void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
I thought about this, and about moving other duplicated definitions from
asm-*/io.h in here, but I couldn't find any other obvious candidates, so
I wasn't anxious to introduce a new file.
If you think that's OK, though, it obviously makes the patch a lot
smaller, and gives a common place to put future cross-arch definitions.
I'll run another spin of the patch with your and Roland's suggestions.
<b
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 3 of 3] Add __raw_memcpy_toio32 to each arch
2006-01-12 0:05 ` Bryan O'Sullivan
@ 2006-01-12 0:13 ` Andrew Morton
2006-01-12 0:21 ` Bryan O'Sullivan
0 siblings, 1 reply; 17+ messages in thread
From: Andrew Morton @ 2006-01-12 0:13 UTC (permalink / raw)
To: Bryan O'Sullivan; +Cc: rdreier, linux-kernel, hch, ak
"Bryan O'Sullivan" <bos@pathscale.com> wrote:
>
> On Wed, 2006-01-11 at 15:46 -0800, Andrew Morton wrote:
>
> > How's about we add a new linux/io.h which does:
> >
> > #include <asm/io.h>
> > void __raw_memcpy_toio32(void __iomem *to, const void *from, size_t count);
>
> I thought about this, and about moving other duplicated definitions from
> asm-*/io.h in here, but I couldn't find any other obvious candidates, so
> I wasn't anxious to introduce a new file.
>
Well it's obviously better than duplicating the thing.
There are other common things which can be hoisted to linux/io.h, but if we
do that then zillions of .c files need to be changed to include linux/io.h
rather than asm/io.h. That's a good janitorial thing to do, but I doubt if
you want to do it ;)
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 3 of 3] Add __raw_memcpy_toio32 to each arch
2006-01-12 0:13 ` Andrew Morton
@ 2006-01-12 0:21 ` Bryan O'Sullivan
0 siblings, 0 replies; 17+ messages in thread
From: Bryan O'Sullivan @ 2006-01-12 0:21 UTC (permalink / raw)
To: Andrew Morton; +Cc: rdreier, linux-kernel, hch, ak
On Wed, 2006-01-11 at 16:13 -0800, Andrew Morton wrote:
> There are other common things which can be hoisted to linux/io.h, but if we
> do that then zillions of .c files need to be changed to include linux/io.h
> rather than asm/io.h.
Right.
> That's a good janitorial thing to do, but I doubt if
> you want to do it ;)
Not as part of these patches, anyway. They've left me a dried-up husk.
<b
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 2 of 3] memcpy32 for x86_64
2006-01-10 19:53 ` [PATCH 2 of 3] memcpy32 for x86_64 Bryan O'Sullivan
@ 2006-01-12 8:38 ` Denis Vlasenko
2006-01-12 16:04 ` Bryan O'Sullivan
0 siblings, 1 reply; 17+ messages in thread
From: Denis Vlasenko @ 2006-01-12 8:38 UTC (permalink / raw)
To: Bryan O'Sullivan; +Cc: akpm, linux-kernel, hch, ak, rdreier
On Tuesday 10 January 2006 21:53, Bryan O'Sullivan wrote:
> Introduce an x86_64-specific memcpy32 routine. The routine is similar
> to memcpy, but is guaranteed to work in units of 32 bits at a time.
>
> Signed-off-by: Bryan O'Sullivan <bos@pathscale.com>
>
> diff -r 2d4af213d9c5 -r b4863171295f arch/x86_64/kernel/x8664_ksyms.c
> --- a/arch/x86_64/kernel/x8664_ksyms.c Tue Jan 10 11:52:46 2006 -0800
> +++ b/arch/x86_64/kernel/x8664_ksyms.c Tue Jan 10 11:52:48 2006 -0800
> @@ -164,6 +164,8 @@
> EXPORT_SYMBOL(memcpy);
> EXPORT_SYMBOL(__memcpy);
>
> +EXPORT_SYMBOL_GPL(memcpy32);
> +
> #ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM
> /* prototypes are wrong, these are assembly with custom calling functions */
> extern void rwsem_down_read_failed_thunk(void);
> diff -r 2d4af213d9c5 -r b4863171295f arch/x86_64/lib/Makefile
> --- a/arch/x86_64/lib/Makefile Tue Jan 10 11:52:46 2006 -0800
> +++ b/arch/x86_64/lib/Makefile Tue Jan 10 11:52:48 2006 -0800
> @@ -9,4 +9,4 @@
> lib-y := csum-partial.o csum-copy.o csum-wrappers.o delay.o \
> usercopy.o getuser.o putuser.o \
> thunk.o clear_page.o copy_page.o bitstr.o bitops.o
> -lib-y += memcpy.o memmove.o memset.o copy_user.o
> +lib-y += memcpy.o memcpy32.o memmove.o memset.o copy_user.o
> diff -r 2d4af213d9c5 -r b4863171295f include/asm-x86_64/string.h
> --- a/include/asm-x86_64/string.h Tue Jan 10 11:52:46 2006 -0800
> +++ b/include/asm-x86_64/string.h Tue Jan 10 11:52:48 2006 -0800
> @@ -45,6 +45,9 @@
> #define __HAVE_ARCH_MEMMOVE
> void * memmove(void * dest,const void *src,size_t count);
>
> +/* copy data, 32 bits at a time */
> +void memcpy32(void *dst, const void *src, size_t count);
> +
> /* Use C out of line version for memcmp */
> #define memcmp __builtin_memcmp
> int memcmp(const void * cs,const void * ct,size_t count);
> diff -r 2d4af213d9c5 -r b4863171295f arch/x86_64/lib/memcpy32.S
> --- /dev/null Thu Jan 1 00:00:00 1970 +0000
> +++ b/arch/x86_64/lib/memcpy32.S Tue Jan 10 11:52:48 2006 -0800
> @@ -0,0 +1,39 @@
> +/*
> + * Copyright 2006 PathScale, Inc. All Rights Reserved.
> + *
> + * This file is free software; you can redistribute it and/or modify
> + * it under the terms of version 2 of the GNU General Public License
> + * as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software Foundation,
> + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
> + */
> +
> +/*
> + * Registers used below:
> + * dst - rdi
> + * src - rsi
> + * count - rdx
> + */
> +
> +/**
> + * memcpy32 - copy data, in units of 32 bits at a time
> + * @dst: destination (must be 32-bit aligned)
> + * @src: source (must be 32-bit aligned)
> + * @count: number of 32-bit quantities to copy
> + */
> + .globl memcpy32
> +memcpy32:
> + movl %edx,%ecx
> + shrl $1,%ecx
> + andl $1,%edx
> + rep movsq
> + movl %edx,%ecx
> + rep movsd
> + ret
movsq is not a 32bit move, it's a 64 bit one.
There are three possibilities here:
1) I misunderstand what memcpy32 means (I understand it like "it guarantees
that all accesses will be strictly 32bit")
2) On all current x86_64 hardware each 64bit access from/to
IO mapped addresses is always converted to two 32bit accesses.
3) code is buggy
If it is (1) or (2), consider adding a comment to clear future
reader's confusion.
--
vda
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 2 of 3] memcpy32 for x86_64
2006-01-12 8:38 ` Denis Vlasenko
@ 2006-01-12 16:04 ` Bryan O'Sullivan
2006-01-13 9:56 ` Chris Wedgwood
0 siblings, 1 reply; 17+ messages in thread
From: Bryan O'Sullivan @ 2006-01-12 16:04 UTC (permalink / raw)
To: Denis Vlasenko; +Cc: akpm, linux-kernel, hch, ak, rdreier
On Thu, 2006-01-12 at 10:38 +0200, Denis Vlasenko wrote:
> 2) On all current x86_64 hardware each 64bit access from/to
> IO mapped addresses is always converted to two 32bit accesses.
This is true for 64-bit writes over Hypertransport (reads don't get
split up this way), but not for PCI-Express memory writes, which remain
atomic 64-bit. I'll be converting the 64-bit accesses to 32-bit, as you
and Andi suggested.
<b
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 2 of 3] memcpy32 for x86_64
2006-01-12 16:04 ` Bryan O'Sullivan
@ 2006-01-13 9:56 ` Chris Wedgwood
2006-01-13 10:24 ` Denis Vlasenko
0 siblings, 1 reply; 17+ messages in thread
From: Chris Wedgwood @ 2006-01-13 9:56 UTC (permalink / raw)
To: Bryan O'Sullivan; +Cc: Denis Vlasenko, akpm, linux-kernel, hch, ak, rdreier
On Thu, Jan 12, 2006 at 08:04:41AM -0800, Bryan O'Sullivan wrote:
> This is true for 64-bit writes over Hypertransport
is this something that will always be or just something current
hardware does?
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 2 of 3] memcpy32 for x86_64
2006-01-13 9:56 ` Chris Wedgwood
@ 2006-01-13 10:24 ` Denis Vlasenko
2006-01-13 16:21 ` Bryan O'Sullivan
0 siblings, 1 reply; 17+ messages in thread
From: Denis Vlasenko @ 2006-01-13 10:24 UTC (permalink / raw)
To: Chris Wedgwood; +Cc: Bryan O'Sullivan, akpm, linux-kernel, hch, ak, rdreier
On Friday 13 January 2006 11:56, Chris Wedgwood wrote:
> On Thu, Jan 12, 2006 at 08:04:41AM -0800, Bryan O'Sullivan wrote:
>
> > This is true for 64-bit writes over Hypertransport
>
> is this something that will always be or just something current
> hardware does?
Yes, why risking that things will go wrong?
Also you'll get shorter code. Instead of
> + .globl memcpy32
> +memcpy32:
> + movl %edx,%ecx
> + shrl $1,%ecx
> + andl $1,%edx
> + rep movsq
> + movl %edx,%ecx
> + rep movsd
> + ret
you need just
.globl memcpy32
memcpy32:
movl %edx,%ecx
rep movsd
ret
With properly written inlined asms code will be
reduced to just "rep movsd".
--
vda
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH 2 of 3] memcpy32 for x86_64
2006-01-13 10:24 ` Denis Vlasenko
@ 2006-01-13 16:21 ` Bryan O'Sullivan
0 siblings, 0 replies; 17+ messages in thread
From: Bryan O'Sullivan @ 2006-01-13 16:21 UTC (permalink / raw)
To: Denis Vlasenko; +Cc: Chris Wedgwood, akpm, linux-kernel, hch, ak, rdreier
On Fri, 2006-01-13 at 12:24 +0200, Denis Vlasenko wrote:
> you need just
>
> .globl memcpy32
> memcpy32:
> movl %edx,%ecx
> rep movsd
> ret
This is what the current version of the patches in -mm does.
<b
^ permalink raw reply [flat|nested] 17+ messages in thread
end of thread, other threads:[~2006-01-13 16:21 UTC | newest]
Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-01-10 19:53 [PATCH 0 of 3] 32-bit MMIO copy routines, reworked Bryan O'Sullivan
2006-01-10 19:53 ` [PATCH 1 of 3] Introduce __raw_memcpy_toio32 Bryan O'Sullivan
2006-01-10 19:53 ` [PATCH 2 of 3] memcpy32 for x86_64 Bryan O'Sullivan
2006-01-12 8:38 ` Denis Vlasenko
2006-01-12 16:04 ` Bryan O'Sullivan
2006-01-13 9:56 ` Chris Wedgwood
2006-01-13 10:24 ` Denis Vlasenko
2006-01-13 16:21 ` Bryan O'Sullivan
2006-01-10 19:53 ` [PATCH 3 of 3] Add __raw_memcpy_toio32 to each arch Bryan O'Sullivan
2006-01-10 20:08 ` Andi Kleen
2006-01-10 22:52 ` Bryan O'Sullivan
-- strict thread matches above, loose matches on Subject: below --
2006-01-11 22:39 [PATCH 0 of 3] MMIO 32-bit copy routine, the final frontier Bryan O'Sullivan
2006-01-11 22:39 ` [PATCH 3 of 3] Add __raw_memcpy_toio32 to each arch Bryan O'Sullivan
2006-01-11 23:46 ` Andrew Morton
2006-01-12 0:05 ` Bryan O'Sullivan
2006-01-12 0:13 ` Andrew Morton
2006-01-12 0:21 ` Bryan O'Sullivan
2006-01-06 20:26 [PATCH 0 of 3] 32-bit MMIO copy routine Bryan O'Sullivan
2006-01-06 20:26 ` [PATCH 3 of 3] Add __raw_memcpy_toio32 to each arch Bryan O'Sullivan
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.