Re: [parisc-linux] [RFC] Optimi[zs]e copy_*_user routines

From: Randolph Chung <randolph@tausq.org>
To: parisc-linux@lists.parisc-linux.org
Subject: Re: [parisc-linux] [RFC] Optimi[zs]e copy_*_user routines
Date: Thu, 16 Sep 2004 12:18:56 -0700	[thread overview]
Message-ID: <20040916191856.GF28659@tausq.org> (raw)
In-Reply-To: <20040910000032.GJ28659@tausq.org>

Version 2 of optimized copy routines. Changes compared to the previous
version:

- uses new exception mechanism to get the error semantics right
- cleaned up debugs
- added TODO items

If there are no more comments, i'll check this in in a couple of days.

thanks
randolph

Index: arch/parisc/Makefile
===================================================================
RCS file: /var/cvs/linux-2.6/arch/parisc/Makefile,v
retrieving revision 1.14
diff -u -p -r1.14 Makefile

--- arch/parisc/Makefile	15 Sep 2004 14:11:49 -0000	1.14
+++ arch/parisc/Makefile	16 Sep 2004 19:15:54 -0000
@@ -38,7 +38,7 @@ cflags-y	:= -pipe
 cflags-y	+= -mno-space-regs -mfast-indirect-calls
 
 # No fixed-point multiply
-cflags-y	+= -mdisable-fpregs
+#cflags-y	+= -mdisable-fpregs
 
 # Without this, "ld -r" results in .text sections that are too big
 # (> 0x40000) for branches to reach stubs.
Index: arch/parisc/lib/Makefile
===================================================================
RCS file: /var/cvs/linux-2.6/arch/parisc/lib/Makefile,v
retrieving revision 1.3
diff -u -p -r1.3 Makefile
--- arch/parisc/lib/Makefile	15 Sep 2004 16:08:48 -0000	1.3
+++ arch/parisc/lib/Makefile	16 Sep 2004 19:15:58 -0000
@@ -2,6 +2,6 @@
 # Makefile for parisc-specific library files
 #
 
-lib-y	:= lusercopy.o bitops.o checksum.o io.o memset.o fixup.o
+lib-y	:= lusercopy.o bitops.o checksum.o io.o memset.o fixup.o memcpy.o
 
 lib-$(CONFIG_SMP) += debuglocks.o
Index: arch/parisc/lib/memcpy.c
===================================================================
RCS file: arch/parisc/lib/memcpy.c
diff -N arch/parisc/lib/memcpy.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ arch/parisc/lib/memcpy.c	16 Sep 2004 19:15:58 -0000
@@ -0,0 +1,499 @@
+/*
+ *    Optimized memory copy routines.
+ *
+ *    Copyright (C) 2004 Randolph Chung <tausq@debian.org>
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; either version 2, or (at your option)
+ *    any later version.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License
+ *    along with this program; if not, write to the Free Software
+ *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *    Portions derived from the GNU C Library
+ *    Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc.
+ *
+ * Several strategies are tried to try to get the best performance for various
+ * conditions. In the optimal case, we copy 64-bytes in an unrolled loop using 
+ * fp regs. This is followed by loops that copy 32- or 16-bytes at a time using
+ * general registers.  Unaligned copies are handled either by aligning the 
+ * destination and then using shift-and-write method, or in a few cases by 
+ * falling back to a byte-at-a-time copy.
+ *
+ * I chose to implement this in C because it is easier to maintain and debug,
+ * and in my experiments it appears that the C code generated by gcc (3.3/3.4
+ * at the time of writing) is fairly optimal. Unfortunately some of the 
+ * semantics of the copy routine (exception handling) is difficult to express
+ * in C, so we have to play some tricks to get it to work.
+ *
+ * All the loads and stores are done via explicit asm() code in order to use
+ * the right space registers. 
+ * 
+ * Testing with various alignments and buffer sizes shows that this code is 
+ * often >10x faster than a simple byte-at-a-time copy, even for strangely
+ * aligned operands. It is interesting to note that the glibc version
+ * of memcpy (written in C) is actually quite fast already. This routine is 
+ * able to beat it by 30-40% for aligned copies because of the loop unrolling, 
+ * but in some cases the glibc version is still slightly faster. This lends 
+ * more credibility that gcc can generate very good code as long as we are 
+ * careful.
+ *
+ * TODO:
+ * - cache prefetching needs more experimentation to get optimal settings
+ * - try not to use the post-increment address modifiers; they create additional
+ *   interlocks
+ * - replace byte-copy loops with stybs sequences
+ */
+
+#ifdef __KERNEL__
+#include <linux/config.h>
+#include <linux/compiler.h>
+#include <asm/uaccess.h>
+#define s_space "%%sr1"
+#define d_space "%%sr2"
+#else
+#define likely(x)	__builtin_expect(!!(x), 1)
+#define unlikely(x)	__builtin_expect(!!(x), 0)
+#define inline		__inline__ __attribute__((always_inline))
+#define s_space "%%sr0"
+#define d_space "%%sr0"
+#define pa_memcpy new2_copy
+#define L1_CACHE_BYTES 64
+#endif
+
+DECLARE_PER_CPU(struct exception_data, exception_data);
+
+#define preserve_branch(label)	do {					\
+	volatile int dummy;						\
+	/* The following branch is never taken, it's just here to  */	\
+	/* prevent gcc from optimizing away our exception code. */ 	\
+	if (unlikely(dummy != dummy))					\
+		goto label;						\
+} while (0)
+
+#define get_user_space() (segment_eq(get_fs(), KERNEL_DS) ? 0 : mfsp(3))
+
+#define MERGE(w0, sh_1, w1, sh_2) (((w0) << (sh_1)) | ((w1) >> (sh_2)))
+#define THRESHOLD	16
+
+#ifdef DEBUG_MEMCPY
+#define DPRINTF(fmt, args...) do { printk(KERN_DEBUG "%s:%d:%s ", __FILE__, __LINE__, __FUNCTION__ ); printk(KERN_DEBUG fmt, ##args ); } while (0)
+#else
+#define DPRINTF(fmt, args...)
+#endif
+
+#ifndef __LP64__
+#define EXC_WORD ".word"
+#else
+#define EXC_WORD ".dword"
+#endif
+
+#define def_load_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e)	\
+	__asm__ __volatile__ (				\
+	"1:\t" #_insn ",ma " #_sz "(" _s ",%1), %0\n" 	\
+	"\t.section __ex_table,\"aw\"\n"		\
+	"\t" EXC_WORD "\t1b\n"				\
+	"\t" EXC_WORD "\t" #_e "\n"			\
+	"\t.previous\n"					\
+	: "=" #_tt(_t), "+r"(_a)			\
+	: "1"(_a)					\
+	: "r8")
+
+#define def_store_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e) 	\
+	__asm__ __volatile__ (				\
+	"1:\t" #_insn ",ma %1, " #_sz "(" _s ",%0)\n" 	\
+	"\t.section __ex_table,\"aw\"\n"		\
+	"\t" EXC_WORD "\t1b\n"				\
+	"\t" EXC_WORD "\t" #_e "\n"			\
+	"\t.previous\n"					\
+	: "+r"(_a) 					\
+	: #_tt(_t), "0"(_a)				\
+	: "r8")
+
+#define ldbma(_s, _a, _t, _e) def_load_ai_insn(ldbs,1,r,_s,_a,_t,_e)
+#define stbma(_s, _t, _a, _e) def_store_ai_insn(stbs,1,r,_s,_a,_t,_e)
+#define ldwma(_s, _a, _t, _e) def_load_ai_insn(ldw,4,r,_s,_a,_t,_e)
+#define stwma(_s, _t, _a, _e) def_store_ai_insn(stw,4,r,_s,_a,_t,_e)
+#define flddma(_s, _a, _t, _e) def_load_ai_insn(fldd,8,f,_s,_a,_t,_e)
+#define fstdma(_s, _t, _a, _e) def_store_ai_insn(fstd,8,f,_s,_a,_t,_e)
+
+#define ldw(_s,_o,_a,_t,_e) 				\
+	__asm__ __volatile__ (				\
+	"1:\tldw " #_o "(" _s ",%1), %0\n"		\
+	"\t.section __ex_table,\"aw\"\n"		\
+	"\t" EXC_WORD "\t1b\n"				\
+	"\t" EXC_WORD "\t" #_e "\n"			\
+	"\t.previous\n"					\
+	: "=r"(_t) 					\
+	: "r"(_a)					\
+	: "r8")
+
+#define stw(_s,_t,_o,_a,_e) 				\
+	__asm__ __volatile__ (				\
+	"1:\tstw %0, " #_o "(" _s ",%1)\n" 		\
+	"\t.section __ex_table,\"aw\"\n"		\
+	"\t" EXC_WORD "\t1b\n"				\
+	"\t" EXC_WORD "\t" #_e "\n"			\
+	"\t.previous\n"					\
+	: 						\
+	: "r"(_t), "r"(_a)				\
+	: "r8")
+
+#ifdef  CONFIG_PREFETCH
+extern inline void prefetch_src(const void *addr)
+{
+	__asm__("ldw 0(" s_space ",%0), %%r0" : : "r" (addr));
+}
+
+extern inline void prefetch_dst(const void *addr)
+{
+	__asm__("ldd 0(" d_space ",%0), %%r0" : : "r" (addr));
+}
+#else
+#define prefetch_src(addr)
+#define prefetch_dst(addr)
+#endif
+
+/* Copy from a not-aligned src to an aligned dst, using shifts. Handles 4 words
+ * per loop.  This code is derived from glibc. 
+ */
+static inline unsigned long copy_dstaligned(unsigned long dst, unsigned long src, unsigned long len, unsigned long o_dst, unsigned long o_src, unsigned long o_len)
+{
+	/* gcc complains that a2 and a3 may be uninitialized, but actually
+	 * they cannot be.  Initialize a2/a3 to shut gcc up.
+	 */
+	register unsigned int a0, a1, a2 = 0, a3 = 0;
+	int sh_1, sh_2;
+	struct exception_data *d;
+
+	/* prefetch_src((const void *)src); */
+
+	/* Calculate how to shift a word read at the memory operation
+	   aligned srcp to make it aligned for copy.  */
+	sh_1 = 8 * (src % sizeof(unsigned int));
+	sh_2 = 8 * sizeof(unsigned int) - sh_1;
+
+	/* Make src aligned by rounding it down.  */
+	src &= -sizeof(unsigned int);
+
+	switch (len % 4)
+	{
+		case 2:
+			/* a1 = ((unsigned int *) src)[0];
+			   a2 = ((unsigned int *) src)[1]; */
+			ldw(s_space, 0, src, a1, cda_ldw_exc);
+			ldw(s_space, 4, src, a2, cda_ldw_exc);
+			src -= 1 * sizeof(unsigned int);
+			dst -= 3 * sizeof(unsigned int);
+			len += 2;
+			goto do1;
+		case 3:
+			/* a0 = ((unsigned int *) src)[0];
+			   a1 = ((unsigned int *) src)[1]; */
+			ldw(s_space, 0, src, a0, cda_ldw_exc);
+			ldw(s_space, 4, src, a1, cda_ldw_exc);
+			src -= 0 * sizeof(unsigned int);
+			dst -= 2 * sizeof(unsigned int);
+			len += 1;
+			goto do2;
+		case 0:
+			if (len == 0)
+				return 0;
+			/* a3 = ((unsigned int *) src)[0];
+			   a0 = ((unsigned int *) src)[1]; */
+			ldw(s_space, 0, src, a3, cda_ldw_exc);
+			ldw(s_space, 4, src, a0, cda_ldw_exc);
+			src -=-1 * sizeof(unsigned int);
+			dst -= 1 * sizeof(unsigned int);
+			len += 0;
+			goto do3;
+		case 1:
+			/* a2 = ((unsigned int *) src)[0];
+			   a3 = ((unsigned int *) src)[1]; */
+			ldw(s_space, 0, src, a2, cda_ldw_exc);
+			ldw(s_space, 4, src, a3, cda_ldw_exc);
+			src -=-2 * sizeof(unsigned int);
+			dst -= 0 * sizeof(unsigned int);
+			len -= 1;
+			if (len == 0)
+				goto do0;
+			goto do4;			/* No-op.  */
+	}
+
+	do
+	{
+		/* prefetch_src((const void *)(src + 4 * sizeof(unsigned int))); */
+do4:
+		/* a0 = ((unsigned int *) src)[0]; */
+		ldw(s_space, 0, src, a0, cda_ldw_exc);
+		/* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */
+		stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc);
+do3:
+		/* a1 = ((unsigned int *) src)[1]; */
+		ldw(s_space, 4, src, a1, cda_ldw_exc);
+		/* ((unsigned int *) dst)[1] = MERGE (a3, sh_1, a0, sh_2); */
+		stw(d_space, MERGE (a3, sh_1, a0, sh_2), 4, dst, cda_stw_exc);
+do2:
+		/* a2 = ((unsigned int *) src)[2]; */
+		ldw(s_space, 8, src, a2, cda_ldw_exc);
+		/* ((unsigned int *) dst)[2] = MERGE (a0, sh_1, a1, sh_2); */
+		stw(d_space, MERGE (a0, sh_1, a1, sh_2), 8, dst, cda_stw_exc);
+do1:
+		/* a3 = ((unsigned int *) src)[3]; */
+		ldw(s_space, 12, src, a3, cda_ldw_exc);
+		/* ((unsigned int *) dst)[3] = MERGE (a1, sh_1, a2, sh_2); */
+		stw(d_space, MERGE (a1, sh_1, a2, sh_2), 12, dst, cda_stw_exc);
+
+		src += 4 * sizeof(unsigned int);
+		dst += 4 * sizeof(unsigned int);
+		len -= 4;
+	}
+	while (len != 0);
+
+do0:
+	/* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */
+	stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc);
+
+	preserve_branch(handle_load_error);
+	preserve_branch(handle_store_error);
+
+	return 0;
+
+handle_load_error:
+	__asm__ __volatile__ ("cda_ldw_exc:\n");
+	d = &__get_cpu_var(exception_data);
+	DPRINTF("cda_ldw_exc: o_len=%lu fault_addr=%lu o_src=%lu ret=%lu\n",
+		o_len, d->fault_addr, o_src, o_len - d->fault_addr + o_src);
+	return o_len * 4 - d->fault_addr + o_src;
+
+handle_store_error:
+	__asm__ __volatile__ ("cda_stw_exc:\n");
+	d = &__get_cpu_var(exception_data);
+	DPRINTF("cda_stw_exc: o_len=%lu fault_addr=%lu o_dst=%lu ret=%lu\n",
+		o_len, d->fault_addr, o_dst, o_len - d->fault_addr + o_dst);
+	return o_len * 4 - d->fault_addr + o_dst;
+}
+
+
+/* Returns 0 for success, otherwise, returns number of bytes not transferred. */
+unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
+{
+	register unsigned long src, dst, t1, t2, t3;
+	register char *pcs, *pcd;
+	register unsigned int *pws, *pwd;
+	register double *pds, *pdd;
+	unsigned long ret = 0;
+	unsigned long o_dst, o_src, o_len;
+	struct exception_data *d;
+
+	src = (unsigned long)srcp;
+	dst = (unsigned long)dstp;
+	pcs = (unsigned char *)srcp;
+	pcd = (unsigned char *)dstp;
+
+	o_dst = dst; o_src = src; o_len = len;
+
+	/* prefetch_src((const void *)srcp); */
+
+	if (unlikely(len == 0))
+		return 0;
+
+	/* Check alignment */
+	t1 = (src ^ dst);
+	if (unlikely(t1 & (sizeof(double)-1)))
+		goto unaligned_copy;
+
+	/* src and dst have same alignment. */
+
+	/* Copy bytes till we are double-aligned. */
+	t2 = src & (sizeof(double) - 1);
+	if (unlikely(t2 != 0)) {
+		t2 = sizeof(double) - t2;
+		while (t2 && len) {
+			/* *pcd++ = *pcs++; */
+			ldbma(s_space, pcs, t3, pmc_load_exc);
+			len--;
+			stbma(d_space, t3, pcd, pmc_store_exc);
+			t2--;
+		}
+	}
+
+	pds = (double *)pcs;
+	pdd = (double *)pcd;
+
+	/* Copy 8 doubles at a time */
+	while (len >= 8*sizeof(double)) {
+		register double r1, r2, r3, r4, r5, r6, r7, r8;
+		/* prefetch_src((char *)pds + L1_CACHE_BYTES); */
+		flddma(s_space, pds, r1, pmc_load_exc);
+		flddma(s_space, pds, r2, pmc_load_exc);
+		flddma(s_space, pds, r3, pmc_load_exc);
+		flddma(s_space, pds, r4, pmc_load_exc);
+		fstdma(d_space, r1, pdd, pmc_store_exc);
+		fstdma(d_space, r2, pdd, pmc_store_exc);
+		fstdma(d_space, r3, pdd, pmc_store_exc);
+		fstdma(d_space, r4, pdd, pmc_store_exc);
+
+#if 0
+		if (L1_CACHE_BYTES <= 32)
+			prefetch_src((char *)pds + L1_CACHE_BYTES);
+#endif
+		flddma(s_space, pds, r5, pmc_load_exc);
+		flddma(s_space, pds, r6, pmc_load_exc);
+		flddma(s_space, pds, r7, pmc_load_exc);
+		flddma(s_space, pds, r8, pmc_load_exc);
+		fstdma(d_space, r5, pdd, pmc_store_exc);
+		fstdma(d_space, r6, pdd, pmc_store_exc);
+		fstdma(d_space, r7, pdd, pmc_store_exc);
+		fstdma(d_space, r8, pdd, pmc_store_exc);
+		len -= 8*sizeof(double);
+	}
+
+	pws = (unsigned int *)pds;
+	pwd = (unsigned int *)pdd;
+
+word_copy:
+	while (len >= 8*sizeof(unsigned int)) {
+		register unsigned int r1,r2,r3,r4,r5,r6,r7,r8;
+		/* prefetch_src((char *)pws + L1_CACHE_BYTES); */
+		ldwma(s_space, pws, r1, pmc_load_exc);
+		ldwma(s_space, pws, r2, pmc_load_exc);
+		ldwma(s_space, pws, r3, pmc_load_exc);
+		ldwma(s_space, pws, r4, pmc_load_exc);
+		stwma(d_space, r1, pwd, pmc_store_exc);
+		stwma(d_space, r2, pwd, pmc_store_exc);
+		stwma(d_space, r3, pwd, pmc_store_exc);
+		stwma(d_space, r4, pwd, pmc_store_exc);
+
+		ldwma(s_space, pws, r5, pmc_load_exc);
+		ldwma(s_space, pws, r6, pmc_load_exc);
+		ldwma(s_space, pws, r7, pmc_load_exc);
+		ldwma(s_space, pws, r8, pmc_load_exc);
+		stwma(d_space, r5, pwd, pmc_store_exc);
+		stwma(d_space, r6, pwd, pmc_store_exc);
+		stwma(d_space, r7, pwd, pmc_store_exc);
+		stwma(d_space, r8, pwd, pmc_store_exc);
+		len -= 8*sizeof(unsigned int);
+	}
+
+	while (len >= 4*sizeof(unsigned int)) {
+		register unsigned int r1,r2,r3,r4;
+		ldwma(s_space, pws, r1, pmc_load_exc);
+		ldwma(s_space, pws, r2, pmc_load_exc);
+		ldwma(s_space, pws, r3, pmc_load_exc);
+		ldwma(s_space, pws, r4, pmc_load_exc);
+		stwma(d_space, r1, pwd, pmc_store_exc);
+		stwma(d_space, r2, pwd, pmc_store_exc);
+		stwma(d_space, r3, pwd, pmc_store_exc);
+		stwma(d_space, r4, pwd, pmc_store_exc);
+		len -= 4*sizeof(unsigned int);
+	}
+
+	pcs = (unsigned char *)pws;
+	pcd = (unsigned char *)pwd;
+
+byte_copy:
+	while (len) {
+		/* *pcd++ = *pcs++; */
+		ldbma(s_space, pcs, t3, pmc_load_exc);
+		stbma(d_space, t3, pcd, pmc_store_exc);
+		len--;
+	}
+
+	return 0;
+
+unaligned_copy:
+	if (len < THRESHOLD)
+		goto byte_copy;
+
+	/* possibly we are aligned on a word, but not on a double... */
+	if (likely(t1 & (sizeof(unsigned int)-1)) == 0) {
+		t2 = src & (sizeof(unsigned int) - 1);
+
+		if (unlikely(t2 != 0)) {
+			t2 = sizeof(unsigned int) - t2;
+			while (t2) {
+				/* *pcd++ = *pcs++; */
+				ldbma(s_space, pcs, t3, pmc_load_exc);
+				stbma(d_space, t3, pcd, pmc_store_exc);
+				t2--;
+			}
+		}
+
+		pws = (unsigned int *)pcs;
+		pwd = (unsigned int *)pcd;
+		goto word_copy;
+	}
+
+	/* Align the destination.  */
+	if (unlikely((dst & (sizeof(unsigned int) - 1)) != 0)) {
+		t2 = sizeof(unsigned int) - (dst & (sizeof(unsigned int) - 1));
+		while (t2) {
+			/* *pcd++ = *pcs++; */
+			ldbma(s_space, pcs, t3, pmc_load_exc);
+			stbma(d_space, t3, pcd, pmc_store_exc);
+			len--;
+			t2--;
+		}
+		dst = (unsigned long)pcd;
+		src = (unsigned long)pcs;
+	}
+
+	ret = copy_dstaligned(dst, src, len / sizeof(unsigned int), 
+		o_dst, o_src, o_len);
+	if (ret)
+		return ret;
+
+	pcs += (len & -sizeof(unsigned int));
+	pcd += (len & -sizeof(unsigned int));
+	len %= sizeof(unsigned int);
+
+	preserve_branch(handle_load_error);
+	preserve_branch(handle_store_error);
+
+	goto byte_copy;
+
+handle_load_error:
+	__asm__ __volatile__ ("pmc_load_exc:\n");
+	d = &__get_cpu_var(exception_data);
+	DPRINTF("pmc_load_exc: o_len=%lu fault_addr=%lu o_src=%lu ret=%lu\n",
+		o_len, d->fault_addr, o_src, o_len - d->fault_addr + o_src);
+	return o_len - d->fault_addr + o_src;
+
+handle_store_error:
+	__asm__ __volatile__ ("pmc_store_exc:\n");
+	d = &__get_cpu_var(exception_data);
+	DPRINTF("pmc_store_exc: o_len=%lu fault_addr=%lu o_dst=%lu ret=%lu\n",
+		o_len, d->fault_addr, o_dst, o_len - d->fault_addr + o_dst);
+	return o_len - d->fault_addr + o_dst;
+}
+
+#ifdef __KERNEL__
+unsigned long copy_to_user(void __user *dst, const void *src, unsigned long len)
+{
+	mtsp(0, 1);
+	mtsp(get_user_space(), 2);
+	return pa_memcpy(dst, src, len);
+}
+
+unsigned long copy_from_user(void *dst, const void __user *src, unsigned long len)
+{
+	mtsp(get_user_space(), 1);
+	mtsp(0, 2);
+	return pa_memcpy(dst, src, len);
+}
+
+unsigned long copy_in_user(void __user *dst, const void __user *src, unsigned long len)
+{
+	mtsp(get_user_space(), 1);
+	mtsp(get_user_space(), 2);
+	return pa_memcpy(dst, src, len);
+}
+#endif
Index: include/asm-parisc/uaccess.h
===================================================================
RCS file: /var/cvs/linux-2.6/include/asm-parisc/uaccess.h,v
retrieving revision 1.16
diff -u -p -r1.16 uaccess.h
--- include/asm-parisc/uaccess.h	15 Sep 2004 16:08:48 -0000	1.16
+++ include/asm-parisc/uaccess.h	16 Sep 2004 19:16:21 -0000
@@ -267,12 +267,12 @@ extern long lstrnlen_user(const char __u
 #define clear_user lclear_user
 #define __clear_user lclear_user
 
-#define copy_from_user lcopy_from_user
-#define __copy_from_user lcopy_from_user
-#define copy_to_user lcopy_to_user
-#define __copy_to_user lcopy_to_user
-#define copy_in_user lcopy_in_user
-#define __copy_in_user lcopy_in_user
+unsigned long copy_to_user(void __user *dst, const void *src, unsigned long len);
+#define __copy_to_user copy_to_user
+unsigned long copy_from_user(void *dst, const void __user *src, unsigned long len);
+#define __copy_from_user copy_from_user
+unsigned long copy_in_user(void __user *dst, const void __user *src, unsigned long len);
+#define __copy_in_user copy_in_user
 #define __copy_to_user_inatomic __copy_to_user
 #define __copy_from_user_inatomic __copy_from_user
 
_______________________________________________
parisc-linux mailing list
parisc-linux@lists.parisc-linux.org
http://lists.parisc-linux.org/mailman/listinfo/parisc-linux