* [PATCH] Add memcpy_cachebypass, a copy routine that tries to keep cache pressure down
@ 2006-07-11 20:50 Bryan O'Sullivan
2006-07-11 20:56 ` Arjan van de Ven
` (3 more replies)
0 siblings, 4 replies; 12+ messages in thread
From: Bryan O'Sullivan @ 2006-07-11 20:50 UTC (permalink / raw)
To: linux-kernel; +Cc: davem, arjan
This copy routine is memcpy-compatible, but on some architectures will use
cache-bypassing loads to avoid bringing the source data into the cache.
One case where this is useful is when a device issues a DMA to a memory
region, and the CPU must copy the DMAed data elsewhere before doing any
work with it. Since the source data is read-once, write-never from the
CPU's perspective, caching those addresses can only evict potentially
useful data.
We provide an x86_64 implementation that uses SSE non-temporal loads,
and a generic version that falls back to plain memcpy.
Implementors for other arches should not use cache-bypassing stores to
the destination, as in most cases, the destination is accessed almost
immediately after a copy finishes.
Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com>
Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
diff -r c5610179c494 -r da0cd816c4cb include/linux/string.h
--- a/include/linux/string.h Tue Jul 11 13:40:19 2006 -0700
+++ b/include/linux/string.h Tue Jul 11 13:41:40 2006 -0700
@@ -85,6 +85,7 @@ extern void * memset(void *,int,__kernel
#ifndef __HAVE_ARCH_MEMCPY
extern void * memcpy(void *,const void *,__kernel_size_t);
#endif
+extern void * memcpy_cachebypass(void *,const void *,__kernel_size_t);
#ifndef __HAVE_ARCH_MEMMOVE
extern void * memmove(void *,const void *,__kernel_size_t);
#endif
diff -r c5610179c494 -r da0cd816c4cb lib/string.c
--- a/lib/string.c Tue Jul 11 13:40:19 2006 -0700
+++ b/lib/string.c Tue Jul 11 13:41:40 2006 -0700
@@ -509,6 +509,38 @@ EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(memcpy);
#endif
+void *memcpy_cachebypass(void *dest, const void *src, size_t count)
+ __attribute__((weak));
+
+/**
+ * memcpy_cachebypass - Copy one area of memory to another, if possible
+ * bypassing the CPU's cache when loading the copied-from data
+ * @dest: Where to copy to
+ * @src: Where to copy from (bypassing the CPU's cache, if possible)
+ * @count: The size of the area.
+ *
+ * This memcpy-compatible routine is intended for use when the CPU
+ * only reads the source data once. It is useful when, for example, a
+ * hardware device writes to a memory region, and the CPU needs to
+ * copy this data somewhere else before working on it. In such a
+ * case, caching the source addresses only serves to evict possibly
+ * useful data that will probably have to be reloaded.
+ *
+ * An arch-specific implementation should not attempt to bypass the
+ * cache when storing to the destination, as copied data is usually
+ * accessed almost immediately after a copy finishes.
+ *
+ * This routine does not *guarantee* that the source addresses won't
+ * be cached; a user of this code must not rely on this behaviour for
+ * correctness. It should only be used in cases where it provides a
+ * measurable performance improvement.
+ */
+void *memcpy_cachebypass(void *dest, const void *src, size_t count)
+{
+ return memcpy(dest, src, count);
+}
+EXPORT_SYMBOL_GPL(memcpy_cachebypass);
+
#ifndef __HAVE_ARCH_MEMMOVE
/**
* memmove - Copy one area of memory to another
diff -r c5610179c494 -r da0cd816c4cb arch/x86_64/lib/memcpy_cachebypass.S
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/x86_64/lib/memcpy_cachebypass.S Tue Jul 11 13:41:40 2006 -0700
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2006 QLogic, Inc. All Rights Reserved.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+/*
+ * memcpy_cachebypass - memcpy-compatible copy routine, using streaming loads
+ * @dest: destination address
+ * @src: source address (will not be cached)
+ * @count: number of bytes to copy
+ *
+ * Use streaming loads and normal stores for a special-case copy where
+ * we know we won't be reading the source again, but will be reading the
+ * destination again soon.
+ */
+ .text
+ .p2align 4,,15
+ /* rdi destination, rsi source, rdx count */
+ .globl memcpy_cachebypass
+ .type memcpy_cachebypass, @function
+memcpy_cachebypass:
+ movq %rdi, %rax
+.L5:
+ cmpq $15, %rdx
+ ja .L34
+.L3:
+ cmpl $8, %edx /* rdx is 0..15 */
+ jbe .L9
+.L6:
+ testb $8, %dxl /* rdx is 3,5,6,7,9..15 */
+ je .L13
+ movq (%rsi), %rcx
+ addq $8, %rsi
+ movq %rcx, (%rdi)
+ addq $8, %rdi
+.L13:
+ testb $4, %dxl
+ je .L15
+ movl (%rsi), %ecx
+ addq $4, %rsi
+ movl %ecx, (%rdi)
+ addq $4, %rdi
+.L15:
+ testb $2, %dxl
+ je .L17
+ movzwl (%rsi), %ecx
+ addq $2, %rsi
+ movw %cx, (%rdi)
+ addq $2, %rdi
+.L17:
+ testb $1, %dxl
+ je .L33
+.L1:
+ movzbl (%rsi), %ecx
+ movb %cl, (%rdi)
+.L33:
+ ret
+.L34:
+ cmpq $63, %rdx /* rdx is > 15 */
+ ja .L64
+ movl $16, %ecx /* rdx is 16..63 */
+.L25:
+ movq 8(%rsi), %r8
+ movq (%rsi), %r9
+ addq %rcx, %rsi
+ movq %r8, 8(%rdi)
+ movq %r9, (%rdi)
+ addq %rcx, %rdi
+ subq %rcx, %rdx
+ cmpl %edx, %ecx /* is rdx >= 16? */
+ jbe .L25
+ jmp .L3 /* rdx is 0..15 */
+ .p2align 4,,7
+.L64:
+ movl $64, %ecx
+.L42:
+ prefetchnta 128(%rsi)
+ movq (%rsi), %r8
+ movq 8(%rsi), %r9
+ movq 16(%rsi), %r10
+ movq 24(%rsi), %r11
+ subq %rcx, %rdx
+ movq %r8, (%rdi)
+ movq 32(%rsi), %r8
+ movq %r9, 8(%rdi)
+ movq 40(%rsi), %r9
+ movq %r10, 16(%rdi)
+ movq 48(%rsi), %r10
+ movq %r11, 24(%rdi)
+ movq 56(%rsi), %r11
+ addq %rcx, %rsi
+ movq %r8, 32(%rdi)
+ movq %r9, 40(%rdi)
+ movq %r10, 48(%rdi)
+ movq %r11, 56(%rdi)
+ addq %rcx, %rdi
+ cmpq %rdx, %rcx /* is rdx >= 64? */
+ jbe .L42
+ sfence
+ orl %edx, %edx
+ je .L33
+ jmp .L5
+.L9:
+ jmp *.L12(,%rdx,8) /* rdx is 0..8 */
+ .section .rodata
+ .align 8
+ .align 4
+.L12:
+ .quad .L33
+ .quad .L1
+ .quad .L2
+ .quad .L6
+ .quad .L4
+ .quad .L6
+ .quad .L6
+ .quad .L6
+ .quad .L8
+ .text
+.L2:
+ movzwl (%rsi), %ecx
+ movw %cx, (%rdi)
+ ret
+.L4:
+ movl (%rsi), %ecx
+ movl %ecx, (%rdi)
+ ret
+.L8:
+ movq (%rsi), %rcx
+ movq %rcx, (%rdi)
+ ret
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] Add memcpy_cachebypass, a copy routine that tries to keep cache pressure down
2006-07-11 20:50 [PATCH] Add memcpy_cachebypass, a copy routine that tries to keep cache pressure down Bryan O'Sullivan
@ 2006-07-11 20:56 ` Arjan van de Ven
2006-07-11 20:57 ` David Miller
` (2 subsequent siblings)
3 siblings, 0 replies; 12+ messages in thread
From: Arjan van de Ven @ 2006-07-11 20:56 UTC (permalink / raw)
To: Bryan O'Sullivan; +Cc: linux-kernel, davem
On Tue, 2006-07-11 at 13:50 -0700, Bryan O'Sullivan wrote:
> This copy routine is memcpy-compatible, but on some architectures will use
> cache-bypassing loads to avoid bringing the source data into the cache.
>
> One case where this is useful is when a device issues a DMA to a memory
> region, and the CPU must copy the DMAed data elsewhere before doing any
> work with it. Since the source data is read-once, write-never from the
> CPU's perspective, caching those addresses can only evict potentially
> useful data.
>
> We provide an x86_64 implementation that uses SSE non-temporal loads,
> and a generic version that falls back to plain memcpy.
>
> Implementors for other arches should not use cache-bypassing stores to
> the destination, as in most cases, the destination is accessed almost
> immediately after a copy finishes.
>
> Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com>
> Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Acked-by: Arjan van de Ven <arjan@Linux.intel.com>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] Add memcpy_cachebypass, a copy routine that tries to keep cache pressure down
2006-07-11 20:50 [PATCH] Add memcpy_cachebypass, a copy routine that tries to keep cache pressure down Bryan O'Sullivan
2006-07-11 20:56 ` Arjan van de Ven
@ 2006-07-11 20:57 ` David Miller
2006-07-11 21:30 ` Bryan O'Sullivan
2006-07-11 21:09 ` Randy.Dunlap
2006-07-12 16:15 ` Andi Kleen
3 siblings, 1 reply; 12+ messages in thread
From: David Miller @ 2006-07-11 20:57 UTC (permalink / raw)
To: bos; +Cc: linux-kernel, arjan
From: Bryan O'Sullivan <bos@serpentine.com>
Date: Tue, 11 Jul 2006 13:50:55 -0700
> This copy routine is memcpy-compatible, but on some architectures will use
> cache-bypassing loads to avoid bringing the source data into the cache.
>
> One case where this is useful is when a device issues a DMA to a memory
> region, and the CPU must copy the DMAed data elsewhere before doing any
> work with it. Since the source data is read-once, write-never from the
> CPU's perspective, caching those addresses can only evict potentially
> useful data.
>
> We provide an x86_64 implementation that uses SSE non-temporal loads,
> and a generic version that falls back to plain memcpy.
>
> Implementors for other arches should not use cache-bypassing stores to
> the destination, as in most cases, the destination is accessed almost
> immediately after a copy finishes.
>
> Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com>
> Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Please don't use a weak attribute, and instead use the same
"__HAVE_ARCH_FOO" cpp test scheme used for the other string
operations to allow a platform to override the default
implementation in lib/string.x
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] Add memcpy_cachebypass, a copy routine that tries to keep cache pressure down
2006-07-11 20:50 [PATCH] Add memcpy_cachebypass, a copy routine that tries to keep cache pressure down Bryan O'Sullivan
2006-07-11 20:56 ` Arjan van de Ven
2006-07-11 20:57 ` David Miller
@ 2006-07-11 21:09 ` Randy.Dunlap
2006-07-11 21:35 ` Bryan O'Sullivan
2006-07-12 16:15 ` Andi Kleen
3 siblings, 1 reply; 12+ messages in thread
From: Randy.Dunlap @ 2006-07-11 21:09 UTC (permalink / raw)
To: Bryan O'Sullivan; +Cc: linux-kernel, davem, arjan
On Tue, 11 Jul 2006 13:50:55 -0700 Bryan O'Sullivan wrote:
> diff -r c5610179c494 -r da0cd816c4cb include/linux/string.h
> --- a/include/linux/string.h Tue Jul 11 13:40:19 2006 -0700
> +++ b/include/linux/string.h Tue Jul 11 13:41:40 2006 -0700
> @@ -85,6 +85,7 @@ extern void * memset(void *,int,__kernel
> #ifndef __HAVE_ARCH_MEMCPY
> extern void * memcpy(void *,const void *,__kernel_size_t);
> #endif
> +extern void * memcpy_cachebypass(void *,const void *,__kernel_size_t);
space after commas, please.
> #ifndef __HAVE_ARCH_MEMMOVE
> extern void * memmove(void *,const void *,__kernel_size_t);
> #endif
> diff -r c5610179c494 -r da0cd816c4cb lib/string.c
> --- a/lib/string.c Tue Jul 11 13:40:19 2006 -0700
> +++ b/lib/string.c Tue Jul 11 13:41:40 2006 -0700
> @@ -509,6 +509,38 @@ EXPORT_SYMBOL(memcpy);
> EXPORT_SYMBOL(memcpy);
> #endif
>
> +void *memcpy_cachebypass(void *dest, const void *src, size_t count)
> + __attribute__((weak));
> +
> +/**
> + * memcpy_cachebypass - Copy one area of memory to another, if possible
> + * bypassing the CPU's cache when loading the copied-from data
Currently kernel-doc function description is limited to one line.
If you can't shorten it, just omit it completely and make it the first
paragraph after the parameters.
> + * @dest: Where to copy to
> + * @src: Where to copy from (bypassing the CPU's cache, if possible)
> + * @count: The size of the area.
> + *
> + * This memcpy-compatible routine is intended for use when the CPU
> + * only reads the source data once. It is useful when, for example, a
> + * hardware device writes to a memory region, and the CPU needs to
> + * copy this data somewhere else before working on it. In such a
> + * case, caching the source addresses only serves to evict possibly
> + * useful data that will probably have to be reloaded.
> + *
> + * An arch-specific implementation should not attempt to bypass the
> + * cache when storing to the destination, as copied data is usually
> + * accessed almost immediately after a copy finishes.
> + *
> + * This routine does not *guarantee* that the source addresses won't
> + * be cached; a user of this code must not rely on this behaviour for
> + * correctness. It should only be used in cases where it provides a
> + * measurable performance improvement.
> + */
> +void *memcpy_cachebypass(void *dest, const void *src, size_t count)
> +{
> + return memcpy(dest, src, count);
> +}
> +EXPORT_SYMBOL_GPL(memcpy_cachebypass);
> +
> #ifndef __HAVE_ARCH_MEMMOVE
> /**
> * memmove - Copy one area of memory to another
---
~Randy
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] Add memcpy_cachebypass, a copy routine that tries to keep cache pressure down
2006-07-11 20:57 ` David Miller
@ 2006-07-11 21:30 ` Bryan O'Sullivan
2006-07-11 21:57 ` David Miller
0 siblings, 1 reply; 12+ messages in thread
From: Bryan O'Sullivan @ 2006-07-11 21:30 UTC (permalink / raw)
To: David Miller; +Cc: linux-kernel, arjan
On Tue, 2006-07-11 at 13:57 -0700, David Miller wrote:
> Please don't use a weak attribute, and instead use the same
> "__HAVE_ARCH_FOO" cpp test scheme used for the other string
> operations to allow a platform to override the default
> implementation in lib/string.x
I'm a bit confused.
The last time I tried submitting a patch that followed that style (for
__iowrite_copy*), it got NAKed for propagating preprocessor abuse (Linus
roundly flamed someone for a similar patch a few weeks before I
submitted mine), and Andrew suggested that I use the same scheme that
this patch uses.
So whose instructions do I follow? Yours of today, or Andrew's and
Linus's of a few months ago?
<b
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] Add memcpy_cachebypass, a copy routine that tries to keep cache pressure down
2006-07-11 21:09 ` Randy.Dunlap
@ 2006-07-11 21:35 ` Bryan O'Sullivan
2006-07-11 22:48 ` Randy.Dunlap
0 siblings, 1 reply; 12+ messages in thread
From: Bryan O'Sullivan @ 2006-07-11 21:35 UTC (permalink / raw)
To: Randy.Dunlap; +Cc: linux-kernel, davem, arjan
On Tue, 2006-07-11 at 14:09 -0700, Randy.Dunlap wrote:
> space after commas, please.
Yep.
> Currently kernel-doc function description is limited to one line.
Ugh, OK. What about "Memory copy, bypassing CPU cache for loads" for
the one-liner? And a suitably modified first paragraph to make it clear
that on some arches, it falls back to memcpy.
Thanks,
<b
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] Add memcpy_cachebypass, a copy routine that tries to keep cache pressure down
2006-07-11 21:30 ` Bryan O'Sullivan
@ 2006-07-11 21:57 ` David Miller
2006-07-11 22:05 ` Bryan O'Sullivan
2006-07-12 8:04 ` Christoph Hellwig
0 siblings, 2 replies; 12+ messages in thread
From: David Miller @ 2006-07-11 21:57 UTC (permalink / raw)
To: bos; +Cc: linux-kernel, arjan
From: Bryan O'Sullivan <bos@serpentine.com>
Date: Tue, 11 Jul 2006 14:30:01 -0700
> The last time I tried submitting a patch that followed that style (for
> __iowrite_copy*), it got NAKed for propagating preprocessor abuse (Linus
> roundly flamed someone for a similar patch a few weeks before I
> submitted mine), and Andrew suggested that I use the same scheme that
> this patch uses.
>
> So whose instructions do I follow? Yours of today, or Andrew's and
> Linus's of a few months ago?
I didn't realize there was change afoot in this area, sorry.
I was just striving for consistency with current practice.
If Andrew suggested to use weak, that's fine, but it's kind
of erroneous for something like lib/string.c because that
gets built into a library lib.a file, which resolves any
unresolved references.
When the kernel is linked, lib.a implementations only get brought in
if they are not already resolved by definitions present in the other
objects of the kernel image.
Weak makes more sense when dealing with object files, not archives.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] Add memcpy_cachebypass, a copy routine that tries to keep cache pressure down
2006-07-11 21:57 ` David Miller
@ 2006-07-11 22:05 ` Bryan O'Sullivan
2006-07-11 22:08 ` David Miller
2006-07-12 8:04 ` Christoph Hellwig
1 sibling, 1 reply; 12+ messages in thread
From: Bryan O'Sullivan @ 2006-07-11 22:05 UTC (permalink / raw)
To: David Miller; +Cc: linux-kernel, arjan
On Tue, 2006-07-11 at 14:57 -0700, David Miller wrote:
> I didn't realize there was change afoot in this area, sorry.
> I was just striving for consistency with current practice.
Sure.
> When the kernel is linked, lib.a implementations only get brought in
> if they are not already resolved by definitions present in the other
> objects of the kernel image.
Well, exactly this scheme seems to work for __iowrite_copy*. There's a
weak generic version and a strong version in arch/x86_64/lib that
overrides it, and it gets picked up at kernel link time.
It could be working by accident, I suppose, but it's at least consistent
behaviour with what I'm used to from weak symbols.
<b
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] Add memcpy_cachebypass, a copy routine that tries to keep cache pressure down
2006-07-11 22:05 ` Bryan O'Sullivan
@ 2006-07-11 22:08 ` David Miller
0 siblings, 0 replies; 12+ messages in thread
From: David Miller @ 2006-07-11 22:08 UTC (permalink / raw)
To: bos; +Cc: linux-kernel, arjan
From: Bryan O'Sullivan <bos@serpentine.com>
Date: Tue, 11 Jul 2006 15:05:08 -0700
> Well, exactly this scheme seems to work for __iowrite_copy*. There's a
> weak generic version and a strong version in arch/x86_64/lib that
> overrides it, and it gets picked up at kernel link time.
It is linked in as an object, not into the library archive,
that's why that one works like that.
That is why io.o is added to the "obj-y" variable instead of the
"lib-y" variable. It is also necessary to link these things
in as objects when module exports are present, because if there
is no in-kernel reference to the function, you won't get the
function nor it's module export :)
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] Add memcpy_cachebypass, a copy routine that tries to keep cache pressure down
2006-07-11 21:35 ` Bryan O'Sullivan
@ 2006-07-11 22:48 ` Randy.Dunlap
0 siblings, 0 replies; 12+ messages in thread
From: Randy.Dunlap @ 2006-07-11 22:48 UTC (permalink / raw)
To: Bryan O'Sullivan; +Cc: linux-kernel, davem, arjan
On Tue, 11 Jul 2006 14:35:19 -0700 Bryan O'Sullivan wrote:
> On Tue, 2006-07-11 at 14:09 -0700, Randy.Dunlap wrote:
>
> > space after commas, please.
>
> Yep.
>
> > Currently kernel-doc function description is limited to one line.
>
> Ugh, OK. What about "Memory copy, bypassing CPU cache for loads" for
> the one-liner? And a suitably modified first paragraph to make it clear
> that on some arches, it falls back to memcpy.
Yep, that sounds good.
Thanks.
---
~Randy
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] Add memcpy_cachebypass, a copy routine that tries to keep cache pressure down
2006-07-11 21:57 ` David Miller
2006-07-11 22:05 ` Bryan O'Sullivan
@ 2006-07-12 8:04 ` Christoph Hellwig
1 sibling, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2006-07-12 8:04 UTC (permalink / raw)
To: David Miller; +Cc: bos, linux-kernel, arjan
On Tue, Jul 11, 2006 at 02:57:51PM -0700, David Miller wrote:
> From: Bryan O'Sullivan <bos@serpentine.com>
> Date: Tue, 11 Jul 2006 14:30:01 -0700
>
> > The last time I tried submitting a patch that followed that style (for
> > __iowrite_copy*), it got NAKed for propagating preprocessor abuse (Linus
> > roundly flamed someone for a similar patch a few weeks before I
> > submitted mine), and Andrew suggested that I use the same scheme that
> > this patch uses.
> >
> > So whose instructions do I follow? Yours of today, or Andrew's and
> > Linus's of a few months ago?
>
> I didn't realize there was change afoot in this area, sorry.
> I was just striving for consistency with current practice.
>
> If Andrew suggested to use weak, that's fine, but it's kind
> of erroneous for something like lib/string.c because that
> gets built into a library lib.a file, which resolves any
> unresolved references.
>
> When the kernel is linked, lib.a implementations only get brought in
> if they are not already resolved by definitions present in the other
> objects of the kernel image.
>
> Weak makes more sense when dealing with object files, not archives.
Weak is generally the wrong thing for what we do in kernel land. We don't
even want to build the generic version if we have a better one. It also
makes it really hard to find out what exactly in use. Linus only argued
against __ARCH_HAVE_FOO, but his suggested replacemenet is:
#ifndef foo
#define foo generic_version
#endif
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] Add memcpy_cachebypass, a copy routine that tries to keep cache pressure down
2006-07-11 20:50 [PATCH] Add memcpy_cachebypass, a copy routine that tries to keep cache pressure down Bryan O'Sullivan
` (2 preceding siblings ...)
2006-07-11 21:09 ` Randy.Dunlap
@ 2006-07-12 16:15 ` Andi Kleen
3 siblings, 0 replies; 12+ messages in thread
From: Andi Kleen @ 2006-07-12 16:15 UTC (permalink / raw)
To: Bryan O'Sullivan; +Cc: davem, arjan, linux-kernel
Bryan O'Sullivan <bos@serpentine.com> writes:
> + * memcpy_cachebypass - memcpy-compatible copy routine, using streaming loads
> + * @dest: destination address
> + * @src: source address (will not be cached)
> + * @count: number of bytes to copy
> + *
> + * Use streaming loads and normal stores for a special-case copy where
> + * we know we won't be reading the source again, but will be reading the
> + * destination again soon.
> + */
For what CPU did you optimize that function? Comment missing for that.
Also the comment should state that you're caching the target.
Also I trust you ran it through a comprehensive memcpy-all-cases tester?
-Andi
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2006-07-12 16:15 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-07-11 20:50 [PATCH] Add memcpy_cachebypass, a copy routine that tries to keep cache pressure down Bryan O'Sullivan
2006-07-11 20:56 ` Arjan van de Ven
2006-07-11 20:57 ` David Miller
2006-07-11 21:30 ` Bryan O'Sullivan
2006-07-11 21:57 ` David Miller
2006-07-11 22:05 ` Bryan O'Sullivan
2006-07-11 22:08 ` David Miller
2006-07-12 8:04 ` Christoph Hellwig
2006-07-11 21:09 ` Randy.Dunlap
2006-07-11 21:35 ` Bryan O'Sullivan
2006-07-11 22:48 ` Randy.Dunlap
2006-07-12 16:15 ` Andi Kleen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox