* [PATCH v3 1/2] ARM: Introduce ARM_L1_CACHE_SHIFT to define cache line size @ 2009-09-12 20:48 Kirill A. Shutemov 2009-09-12 20:48 ` [PATCH v3 2/2] ARM: copy_page.S: take into account the size of the cache line Kirill A. Shutemov 2009-09-21 8:37 ` [PATCH v3 1/2] ARM: Introduce ARM_L1_CACHE_SHIFT to define cache line size Russell King - ARM Linux 0 siblings, 2 replies; 5+ messages in thread From: Kirill A. Shutemov @ 2009-09-12 20:48 UTC (permalink / raw) To: linux-arm-kernel Currently kernel believes that all ARM CPUs have L1_CACHE_SHIFT == 5. It's not true at least for CPUs based on Cortex-A8. List of CPUs with cache line size != 32 should be expanded later. V2: - remove unnecessary parens Signed-off-by: Kirill A. Shutemov <kirill@shutemov.name> --- arch/arm/include/asm/cache.h | 2 +- arch/arm/mm/Kconfig | 5 +++++ 2 files changed, 6 insertions(+), 1 deletions(-) diff --git a/arch/arm/include/asm/cache.h b/arch/arm/include/asm/cache.h index feaa75f..66c160b 100644 --- a/arch/arm/include/asm/cache.h +++ b/arch/arm/include/asm/cache.h @@ -4,7 +4,7 @@ #ifndef __ASMARM_CACHE_H #define __ASMARM_CACHE_H -#define L1_CACHE_SHIFT 5 +#define L1_CACHE_SHIFT CONFIG_ARM_L1_CACHE_SHIFT #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) /* diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 83c025e..3c37d4c 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -771,3 +771,8 @@ config CACHE_XSC3L2 select OUTER_CACHE help This option enables the L2 cache on XScale3. + +config ARM_L1_CACHE_SHIFT + int + default 6 if ARCH_OMAP3 + default 5 -- 1.6.4.2 ^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v3 2/2] ARM: copy_page.S: take into account the size of the cache line 2009-09-12 20:48 [PATCH v3 1/2] ARM: Introduce ARM_L1_CACHE_SHIFT to define cache line size Kirill A. Shutemov @ 2009-09-12 20:48 ` Kirill A. Shutemov 2009-09-21 8:37 ` [PATCH v3 1/2] ARM: Introduce ARM_L1_CACHE_SHIFT to define cache line size Russell King - ARM Linux 1 sibling, 0 replies; 5+ messages in thread From: Kirill A. Shutemov @ 2009-09-12 20:48 UTC (permalink / raw) To: linux-arm-kernel Optimized version of copy_page() was written with assumption that cache line size is 32 bytes. On Cortex-A8 cache line size is 64 bytes. This patch tries to generalize copy_page() to work with any cache line size if cache line size is multiple of 16 and page size is multiple of two cache line size. After this optimization we've got ~25% speedup on OMAP3(tested in userspace). There is test for kernelspace which trigger copy-on-write after fork(): #include <stdlib.h> #include <string.h> #include <unistd.h> #define BUF_SIZE (10000*4096) #define NFORK 200 int main(int argc, char **argv) { char *buf = malloc(BUF_SIZE); int i; memset(buf, 0, BUF_SIZE); for(i = 0; i < NFORK; i++) { if (fork()) { wait(NULL); } else { int j; for(j = 0; j < BUF_SIZE; j+= 4096) buf[j] = (j & 0xFF) + 1; break; } } free(buf); return 0; } Before optimization this test takes ~66 seconds, after optimization takes ~56 seconds. V3: - fix typo V2: - include <asm/cache.h> - remove unnecessary parens and fix style Signed-off-by: Siarhei Siamashka <siarhei.siamashka@nokia.com> Signed-off-by: Kirill A. Shutemov <kirill@shutemov.name> --- arch/arm/lib/copy_page.S | 16 ++++++++-------- 1 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm/lib/copy_page.S b/arch/arm/lib/copy_page.S index 6ae04db..6ee2f67 100644 --- a/arch/arm/lib/copy_page.S +++ b/arch/arm/lib/copy_page.S @@ -12,8 +12,9 @@ #include <linux/linkage.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> +#include <asm/cache.h> -#define COPY_COUNT (PAGE_SZ/64 PLD( -1 )) +#define COPY_COUNT (PAGE_SZ / (2 * L1_CACHE_BYTES) PLD( -1 )) .text .align 5 @@ -26,17 +27,16 @@ ENTRY(copy_page) stmfd sp!, {r4, lr} @ 2 PLD( pld [r1, #0] ) - PLD( pld [r1, #32] ) + PLD( pld [r1, #L1_CACHE_BYTES] ) mov r2, #COPY_COUNT @ 1 ldmia r1!, {r3, r4, ip, lr} @ 4+1 -1: PLD( pld [r1, #64] ) - PLD( pld [r1, #96] ) -2: stmia r0!, {r3, r4, ip, lr} @ 4 - ldmia r1!, {r3, r4, ip, lr} @ 4+1 - stmia r0!, {r3, r4, ip, lr} @ 4 - ldmia r1!, {r3, r4, ip, lr} @ 4+1 +1: PLD( pld [r1, #2 * L1_CACHE_BYTES]) + PLD( pld [r1, #3 * L1_CACHE_BYTES]) +2: + .rept (2 * L1_CACHE_BYTES / 16 - 1) stmia r0!, {r3, r4, ip, lr} @ 4 ldmia r1!, {r3, r4, ip, lr} @ 4 + .endr subs r2, r2, #1 @ 1 stmia r0!, {r3, r4, ip, lr} @ 4 ldmgtia r1!, {r3, r4, ip, lr} @ 4 -- 1.6.4.2 ^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v3 1/2] ARM: Introduce ARM_L1_CACHE_SHIFT to define cache line size 2009-09-12 20:48 [PATCH v3 1/2] ARM: Introduce ARM_L1_CACHE_SHIFT to define cache line size Kirill A. Shutemov 2009-09-12 20:48 ` [PATCH v3 2/2] ARM: copy_page.S: take into account the size of the cache line Kirill A. Shutemov @ 2009-09-21 8:37 ` Russell King - ARM Linux 2009-09-21 11:19 ` Kirill A. Shutemov 1 sibling, 1 reply; 5+ messages in thread From: Russell King - ARM Linux @ 2009-09-21 8:37 UTC (permalink / raw) To: linux-arm-kernel On Sat, Sep 12, 2009 at 11:48:30PM +0300, Kirill A. Shutemov wrote: > Currently kernel believes that all ARM CPUs have L1_CACHE_SHIFT == 5. > It's not true at least for CPUs based on Cortex-A8. Please send this to the patch system. There's no need to add the "V2" comments to it when you do. ^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH v3 1/2] ARM: Introduce ARM_L1_CACHE_SHIFT to define cache line size 2009-09-21 8:37 ` [PATCH v3 1/2] ARM: Introduce ARM_L1_CACHE_SHIFT to define cache line size Russell King - ARM Linux @ 2009-09-21 11:19 ` Kirill A. Shutemov 2009-09-21 15:38 ` Russell King - ARM Linux 0 siblings, 1 reply; 5+ messages in thread From: Kirill A. Shutemov @ 2009-09-21 11:19 UTC (permalink / raw) To: linux-arm-kernel On Mon, Sep 21, 2009 at 11:37 AM, Russell King - ARM Linux <linux@arm.linux.org.uk> wrote: > On Sat, Sep 12, 2009 at 11:48:30PM +0300, Kirill A. Shutemov wrote: >> Currently kernel believes that all ARM CPUs have L1_CACHE_SHIFT == 5. >> It's not true at least for CPUs based on Cortex-A8. > > Please send this to the patch system. ?There's no need to add the "V2" > comments to it when you do. > #5716, #5717 BTW, I ,my pathes without change log in your git tree. Commits 910a17e and dca230f. What is wrong with it? ^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH v3 1/2] ARM: Introduce ARM_L1_CACHE_SHIFT to define cache line size 2009-09-21 11:19 ` Kirill A. Shutemov @ 2009-09-21 15:38 ` Russell King - ARM Linux 0 siblings, 0 replies; 5+ messages in thread From: Russell King - ARM Linux @ 2009-09-21 15:38 UTC (permalink / raw) To: linux-arm-kernel On Mon, Sep 21, 2009 at 02:19:57PM +0300, Kirill A. Shutemov wrote: > On Mon, Sep 21, 2009 at 11:37 AM, Russell King - ARM Linux > <linux@arm.linux.org.uk> wrote: > > On Sat, Sep 12, 2009 at 11:48:30PM +0300, Kirill A. Shutemov wrote: > >> Currently kernel believes that all ARM CPUs have L1_CACHE_SHIFT == 5. > >> It's not true at least for CPUs based on Cortex-A8. > > > > Please send this to the patch system. ?There's no need to add the "V2" > > comments to it when you do. > > > > #5716, #5717 > > BTW, I ,my pathes without change log in your git tree. Commits 910a17e > and dca230f. What is wrong with it? No need to resend them - sorry, I'd forgotten I'd merged them. ^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2009-09-21 15:38 UTC | newest] Thread overview: 5+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2009-09-12 20:48 [PATCH v3 1/2] ARM: Introduce ARM_L1_CACHE_SHIFT to define cache line size Kirill A. Shutemov 2009-09-12 20:48 ` [PATCH v3 2/2] ARM: copy_page.S: take into account the size of the cache line Kirill A. Shutemov 2009-09-21 8:37 ` [PATCH v3 1/2] ARM: Introduce ARM_L1_CACHE_SHIFT to define cache line size Russell King - ARM Linux 2009-09-21 11:19 ` Kirill A. Shutemov 2009-09-21 15:38 ` Russell King - ARM Linux
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).