From mboxrd@z Thu Jan 1 00:00:00 1970 From: Dirk Behme Date: Wed, 12 Aug 2009 18:35:56 +0200 Subject: [U-Boot] [PATCH] OMAP3 Move cache routines to cache.S In-Reply-To: <1250091750-1525-2-git-send-email-Tom.Rix@windriver.com> References: <1250091750-1525-1-git-send-email-Tom.Rix@windriver.com> <1250091750-1525-2-git-send-email-Tom.Rix@windriver.com> Message-ID: <4A82EF6C.90807@googlemail.com> List-Id: MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit To: u-boot@lists.denx.de Tom Rix wrote: > v7_flush_dcache_all, because it depends on omap ROM code is not > generic. Rename the function to 'invalidate_dcache' and move it > to the omap cpu directory. > > Collect the other omap cache routines l2_cache_enable and > l2_cache_disable with invalide_dcache into cache.S. This > means removing the old cache.c file that contained l2_cache_enable > and l2_cache_disable. > > The conversion from cache.c to cache.S was done most through > disassembling the uboot binary. The only significant change was > to change the comparision for the return of get_cpu_rev from > > cmp r0, #0 > beq earlier_than_label > > Which was lost information to > > cmp r0, #CPU_3XX_ES20 > blt earlier_than_label > > The paths through the enable routine were verified by > adding an infinite loop and seeing the hang. Then > removing the infinite loop and seeing it continue. > > The disable routine is similar enough that it was not > tested with this method. > > Run tested by cold booting from nand on beagle and zoom1. > Compile tested on MAKEALL arm. Boot tested from SD card on BeagleBoard. > Signed-off-by: Tom Rix Acked-by: Dirk Behme > --- > cpu/arm_cortexa8/cpu.c | 2 +- > cpu/arm_cortexa8/omap3/Makefile | 2 +- > cpu/arm_cortexa8/omap3/board.c | 2 +- > cpu/arm_cortexa8/omap3/cache.S | 191 ++++++++++++++++++++++++++++++++ > cpu/arm_cortexa8/omap3/cache.c | 95 ---------------- > cpu/arm_cortexa8/start.S | 85 -------------- > include/asm-arm/arch-omap3/omap3.h | 2 + > include/asm-arm/arch-omap3/sys_proto.h | 2 +- > 8 files changed, 197 insertions(+), 184 deletions(-) > create mode 100644 cpu/arm_cortexa8/omap3/cache.S > delete mode 100644 cpu/arm_cortexa8/omap3/cache.c > > diff --git a/cpu/arm_cortexa8/cpu.c b/cpu/arm_cortexa8/cpu.c > index 5a5981e..a01e0d6 100644 > --- a/cpu/arm_cortexa8/cpu.c > +++ b/cpu/arm_cortexa8/cpu.c > @@ -64,7 +64,7 @@ int cleanup_before_linux(void) > /* turn off L2 cache */ > l2_cache_disable(); > /* invalidate L2 cache also */ > - v7_flush_dcache_all(get_device_type()); > + invalidate_dcache(get_device_type()); > #endif > i = 0; > /* mem barrier to sync up things */ > diff --git a/cpu/arm_cortexa8/omap3/Makefile b/cpu/arm_cortexa8/omap3/Makefile > index eef165c..136b163 100644 > --- a/cpu/arm_cortexa8/omap3/Makefile > +++ b/cpu/arm_cortexa8/omap3/Makefile > @@ -26,10 +26,10 @@ include $(TOPDIR)/config.mk > LIB = $(obj)lib$(SOC).a > > SOBJS := lowlevel_init.o > +SOBJS += cache.o > SOBJS += reset.o > > COBJS += board.o > -COBJS += cache.o > COBJS += clock.o > COBJS += gpio.o > COBJS += mem.o > diff --git a/cpu/arm_cortexa8/omap3/board.c b/cpu/arm_cortexa8/omap3/board.c > index 2337287..43262e7 100644 > --- a/cpu/arm_cortexa8/omap3/board.c > +++ b/cpu/arm_cortexa8/omap3/board.c > @@ -201,7 +201,7 @@ void s_init(void) > * Right now flushing at low MPU speed. > * Need to move after clock init > */ > - v7_flush_dcache_all(get_device_type()); > + invalidate_dcache(get_device_type()); > #ifndef CONFIG_ICACHE_OFF > icache_enable(); > #endif > diff --git a/cpu/arm_cortexa8/omap3/cache.S b/cpu/arm_cortexa8/omap3/cache.S > new file mode 100644 > index 0000000..0f63815 > --- /dev/null > +++ b/cpu/arm_cortexa8/omap3/cache.S > @@ -0,0 +1,191 @@ > +/* > + * Copyright (c) 2009 Wind River Systems, Inc. > + * Tom Rix > + * > + * This file is based on and replaces the existing cache.c file > + * The copyrights for the cache.c file are: > + * > + * (C) Copyright 2008 Texas Insturments > + * > + * (C) Copyright 2002 > + * Sysgo Real-Time Solutions, GmbH > + * Marius Groeger > + * > + * (C) Copyright 2002 > + * Gary Jennejohn, DENX Software Engineering, > + * > + * See file CREDITS for list of people who contributed to this > + * project. > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License as > + * published by the Free Software Foundation; either version 2 of > + * the License, or (at your option) any later version. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program; if not, write to the Free Software > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, > + * MA 02111-1307 USA > + */ > + > +#include > + > +/* > + * omap3 cache code > + */ > + > +.align 5 > +.global invalidate_dcache > +.global l2_cache_enable > +.global l2_cache_disable > + > +/* > + * invalidate_dcache() > + * > + * Invalidate the whole D-cache. > + * > + * Corrupted registers: r0-r5, r7, r9-r11 > + * > + * - mm - mm_struct describing address space > + */ > +invalidate_dcache: > + stmfd r13!, {r0 - r5, r7, r9 - r12, r14} > + > + mov r7, r0 @ take a backup of device type > + cmp r0, #0x3 @ check if the device type is > + @ GP > + moveq r12, #0x1 @ set up to invalide L2 > +smi: .word 0x01600070 @ Call SMI monitor (smieq) > + cmp r7, #0x3 @ compare again in case its > + @ lost > + beq finished_inval @ if GP device, inval done > + @ above > + > + mrc p15, 1, r0, c0, c0, 1 @ read clidr > + ands r3, r0, #0x7000000 @ extract loc from clidr > + mov r3, r3, lsr #23 @ left align loc bit field > + beq finished_inval @ if loc is 0, then no need to > + @ clean > + mov r10, #0 @ start clean at cache level 0 > +inval_loop1: > + add r2, r10, r10, lsr #1 @ work out 3x current cache > + @ level > + mov r1, r0, lsr r2 @ extract cache type bits from > + @ clidr > + and r1, r1, #7 @ mask of the bits for current > + @ cache only > + cmp r1, #2 @ see what cache we have at > + @ this level > + blt skip_inval @ skip if no cache, or just > + @ i-cache > + mcr p15, 2, r10, c0, c0, 0 @ select current cache level > + @ in cssr > + mov r2, #0 @ operand for mcr SBZ > + mcr p15, 0, r2, c7, c5, 4 @ flush prefetch buffer to > + @ sych the new cssr&csidr, > + @ with armv7 this is 'isb', > + @ but we compile with armv5 > + mrc p15, 1, r1, c0, c0, 0 @ read the new csidr > + and r2, r1, #7 @ extract the length of the > + @ cache lines > + add r2, r2, #4 @ add 4 (line length offset) > + ldr r4, =0x3ff > + ands r4, r4, r1, lsr #3 @ find maximum number on the > + @ way size > + clz r5, r4 @ find bit position of way > + @ size increment > + ldr r7, =0x7fff > + ands r7, r7, r1, lsr #13 @ extract max number of the > + @ index size > +inval_loop2: > + mov r9, r4 @ create working copy of max > + @ way size > +inval_loop3: > + orr r11, r10, r9, lsl r5 @ factor way and cache number > + @ into r11 > + orr r11, r11, r7, lsl r2 @ factor index number into r11 > + mcr p15, 0, r11, c7, c6, 2 @ invalidate by set/way > + subs r9, r9, #1 @ decrement the way > + bge inval_loop3 > + subs r7, r7, #1 @ decrement the index > + bge inval_loop2 > +skip_inval: > + add r10, r10, #2 @ increment cache number > + cmp r3, r10 > + bgt inval_loop1 > +finished_inval: > + mov r10, #0 @ swith back to cache level 0 > + mcr p15, 2, r10, c0, c0, 0 @ select current cache level > + @ in cssr > + mcr p15, 0, r10, c7, c5, 4 @ flush prefetch buffer, > + @ with armv7 this is 'isb', > + @ but we compile with armv5 > + > + ldmfd r13!, {r0 - r5, r7, r9 - r12, pc} > + > + > +l2_cache_enable: > + push {r0, r1, r2, lr} > + @ ES2 onwards we can disable/enable L2 ourselves > + bl get_cpu_rev > + cmp r0, #CPU_3XX_ES20 > + blt l2_cache_disable_EARLIER_THAN_ES2 > + mrc 15, 0, r3, cr1, cr0, 1 > + orr r3, r3, #2 > + mcr 15, 0, r3, cr1, cr0, 1 > + b l2_cache_enable_END > +l2_cache_enable_EARLIER_THAN_ES2: > + @ Save r0, r12 and restore them after usage > + mov r3, ip > + str r3, [sp, #4] > + mov r3, r0 > + @ > + @ GP Device ROM code API usage here > + @ r12 = AUXCR Write function and r0 value > + @ > + mov ip, #3 > + mrc 15, 0, r0, cr1, cr0, 1 > + orr r0, r0, #2 > + @ SMI instruction to call ROM Code API > + .word 0xe1600070 > + mov r0, r3 > + mov ip, r3 > + str r3, [sp, #4] > +l2_cache_enable_END: > + pop {r1, r2, r3, pc} > + > + > +l2_cache_disable: > + push {r0, r1, r2, lr} > + @ ES2 onwards we can disable/enable L2 ourselves > + bl get_cpu_rev > + cmp r0, #CPU_3XX_ES20 > + blt l2_cache_disable_EARLIER_THAN_ES2 > + mrc 15, 0, r3, cr1, cr0, 1 > + bic r3, r3, #2 > + mcr 15, 0, r3, cr1, cr0, 1 > + b l2_cache_disable_END > +l2_cache_disable_EARLIER_THAN_ES2: > + @ Save r0, r12 and restore them after usage > + mov r3, ip > + str r3, [sp, #4] > + mov r3, r0 > + @ > + @ GP Device ROM code API usage here > + @ r12 = AUXCR Write function and r0 value > + @ > + mov ip, #3 > + mrc 15, 0, r0, cr1, cr0, 1 > + bic r0, r0, #2 > + @ SMI instruction to call ROM Code API > + .word 0xe1600070 > + mov r0, r3 > + mov ip, r3 > + str r3, [sp, #4] > +l2_cache_disable_END: > + pop {r1, r2, r3, pc} > diff --git a/cpu/arm_cortexa8/omap3/cache.c b/cpu/arm_cortexa8/omap3/cache.c > deleted file mode 100644 > index 0d5b444..0000000 > --- a/cpu/arm_cortexa8/omap3/cache.c > +++ /dev/null > @@ -1,95 +0,0 @@ > -/* > - * (C) Copyright 2008 Texas Insturments > - * > - * (C) Copyright 2002 > - * Sysgo Real-Time Solutions, GmbH > - * Marius Groeger > - * > - * (C) Copyright 2002 > - * Gary Jennejohn, DENX Software Engineering, > - * > - * See file CREDITS for list of people who contributed to this > - * project. > - * > - * This program is free software; you can redistribute it and/or > - * modify it under the terms of the GNU General Public License as > - * published by the Free Software Foundation; either version 2 of > - * the License, or (at your option) any later version. > - * > - * This program is distributed in the hope that it will be useful, > - * but WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > - * GNU General Public License for more details. > - * > - * You should have received a copy of the GNU General Public License > - * along with this program; if not, write to the Free Software > - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, > - * MA 02111-1307 USA > - */ > - > -/* > - * omap3 L2 cache code > - */ > - > -#include > -#include > -#include > - > -void l2_cache_enable(void) > -{ > - unsigned long i; > - volatile unsigned int j; > - > - /* ES2 onwards we can disable/enable L2 ourselves */ > - if (get_cpu_rev() >= CPU_3XX_ES20) { > - __asm__ __volatile__("mrc p15, 0, %0, c1, c0, 1":"=r"(i)); > - __asm__ __volatile__("orr %0, %0, #0x2":"=r"(i)); > - __asm__ __volatile__("mcr p15, 0, %0, c1, c0, 1":"=r"(i)); > - } else { > - /* Save r0, r12 and restore them after usage */ > - __asm__ __volatile__("mov %0, r12":"=r"(j)); > - __asm__ __volatile__("mov %0, r0":"=r"(i)); > - > - /* > - * GP Device ROM code API usage here > - * r12 = AUXCR Write function and r0 value > - */ > - __asm__ __volatile__("mov r12, #0x3"); > - __asm__ __volatile__("mrc p15, 0, r0, c1, c0, 1"); > - __asm__ __volatile__("orr r0, r0, #0x2"); > - /* SMI instruction to call ROM Code API */ > - __asm__ __volatile__(".word 0xE1600070"); > - __asm__ __volatile__("mov r0, %0":"=r"(i)); > - __asm__ __volatile__("mov r12, %0":"=r"(j)); > - } > - > -} > - > -void l2_cache_disable(void) > -{ > - unsigned long i; > - volatile unsigned int j; > - > - /* ES2 onwards we can disable/enable L2 ourselves */ > - if (get_cpu_rev() >= CPU_3XX_ES20) { > - __asm__ __volatile__("mrc p15, 0, %0, c1, c0, 1":"=r"(i)); > - __asm__ __volatile__("bic %0, %0, #0x2":"=r"(i)); > - __asm__ __volatile__("mcr p15, 0, %0, c1, c0, 1":"=r"(i)); > - } else { > - /* Save r0, r12 and restore them after usage */ > - __asm__ __volatile__("mov %0, r12":"=r"(j)); > - __asm__ __volatile__("mov %0, r0":"=r"(i)); > - > - /* > - * GP Device ROM code API usage here > - * r12 = AUXCR Write function and r0 value > - */ > - __asm__ __volatile__("mov r12, #0x3"); > - __asm__ __volatile__("mrc p15, 0, r0, c1, c0, 1"); > - __asm__ __volatile__("bic r0, r0, #0x2"); > - /* SMI instruction to call ROM Code API */ > - __asm__ __volatile__(".word 0xE1600070"); > - __asm__ __volatile__("mov r0, %0":"=r"(i)); > - __asm__ __volatile__("mov r12, %0":"=r"(j)); > - } > -} > diff --git a/cpu/arm_cortexa8/start.S b/cpu/arm_cortexa8/start.S > index 6bd6552..14a9bd3 100644 > --- a/cpu/arm_cortexa8/start.S > +++ b/cpu/arm_cortexa8/start.S > @@ -415,88 +415,3 @@ fiq: > > #endif > > -/* > - * v7_flush_dcache_all() > - * > - * Flush the whole D-cache. > - * > - * Corrupted registers: r0-r5, r7, r9-r11 > - * > - * - mm - mm_struct describing address space > - */ > - .align 5 > -.global v7_flush_dcache_all > -v7_flush_dcache_all: > - stmfd r13!, {r0 - r5, r7, r9 - r12, r14} > - > - mov r7, r0 @ take a backup of device type > - cmp r0, #0x3 @ check if the device type is > - @ GP > - moveq r12, #0x1 @ set up to invalide L2 > -smi: .word 0x01600070 @ Call SMI monitor (smieq) > - cmp r7, #0x3 @ compare again in case its > - @ lost > - beq finished_inval @ if GP device, inval done > - @ above > - > - mrc p15, 1, r0, c0, c0, 1 @ read clidr > - ands r3, r0, #0x7000000 @ extract loc from clidr > - mov r3, r3, lsr #23 @ left align loc bit field > - beq finished_inval @ if loc is 0, then no need to > - @ clean > - mov r10, #0 @ start clean at cache level 0 > -inval_loop1: > - add r2, r10, r10, lsr #1 @ work out 3x current cache > - @ level > - mov r1, r0, lsr r2 @ extract cache type bits from > - @ clidr > - and r1, r1, #7 @ mask of the bits for current > - @ cache only > - cmp r1, #2 @ see what cache we have at > - @ this level > - blt skip_inval @ skip if no cache, or just > - @ i-cache > - mcr p15, 2, r10, c0, c0, 0 @ select current cache level > - @ in cssr > - mov r2, #0 @ operand for mcr SBZ > - mcr p15, 0, r2, c7, c5, 4 @ flush prefetch buffer to > - @ sych the new cssr&csidr, > - @ with armv7 this is 'isb', > - @ but we compile with armv5 > - mrc p15, 1, r1, c0, c0, 0 @ read the new csidr > - and r2, r1, #7 @ extract the length of the > - @ cache lines > - add r2, r2, #4 @ add 4 (line length offset) > - ldr r4, =0x3ff > - ands r4, r4, r1, lsr #3 @ find maximum number on the > - @ way size > - clz r5, r4 @ find bit position of way > - @ size increment > - ldr r7, =0x7fff > - ands r7, r7, r1, lsr #13 @ extract max number of the > - @ index size > -inval_loop2: > - mov r9, r4 @ create working copy of max > - @ way size > -inval_loop3: > - orr r11, r10, r9, lsl r5 @ factor way and cache number > - @ into r11 > - orr r11, r11, r7, lsl r2 @ factor index number into r11 > - mcr p15, 0, r11, c7, c6, 2 @ invalidate by set/way > - subs r9, r9, #1 @ decrement the way > - bge inval_loop3 > - subs r7, r7, #1 @ decrement the index > - bge inval_loop2 > -skip_inval: > - add r10, r10, #2 @ increment cache number > - cmp r3, r10 > - bgt inval_loop1 > -finished_inval: > - mov r10, #0 @ swith back to cache level 0 > - mcr p15, 2, r10, c0, c0, 0 @ select current cache level > - @ in cssr > - mcr p15, 0, r10, c7, c5, 4 @ flush prefetch buffer, > - @ with armv7 this is 'isb', > - @ but we compile with armv5 > - > - ldmfd r13!, {r0 - r5, r7, r9 - r12, pc} > diff --git a/include/asm-arm/arch-omap3/omap3.h b/include/asm-arm/arch-omap3/omap3.h > index 6459d99..12815f6 100644 > --- a/include/asm-arm/arch-omap3/omap3.h > +++ b/include/asm-arm/arch-omap3/omap3.h > @@ -168,6 +168,8 @@ struct gpio { > * ES1 = rev 0 > * > * ES2 onwards, the value maps to contents of IDCODE register [31:28]. > + * > + * Note : CPU_3XX_ES20 is used in cache.S. Please review before changing. > */ > #define CPU_3XX_ES10 0 > #define CPU_3XX_ES20 1 > diff --git a/include/asm-arm/arch-omap3/sys_proto.h b/include/asm-arm/arch-omap3/sys_proto.h > index 7361d08..2246f80 100644 > --- a/include/asm-arm/arch-omap3/sys_proto.h > +++ b/include/asm-arm/arch-omap3/sys_proto.h > @@ -55,7 +55,7 @@ void secureworld_exit(void); > void setup_auxcr(void); > void try_unlock_memory(void); > u32 get_boot_type(void); > -void v7_flush_dcache_all(u32); > +void invalidate_dcache(u32); > void sr32(void *, u32, u32, u32); > u32 wait_on_value(u32, u32, void *, u32); > void sdelay(unsigned long);