* [PATCH] [PPC 44x] L2-cache synchronization for ppc44x
@ 2007-11-07 23:12 Yuri Tikhonov
2007-11-07 23:19 ` Benjamin Herrenschmidt
2007-11-26 6:47 ` Benjamin Herrenschmidt
0 siblings, 2 replies; 4+ messages in thread
From: Yuri Tikhonov @ 2007-11-07 23:12 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Olof Johansson
=0D=0A This is the updated patch for support synchronization of L2-Cache wi=
th the external memory on the ppc44x-based platforms.
Differencies against the previous patch-set:
- remove L2_CACHE config option;
- introduce the ppc machdep to invalidate L2 cache lines;
- some code clean-up.
Signed-off-by: Yuri Tikhonov <yur@emcraft.com>
Signed-off-by: Pavel Kolesnikov <concord@emcraft.com>
--
diff --git a/arch/powerpc/lib/dma-noncoherent.c b/arch/powerpc/lib/dma-nonc=
oherent.c
index 1947380..b06f05c 100644
--- a/arch/powerpc/lib/dma-noncoherent.c
+++ b/arch/powerpc/lib/dma-noncoherent.c
@@ -31,6 +31,7 @@
#include <linux/dma-mapping.h>
=20
#include <asm/tlbflush.h>
+#include <asm/machdep.h>
=20
/*
* This address range defaults to a value that is safe for all
@@ -186,6 +187,8 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, g=
fp_t gfp)
=09=09unsigned long kaddr =3D (unsigned long)page_address(page);
=09=09memset(page_address(page), 0, size);
=09=09flush_dcache_range(kaddr, kaddr + size);
+=09=09if (ppc_md.l2cache_inv_range)
+=09=09=09ppc_md.l2cache_inv_range(__pa(kaddr), __pa(kaddr + size));
=09}
=20
=09/*
@@ -351,12 +354,16 @@ void __dma_sync(void *vaddr, size_t size, int directi=
on)
=09=09BUG();
=09case DMA_FROM_DEVICE:=09/* invalidate only */
=09=09invalidate_dcache_range(start, end);
+=09=09if (ppc_md.l2cache_inv_range)
+=09=09=09ppc_md.l2cache_inv_range(__pa(start), __pa(end));
=09=09break;
=09case DMA_TO_DEVICE:=09=09/* writeback only */
=09=09clean_dcache_range(start, end);
=09=09break;
=09case DMA_BIDIRECTIONAL:=09/* writeback and invalidate */
=09=09flush_dcache_range(start, end);
+=09=09if (ppc_md.l2cache_inv_range)
+=09=09=09ppc_md.l2cache_inv_range(__pa(start), __pa(end));
=09=09break;
=09}
}
diff --git a/arch/ppc/kernel/misc.S b/arch/ppc/kernel/misc.S
index 46cf8fa..31c9149 100644
--- a/arch/ppc/kernel/misc.S
+++ b/arch/ppc/kernel/misc.S
@@ -25,6 +25,10 @@
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
=20
+#ifdef CONFIG_44x
+#include <asm/ibm44x.h>
+#endif
+
#ifdef CONFIG_8xx
#define ISYNC_8xx isync
#else
@@ -386,6 +390,35 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
=09sync=09=09=09=09/* additional sync needed on g4 */
=09isync
=09blr
+
+#if defined(CONFIG_44x)
+/*
+ * Invalidate the Level-2 cache lines corresponded to the address
+ * range.
+ *
+ * invalidate_l2cache_range(unsigned long start, unsigned long stop)
+ */
+_GLOBAL(invalidate_l2cache_range)
+=09li=09r5,PPC44X_L2_CACHE_BYTES-1=09/* align on L2-cache line */
+=09andc=09r3,r3,r5
+=09subf=09r4,r3,r4
+=09add=09r4,r4,r5
+=09srwi.=09r4,r4,PPC44X_L2_CACHE_SHIFT
+=09mtctr=09r4
+
+=09lis=09r4, L2C_CMD_INV>>16
+1:=09mtdcr=09DCRN_L2C0_ADDR,r3=09/* write address to invalidate */
+=09mtdcr=09DCRN_L2C0_CMD,r4=09/* issue the Invalidate cmd */
+
+2:=09mfdcr=09r5,DCRN_L2C0_SR=09=09/* wait for complete */
+=09andis.=09r5,r5,L2C_CMD_CLR>>16
+=09beq=092b
+
+=09addi=09r3,r3,PPC44X_L2_CACHE_BYTES=09/* next address to invalidate */
+=09bdnz=091b
+=09blr
+#endif
+
/*
* Write any modified data cache blocks out to memory.
* Does not invalidate the corresponding cache lines (especially for
diff --git a/arch/ppc/syslib/ibm440gx_common.c b/arch/ppc/syslib/ibm440gx_c=
ommon.c
index 6b1a801..64c663f 100644
--- a/arch/ppc/syslib/ibm440gx_common.c
+++ b/arch/ppc/syslib/ibm440gx_common.c
@@ -12,6 +12,8 @@
*/
#include <linux/kernel.h>
#include <linux/interrupt.h>
+#include <asm/machdep.h>
+#include <asm/cacheflush.h>
#include <asm/ibm44x.h>
#include <asm/mmu.h>
#include <asm/processor.h>
@@ -201,6 +203,7 @@ void __init ibm440gx_l2c_enable(void){
=20
=09asm volatile ("sync; isync" ::: "memory");
=09local_irq_restore(flags);
+=09ppc_md.l2cache_inv_range =3D invalidate_l2cache_range;
}
=20
/* Disable L2 cache */
diff --git a/include/asm-powerpc/cacheflush.h b/include/asm-powerpc/cachefl=
ush.h
index ba667a3..bdebfaa 100644
--- a/include/asm-powerpc/cacheflush.h
+++ b/include/asm-powerpc/cacheflush.h
@@ -49,6 +49,7 @@ extern void flush_dcache_range(unsigned long start, unsig=
ned long stop);
#ifdef CONFIG_PPC32
extern void clean_dcache_range(unsigned long start, unsigned long stop);
extern void invalidate_dcache_range(unsigned long start, unsigned long sto=
p);
+extern void invalidate_l2cache_range(unsigned long start, unsigned long st=
op);
#endif /* CONFIG_PPC32 */
#ifdef CONFIG_PPC64
extern void flush_inval_dcache_range(unsigned long start, unsigned long st=
op);
diff --git a/include/asm-powerpc/machdep.h b/include/asm-powerpc/machdep.h
index 71c6e7e..754f416 100644
--- a/include/asm-powerpc/machdep.h
+++ b/include/asm-powerpc/machdep.h
@@ -201,6 +201,8 @@ struct machdep_calls {
=09void=09=09(*early_serial_map)(void);
=09void=09=09(*kgdb_map_scc)(void);
=20
+=09void=09=09(*l2cache_inv_range)(unsigned long s, unsigned long e);
+
=09/*
=09 * optional PCI "hooks"
=09 */
diff --git a/include/asm-ppc/ibm44x.h b/include/asm-ppc/ibm44x.h
index 8078a58..8ac0a13 100644
--- a/include/asm-ppc/ibm44x.h
+++ b/include/asm-ppc/ibm44x.h
@@ -138,7 +138,6 @@
* The "residual" board information structure the boot loader passes
* into the kernel.
*/
-#ifndef __ASSEMBLY__
=20
/*
* DCRN definitions
@@ -596,6 +595,9 @@
#define SRAM_DPC_ENABLE=090x80000000
=20
/* L2 Cache Controller 440GX/440SP/440SPe */
+#define PPC44X_L2_CACHE_SHIFT=095
+#define PPC44X_L2_CACHE_BYTES=09(1 << PPC44X_L2_CACHE_SHIFT)
+
#define DCRN_L2C0_CFG=09=090x030
#define L2C_CFG_L2M=09=090x80000000
#define L2C_CFG_ICU=09=090x40000000
@@ -814,6 +816,5 @@
=20
#include <asm/ibm4xx.h>
=20
-#endif /* __ASSEMBLY__ */
#endif /* __ASM_IBM44x_H__ */
#endif /* __KERNEL__ */
diff --git a/include/asm-ppc/machdep.h b/include/asm-ppc/machdep.h
index 293a444..4e7a270 100644
--- a/include/asm-ppc/machdep.h
+++ b/include/asm-ppc/machdep.h
@@ -80,6 +80,8 @@ struct machdep_calls {
=09void=09=09(*nvram_write_val)(int addr, unsigned char val);
=09void=09=09(*nvram_sync)(void);
=20
+=09void=09=09(*l2cache_inv_range)(unsigned long s, unsigned long e);
+
=09/*
=09 * optional PCI "hooks"
=09 */=20
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH] [PPC 44x] L2-cache synchronization for ppc44x
2007-11-07 23:12 [PATCH] [PPC 44x] L2-cache synchronization for ppc44x Yuri Tikhonov
@ 2007-11-07 23:19 ` Benjamin Herrenschmidt
2007-11-07 23:39 ` Re[2]: " Yuri Tikhonov
2007-11-26 6:47 ` Benjamin Herrenschmidt
1 sibling, 1 reply; 4+ messages in thread
From: Benjamin Herrenschmidt @ 2007-11-07 23:19 UTC (permalink / raw)
To: Yuri Tikhonov; +Cc: Olof Johansson, linuxppc-dev
On Thu, 2007-11-08 at 02:12 +0300, Yuri Tikhonov wrote:
> This is the updated patch for support synchronization of L2-Cache with the external memory on the ppc44x-based platforms.
>
> Differencies against the previous patch-set:
> - remove L2_CACHE config option;
> - introduce the ppc machdep to invalidate L2 cache lines;
> - some code clean-up.
Can you tell me more about how this cache operates ? I don't quite
understand why you would invalidate it on bidirectional DMAs rather than
flush it to memory (unless you get your terminology wrong) and why you
wouldn't flush it on transfers to the device.. Unless it is a
write-through cache ?
Ben.
> Signed-off-by: Yuri Tikhonov <yur@emcraft.com>
> Signed-off-by: Pavel Kolesnikov <concord@emcraft.com>
>
> --
> diff --git a/arch/powerpc/lib/dma-noncoherent.c b/arch/powerpc/lib/dma-noncoherent.c
> index 1947380..b06f05c 100644
> --- a/arch/powerpc/lib/dma-noncoherent.c
> +++ b/arch/powerpc/lib/dma-noncoherent.c
> @@ -31,6 +31,7 @@
> #include <linux/dma-mapping.h>
>
> #include <asm/tlbflush.h>
> +#include <asm/machdep.h>
>
> /*
> * This address range defaults to a value that is safe for all
> @@ -186,6 +187,8 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp)
> unsigned long kaddr = (unsigned long)page_address(page);
> memset(page_address(page), 0, size);
> flush_dcache_range(kaddr, kaddr + size);
> + if (ppc_md.l2cache_inv_range)
> + ppc_md.l2cache_inv_range(__pa(kaddr), __pa(kaddr + size));
> }
>
> /*
> @@ -351,12 +354,16 @@ void __dma_sync(void *vaddr, size_t size, int direction)
> BUG();
> case DMA_FROM_DEVICE: /* invalidate only */
> invalidate_dcache_range(start, end);
> + if (ppc_md.l2cache_inv_range)
> + ppc_md.l2cache_inv_range(__pa(start), __pa(end));
> break;
> case DMA_TO_DEVICE: /* writeback only */
> clean_dcache_range(start, end);
> break;
> case DMA_BIDIRECTIONAL: /* writeback and invalidate */
> flush_dcache_range(start, end);
> + if (ppc_md.l2cache_inv_range)
> + ppc_md.l2cache_inv_range(__pa(start), __pa(end));
> break;
> }
> }
> diff --git a/arch/ppc/kernel/misc.S b/arch/ppc/kernel/misc.S
> index 46cf8fa..31c9149 100644
> --- a/arch/ppc/kernel/misc.S
> +++ b/arch/ppc/kernel/misc.S
> @@ -25,6 +25,10 @@
> #include <asm/thread_info.h>
> #include <asm/asm-offsets.h>
>
> +#ifdef CONFIG_44x
> +#include <asm/ibm44x.h>
> +#endif
> +
> #ifdef CONFIG_8xx
> #define ISYNC_8xx isync
> #else
> @@ -386,6 +390,35 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
> sync /* additional sync needed on g4 */
> isync
> blr
> +
> +#if defined(CONFIG_44x)
> +/*
> + * Invalidate the Level-2 cache lines corresponded to the address
> + * range.
> + *
> + * invalidate_l2cache_range(unsigned long start, unsigned long stop)
> + */
> +_GLOBAL(invalidate_l2cache_range)
> + li r5,PPC44X_L2_CACHE_BYTES-1 /* align on L2-cache line */
> + andc r3,r3,r5
> + subf r4,r3,r4
> + add r4,r4,r5
> + srwi. r4,r4,PPC44X_L2_CACHE_SHIFT
> + mtctr r4
> +
> + lis r4, L2C_CMD_INV>>16
> +1: mtdcr DCRN_L2C0_ADDR,r3 /* write address to invalidate */
> + mtdcr DCRN_L2C0_CMD,r4 /* issue the Invalidate cmd */
> +
> +2: mfdcr r5,DCRN_L2C0_SR /* wait for complete */
> + andis. r5,r5,L2C_CMD_CLR>>16
> + beq 2b
> +
> + addi r3,r3,PPC44X_L2_CACHE_BYTES /* next address to invalidate */
> + bdnz 1b
> + blr
> +#endif
> +
> /*
> * Write any modified data cache blocks out to memory.
> * Does not invalidate the corresponding cache lines (especially for
> diff --git a/arch/ppc/syslib/ibm440gx_common.c b/arch/ppc/syslib/ibm440gx_common.c
> index 6b1a801..64c663f 100644
> --- a/arch/ppc/syslib/ibm440gx_common.c
> +++ b/arch/ppc/syslib/ibm440gx_common.c
> @@ -12,6 +12,8 @@
> */
> #include <linux/kernel.h>
> #include <linux/interrupt.h>
> +#include <asm/machdep.h>
> +#include <asm/cacheflush.h>
> #include <asm/ibm44x.h>
> #include <asm/mmu.h>
> #include <asm/processor.h>
> @@ -201,6 +203,7 @@ void __init ibm440gx_l2c_enable(void){
>
> asm volatile ("sync; isync" ::: "memory");
> local_irq_restore(flags);
> + ppc_md.l2cache_inv_range = invalidate_l2cache_range;
> }
>
> /* Disable L2 cache */
> diff --git a/include/asm-powerpc/cacheflush.h b/include/asm-powerpc/cacheflush.h
> index ba667a3..bdebfaa 100644
> --- a/include/asm-powerpc/cacheflush.h
> +++ b/include/asm-powerpc/cacheflush.h
> @@ -49,6 +49,7 @@ extern void flush_dcache_range(unsigned long start, unsigned long stop);
> #ifdef CONFIG_PPC32
> extern void clean_dcache_range(unsigned long start, unsigned long stop);
> extern void invalidate_dcache_range(unsigned long start, unsigned long stop);
> +extern void invalidate_l2cache_range(unsigned long start, unsigned long stop);
> #endif /* CONFIG_PPC32 */
> #ifdef CONFIG_PPC64
> extern void flush_inval_dcache_range(unsigned long start, unsigned long stop);
> diff --git a/include/asm-powerpc/machdep.h b/include/asm-powerpc/machdep.h
> index 71c6e7e..754f416 100644
> --- a/include/asm-powerpc/machdep.h
> +++ b/include/asm-powerpc/machdep.h
> @@ -201,6 +201,8 @@ struct machdep_calls {
> void (*early_serial_map)(void);
> void (*kgdb_map_scc)(void);
>
> + void (*l2cache_inv_range)(unsigned long s, unsigned long e);
> +
> /*
> * optional PCI "hooks"
> */
> diff --git a/include/asm-ppc/ibm44x.h b/include/asm-ppc/ibm44x.h
> index 8078a58..8ac0a13 100644
> --- a/include/asm-ppc/ibm44x.h
> +++ b/include/asm-ppc/ibm44x.h
> @@ -138,7 +138,6 @@
> * The "residual" board information structure the boot loader passes
> * into the kernel.
> */
> -#ifndef __ASSEMBLY__
>
> /*
> * DCRN definitions
> @@ -596,6 +595,9 @@
> #define SRAM_DPC_ENABLE 0x80000000
>
> /* L2 Cache Controller 440GX/440SP/440SPe */
> +#define PPC44X_L2_CACHE_SHIFT 5
> +#define PPC44X_L2_CACHE_BYTES (1 << PPC44X_L2_CACHE_SHIFT)
> +
> #define DCRN_L2C0_CFG 0x030
> #define L2C_CFG_L2M 0x80000000
> #define L2C_CFG_ICU 0x40000000
> @@ -814,6 +816,5 @@
>
> #include <asm/ibm4xx.h>
>
> -#endif /* __ASSEMBLY__ */
> #endif /* __ASM_IBM44x_H__ */
> #endif /* __KERNEL__ */
> diff --git a/include/asm-ppc/machdep.h b/include/asm-ppc/machdep.h
> index 293a444..4e7a270 100644
> --- a/include/asm-ppc/machdep.h
> +++ b/include/asm-ppc/machdep.h
> @@ -80,6 +80,8 @@ struct machdep_calls {
> void (*nvram_write_val)(int addr, unsigned char val);
> void (*nvram_sync)(void);
>
> + void (*l2cache_inv_range)(unsigned long s, unsigned long e);
> +
> /*
> * optional PCI "hooks"
> */
>
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@ozlabs.org
> https://ozlabs.org/mailman/listinfo/linuxppc-dev
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re[2]: [PATCH] [PPC 44x] L2-cache synchronization for ppc44x
2007-11-07 23:19 ` Benjamin Herrenschmidt
@ 2007-11-07 23:39 ` Yuri Tikhonov
0 siblings, 0 replies; 4+ messages in thread
From: Yuri Tikhonov @ 2007-11-07 23:39 UTC (permalink / raw)
To: Benjamin Herrenschmidt; +Cc: linuxppc-dev
Hi Ben,
On 08.11.2007, 2:19:33 you wrote:
> On Thu, 2007-11-08 at 02:12 +0300, Yuri Tikhonov wrote:
>> This is the updated patch for support synchronization of L2-Cache with
>> the external memory on the ppc44x-based platforms.
>>
>> Differencies against the previous patch-set:
>> - remove L2_CACHE config option;
>> - introduce the ppc machdep to invalidate L2 cache lines;
>> - some code clean-up.
> Can you tell me more about how this cache operates ? I don't quite
> understand why you would invalidate it on bidirectional DMAs rather than
> flush it to memory (unless you get your terminology wrong) and why you
> wouldn't flush it on transfers to the device.. Unless it is a
> write-through cache ?
Yes, the ppc44x Level2 cache has the write-through design, so no need to do any kind of l2_flush.
As far as the DMA_BIDIRECTIONAL case is concerned flush_dcache_range() flushes the data over the following path: L1->L2->RAM, but invalidates L1 only, and L2 remains invalid. Since in the BIDIRECTIONAL case DMA may update the data in RAM - we have to invalidate L2-cache manually, so that CPU may read new data transmitted by DMA right from RAM rather than old ones stuck in L2 due to flush_dcache().
Regards,
Yuri
--
Yuri Tikhonov, Senior Software Engineer
Emcraft Systems, www.emcraft.com
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] [PPC 44x] L2-cache synchronization for ppc44x
2007-11-07 23:12 [PATCH] [PPC 44x] L2-cache synchronization for ppc44x Yuri Tikhonov
2007-11-07 23:19 ` Benjamin Herrenschmidt
@ 2007-11-26 6:47 ` Benjamin Herrenschmidt
1 sibling, 0 replies; 4+ messages in thread
From: Benjamin Herrenschmidt @ 2007-11-26 6:47 UTC (permalink / raw)
To: Yuri Tikhonov; +Cc: Olof Johansson, linuxppc-dev
On Thu, 2007-11-08 at 02:12 +0300, Yuri Tikhonov wrote:
> This is the updated patch for support synchronization of L2-Cache with the external memory on the ppc44x-based platforms.
>
> Differencies against the previous patch-set:
> - remove L2_CACHE config option;
> - introduce the ppc machdep to invalidate L2 cache lines;
> - some code clean-up.
>
> Signed-off-by: Yuri Tikhonov <yur@emcraft.com>
> Signed-off-by: Pavel Kolesnikov <concord@emcraft.com>
It's almost right :-0
You want something a bit more details than just a
ppc_md.l2cache_inv_range.
I'd suggest you do a separate extcache_ops structure that contains
callbacks for all 3 directions (flush, flush & invalidate, invalidate),
then fill that up accordingly.
It might be a good idea in the long run to do some tricks to avoid a
branch via function pointer since the cache ops are pretty "hot", but if
that ever happens, I'll do that via asm patching tricks I suppose.
Now regarding the line size, I would recommend using the device-tree to
describe the cache instead rather than a config option. The intend is to
be able to build a single binary kernel that can boot multiple variants
of 44x.
I may eventually rewrite your patch around those ideas next week if you
don't feel like doing it -and- I get bored :-)
Cheers,
Ben.
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2007-11-26 6:47 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-11-07 23:12 [PATCH] [PPC 44x] L2-cache synchronization for ppc44x Yuri Tikhonov
2007-11-07 23:19 ` Benjamin Herrenschmidt
2007-11-07 23:39 ` Re[2]: " Yuri Tikhonov
2007-11-26 6:47 ` Benjamin Herrenschmidt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).