* [PATCH V4 2/9] MIPS: c-r4k: Add r4k_blast_scache_node for Loongson-3
[not found] <1496718888-18324-1-git-send-email-chenhc@lemote.com>
@ 2017-06-06 3:14 ` Huacai Chen
2017-06-13 8:40 ` Ralf Baechle
0 siblings, 1 reply; 3+ messages in thread
From: Huacai Chen @ 2017-06-06 3:14 UTC (permalink / raw)
To: Ralf Baechle
Cc: John Crispin, Steven J . Hill, linux-mips, Fuxin Zhang,
Zhangjin Wu, Huacai Chen, stable
For multi-node Loongson-3 (NUMA configuration), r4k_blast_scache() can
only flush Node-0's scache. So we add r4k_blast_scache_node() by using
(CAC_BASE | (node_id << 44)) instead of CKSEG0 as the start address.
Cc: stable@vger.kernel.org
Signed-off-by: Huacai Chen <chenhc@lemote.com>
---
arch/mips/include/asm/r4kcache.h | 26 ++++++++++++++++++++++++++
arch/mips/mm/c-r4k.c | 33 ++++++++++++++++++++++++++++++++-
2 files changed, 58 insertions(+), 1 deletion(-)
diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h
index 7f12d7e..aa615e3 100644
--- a/arch/mips/include/asm/r4kcache.h
+++ b/arch/mips/include/asm/r4kcache.h
@@ -747,4 +747,30 @@ __BUILD_BLAST_CACHE_RANGE(s, scache, Hit_Writeback_Inv_SD, , )
__BUILD_BLAST_CACHE_RANGE(inv_d, dcache, Hit_Invalidate_D, , )
__BUILD_BLAST_CACHE_RANGE(inv_s, scache, Hit_Invalidate_SD, , )
+#ifdef CONFIG_CPU_LOONGSON3
+#define __BUILD_BLAST_CACHE_NODE(pfx, desc, indexop, hitop, lsize) \
+static inline void blast_##pfx##cache##lsize##_node(long node) \
+{ \
+ unsigned long start = CAC_BASE | (node << 44); \
+ unsigned long end = start + current_cpu_data.desc.waysize; \
+ unsigned long ws_inc = 1UL << current_cpu_data.desc.waybit; \
+ unsigned long ws_end = current_cpu_data.desc.ways << \
+ current_cpu_data.desc.waybit; \
+ unsigned long ws, addr; \
+ \
+ __##pfx##flush_prologue \
+ \
+ for (ws = 0; ws < ws_end; ws += ws_inc) \
+ for (addr = start; addr < end; addr += lsize * 32) \
+ cache##lsize##_unroll32(addr|ws, indexop); \
+ \
+ __##pfx##flush_epilogue \
+}
+
+__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 16)
+__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 32)
+__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 64)
+__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 128)
+#endif
+
#endif /* _ASM_R4KCACHE_H */
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 3fe99cb..0a49af0 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -459,11 +459,29 @@ static void r4k_blast_scache_setup(void)
r4k_blast_scache = blast_scache128;
}
+static void (* r4k_blast_scache_node)(long node);
+
+static void r4k_blast_scache_node_setup(void)
+{
+ unsigned long sc_lsize = cpu_scache_line_size();
+
+ r4k_blast_scache_node = (void *)cache_noop;
+#ifdef CONFIG_CPU_LOONGSON3
+ if (sc_lsize == 16)
+ r4k_blast_scache_node = blast_scache16_node;
+ else if (sc_lsize == 32)
+ r4k_blast_scache_node = blast_scache32_node;
+ else if (sc_lsize == 64)
+ r4k_blast_scache_node = blast_scache64_node;
+ else if (sc_lsize == 128)
+ r4k_blast_scache_node = blast_scache128_node;
+#endif
+}
+
static inline void local_r4k___flush_cache_all(void * args)
{
switch (current_cpu_type()) {
case CPU_LOONGSON2:
- case CPU_LOONGSON3:
case CPU_R4000SC:
case CPU_R4000MC:
case CPU_R4400SC:
@@ -480,6 +498,10 @@ static inline void local_r4k___flush_cache_all(void * args)
r4k_blast_scache();
break;
+ case CPU_LOONGSON3:
+ r4k_blast_scache_node(get_ebase_cpunum() >> 2);
+ break;
+
case CPU_BMIPS5000:
r4k_blast_scache();
__sync();
@@ -840,7 +862,11 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
preempt_disable();
if (cpu_has_inclusive_pcaches) {
if (size >= scache_size)
+#ifndef CONFIG_CPU_LOONGSON3
r4k_blast_scache();
+#else
+ r4k_blast_scache_node((addr >> 44) & 0xF);
+#endif
else
blast_scache_range(addr, addr + size);
preempt_enable();
@@ -873,7 +899,11 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
preempt_disable();
if (cpu_has_inclusive_pcaches) {
if (size >= scache_size)
+#ifndef CONFIG_CPU_LOONGSON3
r4k_blast_scache();
+#else
+ r4k_blast_scache_node((addr >> 44) & 0xF);
+#endif
else {
/*
* There is no clearly documented alignment requirement
@@ -1903,6 +1933,7 @@ void r4k_cache_init(void)
r4k_blast_scache_page_setup();
r4k_blast_scache_page_indexed_setup();
r4k_blast_scache_setup();
+ r4k_blast_scache_node_setup();
#ifdef CONFIG_EVA
r4k_blast_dcache_user_page_setup();
r4k_blast_icache_user_page_setup();
--
2.7.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH V4 2/9] MIPS: c-r4k: Add r4k_blast_scache_node for Loongson-3
2017-06-06 3:14 ` [PATCH V4 2/9] MIPS: c-r4k: Add r4k_blast_scache_node for Loongson-3 Huacai Chen
@ 2017-06-13 8:40 ` Ralf Baechle
2017-06-13 9:38 ` Huacai Chen
0 siblings, 1 reply; 3+ messages in thread
From: Ralf Baechle @ 2017-06-13 8:40 UTC (permalink / raw)
To: Huacai Chen
Cc: John Crispin, Steven J . Hill, linux-mips, Fuxin Zhang,
Zhangjin Wu, stable
On Tue, Jun 06, 2017 at 11:14:41AM +0800, Huacai Chen wrote:
> For multi-node Loongson-3 (NUMA configuration), r4k_blast_scache() can
> only flush Node-0's scache. So we add r4k_blast_scache_node() by using
> (CAC_BASE | (node_id << 44)) instead of CKSEG0 as the start address.
>
> Cc: stable@vger.kernel.org
> Signed-off-by: Huacai Chen <chenhc@lemote.com>
> ---
> arch/mips/include/asm/r4kcache.h | 26 ++++++++++++++++++++++++++
> arch/mips/mm/c-r4k.c | 33 ++++++++++++++++++++++++++++++++-
> 2 files changed, 58 insertions(+), 1 deletion(-)
>
> diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h
> index 7f12d7e..aa615e3 100644
> --- a/arch/mips/include/asm/r4kcache.h
> +++ b/arch/mips/include/asm/r4kcache.h
> @@ -747,4 +747,30 @@ __BUILD_BLAST_CACHE_RANGE(s, scache, Hit_Writeback_Inv_SD, , )
> __BUILD_BLAST_CACHE_RANGE(inv_d, dcache, Hit_Invalidate_D, , )
> __BUILD_BLAST_CACHE_RANGE(inv_s, scache, Hit_Invalidate_SD, , )
>
> +#ifdef CONFIG_CPU_LOONGSON3
> +#define __BUILD_BLAST_CACHE_NODE(pfx, desc, indexop, hitop, lsize) \
> +static inline void blast_##pfx##cache##lsize##_node(long node) \
> +{ \
> + unsigned long start = CAC_BASE | (node << 44); \
> + unsigned long end = start + current_cpu_data.desc.waysize; \
> + unsigned long ws_inc = 1UL << current_cpu_data.desc.waybit; \
> + unsigned long ws_end = current_cpu_data.desc.ways << \
> + current_cpu_data.desc.waybit; \
> + unsigned long ws, addr; \
> + \
> + __##pfx##flush_prologue \
> + \
> + for (ws = 0; ws < ws_end; ws += ws_inc) \
> + for (addr = start; addr < end; addr += lsize * 32) \
> + cache##lsize##_unroll32(addr|ws, indexop); \
> + \
> + __##pfx##flush_epilogue \
> +}
> +
> +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 16)
> +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 32)
> +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 64)
> +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 128)
> +#endif
This all expand to just inline functions which generate no code if they're
unused, so you can drop the #ifdef.
However a comment explaining why this function is only required for
Loongson 3 would be great!
> +
> #endif /* _ASM_R4KCACHE_H */
> diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
> index 3fe99cb..0a49af0 100644
> --- a/arch/mips/mm/c-r4k.c
> +++ b/arch/mips/mm/c-r4k.c
> @@ -459,11 +459,29 @@ static void r4k_blast_scache_setup(void)
> r4k_blast_scache = blast_scache128;
> }
>
> +static void (* r4k_blast_scache_node)(long node);
> +
> +static void r4k_blast_scache_node_setup(void)
> +{
> + unsigned long sc_lsize = cpu_scache_line_size();
> +
> + r4k_blast_scache_node = (void *)cache_noop;
> +#ifdef CONFIG_CPU_LOONGSON3
> + if (sc_lsize == 16)
> + r4k_blast_scache_node = blast_scache16_node;
> + else if (sc_lsize == 32)
> + r4k_blast_scache_node = blast_scache32_node;
> + else if (sc_lsize == 64)
> + r4k_blast_scache_node = blast_scache64_node;
> + else if (sc_lsize == 128)
> + r4k_blast_scache_node = blast_scache128_node;
> +#endif
No #idefs please. Instead you can check the CPU type with something like
if (current_cpu_type() = CPU_LOONGSON3) {
...
}
__get_cpu_type() in include/asm/cpu-type.h will then ensure that GCC
knows it can optimize things for the CPU type(s) in use.
> +
> static inline void local_r4k___flush_cache_all(void * args)
> {
> switch (current_cpu_type()) {
> case CPU_LOONGSON2:
> - case CPU_LOONGSON3:
> case CPU_R4000SC:
> case CPU_R4000MC:
> case CPU_R4400SC:
> @@ -480,6 +498,10 @@ static inline void local_r4k___flush_cache_all(void * args)
> r4k_blast_scache();
> break;
>
> + case CPU_LOONGSON3:
> + r4k_blast_scache_node(get_ebase_cpunum() >> 2);
> + break;
> +
> case CPU_BMIPS5000:
> r4k_blast_scache();
> __sync();
> @@ -840,7 +862,11 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
> preempt_disable();
> if (cpu_has_inclusive_pcaches) {
> if (size >= scache_size)
> +#ifndef CONFIG_CPU_LOONGSON3
> r4k_blast_scache();
> +#else
> + r4k_blast_scache_node((addr >> 44) & 0xF);
> +#endif
Ditto.
> else
> blast_scache_range(addr, addr + size);
> preempt_enable();
> @@ -873,7 +899,11 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
> preempt_disable();
> if (cpu_has_inclusive_pcaches) {
> if (size >= scache_size)
> +#ifndef CONFIG_CPU_LOONGSON3
> r4k_blast_scache();
> +#else
> + r4k_blast_scache_node((addr >> 44) & 0xF);
> +#endif
Ditto.
> else {
> /*
> * There is no clearly documented alignment requirement
> @@ -1903,6 +1933,7 @@ void r4k_cache_init(void)
> r4k_blast_scache_page_setup();
> r4k_blast_scache_page_indexed_setup();
> r4k_blast_scache_setup();
> + r4k_blast_scache_node_setup();
> #ifdef CONFIG_EVA
> r4k_blast_dcache_user_page_setup();
> r4k_blast_icache_user_page_setup();
Ralf
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH V4 2/9] MIPS: c-r4k: Add r4k_blast_scache_node for Loongson-3
2017-06-13 8:40 ` Ralf Baechle
@ 2017-06-13 9:38 ` Huacai Chen
0 siblings, 0 replies; 3+ messages in thread
From: Huacai Chen @ 2017-06-13 9:38 UTC (permalink / raw)
To: Ralf Baechle
Cc: John Crispin, Steven J . Hill, Linux MIPS Mailing List,
Fuxin Zhang, Zhangjin Wu, stable
On Tue, Jun 13, 2017 at 4:40 PM, Ralf Baechle <ralf@linux-mips.org> wrote:
> On Tue, Jun 06, 2017 at 11:14:41AM +0800, Huacai Chen wrote:
>
>> For multi-node Loongson-3 (NUMA configuration), r4k_blast_scache() can
>> only flush Node-0's scache. So we add r4k_blast_scache_node() by using
>> (CAC_BASE | (node_id << 44)) instead of CKSEG0 as the start address.
>>
>> Cc: stable@vger.kernel.org
>> Signed-off-by: Huacai Chen <chenhc@lemote.com>
>> ---
>> arch/mips/include/asm/r4kcache.h | 26 ++++++++++++++++++++++++++
>> arch/mips/mm/c-r4k.c | 33 ++++++++++++++++++++++++++++++++-
>> 2 files changed, 58 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h
>> index 7f12d7e..aa615e3 100644
>> --- a/arch/mips/include/asm/r4kcache.h
>> +++ b/arch/mips/include/asm/r4kcache.h
>> @@ -747,4 +747,30 @@ __BUILD_BLAST_CACHE_RANGE(s, scache, Hit_Writeback_Inv_SD, , )
>> __BUILD_BLAST_CACHE_RANGE(inv_d, dcache, Hit_Invalidate_D, , )
>> __BUILD_BLAST_CACHE_RANGE(inv_s, scache, Hit_Invalidate_SD, , )
>>
>> +#ifdef CONFIG_CPU_LOONGSON3
>> +#define __BUILD_BLAST_CACHE_NODE(pfx, desc, indexop, hitop, lsize) \
>> +static inline void blast_##pfx##cache##lsize##_node(long node) \
>> +{ \
>> + unsigned long start = CAC_BASE | (node << 44); \
>> + unsigned long end = start + current_cpu_data.desc.waysize; \
>> + unsigned long ws_inc = 1UL << current_cpu_data.desc.waybit; \
>> + unsigned long ws_end = current_cpu_data.desc.ways << \
>> + current_cpu_data.desc.waybit; \
>> + unsigned long ws, addr; \
>> + \
>> + __##pfx##flush_prologue \
>> + \
>> + for (ws = 0; ws < ws_end; ws += ws_inc) \
>> + for (addr = start; addr < end; addr += lsize * 32) \
>> + cache##lsize##_unroll32(addr|ws, indexop); \
>> + \
>> + __##pfx##flush_epilogue \
>> +}
>> +
>> +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 16)
>> +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 32)
>> +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 64)
>> +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 128)
>> +#endif
>
> This all expand to just inline functions which generate no code if they're
> unused, so you can drop the #ifdef.
>
> However a comment explaining why this function is only required for
> Loongson 3 would be great!
Address space is very specific to cpu-type. I don't know whether other
cpus need r4k_blast_scache_node(), and I don't know how to implement
r4k_blast_scache_node() for other cpus either (if they really need
this). So, I use #ifdefs.
>
>> +
>> #endif /* _ASM_R4KCACHE_H */
>> diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
>> index 3fe99cb..0a49af0 100644
>> --- a/arch/mips/mm/c-r4k.c
>> +++ b/arch/mips/mm/c-r4k.c
>> @@ -459,11 +459,29 @@ static void r4k_blast_scache_setup(void)
>> r4k_blast_scache = blast_scache128;
>> }
>>
>> +static void (* r4k_blast_scache_node)(long node);
>> +
>> +static void r4k_blast_scache_node_setup(void)
>> +{
>> + unsigned long sc_lsize = cpu_scache_line_size();
>> +
>> + r4k_blast_scache_node = (void *)cache_noop;
>> +#ifdef CONFIG_CPU_LOONGSON3
>> + if (sc_lsize == 16)
>> + r4k_blast_scache_node = blast_scache16_node;
>> + else if (sc_lsize == 32)
>> + r4k_blast_scache_node = blast_scache32_node;
>> + else if (sc_lsize == 64)
>> + r4k_blast_scache_node = blast_scache64_node;
>> + else if (sc_lsize == 128)
>> + r4k_blast_scache_node = blast_scache128_node;
>> +#endif
>
> No #idefs please. Instead you can check the CPU type with something like
>
> if (current_cpu_type() = CPU_LOONGSON3) {
> ...
> }
>
> __get_cpu_type() in include/asm/cpu-type.h will then ensure that GCC
> knows it can optimize things for the CPU type(s) in use.
>
>> +
>> static inline void local_r4k___flush_cache_all(void * args)
>> {
>> switch (current_cpu_type()) {
>> case CPU_LOONGSON2:
>> - case CPU_LOONGSON3:
>> case CPU_R4000SC:
>> case CPU_R4000MC:
>> case CPU_R4400SC:
>> @@ -480,6 +498,10 @@ static inline void local_r4k___flush_cache_all(void * args)
>> r4k_blast_scache();
>> break;
>>
>> + case CPU_LOONGSON3:
>> + r4k_blast_scache_node(get_ebase_cpunum() >> 2);
>> + break;
>> +
>> case CPU_BMIPS5000:
>> r4k_blast_scache();
>> __sync();
>> @@ -840,7 +862,11 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
>> preempt_disable();
>> if (cpu_has_inclusive_pcaches) {
>> if (size >= scache_size)
>> +#ifndef CONFIG_CPU_LOONGSON3
>> r4k_blast_scache();
>> +#else
>> + r4k_blast_scache_node((addr >> 44) & 0xF);
>> +#endif
>
> Ditto.
>
>> else
>> blast_scache_range(addr, addr + size);
>> preempt_enable();
>> @@ -873,7 +899,11 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
>> preempt_disable();
>> if (cpu_has_inclusive_pcaches) {
>> if (size >= scache_size)
>> +#ifndef CONFIG_CPU_LOONGSON3
>> r4k_blast_scache();
>> +#else
>> + r4k_blast_scache_node((addr >> 44) & 0xF);
>> +#endif
>
> Ditto.
>
>> else {
>> /*
>> * There is no clearly documented alignment requirement
>> @@ -1903,6 +1933,7 @@ void r4k_cache_init(void)
>> r4k_blast_scache_page_setup();
>> r4k_blast_scache_page_indexed_setup();
>> r4k_blast_scache_setup();
>> + r4k_blast_scache_node_setup();
>> #ifdef CONFIG_EVA
>> r4k_blast_dcache_user_page_setup();
>> r4k_blast_icache_user_page_setup();
>
> Ralf
>
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2017-06-13 9:38 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <1496718888-18324-1-git-send-email-chenhc@lemote.com>
2017-06-06 3:14 ` [PATCH V4 2/9] MIPS: c-r4k: Add r4k_blast_scache_node for Loongson-3 Huacai Chen
2017-06-13 8:40 ` Ralf Baechle
2017-06-13 9:38 ` Huacai Chen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox