* [PATCH V4 2/9] MIPS: c-r4k: Add r4k_blast_scache_node for Loongson-3 [not found] <1496718888-18324-1-git-send-email-chenhc@lemote.com> @ 2017-06-06 3:14 ` Huacai Chen 2017-06-13 8:40 ` Ralf Baechle 0 siblings, 1 reply; 3+ messages in thread From: Huacai Chen @ 2017-06-06 3:14 UTC (permalink / raw) To: Ralf Baechle Cc: John Crispin, Steven J . Hill, linux-mips, Fuxin Zhang, Zhangjin Wu, Huacai Chen, stable For multi-node Loongson-3 (NUMA configuration), r4k_blast_scache() can only flush Node-0's scache. So we add r4k_blast_scache_node() by using (CAC_BASE | (node_id << 44)) instead of CKSEG0 as the start address. Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen <chenhc@lemote.com> --- arch/mips/include/asm/r4kcache.h | 26 ++++++++++++++++++++++++++ arch/mips/mm/c-r4k.c | 33 ++++++++++++++++++++++++++++++++- 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h index 7f12d7e..aa615e3 100644 --- a/arch/mips/include/asm/r4kcache.h +++ b/arch/mips/include/asm/r4kcache.h @@ -747,4 +747,30 @@ __BUILD_BLAST_CACHE_RANGE(s, scache, Hit_Writeback_Inv_SD, , ) __BUILD_BLAST_CACHE_RANGE(inv_d, dcache, Hit_Invalidate_D, , ) __BUILD_BLAST_CACHE_RANGE(inv_s, scache, Hit_Invalidate_SD, , ) +#ifdef CONFIG_CPU_LOONGSON3 +#define __BUILD_BLAST_CACHE_NODE(pfx, desc, indexop, hitop, lsize) \ +static inline void blast_##pfx##cache##lsize##_node(long node) \ +{ \ + unsigned long start = CAC_BASE | (node << 44); \ + unsigned long end = start + current_cpu_data.desc.waysize; \ + unsigned long ws_inc = 1UL << current_cpu_data.desc.waybit; \ + unsigned long ws_end = current_cpu_data.desc.ways << \ + current_cpu_data.desc.waybit; \ + unsigned long ws, addr; \ + \ + __##pfx##flush_prologue \ + \ + for (ws = 0; ws < ws_end; ws += ws_inc) \ + for (addr = start; addr < end; addr += lsize * 32) \ + cache##lsize##_unroll32(addr|ws, indexop); \ + \ + __##pfx##flush_epilogue \ +} + +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 16) +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 32) +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 64) +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 128) +#endif + #endif /* _ASM_R4KCACHE_H */ diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 3fe99cb..0a49af0 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -459,11 +459,29 @@ static void r4k_blast_scache_setup(void) r4k_blast_scache = blast_scache128; } +static void (* r4k_blast_scache_node)(long node); + +static void r4k_blast_scache_node_setup(void) +{ + unsigned long sc_lsize = cpu_scache_line_size(); + + r4k_blast_scache_node = (void *)cache_noop; +#ifdef CONFIG_CPU_LOONGSON3 + if (sc_lsize == 16) + r4k_blast_scache_node = blast_scache16_node; + else if (sc_lsize == 32) + r4k_blast_scache_node = blast_scache32_node; + else if (sc_lsize == 64) + r4k_blast_scache_node = blast_scache64_node; + else if (sc_lsize == 128) + r4k_blast_scache_node = blast_scache128_node; +#endif +} + static inline void local_r4k___flush_cache_all(void * args) { switch (current_cpu_type()) { case CPU_LOONGSON2: - case CPU_LOONGSON3: case CPU_R4000SC: case CPU_R4000MC: case CPU_R4400SC: @@ -480,6 +498,10 @@ static inline void local_r4k___flush_cache_all(void * args) r4k_blast_scache(); break; + case CPU_LOONGSON3: + r4k_blast_scache_node(get_ebase_cpunum() >> 2); + break; + case CPU_BMIPS5000: r4k_blast_scache(); __sync(); @@ -840,7 +862,11 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) preempt_disable(); if (cpu_has_inclusive_pcaches) { if (size >= scache_size) +#ifndef CONFIG_CPU_LOONGSON3 r4k_blast_scache(); +#else + r4k_blast_scache_node((addr >> 44) & 0xF); +#endif else blast_scache_range(addr, addr + size); preempt_enable(); @@ -873,7 +899,11 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) preempt_disable(); if (cpu_has_inclusive_pcaches) { if (size >= scache_size) +#ifndef CONFIG_CPU_LOONGSON3 r4k_blast_scache(); +#else + r4k_blast_scache_node((addr >> 44) & 0xF); +#endif else { /* * There is no clearly documented alignment requirement @@ -1903,6 +1933,7 @@ void r4k_cache_init(void) r4k_blast_scache_page_setup(); r4k_blast_scache_page_indexed_setup(); r4k_blast_scache_setup(); + r4k_blast_scache_node_setup(); #ifdef CONFIG_EVA r4k_blast_dcache_user_page_setup(); r4k_blast_icache_user_page_setup(); -- 2.7.0 ^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH V4 2/9] MIPS: c-r4k: Add r4k_blast_scache_node for Loongson-3 2017-06-06 3:14 ` [PATCH V4 2/9] MIPS: c-r4k: Add r4k_blast_scache_node for Loongson-3 Huacai Chen @ 2017-06-13 8:40 ` Ralf Baechle 2017-06-13 9:38 ` Huacai Chen 0 siblings, 1 reply; 3+ messages in thread From: Ralf Baechle @ 2017-06-13 8:40 UTC (permalink / raw) To: Huacai Chen Cc: John Crispin, Steven J . Hill, linux-mips, Fuxin Zhang, Zhangjin Wu, stable On Tue, Jun 06, 2017 at 11:14:41AM +0800, Huacai Chen wrote: > For multi-node Loongson-3 (NUMA configuration), r4k_blast_scache() can > only flush Node-0's scache. So we add r4k_blast_scache_node() by using > (CAC_BASE | (node_id << 44)) instead of CKSEG0 as the start address. > > Cc: stable@vger.kernel.org > Signed-off-by: Huacai Chen <chenhc@lemote.com> > --- > arch/mips/include/asm/r4kcache.h | 26 ++++++++++++++++++++++++++ > arch/mips/mm/c-r4k.c | 33 ++++++++++++++++++++++++++++++++- > 2 files changed, 58 insertions(+), 1 deletion(-) > > diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h > index 7f12d7e..aa615e3 100644 > --- a/arch/mips/include/asm/r4kcache.h > +++ b/arch/mips/include/asm/r4kcache.h > @@ -747,4 +747,30 @@ __BUILD_BLAST_CACHE_RANGE(s, scache, Hit_Writeback_Inv_SD, , ) > __BUILD_BLAST_CACHE_RANGE(inv_d, dcache, Hit_Invalidate_D, , ) > __BUILD_BLAST_CACHE_RANGE(inv_s, scache, Hit_Invalidate_SD, , ) > > +#ifdef CONFIG_CPU_LOONGSON3 > +#define __BUILD_BLAST_CACHE_NODE(pfx, desc, indexop, hitop, lsize) \ > +static inline void blast_##pfx##cache##lsize##_node(long node) \ > +{ \ > + unsigned long start = CAC_BASE | (node << 44); \ > + unsigned long end = start + current_cpu_data.desc.waysize; \ > + unsigned long ws_inc = 1UL << current_cpu_data.desc.waybit; \ > + unsigned long ws_end = current_cpu_data.desc.ways << \ > + current_cpu_data.desc.waybit; \ > + unsigned long ws, addr; \ > + \ > + __##pfx##flush_prologue \ > + \ > + for (ws = 0; ws < ws_end; ws += ws_inc) \ > + for (addr = start; addr < end; addr += lsize * 32) \ > + cache##lsize##_unroll32(addr|ws, indexop); \ > + \ > + __##pfx##flush_epilogue \ > +} > + > +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 16) > +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 32) > +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 64) > +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 128) > +#endif This all expand to just inline functions which generate no code if they're unused, so you can drop the #ifdef. However a comment explaining why this function is only required for Loongson 3 would be great! > + > #endif /* _ASM_R4KCACHE_H */ > diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c > index 3fe99cb..0a49af0 100644 > --- a/arch/mips/mm/c-r4k.c > +++ b/arch/mips/mm/c-r4k.c > @@ -459,11 +459,29 @@ static void r4k_blast_scache_setup(void) > r4k_blast_scache = blast_scache128; > } > > +static void (* r4k_blast_scache_node)(long node); > + > +static void r4k_blast_scache_node_setup(void) > +{ > + unsigned long sc_lsize = cpu_scache_line_size(); > + > + r4k_blast_scache_node = (void *)cache_noop; > +#ifdef CONFIG_CPU_LOONGSON3 > + if (sc_lsize == 16) > + r4k_blast_scache_node = blast_scache16_node; > + else if (sc_lsize == 32) > + r4k_blast_scache_node = blast_scache32_node; > + else if (sc_lsize == 64) > + r4k_blast_scache_node = blast_scache64_node; > + else if (sc_lsize == 128) > + r4k_blast_scache_node = blast_scache128_node; > +#endif No #idefs please. Instead you can check the CPU type with something like if (current_cpu_type() = CPU_LOONGSON3) { ... } __get_cpu_type() in include/asm/cpu-type.h will then ensure that GCC knows it can optimize things for the CPU type(s) in use. > + > static inline void local_r4k___flush_cache_all(void * args) > { > switch (current_cpu_type()) { > case CPU_LOONGSON2: > - case CPU_LOONGSON3: > case CPU_R4000SC: > case CPU_R4000MC: > case CPU_R4400SC: > @@ -480,6 +498,10 @@ static inline void local_r4k___flush_cache_all(void * args) > r4k_blast_scache(); > break; > > + case CPU_LOONGSON3: > + r4k_blast_scache_node(get_ebase_cpunum() >> 2); > + break; > + > case CPU_BMIPS5000: > r4k_blast_scache(); > __sync(); > @@ -840,7 +862,11 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) > preempt_disable(); > if (cpu_has_inclusive_pcaches) { > if (size >= scache_size) > +#ifndef CONFIG_CPU_LOONGSON3 > r4k_blast_scache(); > +#else > + r4k_blast_scache_node((addr >> 44) & 0xF); > +#endif Ditto. > else > blast_scache_range(addr, addr + size); > preempt_enable(); > @@ -873,7 +899,11 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) > preempt_disable(); > if (cpu_has_inclusive_pcaches) { > if (size >= scache_size) > +#ifndef CONFIG_CPU_LOONGSON3 > r4k_blast_scache(); > +#else > + r4k_blast_scache_node((addr >> 44) & 0xF); > +#endif Ditto. > else { > /* > * There is no clearly documented alignment requirement > @@ -1903,6 +1933,7 @@ void r4k_cache_init(void) > r4k_blast_scache_page_setup(); > r4k_blast_scache_page_indexed_setup(); > r4k_blast_scache_setup(); > + r4k_blast_scache_node_setup(); > #ifdef CONFIG_EVA > r4k_blast_dcache_user_page_setup(); > r4k_blast_icache_user_page_setup(); Ralf ^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH V4 2/9] MIPS: c-r4k: Add r4k_blast_scache_node for Loongson-3 2017-06-13 8:40 ` Ralf Baechle @ 2017-06-13 9:38 ` Huacai Chen 0 siblings, 0 replies; 3+ messages in thread From: Huacai Chen @ 2017-06-13 9:38 UTC (permalink / raw) To: Ralf Baechle Cc: John Crispin, Steven J . Hill, Linux MIPS Mailing List, Fuxin Zhang, Zhangjin Wu, stable On Tue, Jun 13, 2017 at 4:40 PM, Ralf Baechle <ralf@linux-mips.org> wrote: > On Tue, Jun 06, 2017 at 11:14:41AM +0800, Huacai Chen wrote: > >> For multi-node Loongson-3 (NUMA configuration), r4k_blast_scache() can >> only flush Node-0's scache. So we add r4k_blast_scache_node() by using >> (CAC_BASE | (node_id << 44)) instead of CKSEG0 as the start address. >> >> Cc: stable@vger.kernel.org >> Signed-off-by: Huacai Chen <chenhc@lemote.com> >> --- >> arch/mips/include/asm/r4kcache.h | 26 ++++++++++++++++++++++++++ >> arch/mips/mm/c-r4k.c | 33 ++++++++++++++++++++++++++++++++- >> 2 files changed, 58 insertions(+), 1 deletion(-) >> >> diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h >> index 7f12d7e..aa615e3 100644 >> --- a/arch/mips/include/asm/r4kcache.h >> +++ b/arch/mips/include/asm/r4kcache.h >> @@ -747,4 +747,30 @@ __BUILD_BLAST_CACHE_RANGE(s, scache, Hit_Writeback_Inv_SD, , ) >> __BUILD_BLAST_CACHE_RANGE(inv_d, dcache, Hit_Invalidate_D, , ) >> __BUILD_BLAST_CACHE_RANGE(inv_s, scache, Hit_Invalidate_SD, , ) >> >> +#ifdef CONFIG_CPU_LOONGSON3 >> +#define __BUILD_BLAST_CACHE_NODE(pfx, desc, indexop, hitop, lsize) \ >> +static inline void blast_##pfx##cache##lsize##_node(long node) \ >> +{ \ >> + unsigned long start = CAC_BASE | (node << 44); \ >> + unsigned long end = start + current_cpu_data.desc.waysize; \ >> + unsigned long ws_inc = 1UL << current_cpu_data.desc.waybit; \ >> + unsigned long ws_end = current_cpu_data.desc.ways << \ >> + current_cpu_data.desc.waybit; \ >> + unsigned long ws, addr; \ >> + \ >> + __##pfx##flush_prologue \ >> + \ >> + for (ws = 0; ws < ws_end; ws += ws_inc) \ >> + for (addr = start; addr < end; addr += lsize * 32) \ >> + cache##lsize##_unroll32(addr|ws, indexop); \ >> + \ >> + __##pfx##flush_epilogue \ >> +} >> + >> +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 16) >> +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 32) >> +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 64) >> +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 128) >> +#endif > > This all expand to just inline functions which generate no code if they're > unused, so you can drop the #ifdef. > > However a comment explaining why this function is only required for > Loongson 3 would be great! Address space is very specific to cpu-type. I don't know whether other cpus need r4k_blast_scache_node(), and I don't know how to implement r4k_blast_scache_node() for other cpus either (if they really need this). So, I use #ifdefs. > >> + >> #endif /* _ASM_R4KCACHE_H */ >> diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c >> index 3fe99cb..0a49af0 100644 >> --- a/arch/mips/mm/c-r4k.c >> +++ b/arch/mips/mm/c-r4k.c >> @@ -459,11 +459,29 @@ static void r4k_blast_scache_setup(void) >> r4k_blast_scache = blast_scache128; >> } >> >> +static void (* r4k_blast_scache_node)(long node); >> + >> +static void r4k_blast_scache_node_setup(void) >> +{ >> + unsigned long sc_lsize = cpu_scache_line_size(); >> + >> + r4k_blast_scache_node = (void *)cache_noop; >> +#ifdef CONFIG_CPU_LOONGSON3 >> + if (sc_lsize == 16) >> + r4k_blast_scache_node = blast_scache16_node; >> + else if (sc_lsize == 32) >> + r4k_blast_scache_node = blast_scache32_node; >> + else if (sc_lsize == 64) >> + r4k_blast_scache_node = blast_scache64_node; >> + else if (sc_lsize == 128) >> + r4k_blast_scache_node = blast_scache128_node; >> +#endif > > No #idefs please. Instead you can check the CPU type with something like > > if (current_cpu_type() = CPU_LOONGSON3) { > ... > } > > __get_cpu_type() in include/asm/cpu-type.h will then ensure that GCC > knows it can optimize things for the CPU type(s) in use. > >> + >> static inline void local_r4k___flush_cache_all(void * args) >> { >> switch (current_cpu_type()) { >> case CPU_LOONGSON2: >> - case CPU_LOONGSON3: >> case CPU_R4000SC: >> case CPU_R4000MC: >> case CPU_R4400SC: >> @@ -480,6 +498,10 @@ static inline void local_r4k___flush_cache_all(void * args) >> r4k_blast_scache(); >> break; >> >> + case CPU_LOONGSON3: >> + r4k_blast_scache_node(get_ebase_cpunum() >> 2); >> + break; >> + >> case CPU_BMIPS5000: >> r4k_blast_scache(); >> __sync(); >> @@ -840,7 +862,11 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) >> preempt_disable(); >> if (cpu_has_inclusive_pcaches) { >> if (size >= scache_size) >> +#ifndef CONFIG_CPU_LOONGSON3 >> r4k_blast_scache(); >> +#else >> + r4k_blast_scache_node((addr >> 44) & 0xF); >> +#endif > > Ditto. > >> else >> blast_scache_range(addr, addr + size); >> preempt_enable(); >> @@ -873,7 +899,11 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) >> preempt_disable(); >> if (cpu_has_inclusive_pcaches) { >> if (size >= scache_size) >> +#ifndef CONFIG_CPU_LOONGSON3 >> r4k_blast_scache(); >> +#else >> + r4k_blast_scache_node((addr >> 44) & 0xF); >> +#endif > > Ditto. > >> else { >> /* >> * There is no clearly documented alignment requirement >> @@ -1903,6 +1933,7 @@ void r4k_cache_init(void) >> r4k_blast_scache_page_setup(); >> r4k_blast_scache_page_indexed_setup(); >> r4k_blast_scache_setup(); >> + r4k_blast_scache_node_setup(); >> #ifdef CONFIG_EVA >> r4k_blast_dcache_user_page_setup(); >> r4k_blast_icache_user_page_setup(); > > Ralf > ^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2017-06-13 9:38 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <1496718888-18324-1-git-send-email-chenhc@lemote.com>
2017-06-06 3:14 ` [PATCH V4 2/9] MIPS: c-r4k: Add r4k_blast_scache_node for Loongson-3 Huacai Chen
2017-06-13 8:40 ` Ralf Baechle
2017-06-13 9:38 ` Huacai Chen
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox