public inbox for stable@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH V4 2/9] MIPS: c-r4k: Add r4k_blast_scache_node for Loongson-3
       [not found] <1496718888-18324-1-git-send-email-chenhc@lemote.com>
@ 2017-06-06  3:14 ` Huacai Chen
  2017-06-13  8:40   ` Ralf Baechle
  0 siblings, 1 reply; 3+ messages in thread
From: Huacai Chen @ 2017-06-06  3:14 UTC (permalink / raw)
  To: Ralf Baechle
  Cc: John Crispin, Steven J . Hill, linux-mips, Fuxin Zhang,
	Zhangjin Wu, Huacai Chen, stable

For multi-node Loongson-3 (NUMA configuration), r4k_blast_scache() can
only flush Node-0's scache. So we add r4k_blast_scache_node() by using
(CAC_BASE | (node_id << 44)) instead of CKSEG0 as the start address.

Cc: stable@vger.kernel.org
Signed-off-by: Huacai Chen <chenhc@lemote.com>
---
 arch/mips/include/asm/r4kcache.h | 26 ++++++++++++++++++++++++++
 arch/mips/mm/c-r4k.c             | 33 ++++++++++++++++++++++++++++++++-
 2 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h
index 7f12d7e..aa615e3 100644
--- a/arch/mips/include/asm/r4kcache.h
+++ b/arch/mips/include/asm/r4kcache.h
@@ -747,4 +747,30 @@ __BUILD_BLAST_CACHE_RANGE(s, scache, Hit_Writeback_Inv_SD, , )
 __BUILD_BLAST_CACHE_RANGE(inv_d, dcache, Hit_Invalidate_D, , )
 __BUILD_BLAST_CACHE_RANGE(inv_s, scache, Hit_Invalidate_SD, , )
 
+#ifdef CONFIG_CPU_LOONGSON3
+#define __BUILD_BLAST_CACHE_NODE(pfx, desc, indexop, hitop, lsize)	\
+static inline void blast_##pfx##cache##lsize##_node(long node)		\
+{									\
+	unsigned long start = CAC_BASE | (node << 44);			\
+	unsigned long end = start + current_cpu_data.desc.waysize;	\
+	unsigned long ws_inc = 1UL << current_cpu_data.desc.waybit;	\
+	unsigned long ws_end = current_cpu_data.desc.ways <<		\
+			       current_cpu_data.desc.waybit;		\
+	unsigned long ws, addr;						\
+									\
+	__##pfx##flush_prologue						\
+									\
+	for (ws = 0; ws < ws_end; ws += ws_inc)				\
+		for (addr = start; addr < end; addr += lsize * 32)	\
+			cache##lsize##_unroll32(addr|ws, indexop);	\
+									\
+	__##pfx##flush_epilogue						\
+}
+
+__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 16)
+__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 32)
+__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 64)
+__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 128)
+#endif
+
 #endif /* _ASM_R4KCACHE_H */
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 3fe99cb..0a49af0 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -459,11 +459,29 @@ static void r4k_blast_scache_setup(void)
 		r4k_blast_scache = blast_scache128;
 }
 
+static void (* r4k_blast_scache_node)(long node);
+
+static void r4k_blast_scache_node_setup(void)
+{
+	unsigned long sc_lsize = cpu_scache_line_size();
+
+	r4k_blast_scache_node = (void *)cache_noop;
+#ifdef CONFIG_CPU_LOONGSON3
+	if (sc_lsize == 16)
+		r4k_blast_scache_node = blast_scache16_node;
+	else if (sc_lsize == 32)
+		r4k_blast_scache_node = blast_scache32_node;
+	else if (sc_lsize == 64)
+		r4k_blast_scache_node = blast_scache64_node;
+	else if (sc_lsize == 128)
+		r4k_blast_scache_node = blast_scache128_node;
+#endif
+}
+
 static inline void local_r4k___flush_cache_all(void * args)
 {
 	switch (current_cpu_type()) {
 	case CPU_LOONGSON2:
-	case CPU_LOONGSON3:
 	case CPU_R4000SC:
 	case CPU_R4000MC:
 	case CPU_R4400SC:
@@ -480,6 +498,10 @@ static inline void local_r4k___flush_cache_all(void * args)
 		r4k_blast_scache();
 		break;
 
+	case CPU_LOONGSON3:
+		r4k_blast_scache_node(get_ebase_cpunum() >> 2);
+		break;
+
 	case CPU_BMIPS5000:
 		r4k_blast_scache();
 		__sync();
@@ -840,7 +862,11 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
 	preempt_disable();
 	if (cpu_has_inclusive_pcaches) {
 		if (size >= scache_size)
+#ifndef CONFIG_CPU_LOONGSON3
 			r4k_blast_scache();
+#else
+			r4k_blast_scache_node((addr >> 44) & 0xF);
+#endif
 		else
 			blast_scache_range(addr, addr + size);
 		preempt_enable();
@@ -873,7 +899,11 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
 	preempt_disable();
 	if (cpu_has_inclusive_pcaches) {
 		if (size >= scache_size)
+#ifndef CONFIG_CPU_LOONGSON3
 			r4k_blast_scache();
+#else
+			r4k_blast_scache_node((addr >> 44) & 0xF);
+#endif
 		else {
 			/*
 			 * There is no clearly documented alignment requirement
@@ -1903,6 +1933,7 @@ void r4k_cache_init(void)
 	r4k_blast_scache_page_setup();
 	r4k_blast_scache_page_indexed_setup();
 	r4k_blast_scache_setup();
+	r4k_blast_scache_node_setup();
 #ifdef CONFIG_EVA
 	r4k_blast_dcache_user_page_setup();
 	r4k_blast_icache_user_page_setup();
-- 
2.7.0

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH V4 2/9] MIPS: c-r4k: Add r4k_blast_scache_node for Loongson-3
  2017-06-06  3:14 ` [PATCH V4 2/9] MIPS: c-r4k: Add r4k_blast_scache_node for Loongson-3 Huacai Chen
@ 2017-06-13  8:40   ` Ralf Baechle
  2017-06-13  9:38     ` Huacai Chen
  0 siblings, 1 reply; 3+ messages in thread
From: Ralf Baechle @ 2017-06-13  8:40 UTC (permalink / raw)
  To: Huacai Chen
  Cc: John Crispin, Steven J . Hill, linux-mips, Fuxin Zhang,
	Zhangjin Wu, stable

On Tue, Jun 06, 2017 at 11:14:41AM +0800, Huacai Chen wrote:

> For multi-node Loongson-3 (NUMA configuration), r4k_blast_scache() can
> only flush Node-0's scache. So we add r4k_blast_scache_node() by using
> (CAC_BASE | (node_id << 44)) instead of CKSEG0 as the start address.
> 
> Cc: stable@vger.kernel.org
> Signed-off-by: Huacai Chen <chenhc@lemote.com>
> ---
>  arch/mips/include/asm/r4kcache.h | 26 ++++++++++++++++++++++++++
>  arch/mips/mm/c-r4k.c             | 33 ++++++++++++++++++++++++++++++++-
>  2 files changed, 58 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h
> index 7f12d7e..aa615e3 100644
> --- a/arch/mips/include/asm/r4kcache.h
> +++ b/arch/mips/include/asm/r4kcache.h
> @@ -747,4 +747,30 @@ __BUILD_BLAST_CACHE_RANGE(s, scache, Hit_Writeback_Inv_SD, , )
>  __BUILD_BLAST_CACHE_RANGE(inv_d, dcache, Hit_Invalidate_D, , )
>  __BUILD_BLAST_CACHE_RANGE(inv_s, scache, Hit_Invalidate_SD, , )
>  
> +#ifdef CONFIG_CPU_LOONGSON3
> +#define __BUILD_BLAST_CACHE_NODE(pfx, desc, indexop, hitop, lsize)	\
> +static inline void blast_##pfx##cache##lsize##_node(long node)		\
> +{									\
> +	unsigned long start = CAC_BASE | (node << 44);			\
> +	unsigned long end = start + current_cpu_data.desc.waysize;	\
> +	unsigned long ws_inc = 1UL << current_cpu_data.desc.waybit;	\
> +	unsigned long ws_end = current_cpu_data.desc.ways <<		\
> +			       current_cpu_data.desc.waybit;		\
> +	unsigned long ws, addr;						\
> +									\
> +	__##pfx##flush_prologue						\
> +									\
> +	for (ws = 0; ws < ws_end; ws += ws_inc)				\
> +		for (addr = start; addr < end; addr += lsize * 32)	\
> +			cache##lsize##_unroll32(addr|ws, indexop);	\
> +									\
> +	__##pfx##flush_epilogue						\
> +}
> +
> +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 16)
> +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 32)
> +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 64)
> +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 128)
> +#endif

This all expand to just inline functions which generate no code if they're
unused, so you can drop the #ifdef.

However a comment explaining why this function is only required for
Loongson 3 would be great!

> +
>  #endif /* _ASM_R4KCACHE_H */
> diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
> index 3fe99cb..0a49af0 100644
> --- a/arch/mips/mm/c-r4k.c
> +++ b/arch/mips/mm/c-r4k.c
> @@ -459,11 +459,29 @@ static void r4k_blast_scache_setup(void)
>  		r4k_blast_scache = blast_scache128;
>  }
>  
> +static void (* r4k_blast_scache_node)(long node);
> +
> +static void r4k_blast_scache_node_setup(void)
> +{
> +	unsigned long sc_lsize = cpu_scache_line_size();
> +
> +	r4k_blast_scache_node = (void *)cache_noop;
> +#ifdef CONFIG_CPU_LOONGSON3
> +	if (sc_lsize == 16)
> +		r4k_blast_scache_node = blast_scache16_node;
> +	else if (sc_lsize == 32)
> +		r4k_blast_scache_node = blast_scache32_node;
> +	else if (sc_lsize == 64)
> +		r4k_blast_scache_node = blast_scache64_node;
> +	else if (sc_lsize == 128)
> +		r4k_blast_scache_node = blast_scache128_node;
> +#endif

No #idefs please.  Instead you can check the CPU type with something like

	if (current_cpu_type() = CPU_LOONGSON3) {
		...
	}

__get_cpu_type() in include/asm/cpu-type.h will then ensure that GCC
knows it can optimize things for the CPU type(s) in use.

> +
>  static inline void local_r4k___flush_cache_all(void * args)
>  {
>  	switch (current_cpu_type()) {
>  	case CPU_LOONGSON2:
> -	case CPU_LOONGSON3:
>  	case CPU_R4000SC:
>  	case CPU_R4000MC:
>  	case CPU_R4400SC:
> @@ -480,6 +498,10 @@ static inline void local_r4k___flush_cache_all(void * args)
>  		r4k_blast_scache();
>  		break;
>  
> +	case CPU_LOONGSON3:
> +		r4k_blast_scache_node(get_ebase_cpunum() >> 2);
> +		break;
> +
>  	case CPU_BMIPS5000:
>  		r4k_blast_scache();
>  		__sync();
> @@ -840,7 +862,11 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
>  	preempt_disable();
>  	if (cpu_has_inclusive_pcaches) {
>  		if (size >= scache_size)
> +#ifndef CONFIG_CPU_LOONGSON3
>  			r4k_blast_scache();
> +#else
> +			r4k_blast_scache_node((addr >> 44) & 0xF);
> +#endif

Ditto.

>  		else
>  			blast_scache_range(addr, addr + size);
>  		preempt_enable();
> @@ -873,7 +899,11 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
>  	preempt_disable();
>  	if (cpu_has_inclusive_pcaches) {
>  		if (size >= scache_size)
> +#ifndef CONFIG_CPU_LOONGSON3
>  			r4k_blast_scache();
> +#else
> +			r4k_blast_scache_node((addr >> 44) & 0xF);
> +#endif

Ditto.

>  		else {
>  			/*
>  			 * There is no clearly documented alignment requirement
> @@ -1903,6 +1933,7 @@ void r4k_cache_init(void)
>  	r4k_blast_scache_page_setup();
>  	r4k_blast_scache_page_indexed_setup();
>  	r4k_blast_scache_setup();
> +	r4k_blast_scache_node_setup();
>  #ifdef CONFIG_EVA
>  	r4k_blast_dcache_user_page_setup();
>  	r4k_blast_icache_user_page_setup();

  Ralf

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH V4 2/9] MIPS: c-r4k: Add r4k_blast_scache_node for Loongson-3
  2017-06-13  8:40   ` Ralf Baechle
@ 2017-06-13  9:38     ` Huacai Chen
  0 siblings, 0 replies; 3+ messages in thread
From: Huacai Chen @ 2017-06-13  9:38 UTC (permalink / raw)
  To: Ralf Baechle
  Cc: John Crispin, Steven J . Hill, Linux MIPS Mailing List,
	Fuxin Zhang, Zhangjin Wu, stable

On Tue, Jun 13, 2017 at 4:40 PM, Ralf Baechle <ralf@linux-mips.org> wrote:
> On Tue, Jun 06, 2017 at 11:14:41AM +0800, Huacai Chen wrote:
>
>> For multi-node Loongson-3 (NUMA configuration), r4k_blast_scache() can
>> only flush Node-0's scache. So we add r4k_blast_scache_node() by using
>> (CAC_BASE | (node_id << 44)) instead of CKSEG0 as the start address.
>>
>> Cc: stable@vger.kernel.org
>> Signed-off-by: Huacai Chen <chenhc@lemote.com>
>> ---
>>  arch/mips/include/asm/r4kcache.h | 26 ++++++++++++++++++++++++++
>>  arch/mips/mm/c-r4k.c             | 33 ++++++++++++++++++++++++++++++++-
>>  2 files changed, 58 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h
>> index 7f12d7e..aa615e3 100644
>> --- a/arch/mips/include/asm/r4kcache.h
>> +++ b/arch/mips/include/asm/r4kcache.h
>> @@ -747,4 +747,30 @@ __BUILD_BLAST_CACHE_RANGE(s, scache, Hit_Writeback_Inv_SD, , )
>>  __BUILD_BLAST_CACHE_RANGE(inv_d, dcache, Hit_Invalidate_D, , )
>>  __BUILD_BLAST_CACHE_RANGE(inv_s, scache, Hit_Invalidate_SD, , )
>>
>> +#ifdef CONFIG_CPU_LOONGSON3
>> +#define __BUILD_BLAST_CACHE_NODE(pfx, desc, indexop, hitop, lsize)   \
>> +static inline void blast_##pfx##cache##lsize##_node(long node)               \
>> +{                                                                    \
>> +     unsigned long start = CAC_BASE | (node << 44);                  \
>> +     unsigned long end = start + current_cpu_data.desc.waysize;      \
>> +     unsigned long ws_inc = 1UL << current_cpu_data.desc.waybit;     \
>> +     unsigned long ws_end = current_cpu_data.desc.ways <<            \
>> +                            current_cpu_data.desc.waybit;            \
>> +     unsigned long ws, addr;                                         \
>> +                                                                     \
>> +     __##pfx##flush_prologue                                         \
>> +                                                                     \
>> +     for (ws = 0; ws < ws_end; ws += ws_inc)                         \
>> +             for (addr = start; addr < end; addr += lsize * 32)      \
>> +                     cache##lsize##_unroll32(addr|ws, indexop);      \
>> +                                                                     \
>> +     __##pfx##flush_epilogue                                         \
>> +}
>> +
>> +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 16)
>> +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 32)
>> +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 64)
>> +__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 128)
>> +#endif
>
> This all expand to just inline functions which generate no code if they're
> unused, so you can drop the #ifdef.
>
> However a comment explaining why this function is only required for
> Loongson 3 would be great!
Address space is very specific to cpu-type. I don't know whether other
cpus need r4k_blast_scache_node(), and I don't know how to implement
r4k_blast_scache_node() for other cpus either (if they really need
this). So, I use #ifdefs.

>
>> +
>>  #endif /* _ASM_R4KCACHE_H */
>> diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
>> index 3fe99cb..0a49af0 100644
>> --- a/arch/mips/mm/c-r4k.c
>> +++ b/arch/mips/mm/c-r4k.c
>> @@ -459,11 +459,29 @@ static void r4k_blast_scache_setup(void)
>>               r4k_blast_scache = blast_scache128;
>>  }
>>
>> +static void (* r4k_blast_scache_node)(long node);
>> +
>> +static void r4k_blast_scache_node_setup(void)
>> +{
>> +     unsigned long sc_lsize = cpu_scache_line_size();
>> +
>> +     r4k_blast_scache_node = (void *)cache_noop;
>> +#ifdef CONFIG_CPU_LOONGSON3
>> +     if (sc_lsize == 16)
>> +             r4k_blast_scache_node = blast_scache16_node;
>> +     else if (sc_lsize == 32)
>> +             r4k_blast_scache_node = blast_scache32_node;
>> +     else if (sc_lsize == 64)
>> +             r4k_blast_scache_node = blast_scache64_node;
>> +     else if (sc_lsize == 128)
>> +             r4k_blast_scache_node = blast_scache128_node;
>> +#endif
>
> No #idefs please.  Instead you can check the CPU type with something like
>
>         if (current_cpu_type() = CPU_LOONGSON3) {
>                 ...
>         }
>
> __get_cpu_type() in include/asm/cpu-type.h will then ensure that GCC
> knows it can optimize things for the CPU type(s) in use.
>
>> +
>>  static inline void local_r4k___flush_cache_all(void * args)
>>  {
>>       switch (current_cpu_type()) {
>>       case CPU_LOONGSON2:
>> -     case CPU_LOONGSON3:
>>       case CPU_R4000SC:
>>       case CPU_R4000MC:
>>       case CPU_R4400SC:
>> @@ -480,6 +498,10 @@ static inline void local_r4k___flush_cache_all(void * args)
>>               r4k_blast_scache();
>>               break;
>>
>> +     case CPU_LOONGSON3:
>> +             r4k_blast_scache_node(get_ebase_cpunum() >> 2);
>> +             break;
>> +
>>       case CPU_BMIPS5000:
>>               r4k_blast_scache();
>>               __sync();
>> @@ -840,7 +862,11 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
>>       preempt_disable();
>>       if (cpu_has_inclusive_pcaches) {
>>               if (size >= scache_size)
>> +#ifndef CONFIG_CPU_LOONGSON3
>>                       r4k_blast_scache();
>> +#else
>> +                     r4k_blast_scache_node((addr >> 44) & 0xF);
>> +#endif
>
> Ditto.
>
>>               else
>>                       blast_scache_range(addr, addr + size);
>>               preempt_enable();
>> @@ -873,7 +899,11 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
>>       preempt_disable();
>>       if (cpu_has_inclusive_pcaches) {
>>               if (size >= scache_size)
>> +#ifndef CONFIG_CPU_LOONGSON3
>>                       r4k_blast_scache();
>> +#else
>> +                     r4k_blast_scache_node((addr >> 44) & 0xF);
>> +#endif
>
> Ditto.
>
>>               else {
>>                       /*
>>                        * There is no clearly documented alignment requirement
>> @@ -1903,6 +1933,7 @@ void r4k_cache_init(void)
>>       r4k_blast_scache_page_setup();
>>       r4k_blast_scache_page_indexed_setup();
>>       r4k_blast_scache_setup();
>> +     r4k_blast_scache_node_setup();
>>  #ifdef CONFIG_EVA
>>       r4k_blast_dcache_user_page_setup();
>>       r4k_blast_icache_user_page_setup();
>
>   Ralf
>

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2017-06-13  9:38 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <1496718888-18324-1-git-send-email-chenhc@lemote.com>
2017-06-06  3:14 ` [PATCH V4 2/9] MIPS: c-r4k: Add r4k_blast_scache_node for Loongson-3 Huacai Chen
2017-06-13  8:40   ` Ralf Baechle
2017-06-13  9:38     ` Huacai Chen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox