public inbox for stable@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH V5 2/9] MIPS: c-r4k: Add r4k_blast_scache_node for Loongson-3
       [not found] <1497492952-23877-1-git-send-email-chenhc@lemote.com>
@ 2017-06-15  2:15 ` Huacai Chen
  2017-06-15  9:59   ` Ralf Baechle
  0 siblings, 1 reply; 2+ messages in thread
From: Huacai Chen @ 2017-06-15  2:15 UTC (permalink / raw)
  To: Ralf Baechle
  Cc: John Crispin, Steven J . Hill, linux-mips, Fuxin Zhang,
	Zhangjin Wu, Huacai Chen, stable

For multi-node Loongson-3 (NUMA configuration), r4k_blast_scache() can
only flush Node-0's scache. So we add r4k_blast_scache_node() by using
(CAC_BASE | (node_id << NODE_ADDRSPACE_SHIFT)) instead of CKSEG0 as the
start address.

Maybe all MIPS CPUs need r4k_blast_scache_node() to support cc-NUMA,
but I don't know how to implement it for non-Loongson CPUs.

Cc: stable@vger.kernel.org
Signed-off-by: Huacai Chen <chenhc@lemote.com>
---
 arch/mips/include/asm/r4kcache.h | 30 ++++++++++++++++++++++++++++
 arch/mips/mm/c-r4k.c             | 42 +++++++++++++++++++++++++++++++++-------
 2 files changed, 65 insertions(+), 7 deletions(-)

diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h
index 7f12d7e..e5ece81 100644
--- a/arch/mips/include/asm/r4kcache.h
+++ b/arch/mips/include/asm/r4kcache.h
@@ -747,4 +747,34 @@ __BUILD_BLAST_CACHE_RANGE(s, scache, Hit_Writeback_Inv_SD, , )
 __BUILD_BLAST_CACHE_RANGE(inv_d, dcache, Hit_Invalidate_D, , )
 __BUILD_BLAST_CACHE_RANGE(inv_s, scache, Hit_Invalidate_SD, , )
 
+#ifndef NODE_ADDRSPACE_SHIFT
+#define nid_to_addrbase(nid) 0
+#else
+#define nid_to_addrbase(nid) (nid << NODE_ADDRSPACE_SHIFT)
+#endif
+
+#define __BUILD_BLAST_CACHE_NODE(pfx, desc, indexop, hitop, lsize)	\
+static inline void blast_##pfx##cache##lsize##_node(long node)		\
+{									\
+	unsigned long start = CAC_BASE | nid_to_addrbase(node);		\
+	unsigned long end = start + current_cpu_data.desc.waysize;	\
+	unsigned long ws_inc = 1UL << current_cpu_data.desc.waybit;	\
+	unsigned long ws_end = current_cpu_data.desc.ways <<		\
+			       current_cpu_data.desc.waybit;		\
+	unsigned long ws, addr;						\
+									\
+	__##pfx##flush_prologue						\
+									\
+	for (ws = 0; ws < ws_end; ws += ws_inc)				\
+		for (addr = start; addr < end; addr += lsize * 32)	\
+			cache##lsize##_unroll32(addr|ws, indexop);	\
+									\
+	__##pfx##flush_epilogue						\
+}
+
+__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 16)
+__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 32)
+__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 64)
+__BUILD_BLAST_CACHE_NODE(s, scache, Index_Writeback_Inv_SD, Hit_Writeback_Inv_SD, 128)
+
 #endif /* _ASM_R4KCACHE_H */
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 3fe99cb..02337a9 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -459,11 +459,28 @@ static void r4k_blast_scache_setup(void)
 		r4k_blast_scache = blast_scache128;
 }
 
+static void (* r4k_blast_scache_node)(long node);
+
+static void r4k_blast_scache_node_setup(void)
+{
+	unsigned long sc_lsize = cpu_scache_line_size();
+
+	if (current_cpu_type() != CPU_LOONGSON3)
+		r4k_blast_scache_node = (void *)cache_noop;
+	else if (sc_lsize == 16)
+		r4k_blast_scache_node = blast_scache16_node;
+	else if (sc_lsize == 32)
+		r4k_blast_scache_node = blast_scache32_node;
+	else if (sc_lsize == 64)
+		r4k_blast_scache_node = blast_scache64_node;
+	else if (sc_lsize == 128)
+		r4k_blast_scache_node = blast_scache128_node;
+}
+
 static inline void local_r4k___flush_cache_all(void * args)
 {
 	switch (current_cpu_type()) {
 	case CPU_LOONGSON2:
-	case CPU_LOONGSON3:
 	case CPU_R4000SC:
 	case CPU_R4000MC:
 	case CPU_R4400SC:
@@ -480,6 +497,10 @@ static inline void local_r4k___flush_cache_all(void * args)
 		r4k_blast_scache();
 		break;
 
+	case CPU_LOONGSON3:
+		r4k_blast_scache_node(get_ebase_cpunum() >> 2);
+		break;
+
 	case CPU_BMIPS5000:
 		r4k_blast_scache();
 		__sync();
@@ -839,9 +860,12 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
 
 	preempt_disable();
 	if (cpu_has_inclusive_pcaches) {
-		if (size >= scache_size)
-			r4k_blast_scache();
-		else
+		if (size >= scache_size) {
+			if (current_cpu_type() != CPU_LOONGSON3)
+				r4k_blast_scache();
+			else
+				r4k_blast_scache_node(pa_to_nid(addr));
+		} else
 			blast_scache_range(addr, addr + size);
 		preempt_enable();
 		__sync();
@@ -872,9 +896,12 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
 
 	preempt_disable();
 	if (cpu_has_inclusive_pcaches) {
-		if (size >= scache_size)
-			r4k_blast_scache();
-		else {
+		if (size >= scache_size) {
+			if (current_cpu_type() != CPU_LOONGSON3)
+				r4k_blast_scache();
+			else
+				r4k_blast_scache_node(pa_to_nid(addr));
+		} else {
 			/*
 			 * There is no clearly documented alignment requirement
 			 * for the cache instruction on MIPS processors and
@@ -1903,6 +1930,7 @@ void r4k_cache_init(void)
 	r4k_blast_scache_page_setup();
 	r4k_blast_scache_page_indexed_setup();
 	r4k_blast_scache_setup();
+	r4k_blast_scache_node_setup();
 #ifdef CONFIG_EVA
 	r4k_blast_dcache_user_page_setup();
 	r4k_blast_icache_user_page_setup();
-- 
2.7.0

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH V5 2/9] MIPS: c-r4k: Add r4k_blast_scache_node for Loongson-3
  2017-06-15  2:15 ` [PATCH V5 2/9] MIPS: c-r4k: Add r4k_blast_scache_node for Loongson-3 Huacai Chen
@ 2017-06-15  9:59   ` Ralf Baechle
  0 siblings, 0 replies; 2+ messages in thread
From: Ralf Baechle @ 2017-06-15  9:59 UTC (permalink / raw)
  To: Huacai Chen
  Cc: John Crispin, Steven J . Hill, linux-mips, Fuxin Zhang,
	Zhangjin Wu, stable

On Thu, Jun 15, 2017 at 10:15:45AM +0800, Huacai Chen wrote:

> For multi-node Loongson-3 (NUMA configuration), r4k_blast_scache() can
> only flush Node-0's scache. So we add r4k_blast_scache_node() by using
> (CAC_BASE | (node_id << NODE_ADDRSPACE_SHIFT)) instead of CKSEG0 as the
> start address.
> 
> Maybe all MIPS CPUs need r4k_blast_scache_node() to support cc-NUMA,
> but I don't know how to implement it for non-Loongson CPUs.

There other MIPS ccNUMA systems handle caches in hardware fully transparent
to software so no changes are required.  These systems are SGI's SN
(Origin, Onyx) and BCM1480 (ccNUMA support out of tree, but no sw support
required)).

  Ralf

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2017-06-15  9:59 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <1497492952-23877-1-git-send-email-chenhc@lemote.com>
2017-06-15  2:15 ` [PATCH V5 2/9] MIPS: c-r4k: Add r4k_blast_scache_node for Loongson-3 Huacai Chen
2017-06-15  9:59   ` Ralf Baechle

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox