linux-openrisc.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] openrisc: Add cacheinfo support
@ 2025-03-15 20:39 Sahil Siddiq
  2025-03-16  6:58 ` Stafford Horne
  2025-03-22 15:40 ` Markus Elfring
  0 siblings, 2 replies; 11+ messages in thread
From: Sahil Siddiq @ 2025-03-15 20:39 UTC (permalink / raw)
  To: jonas, stefan.kristiansson, shorne
  Cc: linux-openrisc, linux-kernel, Sahil Siddiq

Add cacheinfo support for OpenRISC.

Currently, a few CPU cache attributes pertaining to OpenRISC processors
are exposed along with other unrelated CPU attributes in the procfs file
system (/proc/cpuinfo). However, a few cache attributes remain unexposed.
An attempt is also made to pull these CPU cache attributes without
detecting if the relevant cache exists.

This patch provides a mechanism that the generic cacheinfo infrastructure
can employ to expose these attributes via the sysfs file system. These
attributes are exposed in /sys/devices/system/cpu/cpuX/cache/indexN.
Cache attributes are pulled only when the cache component is detected.

The implementation to pull cache attributes from the processor's
registers has been moved from arch/openrisc/kernel/setup.c with a few
modifications.

The patch also moves cache-related fields out of struct cpuinfo_or1k and
into its own struct to keep the implementation straightforward. This
reduces duplication of cache-related fields while keeping cpuinfo_or1k
extensible in case more cache descriptors are added in the future.

This implementation is based on similar work done for MIPS and LoongArch.

Signed-off-by: Sahil Siddiq <sahilcdq@proton.me>
---
Changes from v1 -> v2:
- Changed patch prefix from RFC to PATCH.
- cacheinfo.c: Print number of sets. Remove integer padding.
- dma.c
  (page_set_nocache): Access cache attributes only if component exists.
  (arch_sync_dma_for_device): Likewise.
- cache.c: Likewise.
- init.c: Likewise.

 arch/openrisc/include/asm/cpuinfo.h |  16 +++--
 arch/openrisc/kernel/Makefile       |   2 +-
 arch/openrisc/kernel/cacheinfo.c    | 106 ++++++++++++++++++++++++++++
 arch/openrisc/kernel/dma.c          |  22 +++---
 arch/openrisc/kernel/setup.c        |  45 ------------
 arch/openrisc/mm/cache.c            |  11 ++-
 arch/openrisc/mm/init.c             |   8 ++-
 7 files changed, 144 insertions(+), 66 deletions(-)
 create mode 100644 arch/openrisc/kernel/cacheinfo.c

diff --git a/arch/openrisc/include/asm/cpuinfo.h b/arch/openrisc/include/asm/cpuinfo.h
index 5e4744153d0e..82f5d4c06314 100644
--- a/arch/openrisc/include/asm/cpuinfo.h
+++ b/arch/openrisc/include/asm/cpuinfo.h
@@ -15,16 +15,18 @@
 #ifndef __ASM_OPENRISC_CPUINFO_H
 #define __ASM_OPENRISC_CPUINFO_H
 
+struct cache_desc {
+	u32 size;
+	u32 sets;
+	u32 block_size;
+	u32 ways;
+};
+
 struct cpuinfo_or1k {
 	u32 clock_frequency;
 
-	u32 icache_size;
-	u32 icache_block_size;
-	u32 icache_ways;
-
-	u32 dcache_size;
-	u32 dcache_block_size;
-	u32 dcache_ways;
+	struct cache_desc icache;
+	struct cache_desc dcache;
 
 	u16 coreid;
 };
diff --git a/arch/openrisc/kernel/Makefile b/arch/openrisc/kernel/Makefile
index 79129161f3e0..e4c7d9bdd598 100644
--- a/arch/openrisc/kernel/Makefile
+++ b/arch/openrisc/kernel/Makefile
@@ -7,7 +7,7 @@ extra-y	:= vmlinux.lds
 
 obj-y	:= head.o setup.o or32_ksyms.o process.o dma.o \
 	   traps.o time.o irq.o entry.o ptrace.o signal.o \
-	   sys_call_table.o unwinder.o
+	   sys_call_table.o unwinder.o cacheinfo.o
 
 obj-$(CONFIG_SMP)		+= smp.o sync-timer.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
diff --git a/arch/openrisc/kernel/cacheinfo.c b/arch/openrisc/kernel/cacheinfo.c
new file mode 100644
index 000000000000..6bb81e246f7e
--- /dev/null
+++ b/arch/openrisc/kernel/cacheinfo.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * OpenRISC cacheinfo support
+ *
+ * Based on work done for MIPS and LoongArch. All original copyrights
+ * apply as per the original source declaration.
+ *
+ * OpenRISC implementation:
+ * Copyright (C) 2025 Sahil Siddiq <sahilcdq@proton.me>
+ */
+
+#include <linux/cacheinfo.h>
+#include <asm/cpuinfo.h>
+#include <asm/spr.h>
+#include <asm/spr_defs.h>
+
+static inline void ci_leaf_init(struct cacheinfo *this_leaf, enum cache_type type,
+				unsigned int level, struct cache_desc *cache, int cpu)
+{
+	this_leaf->type = type;
+	this_leaf->level = level;
+	this_leaf->coherency_line_size = cache->block_size;
+	this_leaf->number_of_sets = cache->sets;
+	this_leaf->ways_of_associativity = cache->ways;
+	this_leaf->size = cache->size;
+	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
+}
+
+int init_cache_level(unsigned int cpu)
+{
+	struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()];
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+	int leaves = 0, levels = 0;
+	unsigned long upr = mfspr(SPR_UPR);
+	unsigned long iccfgr, dccfgr;
+
+	if (!(upr & SPR_UPR_UP)) {
+		printk(KERN_INFO
+		       "-- no UPR register... unable to detect configuration\n");
+		return -ENOENT;
+	}
+
+	if (upr & SPR_UPR_DCP) {
+		dccfgr = mfspr(SPR_DCCFGR);
+		cpuinfo->dcache.ways = 1 << (dccfgr & SPR_DCCFGR_NCW);
+		cpuinfo->dcache.sets = 1 << ((dccfgr & SPR_DCCFGR_NCS) >> 3);
+		cpuinfo->dcache.block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7);
+		cpuinfo->dcache.size =
+		    cpuinfo->dcache.sets * cpuinfo->dcache.ways * cpuinfo->dcache.block_size;
+		leaves += 1;
+		printk(KERN_INFO
+		       "-- dcache: %d bytes total, %d bytes/line, %d set(s), %d way(s)\n",
+		       cpuinfo->dcache.size, cpuinfo->dcache.block_size,
+			   cpuinfo->dcache.sets,
+		       cpuinfo->dcache.ways);
+	} else
+		printk(KERN_INFO "-- dcache disabled\n");
+
+	if (upr & SPR_UPR_ICP) {
+		iccfgr = mfspr(SPR_ICCFGR);
+		cpuinfo->icache.ways = 1 << (iccfgr & SPR_ICCFGR_NCW);
+		cpuinfo->icache.sets = 1 << ((iccfgr & SPR_ICCFGR_NCS) >> 3);
+		cpuinfo->icache.block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7);
+		cpuinfo->icache.size =
+		    cpuinfo->icache.sets * cpuinfo->icache.ways * cpuinfo->icache.block_size;
+		leaves += 1;
+		printk(KERN_INFO
+		       "-- icache: %d bytes total, %d bytes/line, %d set(s), %d way(s)\n",
+		       cpuinfo->icache.size, cpuinfo->icache.block_size,
+			   cpuinfo->icache.sets,
+		       cpuinfo->icache.ways);
+	} else
+		printk(KERN_INFO "-- icache disabled\n");
+
+	if (!leaves)
+		return -ENOENT;
+
+	levels = 1;
+
+	this_cpu_ci->num_leaves = leaves;
+	this_cpu_ci->num_levels = levels;
+
+	return 0;
+}
+
+int populate_cache_leaves(unsigned int cpu)
+{
+	struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()];
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
+	int level = 1;
+
+	if (cpuinfo->dcache.ways) {
+		ci_leaf_init(this_leaf, CACHE_TYPE_DATA, level, &cpuinfo->dcache, cpu);
+		this_leaf->attributes = ((mfspr(SPR_DCCFGR) & SPR_DCCFGR_CWS) >> 8) ?
+					CACHE_WRITE_BACK : CACHE_WRITE_THROUGH;
+		this_leaf++;
+	}
+
+	if (cpuinfo->icache.ways)
+		ci_leaf_init(this_leaf, CACHE_TYPE_INST, level, &cpuinfo->icache, cpu);
+
+	this_cpu_ci->cpu_map_populated = true;
+
+	return 0;
+}
diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c
index b3edbb33b621..ffb161e41e9d 100644
--- a/arch/openrisc/kernel/dma.c
+++ b/arch/openrisc/kernel/dma.c
@@ -36,8 +36,10 @@ page_set_nocache(pte_t *pte, unsigned long addr,
 	flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
 
 	/* Flush page out of dcache */
-	for (cl = __pa(addr); cl < __pa(next); cl += cpuinfo->dcache_block_size)
-		mtspr(SPR_DCBFR, cl);
+	if (cpuinfo->dcache.ways) {
+		for (cl = __pa(addr); cl < __pa(next); cl += cpuinfo->dcache.block_size)
+			mtspr(SPR_DCBFR, cl);
+	}
 
 	return 0;
 }
@@ -104,15 +106,19 @@ void arch_sync_dma_for_device(phys_addr_t addr, size_t size,
 	switch (dir) {
 	case DMA_TO_DEVICE:
 		/* Flush the dcache for the requested range */
-		for (cl = addr; cl < addr + size;
-		     cl += cpuinfo->dcache_block_size)
-			mtspr(SPR_DCBFR, cl);
+		if (cpuinfo->dcache.ways) {
+			for (cl = addr; cl < addr + size;
+			     cl += cpuinfo->dcache.block_size)
+				mtspr(SPR_DCBFR, cl);
+		}
 		break;
 	case DMA_FROM_DEVICE:
 		/* Invalidate the dcache for the requested range */
-		for (cl = addr; cl < addr + size;
-		     cl += cpuinfo->dcache_block_size)
-			mtspr(SPR_DCBIR, cl);
+		if (cpuinfo->dcache.ways) {
+			for (cl = addr; cl < addr + size;
+			     cl += cpuinfo->dcache.block_size)
+				mtspr(SPR_DCBIR, cl);
+		}
 		break;
 	default:
 		/*
diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c
index be56eaafc8b9..38172c0989cf 100644
--- a/arch/openrisc/kernel/setup.c
+++ b/arch/openrisc/kernel/setup.c
@@ -107,27 +107,6 @@ static void print_cpuinfo(void)
 	printk(KERN_INFO "CPU: OpenRISC-%x (revision %d) @%d MHz\n",
 	       version, revision, cpuinfo->clock_frequency / 1000000);
 
-	if (!(upr & SPR_UPR_UP)) {
-		printk(KERN_INFO
-		       "-- no UPR register... unable to detect configuration\n");
-		return;
-	}
-
-	if (upr & SPR_UPR_DCP)
-		printk(KERN_INFO
-		       "-- dcache: %4d bytes total, %2d bytes/line, %d way(s)\n",
-		       cpuinfo->dcache_size, cpuinfo->dcache_block_size,
-		       cpuinfo->dcache_ways);
-	else
-		printk(KERN_INFO "-- dcache disabled\n");
-	if (upr & SPR_UPR_ICP)
-		printk(KERN_INFO
-		       "-- icache: %4d bytes total, %2d bytes/line, %d way(s)\n",
-		       cpuinfo->icache_size, cpuinfo->icache_block_size,
-		       cpuinfo->icache_ways);
-	else
-		printk(KERN_INFO "-- icache disabled\n");
-
 	if (upr & SPR_UPR_DMP)
 		printk(KERN_INFO "-- dmmu: %4d entries, %lu way(s)\n",
 		       1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2),
@@ -155,8 +134,6 @@ static void print_cpuinfo(void)
 void __init setup_cpuinfo(void)
 {
 	struct device_node *cpu;
-	unsigned long iccfgr, dccfgr;
-	unsigned long cache_set_size;
 	int cpu_id = smp_processor_id();
 	struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[cpu_id];
 
@@ -164,20 +141,6 @@ void __init setup_cpuinfo(void)
 	if (!cpu)
 		panic("Couldn't find CPU%d in device tree...\n", cpu_id);
 
-	iccfgr = mfspr(SPR_ICCFGR);
-	cpuinfo->icache_ways = 1 << (iccfgr & SPR_ICCFGR_NCW);
-	cache_set_size = 1 << ((iccfgr & SPR_ICCFGR_NCS) >> 3);
-	cpuinfo->icache_block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7);
-	cpuinfo->icache_size =
-	    cache_set_size * cpuinfo->icache_ways * cpuinfo->icache_block_size;
-
-	dccfgr = mfspr(SPR_DCCFGR);
-	cpuinfo->dcache_ways = 1 << (dccfgr & SPR_DCCFGR_NCW);
-	cache_set_size = 1 << ((dccfgr & SPR_DCCFGR_NCS) >> 3);
-	cpuinfo->dcache_block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7);
-	cpuinfo->dcache_size =
-	    cache_set_size * cpuinfo->dcache_ways * cpuinfo->dcache_block_size;
-
 	if (of_property_read_u32(cpu, "clock-frequency",
 				 &cpuinfo->clock_frequency)) {
 		printk(KERN_WARNING
@@ -320,14 +283,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		seq_printf(m, "revision\t\t: %d\n", vr & SPR_VR_REV);
 	}
 	seq_printf(m, "frequency\t\t: %ld\n", loops_per_jiffy * HZ);
-	seq_printf(m, "dcache size\t\t: %d bytes\n", cpuinfo->dcache_size);
-	seq_printf(m, "dcache block size\t: %d bytes\n",
-		   cpuinfo->dcache_block_size);
-	seq_printf(m, "dcache ways\t\t: %d\n", cpuinfo->dcache_ways);
-	seq_printf(m, "icache size\t\t: %d bytes\n", cpuinfo->icache_size);
-	seq_printf(m, "icache block size\t: %d bytes\n",
-		   cpuinfo->icache_block_size);
-	seq_printf(m, "icache ways\t\t: %d\n", cpuinfo->icache_ways);
 	seq_printf(m, "immu\t\t\t: %d entries, %lu ways\n",
 		   1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2),
 		   1 + (mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTW));
diff --git a/arch/openrisc/mm/cache.c b/arch/openrisc/mm/cache.c
index eb43b73f3855..acdf2dc256bf 100644
--- a/arch/openrisc/mm/cache.c
+++ b/arch/openrisc/mm/cache.c
@@ -16,10 +16,15 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
-static __always_inline void cache_loop(struct page *page, const unsigned int reg)
+static __always_inline void cache_loop(struct page *page, const unsigned int reg,
+				       const unsigned int cache_type)
 {
 	unsigned long paddr = page_to_pfn(page) << PAGE_SHIFT;
 	unsigned long line = paddr & ~(L1_CACHE_BYTES - 1);
+	unsigned long upr = mfspr(SPR_UPR);
+
+	if (!(upr & SPR_UPR_UP & cache_type))
+		return;
 
 	while (line < paddr + PAGE_SIZE) {
 		mtspr(reg, line);
@@ -29,13 +34,13 @@ static __always_inline void cache_loop(struct page *page, const unsigned int reg
 
 void local_dcache_page_flush(struct page *page)
 {
-	cache_loop(page, SPR_DCBFR);
+	cache_loop(page, SPR_DCBFR, SPR_UPR_DCP);
 }
 EXPORT_SYMBOL(local_dcache_page_flush);
 
 void local_icache_page_inv(struct page *page)
 {
-	cache_loop(page, SPR_ICBIR);
+	cache_loop(page, SPR_ICBIR, SPR_UPR_ICP);
 }
 EXPORT_SYMBOL(local_icache_page_inv);
 
diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c
index d0cb1a0126f9..bbe16546c5b9 100644
--- a/arch/openrisc/mm/init.c
+++ b/arch/openrisc/mm/init.c
@@ -124,6 +124,7 @@ static void __init map_ram(void)
 void __init paging_init(void)
 {
 	int i;
+	unsigned long upr;
 
 	printk(KERN_INFO "Setting up paging and PTEs.\n");
 
@@ -176,8 +177,11 @@ void __init paging_init(void)
 	barrier();
 
 	/* Invalidate instruction caches after code modification */
-	mtspr(SPR_ICBIR, 0x900);
-	mtspr(SPR_ICBIR, 0xa00);
+	upr = mfspr(SPR_UPR);
+	if (upr & SPR_UPR_UP & SPR_UPR_ICP) {
+		mtspr(SPR_ICBIR, 0x900);
+		mtspr(SPR_ICBIR, 0xa00);
+	}
 
 	/* New TLB miss handlers and kernel page tables are in now place.
 	 * Make sure that page flags get updated for all pages in TLB by
-- 
2.48.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] openrisc: Add cacheinfo support
  2025-03-15 20:39 [PATCH v2] openrisc: Add cacheinfo support Sahil Siddiq
@ 2025-03-16  6:58 ` Stafford Horne
  2025-03-17  8:25   ` Geert Uytterhoeven
  2025-03-22 15:40 ` Markus Elfring
  1 sibling, 1 reply; 11+ messages in thread
From: Stafford Horne @ 2025-03-16  6:58 UTC (permalink / raw)
  To: Sahil Siddiq
  Cc: jonas, stefan.kristiansson, linux-openrisc, linux-kernel,
	Sahil Siddiq

On Sun, Mar 16, 2025 at 02:09:37AM +0530, Sahil Siddiq wrote:
> Add cacheinfo support for OpenRISC.
> 
> Currently, a few CPU cache attributes pertaining to OpenRISC processors
> are exposed along with other unrelated CPU attributes in the procfs file
> system (/proc/cpuinfo). However, a few cache attributes remain unexposed.
> An attempt is also made to pull these CPU cache attributes without
> detecting if the relevant cache exists.
> 
> This patch provides a mechanism that the generic cacheinfo infrastructure
> can employ to expose these attributes via the sysfs file system. These
> attributes are exposed in /sys/devices/system/cpu/cpuX/cache/indexN.
> Cache attributes are pulled only when the cache component is detected.
> 
> The implementation to pull cache attributes from the processor's
> registers has been moved from arch/openrisc/kernel/setup.c with a few
> modifications.
> 
> The patch also moves cache-related fields out of struct cpuinfo_or1k and
> into its own struct to keep the implementation straightforward. This
> reduces duplication of cache-related fields while keeping cpuinfo_or1k
> extensible in case more cache descriptors are added in the future.
> 
> This implementation is based on similar work done for MIPS and LoongArch.
> 
> Signed-off-by: Sahil Siddiq <sahilcdq@proton.me>
> ---
> Changes from v1 -> v2:
> - Changed patch prefix from RFC to PATCH.
> - cacheinfo.c: Print number of sets. Remove integer padding.
> - dma.c
>   (page_set_nocache): Access cache attributes only if component exists.
>   (arch_sync_dma_for_device): Likewise.
> - cache.c: Likewise.
> - init.c: Likewise.

I think it may be better to split these cache op change out to a separate patch
which we can sequence in before cacheinfo and make your changes a series.

>  arch/openrisc/include/asm/cpuinfo.h |  16 +++--
>  arch/openrisc/kernel/Makefile       |   2 +-
>  arch/openrisc/kernel/cacheinfo.c    | 106 ++++++++++++++++++++++++++++
>  arch/openrisc/kernel/dma.c          |  22 +++---
>  arch/openrisc/kernel/setup.c        |  45 ------------
>  arch/openrisc/mm/cache.c            |  11 ++-
>  arch/openrisc/mm/init.c             |   8 ++-
>  7 files changed, 144 insertions(+), 66 deletions(-)
>  create mode 100644 arch/openrisc/kernel/cacheinfo.c
> 
> diff --git a/arch/openrisc/include/asm/cpuinfo.h b/arch/openrisc/include/asm/cpuinfo.h
> index 5e4744153d0e..82f5d4c06314 100644
> --- a/arch/openrisc/include/asm/cpuinfo.h
> +++ b/arch/openrisc/include/asm/cpuinfo.h
> @@ -15,16 +15,18 @@
>  #ifndef __ASM_OPENRISC_CPUINFO_H
>  #define __ASM_OPENRISC_CPUINFO_H
>  
> +struct cache_desc {
> +	u32 size;
> +	u32 sets;
> +	u32 block_size;
> +	u32 ways;

Considering the changes below to add cache available checks, maybe we
want to add a field here, such as `bool present`.  Or a flags field like
is used in loongarch?

Though just checking the UPR is probably faster in all cases, it may be good to
have a helper function for that.

> +};
> +
>  struct cpuinfo_or1k {
>  	u32 clock_frequency;
>  
> -	u32 icache_size;
> -	u32 icache_block_size;
> -	u32 icache_ways;
> -
> -	u32 dcache_size;
> -	u32 dcache_block_size;
> -	u32 dcache_ways;
> +	struct cache_desc icache;
> +	struct cache_desc dcache;
>  
>  	u16 coreid;
>  };
> diff --git a/arch/openrisc/kernel/Makefile b/arch/openrisc/kernel/Makefile
> index 79129161f3e0..e4c7d9bdd598 100644
> --- a/arch/openrisc/kernel/Makefile
> +++ b/arch/openrisc/kernel/Makefile
> @@ -7,7 +7,7 @@ extra-y	:= vmlinux.lds
>  
>  obj-y	:= head.o setup.o or32_ksyms.o process.o dma.o \
>  	   traps.o time.o irq.o entry.o ptrace.o signal.o \
> -	   sys_call_table.o unwinder.o
> +	   sys_call_table.o unwinder.o cacheinfo.o
>  
>  obj-$(CONFIG_SMP)		+= smp.o sync-timer.o
>  obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
> diff --git a/arch/openrisc/kernel/cacheinfo.c b/arch/openrisc/kernel/cacheinfo.c
> new file mode 100644
> index 000000000000..6bb81e246f7e
> --- /dev/null
> +++ b/arch/openrisc/kernel/cacheinfo.c
> @@ -0,0 +1,106 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * OpenRISC cacheinfo support
> + *
> + * Based on work done for MIPS and LoongArch. All original copyrights
> + * apply as per the original source declaration.
> + *
> + * OpenRISC implementation:
> + * Copyright (C) 2025 Sahil Siddiq <sahilcdq@proton.me>
> + */
> +
> +#include <linux/cacheinfo.h>
> +#include <asm/cpuinfo.h>
> +#include <asm/spr.h>
> +#include <asm/spr_defs.h>
> +
> +static inline void ci_leaf_init(struct cacheinfo *this_leaf, enum cache_type type,
> +				unsigned int level, struct cache_desc *cache, int cpu)
> +{
> +	this_leaf->type = type;
> +	this_leaf->level = level;
> +	this_leaf->coherency_line_size = cache->block_size;
> +	this_leaf->number_of_sets = cache->sets;
> +	this_leaf->ways_of_associativity = cache->ways;
> +	this_leaf->size = cache->size;
> +	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
> +}
> +
> +int init_cache_level(unsigned int cpu)
> +{
> +	struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()];
> +	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
> +	int leaves = 0, levels = 0;
> +	unsigned long upr = mfspr(SPR_UPR);
> +	unsigned long iccfgr, dccfgr;
> +
> +	if (!(upr & SPR_UPR_UP)) {
> +		printk(KERN_INFO
> +		       "-- no UPR register... unable to detect configuration\n");
> +		return -ENOENT;
> +	}
> +
> +	if (upr & SPR_UPR_DCP) {
> +		dccfgr = mfspr(SPR_DCCFGR);
> +		cpuinfo->dcache.ways = 1 << (dccfgr & SPR_DCCFGR_NCW);
> +		cpuinfo->dcache.sets = 1 << ((dccfgr & SPR_DCCFGR_NCS) >> 3);
> +		cpuinfo->dcache.block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7);
> +		cpuinfo->dcache.size =
> +		    cpuinfo->dcache.sets * cpuinfo->dcache.ways * cpuinfo->dcache.block_size;
> +		leaves += 1;
> +		printk(KERN_INFO
> +		       "-- dcache: %d bytes total, %d bytes/line, %d set(s), %d way(s)\n",
> +		       cpuinfo->dcache.size, cpuinfo->dcache.block_size,
> +			   cpuinfo->dcache.sets,
> +		       cpuinfo->dcache.ways);

The indentation of sets looks a bit off here.

> +	} else
> +		printk(KERN_INFO "-- dcache disabled\n");
> +
> +	if (upr & SPR_UPR_ICP) {
> +		iccfgr = mfspr(SPR_ICCFGR);
> +		cpuinfo->icache.ways = 1 << (iccfgr & SPR_ICCFGR_NCW);
> +		cpuinfo->icache.sets = 1 << ((iccfgr & SPR_ICCFGR_NCS) >> 3);
> +		cpuinfo->icache.block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7);
> +		cpuinfo->icache.size =
> +		    cpuinfo->icache.sets * cpuinfo->icache.ways * cpuinfo->icache.block_size;
> +		leaves += 1;
> +		printk(KERN_INFO
> +		       "-- icache: %d bytes total, %d bytes/line, %d set(s), %d way(s)\n",
> +		       cpuinfo->icache.size, cpuinfo->icache.block_size,
> +			   cpuinfo->icache.sets,
> +		       cpuinfo->icache.ways);

The indentation of sets looks a bit off here. Maybe its the others that are out
of line, but can you fix?  Also I think sets and ways could be on the same line.

> +	} else
> +		printk(KERN_INFO "-- icache disabled\n");
> +
> +	if (!leaves)
> +		return -ENOENT;
> +
> +	levels = 1;
> +
> +	this_cpu_ci->num_leaves = leaves;
> +	this_cpu_ci->num_levels = levels;
> +
> +	return 0;
> +}
> +
> +int populate_cache_leaves(unsigned int cpu)
> +{
> +	struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()];
> +	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
> +	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
> +	int level = 1;
> +
> +	if (cpuinfo->dcache.ways) {
> +		ci_leaf_init(this_leaf, CACHE_TYPE_DATA, level, &cpuinfo->dcache, cpu);
> +		this_leaf->attributes = ((mfspr(SPR_DCCFGR) & SPR_DCCFGR_CWS) >> 8) ?
> +					CACHE_WRITE_BACK : CACHE_WRITE_THROUGH;
> +		this_leaf++;
> +	}
> +
> +	if (cpuinfo->icache.ways)
> +		ci_leaf_init(this_leaf, CACHE_TYPE_INST, level, &cpuinfo->icache, cpu);
> +
> +	this_cpu_ci->cpu_map_populated = true;
> +
> +	return 0;
> +}
> diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c
> index b3edbb33b621..ffb161e41e9d 100644
> --- a/arch/openrisc/kernel/dma.c
> +++ b/arch/openrisc/kernel/dma.c
> @@ -36,8 +36,10 @@ page_set_nocache(pte_t *pte, unsigned long addr,
>  	flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
>  
>  	/* Flush page out of dcache */
> -	for (cl = __pa(addr); cl < __pa(next); cl += cpuinfo->dcache_block_size)
> -		mtspr(SPR_DCBFR, cl);
> +	if (cpuinfo->dcache.ways) {
> +		for (cl = __pa(addr); cl < __pa(next); cl += cpuinfo->dcache.block_size)
> +			mtspr(SPR_DCBFR, cl);
> +	}

I think it would be better to move this to cacheflush.h as a function like
flush_dcache_range() or local_dcache_range_flush().

>  	return 0;
>  }
> @@ -104,15 +106,19 @@ void arch_sync_dma_for_device(phys_addr_t addr, size_t size,
>  	switch (dir) {
>  	case DMA_TO_DEVICE:
>  		/* Flush the dcache for the requested range */
> -		for (cl = addr; cl < addr + size;
> -		     cl += cpuinfo->dcache_block_size)
> -			mtspr(SPR_DCBFR, cl);
> +		if (cpuinfo->dcache.ways) {
> +			for (cl = addr; cl < addr + size;
> +			     cl += cpuinfo->dcache.block_size)
> +				mtspr(SPR_DCBFR, cl);
> +		}

Also here,I think it would be better to move this to cacheflush.h as a function like
flush_dcache_range().

Or, local_dcache_range_flush(), which seems to be the convention we use in
cacheflush.h/cache.c.


>  		break;
>  	case DMA_FROM_DEVICE:
>  		/* Invalidate the dcache for the requested range */
> -		for (cl = addr; cl < addr + size;
> -		     cl += cpuinfo->dcache_block_size)
> -			mtspr(SPR_DCBIR, cl);
> +		if (cpuinfo->dcache.ways) {
> +			for (cl = addr; cl < addr + size;
> +			     cl += cpuinfo->dcache.block_size)
> +				mtspr(SPR_DCBIR, cl);
> +		}

This one could be invalidate_dcache_range().   Note, this will also be useful
for the kexec patches that I am working on.

Or, local_dcache_range_inv(), which seems to be the convention we use in
cacheflush.h/cache.c.

>  		break;
>  	default:
>  		/*
> diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c
> index be56eaafc8b9..38172c0989cf 100644
> --- a/arch/openrisc/kernel/setup.c
> +++ b/arch/openrisc/kernel/setup.c
> @@ -107,27 +107,6 @@ static void print_cpuinfo(void)
>  	printk(KERN_INFO "CPU: OpenRISC-%x (revision %d) @%d MHz\n",
>  	       version, revision, cpuinfo->clock_frequency / 1000000);
>  
> -	if (!(upr & SPR_UPR_UP)) {
> -		printk(KERN_INFO
> -		       "-- no UPR register... unable to detect configuration\n");
> -		return;
> -	}
> -
> -	if (upr & SPR_UPR_DCP)
> -		printk(KERN_INFO
> -		       "-- dcache: %4d bytes total, %2d bytes/line, %d way(s)\n",
> -		       cpuinfo->dcache_size, cpuinfo->dcache_block_size,
> -		       cpuinfo->dcache_ways);
> -	else
> -		printk(KERN_INFO "-- dcache disabled\n");
> -	if (upr & SPR_UPR_ICP)
> -		printk(KERN_INFO
> -		       "-- icache: %4d bytes total, %2d bytes/line, %d way(s)\n",
> -		       cpuinfo->icache_size, cpuinfo->icache_block_size,
> -		       cpuinfo->icache_ways);
> -	else
> -		printk(KERN_INFO "-- icache disabled\n");
> -
>  	if (upr & SPR_UPR_DMP)
>  		printk(KERN_INFO "-- dmmu: %4d entries, %lu way(s)\n",
>  		       1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2),
> @@ -155,8 +134,6 @@ static void print_cpuinfo(void)
>  void __init setup_cpuinfo(void)
>  {
>  	struct device_node *cpu;
> -	unsigned long iccfgr, dccfgr;
> -	unsigned long cache_set_size;
>  	int cpu_id = smp_processor_id();
>  	struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[cpu_id];
>  
> @@ -164,20 +141,6 @@ void __init setup_cpuinfo(void)
>  	if (!cpu)
>  		panic("Couldn't find CPU%d in device tree...\n", cpu_id);
>  
> -	iccfgr = mfspr(SPR_ICCFGR);
> -	cpuinfo->icache_ways = 1 << (iccfgr & SPR_ICCFGR_NCW);
> -	cache_set_size = 1 << ((iccfgr & SPR_ICCFGR_NCS) >> 3);
> -	cpuinfo->icache_block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7);
> -	cpuinfo->icache_size =
> -	    cache_set_size * cpuinfo->icache_ways * cpuinfo->icache_block_size;
> -
> -	dccfgr = mfspr(SPR_DCCFGR);
> -	cpuinfo->dcache_ways = 1 << (dccfgr & SPR_DCCFGR_NCW);
> -	cache_set_size = 1 << ((dccfgr & SPR_DCCFGR_NCS) >> 3);
> -	cpuinfo->dcache_block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7);
> -	cpuinfo->dcache_size =
> -	    cache_set_size * cpuinfo->dcache_ways * cpuinfo->dcache_block_size;
> -
>  	if (of_property_read_u32(cpu, "clock-frequency",
>  				 &cpuinfo->clock_frequency)) {
>  		printk(KERN_WARNING
> @@ -320,14 +283,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)
>  		seq_printf(m, "revision\t\t: %d\n", vr & SPR_VR_REV);
>  	}
>  	seq_printf(m, "frequency\t\t: %ld\n", loops_per_jiffy * HZ);
> -	seq_printf(m, "dcache size\t\t: %d bytes\n", cpuinfo->dcache_size);
> -	seq_printf(m, "dcache block size\t: %d bytes\n",
> -		   cpuinfo->dcache_block_size);
> -	seq_printf(m, "dcache ways\t\t: %d\n", cpuinfo->dcache_ways);
> -	seq_printf(m, "icache size\t\t: %d bytes\n", cpuinfo->icache_size);
> -	seq_printf(m, "icache block size\t: %d bytes\n",
> -		   cpuinfo->icache_block_size);
> -	seq_printf(m, "icache ways\t\t: %d\n", cpuinfo->icache_ways);
>  	seq_printf(m, "immu\t\t\t: %d entries, %lu ways\n",
>  		   1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2),
>  		   1 + (mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTW));
> diff --git a/arch/openrisc/mm/cache.c b/arch/openrisc/mm/cache.c
> index eb43b73f3855..acdf2dc256bf 100644
> --- a/arch/openrisc/mm/cache.c
> +++ b/arch/openrisc/mm/cache.c
> @@ -16,10 +16,15 @@
>  #include <asm/cacheflush.h>
>  #include <asm/tlbflush.h>
>  
> -static __always_inline void cache_loop(struct page *page, const unsigned int reg)
> +static __always_inline void cache_loop(struct page *page, const unsigned int reg,
> +				       const unsigned int cache_type)
>  {
>  	unsigned long paddr = page_to_pfn(page) << PAGE_SHIFT;
>  	unsigned long line = paddr & ~(L1_CACHE_BYTES - 1);
> +	unsigned long upr = mfspr(SPR_UPR);
> +
> +	if (!(upr & SPR_UPR_UP & cache_type))
> +		return;
>  
>  	while (line < paddr + PAGE_SIZE) {
>  		mtspr(reg, line);
> @@ -29,13 +34,13 @@ static __always_inline void cache_loop(struct page *page, const unsigned int reg
>  
>  void local_dcache_page_flush(struct page *page)
>  {
> -	cache_loop(page, SPR_DCBFR);
> +	cache_loop(page, SPR_DCBFR, SPR_UPR_DCP);
>  }
>  EXPORT_SYMBOL(local_dcache_page_flush);
>  
>  void local_icache_page_inv(struct page *page)
>  {
> -	cache_loop(page, SPR_ICBIR);
> +	cache_loop(page, SPR_ICBIR, SPR_UPR_ICP);
>  }
>  EXPORT_SYMBOL(local_icache_page_inv);

OK, if we move the range flush and invalidate here we will need to add to this
cache_loop a bit more.

> diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c
> index d0cb1a0126f9..bbe16546c5b9 100644
> --- a/arch/openrisc/mm/init.c
> +++ b/arch/openrisc/mm/init.c
> @@ -124,6 +124,7 @@ static void __init map_ram(void)
>  void __init paging_init(void)
>  {
>  	int i;
> +	unsigned long upr;
>  
>  	printk(KERN_INFO "Setting up paging and PTEs.\n");
>  
> @@ -176,8 +177,11 @@ void __init paging_init(void)
>  	barrier();
>  
>  	/* Invalidate instruction caches after code modification */
> -	mtspr(SPR_ICBIR, 0x900);
> -	mtspr(SPR_ICBIR, 0xa00);
> +	upr = mfspr(SPR_UPR);
> +	if (upr & SPR_UPR_UP & SPR_UPR_ICP) {
> +		mtspr(SPR_ICBIR, 0x900);
> +		mtspr(SPR_ICBIR, 0xa00);
> +	}

Here we could use new utilities such as local_icache_range_inv(0x900,
L1_CACHE_BYTES);

Or something like local_icache_block_inv(0x900).  This only needs to flush a
single block as the code it is invalidating is just 2 instructions 8 bytes:

    .org 0x900
	l.j     boot_dtlb_miss_handler
	 l.nop

    .org 0xa00
	l.j     boot_itlb_miss_handler
	 l.nop
>
>  
>  	/* New TLB miss handlers and kernel page tables are in now place.
>  	 * Make sure that page flags get updated for all pages in TLB by
> -- 
> 2.48.1

-Stafford

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] openrisc: Add cacheinfo support
  2025-03-16  6:58 ` Stafford Horne
@ 2025-03-17  8:25   ` Geert Uytterhoeven
  2025-03-17 12:00     ` Stafford Horne
  0 siblings, 1 reply; 11+ messages in thread
From: Geert Uytterhoeven @ 2025-03-17  8:25 UTC (permalink / raw)
  To: Stafford Horne
  Cc: Sahil Siddiq, jonas, stefan.kristiansson, linux-openrisc,
	linux-kernel, Sahil Siddiq

Hi Stafford,

On Sun, 16 Mar 2025 at 07:59, Stafford Horne <shorne@gmail.com> wrote:
> On Sun, Mar 16, 2025 at 02:09:37AM +0530, Sahil Siddiq wrote:
> > Add cacheinfo support for OpenRISC.
> >
> > Currently, a few CPU cache attributes pertaining to OpenRISC processors
> > are exposed along with other unrelated CPU attributes in the procfs file
> > system (/proc/cpuinfo). However, a few cache attributes remain unexposed.
> > An attempt is also made to pull these CPU cache attributes without
> > detecting if the relevant cache exists.
> >
> > This patch provides a mechanism that the generic cacheinfo infrastructure
> > can employ to expose these attributes via the sysfs file system. These
> > attributes are exposed in /sys/devices/system/cpu/cpuX/cache/indexN.
> > Cache attributes are pulled only when the cache component is detected.
> >
> > The implementation to pull cache attributes from the processor's
> > registers has been moved from arch/openrisc/kernel/setup.c with a few
> > modifications.
> >
> > The patch also moves cache-related fields out of struct cpuinfo_or1k and
> > into its own struct to keep the implementation straightforward. This
> > reduces duplication of cache-related fields while keeping cpuinfo_or1k
> > extensible in case more cache descriptors are added in the future.
> >
> > This implementation is based on similar work done for MIPS and LoongArch.
> >
> > Signed-off-by: Sahil Siddiq <sahilcdq@proton.me>

> > --- a/arch/openrisc/include/asm/cpuinfo.h
> > +++ b/arch/openrisc/include/asm/cpuinfo.h
> > @@ -15,16 +15,18 @@
> >  #ifndef __ASM_OPENRISC_CPUINFO_H
> >  #define __ASM_OPENRISC_CPUINFO_H
> >
> > +struct cache_desc {
> > +     u32 size;
> > +     u32 sets;
> > +     u32 block_size;
> > +     u32 ways;
>
> Considering the changes below to add cache available checks, maybe we
> want to add a field here, such as `bool present`.  Or a flags field like
> is used in loongarch?

I assume cache_desc.size is zero when the cache is not present?

Gr{oetje,eeting}s,

                        Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] openrisc: Add cacheinfo support
  2025-03-17  8:25   ` Geert Uytterhoeven
@ 2025-03-17 12:00     ` Stafford Horne
  2025-03-17 18:36       ` Sahil Siddiq
  0 siblings, 1 reply; 11+ messages in thread
From: Stafford Horne @ 2025-03-17 12:00 UTC (permalink / raw)
  To: Geert Uytterhoeven
  Cc: Sahil Siddiq, jonas, stefan.kristiansson, linux-openrisc,
	linux-kernel, Sahil Siddiq

Hi Geert,

On Mon, Mar 17, 2025 at 09:25:26AM +0100, Geert Uytterhoeven wrote:
> Hi Stafford,
> 
> On Sun, 16 Mar 2025 at 07:59, Stafford Horne <shorne@gmail.com> wrote:
> > On Sun, Mar 16, 2025 at 02:09:37AM +0530, Sahil Siddiq wrote:
> > > Add cacheinfo support for OpenRISC.
> > >
> > > Currently, a few CPU cache attributes pertaining to OpenRISC processors
> > > are exposed along with other unrelated CPU attributes in the procfs file
> > > system (/proc/cpuinfo). However, a few cache attributes remain unexposed.
> > > An attempt is also made to pull these CPU cache attributes without
> > > detecting if the relevant cache exists.
> > >
> > > This patch provides a mechanism that the generic cacheinfo infrastructure
> > > can employ to expose these attributes via the sysfs file system. These
> > > attributes are exposed in /sys/devices/system/cpu/cpuX/cache/indexN.
> > > Cache attributes are pulled only when the cache component is detected.
> > >
> > > The implementation to pull cache attributes from the processor's
> > > registers has been moved from arch/openrisc/kernel/setup.c with a few
> > > modifications.
> > >
> > > The patch also moves cache-related fields out of struct cpuinfo_or1k and
> > > into its own struct to keep the implementation straightforward. This
> > > reduces duplication of cache-related fields while keeping cpuinfo_or1k
> > > extensible in case more cache descriptors are added in the future.
> > >
> > > This implementation is based on similar work done for MIPS and LoongArch.
> > >
> > > Signed-off-by: Sahil Siddiq <sahilcdq@proton.me>
> 
> > > --- a/arch/openrisc/include/asm/cpuinfo.h
> > > +++ b/arch/openrisc/include/asm/cpuinfo.h
> > > @@ -15,16 +15,18 @@
> > >  #ifndef __ASM_OPENRISC_CPUINFO_H
> > >  #define __ASM_OPENRISC_CPUINFO_H
> > >
> > > +struct cache_desc {
> > > +     u32 size;
> > > +     u32 sets;
> > > +     u32 block_size;
> > > +     u32 ways;
> >
> > Considering the changes below to add cache available checks, maybe we
> > want to add a field here, such as `bool present`.  Or a flags field like
> > is used in loongarch?
> 
> I assume cache_desc.size is zero when the cache is not present?

Yes, good point, would be clean too work too.  I was not too happy with using
cache_desc.ways as is done below.  Also there ended up bieng 2 different ways
that were used.

I am happy to use size too, but I think checking the SPR would be faster or just
as fast as using the struct.  I am not too fussed either way.

-Stafford


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] openrisc: Add cacheinfo support
  2025-03-17 12:00     ` Stafford Horne
@ 2025-03-17 18:36       ` Sahil Siddiq
  2025-03-18  7:43         ` Stafford Horne
  0 siblings, 1 reply; 11+ messages in thread
From: Sahil Siddiq @ 2025-03-17 18:36 UTC (permalink / raw)
  To: Stafford Horne, Geert Uytterhoeven
  Cc: jonas, stefan.kristiansson, linux-openrisc, linux-kernel,
	Sahil Siddiq

Hello Stafford and Geert,

Thank you for the reviews.

On 3/17/25 1:55 PM, Geert Uytterhoeven wrote:
> On Sun, 16 Mar 2025 at 07:59, Stafford Horne <shorne@gmail.com> wrote:
>> [...]
>>> +struct cache_desc {
>>> +     u32 size;
>>> +     u32 sets;
>>> +     u32 block_size;
>>> +     u32 ways;
>>
>> Considering the changes below to add cache available checks, maybe we
>> want to add a field here, such as `bool present`.  Or a flags field like
>> is used in loongarch?
> 
> I assume cache_desc.size is zero when the cache is not present?

Yes, cache_desc.size will be zero when there's no cache component since
cpuinfo_or1k[NR_CPUS] is declared as a global variable in setup.c. The
cache attributes are stored in this struct.

On 3/17/25 5:30 PM, Stafford Horne wrote:
> [...]
> Yes, good point, would be clean too work too.  I was not too happy with using
> cache_desc.ways as is done below.  Also there ended up bieng 2 different ways
> that were used.
>
> I am happy to use size too, but I think checking the SPR would be faster or just
> as fast as using the struct.  I am not too fussed either way.

There are a few places (e.g. cache_loop() in cache.c) where
cpuinfo_or1k[smp_processor_id()] is not being referenced. This will have to be
referenced to access cache_desc. In these cases, I think it would be better to
read from the SPR instead. For consistency, the SPR can also be read where
cpuinfo_or1k[smp_processor_id()] is already being referenced.

On 3/16/25 12:28 PM, Stafford Horne wrote:
> On Sun, Mar 16, 2025 at 02:09:37AM +0530, Sahil Siddiq wrote:
>> Add cacheinfo support for OpenRISC.
>> [...]
>> diff --git a/arch/openrisc/kernel/cacheinfo.c b/arch/openrisc/kernel/cacheinfo.c
>> new file mode 100644
>> index 000000000000..6bb81e246f7e
>> --- /dev/null
>> +++ b/arch/openrisc/kernel/cacheinfo.c
>> @@ -0,0 +1,106 @@
>> +// SPDX-License-Identifier: GPL-2.0-or-later
>> +/*
>> + * OpenRISC cacheinfo support
>> + *
>> + * Based on work done for MIPS and LoongArch. All original copyrights
>> + * apply as per the original source declaration.
>> + *
>> + * OpenRISC implementation:
>> + * Copyright (C) 2025 Sahil Siddiq <sahilcdq@proton.me>
>> + */
>> +
>> +#include <linux/cacheinfo.h>
>> +#include <asm/cpuinfo.h>
>> +#include <asm/spr.h>
>> +#include <asm/spr_defs.h>
>> +
>> +static inline void ci_leaf_init(struct cacheinfo *this_leaf, enum cache_type type,
>> +				unsigned int level, struct cache_desc *cache, int cpu)
>> +{
>> +	this_leaf->type = type;
>> +	this_leaf->level = level;
>> +	this_leaf->coherency_line_size = cache->block_size;
>> +	this_leaf->number_of_sets = cache->sets;
>> +	this_leaf->ways_of_associativity = cache->ways;
>> +	this_leaf->size = cache->size;
>> +	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
>> +}
>> +
>> +int init_cache_level(unsigned int cpu)
>> +{
>> +	struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()];
>> +	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
>> +	int leaves = 0, levels = 0;
>> +	unsigned long upr = mfspr(SPR_UPR);
>> +	unsigned long iccfgr, dccfgr;
>> +
>> +	if (!(upr & SPR_UPR_UP)) {
>> +		printk(KERN_INFO
>> +		       "-- no UPR register... unable to detect configuration\n");
>> +		return -ENOENT;
>> +	}
>> +
>> +	if (upr & SPR_UPR_DCP) {
>> +		dccfgr = mfspr(SPR_DCCFGR);
>> +		cpuinfo->dcache.ways = 1 << (dccfgr & SPR_DCCFGR_NCW);
>> +		cpuinfo->dcache.sets = 1 << ((dccfgr & SPR_DCCFGR_NCS) >> 3);
>> +		cpuinfo->dcache.block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7);
>> +		cpuinfo->dcache.size =
>> +		    cpuinfo->dcache.sets * cpuinfo->dcache.ways * cpuinfo->dcache.block_size;
>> +		leaves += 1;
>> +		printk(KERN_INFO
>> +		       "-- dcache: %d bytes total, %d bytes/line, %d set(s), %d way(s)\n",
>> +		       cpuinfo->dcache.size, cpuinfo->dcache.block_size,
>> +			   cpuinfo->dcache.sets,
>> +		       cpuinfo->dcache.ways);
> 
> The indentation of sets looks a bit off here.
> 
>> +	} else
>> +		printk(KERN_INFO "-- dcache disabled\n");
>> +
>> +	if (upr & SPR_UPR_ICP) {
>> +		iccfgr = mfspr(SPR_ICCFGR);
>> +		cpuinfo->icache.ways = 1 << (iccfgr & SPR_ICCFGR_NCW);
>> +		cpuinfo->icache.sets = 1 << ((iccfgr & SPR_ICCFGR_NCS) >> 3);
>> +		cpuinfo->icache.block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7);
>> +		cpuinfo->icache.size =
>> +		    cpuinfo->icache.sets * cpuinfo->icache.ways * cpuinfo->icache.block_size;
>> +		leaves += 1;
>> +		printk(KERN_INFO
>> +		       "-- icache: %d bytes total, %d bytes/line, %d set(s), %d way(s)\n",
>> +		       cpuinfo->icache.size, cpuinfo->icache.block_size,
>> +			   cpuinfo->icache.sets,
>> +		       cpuinfo->icache.ways);
> 
> The indentation of sets looks a bit off here. Maybe its the others that are out
> of line, but can you fix?  Also I think sets and ways could be on the same line.

Sorry, I must have missed this. I'll fix it.

>> [...]
>> diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c
>> index b3edbb33b621..ffb161e41e9d 100644
>> --- a/arch/openrisc/kernel/dma.c
>> +++ b/arch/openrisc/kernel/dma.c
>> @@ -36,8 +36,10 @@ page_set_nocache(pte_t *pte, unsigned long addr,
>>   	flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
>>   
>>   	/* Flush page out of dcache */
>> -	for (cl = __pa(addr); cl < __pa(next); cl += cpuinfo->dcache_block_size)
>> -		mtspr(SPR_DCBFR, cl);
>> +	if (cpuinfo->dcache.ways) {
>> +		for (cl = __pa(addr); cl < __pa(next); cl += cpuinfo->dcache.block_size)
>> +			mtspr(SPR_DCBFR, cl);
>> +	}
> 
> I think it would be better to move this to cacheflush.h as a function like
> flush_dcache_range() or local_dcache_range_flush().
> 
>>   	return 0;
>>   }
>> @@ -104,15 +106,19 @@ void arch_sync_dma_for_device(phys_addr_t addr, size_t size,
>>   	switch (dir) {
>>   	case DMA_TO_DEVICE:
>>   		/* Flush the dcache for the requested range */
>> -		for (cl = addr; cl < addr + size;
>> -		     cl += cpuinfo->dcache_block_size)
>> -			mtspr(SPR_DCBFR, cl);
>> +		if (cpuinfo->dcache.ways) {
>> +			for (cl = addr; cl < addr + size;
>> +			     cl += cpuinfo->dcache.block_size)
>> +				mtspr(SPR_DCBFR, cl);
>> +		}
> 
> Also here,I think it would be better to move this to cacheflush.h as a function like
> flush_dcache_range().
> 
> Or, local_dcache_range_flush(), which seems to be the convention we use in
> cacheflush.h/cache.c.
> 
> 
>>   		break;
>>   	case DMA_FROM_DEVICE:
>>   		/* Invalidate the dcache for the requested range */
>> -		for (cl = addr; cl < addr + size;
>> -		     cl += cpuinfo->dcache_block_size)
>> -			mtspr(SPR_DCBIR, cl);
>> +		if (cpuinfo->dcache.ways) {
>> +			for (cl = addr; cl < addr + size;
>> +			     cl += cpuinfo->dcache.block_size)
>> +				mtspr(SPR_DCBIR, cl);
>> +		}
> 
> This one could be invalidate_dcache_range().   Note, this will also be useful
> for the kexec patches that I am working on.
> 
> Or, local_dcache_range_inv(), which seems to be the convention we use in
> cacheflush.h/cache.c.

Understood.

>> [...]
>> @@ -29,13 +34,13 @@ static __always_inline void cache_loop(struct page *page, const unsigned int reg
>>   
>>   void local_dcache_page_flush(struct page *page)
>>   {
>> -	cache_loop(page, SPR_DCBFR);
>> +	cache_loop(page, SPR_DCBFR, SPR_UPR_DCP);
>>   }
>>   EXPORT_SYMBOL(local_dcache_page_flush);
>>   
>>   void local_icache_page_inv(struct page *page)
>>   {
>> -	cache_loop(page, SPR_ICBIR);
>> +	cache_loop(page, SPR_ICBIR, SPR_UPR_ICP);
>>   }
>>   EXPORT_SYMBOL(local_icache_page_inv);
> 
> OK, if we move the range flush and invalidate here we will need to add to this
> cache_loop a bit more.

Right. I'll see what can be done to keep it concise.

>> diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c
>> index d0cb1a0126f9..bbe16546c5b9 100644
>> --- a/arch/openrisc/mm/init.c
>> +++ b/arch/openrisc/mm/init.c
>> @@ -124,6 +124,7 @@ static void __init map_ram(void)
>>   void __init paging_init(void)
>>   {
>>   	int i;
>> +	unsigned long upr;
>>   
>>   	printk(KERN_INFO "Setting up paging and PTEs.\n");
>>   
>> @@ -176,8 +177,11 @@ void __init paging_init(void)
>>   	barrier();
>>   
>>   	/* Invalidate instruction caches after code modification */
>> -	mtspr(SPR_ICBIR, 0x900);
>> -	mtspr(SPR_ICBIR, 0xa00);
>> +	upr = mfspr(SPR_UPR);
>> +	if (upr & SPR_UPR_UP & SPR_UPR_ICP) {
>> +		mtspr(SPR_ICBIR, 0x900);
>> +		mtspr(SPR_ICBIR, 0xa00);
>> +	}
> 
> Here we could use new utilities such as local_icache_range_inv(0x900,
> L1_CACHE_BYTES);
> 
> Or something like local_icache_block_inv(0x900).  This only needs to flush a
> single block as the code it is invalidating is just 2 instructions 8 bytes:
> 
>      .org 0x900
> 	l.j     boot_dtlb_miss_handler
> 	 l.nop
> 
>      .org 0xa00
> 	l.j     boot_itlb_miss_handler
> 	 l.nop

Given that there'll be generic local_(i|d)cache_range_inv(start, stop) utility
functions, would it make sense to simply have a macro defined as:

#define local_icache_block_inv(addr) local_icache_range_inv(start, L1_CACHE_BYTES)

instead of having a separate function for invalidating a single cache line? This would
still use cache_loop() under the hood. The alternative would be to use
local_icache_range_inv(start, L1_CACHE_BYTES) directly but using the macro might be
more readable.

Thanks,
Sahil

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] openrisc: Add cacheinfo support
  2025-03-17 18:36       ` Sahil Siddiq
@ 2025-03-18  7:43         ` Stafford Horne
  2025-03-22 13:51           ` Sahil Siddiq
  0 siblings, 1 reply; 11+ messages in thread
From: Stafford Horne @ 2025-03-18  7:43 UTC (permalink / raw)
  To: Sahil Siddiq
  Cc: Geert Uytterhoeven, jonas, stefan.kristiansson, linux-openrisc,
	linux-kernel, Sahil Siddiq

Hi Sahil,

On Tue, Mar 18, 2025 at 12:06:30AM +0530, Sahil Siddiq wrote:
> Hello Stafford and Geert,
> 
> Thank you for the reviews.
> 
> On 3/17/25 1:55 PM, Geert Uytterhoeven wrote:
> > On Sun, 16 Mar 2025 at 07:59, Stafford Horne <shorne@gmail.com> wrote:
> > > [...]
> > > > +struct cache_desc {
> > > > +     u32 size;
> > > > +     u32 sets;
> > > > +     u32 block_size;
> > > > +     u32 ways;
> > > 
> > > Considering the changes below to add cache available checks, maybe we
> > > want to add a field here, such as `bool present`.  Or a flags field like
> > > is used in loongarch?
> > 
> > I assume cache_desc.size is zero when the cache is not present?
> 
> Yes, cache_desc.size will be zero when there's no cache component since
> cpuinfo_or1k[NR_CPUS] is declared as a global variable in setup.c. The
> cache attributes are stored in this struct.
> 
> On 3/17/25 5:30 PM, Stafford Horne wrote:
> > [...]
> > Yes, good point, would be clean too work too.  I was not too happy with using
> > cache_desc.ways as is done below.  Also there ended up bieng 2 different ways
> > that were used.
> > 
> > I am happy to use size too, but I think checking the SPR would be faster or just
> > as fast as using the struct.  I am not too fussed either way.
> 
> There are a few places (e.g. cache_loop() in cache.c) where
> cpuinfo_or1k[smp_processor_id()] is not being referenced. This will have to be
> referenced to access cache_desc. In these cases, I think it would be better to
> read from the SPR instead. For consistency, the SPR can also be read where
> cpuinfo_or1k[smp_processor_id()] is already being referenced.
> 
> On 3/16/25 12:28 PM, Stafford Horne wrote:
> > On Sun, Mar 16, 2025 at 02:09:37AM +0530, Sahil Siddiq wrote:
> > > Add cacheinfo support for OpenRISC.
> > > [...]
> > > diff --git a/arch/openrisc/kernel/cacheinfo.c b/arch/openrisc/kernel/cacheinfo.c
> > > new file mode 100644
> > > index 000000000000..6bb81e246f7e
> > > --- /dev/null
> > > +++ b/arch/openrisc/kernel/cacheinfo.c
> > > @@ -0,0 +1,106 @@
> > > +// SPDX-License-Identifier: GPL-2.0-or-later
> > > +/*
> > > + * OpenRISC cacheinfo support
> > > + *
> > > + * Based on work done for MIPS and LoongArch. All original copyrights
> > > + * apply as per the original source declaration.
> > > + *
> > > + * OpenRISC implementation:
> > > + * Copyright (C) 2025 Sahil Siddiq <sahilcdq@proton.me>
> > > + */
> > > +
> > > +#include <linux/cacheinfo.h>
> > > +#include <asm/cpuinfo.h>
> > > +#include <asm/spr.h>
> > > +#include <asm/spr_defs.h>
> > > +
> > > +static inline void ci_leaf_init(struct cacheinfo *this_leaf, enum cache_type type,
> > > +				unsigned int level, struct cache_desc *cache, int cpu)
> > > +{
> > > +	this_leaf->type = type;
> > > +	this_leaf->level = level;
> > > +	this_leaf->coherency_line_size = cache->block_size;
> > > +	this_leaf->number_of_sets = cache->sets;
> > > +	this_leaf->ways_of_associativity = cache->ways;
> > > +	this_leaf->size = cache->size;
> > > +	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
> > > +}
> > > +
> > > +int init_cache_level(unsigned int cpu)
> > > +{
> > > +	struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()];
> > > +	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
> > > +	int leaves = 0, levels = 0;
> > > +	unsigned long upr = mfspr(SPR_UPR);
> > > +	unsigned long iccfgr, dccfgr;
> > > +
> > > +	if (!(upr & SPR_UPR_UP)) {
> > > +		printk(KERN_INFO
> > > +		       "-- no UPR register... unable to detect configuration\n");
> > > +		return -ENOENT;
> > > +	}
> > > +
> > > +	if (upr & SPR_UPR_DCP) {
> > > +		dccfgr = mfspr(SPR_DCCFGR);
> > > +		cpuinfo->dcache.ways = 1 << (dccfgr & SPR_DCCFGR_NCW);
> > > +		cpuinfo->dcache.sets = 1 << ((dccfgr & SPR_DCCFGR_NCS) >> 3);
> > > +		cpuinfo->dcache.block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7);
> > > +		cpuinfo->dcache.size =
> > > +		    cpuinfo->dcache.sets * cpuinfo->dcache.ways * cpuinfo->dcache.block_size;
> > > +		leaves += 1;
> > > +		printk(KERN_INFO
> > > +		       "-- dcache: %d bytes total, %d bytes/line, %d set(s), %d way(s)\n",
> > > +		       cpuinfo->dcache.size, cpuinfo->dcache.block_size,
> > > +			   cpuinfo->dcache.sets,
> > > +		       cpuinfo->dcache.ways);
> > 
> > The indentation of sets looks a bit off here.
> > 
> > > +	} else
> > > +		printk(KERN_INFO "-- dcache disabled\n");
> > > +
> > > +	if (upr & SPR_UPR_ICP) {
> > > +		iccfgr = mfspr(SPR_ICCFGR);
> > > +		cpuinfo->icache.ways = 1 << (iccfgr & SPR_ICCFGR_NCW);
> > > +		cpuinfo->icache.sets = 1 << ((iccfgr & SPR_ICCFGR_NCS) >> 3);
> > > +		cpuinfo->icache.block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7);
> > > +		cpuinfo->icache.size =
> > > +		    cpuinfo->icache.sets * cpuinfo->icache.ways * cpuinfo->icache.block_size;
> > > +		leaves += 1;
> > > +		printk(KERN_INFO
> > > +		       "-- icache: %d bytes total, %d bytes/line, %d set(s), %d way(s)\n",
> > > +		       cpuinfo->icache.size, cpuinfo->icache.block_size,
> > > +			   cpuinfo->icache.sets,
> > > +		       cpuinfo->icache.ways);
> > 
> > The indentation of sets looks a bit off here. Maybe its the others that are out
> > of line, but can you fix?  Also I think sets and ways could be on the same line.
> 
> Sorry, I must have missed this. I'll fix it.
> 
> > > [...]
> > > diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c
> > > index b3edbb33b621..ffb161e41e9d 100644
> > > --- a/arch/openrisc/kernel/dma.c
> > > +++ b/arch/openrisc/kernel/dma.c
> > > @@ -36,8 +36,10 @@ page_set_nocache(pte_t *pte, unsigned long addr,
> > >   	flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
> > >   	/* Flush page out of dcache */
> > > -	for (cl = __pa(addr); cl < __pa(next); cl += cpuinfo->dcache_block_size)
> > > -		mtspr(SPR_DCBFR, cl);
> > > +	if (cpuinfo->dcache.ways) {
> > > +		for (cl = __pa(addr); cl < __pa(next); cl += cpuinfo->dcache.block_size)
> > > +			mtspr(SPR_DCBFR, cl);
> > > +	}
> > 
> > I think it would be better to move this to cacheflush.h as a function like
> > flush_dcache_range() or local_dcache_range_flush().
> > 
> > >   	return 0;
> > >   }
> > > @@ -104,15 +106,19 @@ void arch_sync_dma_for_device(phys_addr_t addr, size_t size,
> > >   	switch (dir) {
> > >   	case DMA_TO_DEVICE:
> > >   		/* Flush the dcache for the requested range */
> > > -		for (cl = addr; cl < addr + size;
> > > -		     cl += cpuinfo->dcache_block_size)
> > > -			mtspr(SPR_DCBFR, cl);
> > > +		if (cpuinfo->dcache.ways) {
> > > +			for (cl = addr; cl < addr + size;
> > > +			     cl += cpuinfo->dcache.block_size)
> > > +				mtspr(SPR_DCBFR, cl);
> > > +		}
> > 
> > Also here,I think it would be better to move this to cacheflush.h as a function like
> > flush_dcache_range().
> > 
> > Or, local_dcache_range_flush(), which seems to be the convention we use in
> > cacheflush.h/cache.c.
> > 
> > 
> > >   		break;
> > >   	case DMA_FROM_DEVICE:
> > >   		/* Invalidate the dcache for the requested range */
> > > -		for (cl = addr; cl < addr + size;
> > > -		     cl += cpuinfo->dcache_block_size)
> > > -			mtspr(SPR_DCBIR, cl);
> > > +		if (cpuinfo->dcache.ways) {
> > > +			for (cl = addr; cl < addr + size;
> > > +			     cl += cpuinfo->dcache.block_size)
> > > +				mtspr(SPR_DCBIR, cl);
> > > +		}
> > 
> > This one could be invalidate_dcache_range().   Note, this will also be useful
> > for the kexec patches that I am working on.
> > 
> > Or, local_dcache_range_inv(), which seems to be the convention we use in
> > cacheflush.h/cache.c.
> 
> Understood.
> 
> > > [...]
> > > @@ -29,13 +34,13 @@ static __always_inline void cache_loop(struct page *page, const unsigned int reg
> > >   void local_dcache_page_flush(struct page *page)
> > >   {
> > > -	cache_loop(page, SPR_DCBFR);
> > > +	cache_loop(page, SPR_DCBFR, SPR_UPR_DCP);
> > >   }
> > >   EXPORT_SYMBOL(local_dcache_page_flush);
> > >   void local_icache_page_inv(struct page *page)
> > >   {
> > > -	cache_loop(page, SPR_ICBIR);
> > > +	cache_loop(page, SPR_ICBIR, SPR_UPR_ICP);
> > >   }
> > >   EXPORT_SYMBOL(local_icache_page_inv);
> > 
> > OK, if we move the range flush and invalidate here we will need to add to this
> > cache_loop a bit more.
> 
> Right. I'll see what can be done to keep it concise.
> 
> > > diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c
> > > index d0cb1a0126f9..bbe16546c5b9 100644
> > > --- a/arch/openrisc/mm/init.c
> > > +++ b/arch/openrisc/mm/init.c
> > > @@ -124,6 +124,7 @@ static void __init map_ram(void)
> > >   void __init paging_init(void)
> > >   {
> > >   	int i;
> > > +	unsigned long upr;
> > >   	printk(KERN_INFO "Setting up paging and PTEs.\n");
> > > @@ -176,8 +177,11 @@ void __init paging_init(void)
> > >   	barrier();
> > >   	/* Invalidate instruction caches after code modification */
> > > -	mtspr(SPR_ICBIR, 0x900);
> > > -	mtspr(SPR_ICBIR, 0xa00);
> > > +	upr = mfspr(SPR_UPR);
> > > +	if (upr & SPR_UPR_UP & SPR_UPR_ICP) {
> > > +		mtspr(SPR_ICBIR, 0x900);
> > > +		mtspr(SPR_ICBIR, 0xa00);
> > > +	}
> > 
> > Here we could use new utilities such as local_icache_range_inv(0x900,
> > L1_CACHE_BYTES);
> > 
> > Or something like local_icache_block_inv(0x900).  This only needs to flush a
> > single block as the code it is invalidating is just 2 instructions 8 bytes:
> > 
> >      .org 0x900
> > 	l.j     boot_dtlb_miss_handler
> > 	 l.nop
> > 
> >      .org 0xa00
> > 	l.j     boot_itlb_miss_handler
> > 	 l.nop
> 
> Given that there'll be generic local_(i|d)cache_range_inv(start, stop) utility
> functions, would it make sense to simply have a macro defined as:
> 
> #define local_icache_block_inv(addr) local_icache_range_inv(start, L1_CACHE_BYTES)
> 
> instead of having a separate function for invalidating a single cache line? This would
> still use cache_loop() under the hood. The alternative would be to use
> local_icache_range_inv(start, L1_CACHE_BYTES) directly but using the macro might be
> more readable.

Yes, I think a macro would be fine.  Should we use cache_desc.block_size or
L1_CACHE_BYTES?  It doesn't make much difference as L1_CACHE_BYTES is defined as
16 bytes which is the minimum block size and using that will always invalidate a
whole block.  It would be good to have a comment explaining why using
L1_CACHE_BYTES is enough.

-Stafford

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] openrisc: Add cacheinfo support
  2025-03-18  7:43         ` Stafford Horne
@ 2025-03-22 13:51           ` Sahil Siddiq
  2025-03-22 16:29             ` Stafford Horne
  0 siblings, 1 reply; 11+ messages in thread
From: Sahil Siddiq @ 2025-03-22 13:51 UTC (permalink / raw)
  To: Stafford Horne
  Cc: Geert Uytterhoeven, jonas, stefan.kristiansson, linux-openrisc,
	linux-kernel, Sahil Siddiq

Hi Stafford,

On 3/18/25 1:13 PM, Stafford Horne wrote:
> On Tue, Mar 18, 2025 at 12:06:30AM +0530, Sahil Siddiq wrote:
>> On 3/17/25 1:55 PM, Geert Uytterhoeven wrote:
>>> On Sun, 16 Mar 2025 at 07:59, Stafford Horne <shorne@gmail.com> wrote:
>>> [...]
>>>> @@ -176,8 +177,11 @@ void __init paging_init(void)
>>>>    	barrier();
>>>>    	/* Invalidate instruction caches after code modification */
>>>> -	mtspr(SPR_ICBIR, 0x900);
>>>> -	mtspr(SPR_ICBIR, 0xa00);
>>>> +	upr = mfspr(SPR_UPR);
>>>> +	if (upr & SPR_UPR_UP & SPR_UPR_ICP) {
>>>> +		mtspr(SPR_ICBIR, 0x900);
>>>> +		mtspr(SPR_ICBIR, 0xa00);
>>>> +	}
>>> Here we could use new utilities such as local_icache_range_inv(0x900,
>>> L1_CACHE_BYTES);
>>>
>>> Or something like local_icache_block_inv(0x900).  This only needs to flush a
>>> single block as the code it is invalidating is just 2 instructions 8 bytes:
>>>
>>>       .org 0x900
>>> 	l.j     boot_dtlb_miss_handler
>>> 	 l.nop
>>>
>>>       .org 0xa00
>>> 	l.j     boot_itlb_miss_handler
>>> 	 l.nop
>>
>> Given that there'll be generic local_(i|d)cache_range_inv(start, stop) utility
>> functions, would it make sense to simply have a macro defined as:
>>
>> #define local_icache_block_inv(addr) local_icache_range_inv(start, L1_CACHE_BYTES)
>>
>> instead of having a separate function for invalidating a single cache line? This would
>> still use cache_loop() under the hood. The alternative would be to use
>> local_icache_range_inv(start, L1_CACHE_BYTES) directly but using the macro might be
>> more readable.
> 
> Yes, I think a macro would be fine.  Should we use cache_desc.block_size or
> L1_CACHE_BYTES?  It doesn't make much difference as L1_CACHE_BYTES is defined as
> 16 bytes which is the minimum block size and using that will always invalidate a
> whole block.  It would be good to have a comment explaining why using
> L1_CACHE_BYTES is enough.
> 

While working on the patch's v3, I realized I am a bit unclear here. Is the ".org"
macro used to set the address at which the instructions are stored in memory? If so,
the first two instructions should occupy the memory area 0x900 through 0x907, right?
Similarly, the next two instructions will occupy 0xa00-0xa07.

Since the two instructions are 256 bytes apart, they shouldn't be cached in the same
cache line, right? Maybe one cache line will have 16 bytes starting from 0x900 while
another cache line will have 16 bytes starting from 0xa00.

If the above is true, I think it'll be better to simply call mtspr() for each address
individually.

Thanks,
Sahil

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] openrisc: Add cacheinfo support
  2025-03-15 20:39 [PATCH v2] openrisc: Add cacheinfo support Sahil Siddiq
  2025-03-16  6:58 ` Stafford Horne
@ 2025-03-22 15:40 ` Markus Elfring
  2025-03-22 19:03   ` Sahil Siddiq
  1 sibling, 1 reply; 11+ messages in thread
From: Markus Elfring @ 2025-03-22 15:40 UTC (permalink / raw)
  To: Sahil Siddiq, linux-openrisc
  Cc: Sahil Siddiq, LKML, Jonas Bonn, Stafford Horne,
	Stefan Kristiansson

…
> This patch provides a mechanism …

> The patch also moves …

See also:
https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/submitting-patches.rst?h=v6.14-rc7#n94

Regards,
Markus

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] openrisc: Add cacheinfo support
  2025-03-22 13:51           ` Sahil Siddiq
@ 2025-03-22 16:29             ` Stafford Horne
  2025-03-22 19:04               ` Sahil Siddiq
  0 siblings, 1 reply; 11+ messages in thread
From: Stafford Horne @ 2025-03-22 16:29 UTC (permalink / raw)
  To: Sahil Siddiq
  Cc: Geert Uytterhoeven, jonas, stefan.kristiansson, linux-openrisc,
	linux-kernel, Sahil Siddiq

On Sat, Mar 22, 2025 at 07:21:18PM +0530, Sahil Siddiq wrote:
> Hi Stafford,
> 
> On 3/18/25 1:13 PM, Stafford Horne wrote:
> > On Tue, Mar 18, 2025 at 12:06:30AM +0530, Sahil Siddiq wrote:
> > > On 3/17/25 1:55 PM, Geert Uytterhoeven wrote:
> > > > On Sun, 16 Mar 2025 at 07:59, Stafford Horne <shorne@gmail.com> wrote:
> > > > [...]
> > > > > @@ -176,8 +177,11 @@ void __init paging_init(void)
> > > > >    	barrier();
> > > > >    	/* Invalidate instruction caches after code modification */
> > > > > -	mtspr(SPR_ICBIR, 0x900);
> > > > > -	mtspr(SPR_ICBIR, 0xa00);
> > > > > +	upr = mfspr(SPR_UPR);
> > > > > +	if (upr & SPR_UPR_UP & SPR_UPR_ICP) {
> > > > > +		mtspr(SPR_ICBIR, 0x900);
> > > > > +		mtspr(SPR_ICBIR, 0xa00);
> > > > > +	}
> > > > Here we could use new utilities such as local_icache_range_inv(0x900,
> > > > L1_CACHE_BYTES);
> > > > 
> > > > Or something like local_icache_block_inv(0x900).  This only needs to flush a
> > > > single block as the code it is invalidating is just 2 instructions 8 bytes:
> > > > 
> > > >       .org 0x900
> > > > 	l.j     boot_dtlb_miss_handler
> > > > 	 l.nop
> > > > 
> > > >       .org 0xa00
> > > > 	l.j     boot_itlb_miss_handler
> > > > 	 l.nop
> > > 
> > > Given that there'll be generic local_(i|d)cache_range_inv(start, stop) utility
> > > functions, would it make sense to simply have a macro defined as:
> > > 
> > > #define local_icache_block_inv(addr) local_icache_range_inv(start, L1_CACHE_BYTES)
> > > 
> > > instead of having a separate function for invalidating a single cache line? This would
> > > still use cache_loop() under the hood. The alternative would be to use
> > > local_icache_range_inv(start, L1_CACHE_BYTES) directly but using the macro might be
> > > more readable.
> > 
> > Yes, I think a macro would be fine.  Should we use cache_desc.block_size or
> > L1_CACHE_BYTES?  It doesn't make much difference as L1_CACHE_BYTES is defined as
> > 16 bytes which is the minimum block size and using that will always invalidate a
> > whole block.  It would be good to have a comment explaining why using
> > L1_CACHE_BYTES is enough.
> > 
> 
> While working on the patch's v3, I realized I am a bit unclear here. Is the ".org"
> macro used to set the address at which the instructions are stored in memory? If so,
> the first two instructions should occupy the memory area 0x900 through 0x907, right?
> Similarly, the next two instructions will occupy 0xa00-0xa07.
> 
> Since the two instructions are 256 bytes apart, they shouldn't be cached in the same
> cache line, right? Maybe one cache line will have 16 bytes starting from 0x900 while
> another cache line will have 16 bytes starting from 0xa00.

Yes, to invalidate the cache we will need to do:

	local_icache_block_inv(0x900);
	local_icache_block_inv(0xa00);

This will then compile down to the pretty much same as, (but with checks to
validate the caches exist first):

	mtspr(0x900);
	mtspr(0xa00);

> If the above is true, I think it'll be better to simply call mtspr() for each address
> individually.

Thats right, but I figured the local_icache_block_inv function/macro would be
more useful other than just this block.

-Stafford

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] openrisc: Add cacheinfo support
  2025-03-22 15:40 ` Markus Elfring
@ 2025-03-22 19:03   ` Sahil Siddiq
  0 siblings, 0 replies; 11+ messages in thread
From: Sahil Siddiq @ 2025-03-22 19:03 UTC (permalink / raw)
  To: Markus Elfring, Sahil Siddiq, linux-openrisc
  Cc: LKML, Jonas Bonn, Stafford Horne, Stefan Kristiansson

Hi,

On 3/22/25 9:10 PM, Markus Elfring wrote:
> …
>> This patch provides a mechanism …
> 
>> The patch also moves …
> 
> See also:
> https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/submitting-patches.rst?h=v6.14-rc7#n94
> 

Thank you. I'll change the wording of the commit message.

Thanks,
Sahil

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] openrisc: Add cacheinfo support
  2025-03-22 16:29             ` Stafford Horne
@ 2025-03-22 19:04               ` Sahil Siddiq
  0 siblings, 0 replies; 11+ messages in thread
From: Sahil Siddiq @ 2025-03-22 19:04 UTC (permalink / raw)
  To: Stafford Horne
  Cc: Geert Uytterhoeven, jonas, stefan.kristiansson, linux-openrisc,
	linux-kernel, Sahil Siddiq

Hi,

On 3/22/25 9:59 PM, Stafford Horne wrote:
> On Sat, Mar 22, 2025 at 07:21:18PM +0530, Sahil Siddiq wrote:
>> On 3/18/25 1:13 PM, Stafford Horne wrote:
>>> On Tue, Mar 18, 2025 at 12:06:30AM +0530, Sahil Siddiq wrote:
>>>> On 3/17/25 1:55 PM, Geert Uytterhoeven wrote:
>>>>> On Sun, 16 Mar 2025 at 07:59, Stafford Horne <shorne@gmail.com> wrote:
>>>>> [...]
>>>>>> @@ -176,8 +177,11 @@ void __init paging_init(void)
>>>>>>     	barrier();
>>>>>>     	/* Invalidate instruction caches after code modification */
>>>>>> -	mtspr(SPR_ICBIR, 0x900);
>>>>>> -	mtspr(SPR_ICBIR, 0xa00);
>>>>>> +	upr = mfspr(SPR_UPR);
>>>>>> +	if (upr & SPR_UPR_UP & SPR_UPR_ICP) {
>>>>>> +		mtspr(SPR_ICBIR, 0x900);
>>>>>> +		mtspr(SPR_ICBIR, 0xa00);
>>>>>> +	}
>>>>> Here we could use new utilities such as local_icache_range_inv(0x900,
>>>>> L1_CACHE_BYTES);
>>>>>
>>>>> Or something like local_icache_block_inv(0x900).  This only needs to flush a
>>>>> single block as the code it is invalidating is just 2 instructions 8 bytes:
>>>>>
>>>>>        .org 0x900
>>>>> 	l.j     boot_dtlb_miss_handler
>>>>> 	 l.nop
>>>>>
>>>>>        .org 0xa00
>>>>> 	l.j     boot_itlb_miss_handler
>>>>> 	 l.nop
>>>>
>>>> Given that there'll be generic local_(i|d)cache_range_inv(start, stop) utility
>>>> functions, would it make sense to simply have a macro defined as:
>>>>
>>>> #define local_icache_block_inv(addr) local_icache_range_inv(start, L1_CACHE_BYTES)
>>>>
>>>> instead of having a separate function for invalidating a single cache line? This would
>>>> still use cache_loop() under the hood. The alternative would be to use
>>>> local_icache_range_inv(start, L1_CACHE_BYTES) directly but using the macro might be
>>>> more readable.
>>>
>>> Yes, I think a macro would be fine.  Should we use cache_desc.block_size or
>>> L1_CACHE_BYTES?  It doesn't make much difference as L1_CACHE_BYTES is defined as
>>> 16 bytes which is the minimum block size and using that will always invalidate a
>>> whole block.  It would be good to have a comment explaining why using
>>> L1_CACHE_BYTES is enough.
>>>
>>
>> While working on the patch's v3, I realized I am a bit unclear here. Is the ".org"
>> macro used to set the address at which the instructions are stored in memory? If so,
>> the first two instructions should occupy the memory area 0x900 through 0x907, right?
>> Similarly, the next two instructions will occupy 0xa00-0xa07.
>>
>> Since the two instructions are 256 bytes apart, they shouldn't be cached in the same
>> cache line, right? Maybe one cache line will have 16 bytes starting from 0x900 while
>> another cache line will have 16 bytes starting from 0xa00.
> 
> Yes, to invalidate the cache we will need to do:
> 
> 	local_icache_block_inv(0x900);
> 	local_icache_block_inv(0xa00);
> 
> This will then compile down to the pretty much same as, (but with checks to
> validate the caches exist first):
> 
> 	mtspr(0x900);
> 	mtspr(0xa00);

Ok, this makes sense. I misunderstood the comments in the previous email.

>> If the above is true, I think it'll be better to simply call mtspr() for each address
>> individually.
> 
> Thats right, but I figured the local_icache_block_inv function/macro would be
> more useful other than just this block.
> 

Right, I'll replace this with a macro.

Thanks,
Sahil

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2025-03-22 19:04 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-03-15 20:39 [PATCH v2] openrisc: Add cacheinfo support Sahil Siddiq
2025-03-16  6:58 ` Stafford Horne
2025-03-17  8:25   ` Geert Uytterhoeven
2025-03-17 12:00     ` Stafford Horne
2025-03-17 18:36       ` Sahil Siddiq
2025-03-18  7:43         ` Stafford Horne
2025-03-22 13:51           ` Sahil Siddiq
2025-03-22 16:29             ` Stafford Horne
2025-03-22 19:04               ` Sahil Siddiq
2025-03-22 15:40 ` Markus Elfring
2025-03-22 19:03   ` Sahil Siddiq

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).