* [PATCH 1/3] Revert "arm64: kernel: add support for cpu cache information"
2015-10-28 21:43 [PATCH 0/3] Revert arm64 cache geometry Alex Van Brunt
@ 2015-10-28 21:43 ` Alex Van Brunt
2015-10-28 21:43 ` [PATCH 2/3] Revert "arm64: don't flag non-aliasing VIPT I-caches as aliasing" Alex Van Brunt
` (4 subsequent siblings)
5 siblings, 0 replies; 20+ messages in thread
From: Alex Van Brunt @ 2015-10-28 21:43 UTC (permalink / raw)
To: linux-arm-kernel
This reverts commit 5d425c18653731af62831d30a4fa023d532657a9.
Change-Id: Ie34bbade5183d0c6b5b578dfaaa3ee1e898fcd9a
Signed-off-by: Alex Van Brunt <avanbrunt@nvidia.com>
---
arch/arm64/include/asm/cachetype.h | 29 ++-------
arch/arm64/kernel/Makefile | 2 +-
arch/arm64/kernel/cacheinfo.c | 128 -------------------------------------
arch/arm64/kernel/cpuinfo.c | 12 ++++
4 files changed, 19 insertions(+), 152 deletions(-)
delete mode 100644 arch/arm64/kernel/cacheinfo.c
diff --git a/arch/arm64/include/asm/cachetype.h b/arch/arm64/include/asm/cachetype.h
index da2fc9e..4c631a0 100644
--- a/arch/arm64/include/asm/cachetype.h
+++ b/arch/arm64/include/asm/cachetype.h
@@ -39,41 +39,24 @@
extern unsigned long __icache_flags;
-/*
- * NumSets, bits[27:13] - (Number of sets in cache) - 1
- * Associativity, bits[12:3] - (Associativity of cache) - 1
- * LineSize, bits[2:0] - (Log2(Number of words in cache line)) - 2
- */
-#define CCSIDR_EL1_WRITE_THROUGH BIT(31)
-#define CCSIDR_EL1_WRITE_BACK BIT(30)
-#define CCSIDR_EL1_READ_ALLOCATE BIT(29)
-#define CCSIDR_EL1_WRITE_ALLOCATE BIT(28)
#define CCSIDR_EL1_LINESIZE_MASK 0x7
#define CCSIDR_EL1_LINESIZE(x) ((x) & CCSIDR_EL1_LINESIZE_MASK)
-#define CCSIDR_EL1_ASSOCIATIVITY_SHIFT 3
-#define CCSIDR_EL1_ASSOCIATIVITY_MASK 0x3ff
-#define CCSIDR_EL1_ASSOCIATIVITY(x) \
- (((x) >> CCSIDR_EL1_ASSOCIATIVITY_SHIFT) & CCSIDR_EL1_ASSOCIATIVITY_MASK)
+
#define CCSIDR_EL1_NUMSETS_SHIFT 13
-#define CCSIDR_EL1_NUMSETS_MASK 0x7fff
+#define CCSIDR_EL1_NUMSETS_MASK (0x7fff << CCSIDR_EL1_NUMSETS_SHIFT)
#define CCSIDR_EL1_NUMSETS(x) \
- (((x) >> CCSIDR_EL1_NUMSETS_SHIFT) & CCSIDR_EL1_NUMSETS_MASK)
-
-#define CACHE_LINESIZE(x) (16 << CCSIDR_EL1_LINESIZE(x))
-#define CACHE_NUMSETS(x) (CCSIDR_EL1_NUMSETS(x) + 1)
-#define CACHE_ASSOCIATIVITY(x) (CCSIDR_EL1_ASSOCIATIVITY(x) + 1)
+ (((x) & CCSIDR_EL1_NUMSETS_MASK) >> CCSIDR_EL1_NUMSETS_SHIFT)
-extern u64 __attribute_const__ cache_get_ccsidr(u64 csselr);
+extern u64 __attribute_const__ icache_get_ccsidr(void);
-/* Helpers for Level 1 Instruction cache csselr = 1L */
static inline int icache_get_linesize(void)
{
- return CACHE_LINESIZE(cache_get_ccsidr(1L));
+ return 16 << CCSIDR_EL1_LINESIZE(icache_get_ccsidr());
}
static inline int icache_get_numsets(void)
{
- return CACHE_NUMSETS(cache_get_ccsidr(1L));
+ return 1 + CCSIDR_EL1_NUMSETS(icache_get_ccsidr());
}
/*
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 22dc9bc..abcf663 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -17,7 +17,7 @@ arm64-obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
sys.o stacktrace.o time.o traps.o io.o vdso.o \
hyp-stub.o psci.o psci-call.o cpu_ops.o insn.o \
return_address.o cpuinfo.o cpu_errata.o \
- cpufeature.o alternative.o cacheinfo.o \
+ cpufeature.o alternative.o \
smp.o smp_spin_table.o topology.o
arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \
diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c
deleted file mode 100644
index b8629d5..0000000
--- a/arch/arm64/kernel/cacheinfo.c
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * ARM64 cacheinfo support
- *
- * Copyright (C) 2015 ARM Ltd.
- * All Rights Reserved
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/bitops.h>
-#include <linux/cacheinfo.h>
-#include <linux/cpu.h>
-#include <linux/compiler.h>
-#include <linux/of.h>
-
-#include <asm/cachetype.h>
-#include <asm/processor.h>
-
-#define MAX_CACHE_LEVEL 7 /* Max 7 level supported */
-/* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */
-#define CLIDR_CTYPE_SHIFT(level) (3 * (level - 1))
-#define CLIDR_CTYPE_MASK(level) (7 << CLIDR_CTYPE_SHIFT(level))
-#define CLIDR_CTYPE(clidr, level) \
- (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level))
-
-static inline enum cache_type get_cache_type(int level)
-{
- u64 clidr;
-
- if (level > MAX_CACHE_LEVEL)
- return CACHE_TYPE_NOCACHE;
- asm volatile ("mrs %x0, clidr_el1" : "=r" (clidr));
- return CLIDR_CTYPE(clidr, level);
-}
-
-/*
- * Cache Size Selection Register(CSSELR) selects which Cache Size ID
- * Register(CCSIDR) is accessible by specifying the required cache
- * level and the cache type. We need to ensure that no one else changes
- * CSSELR by calling this in non-preemtible context
- */
-u64 __attribute_const__ cache_get_ccsidr(u64 csselr)
-{
- u64 ccsidr;
-
- WARN_ON(preemptible());
-
- /* Put value into CSSELR */
- asm volatile("msr csselr_el1, %x0" : : "r" (csselr));
- isb();
- /* Read result out of CCSIDR */
- asm volatile("mrs %x0, ccsidr_el1" : "=r" (ccsidr));
-
- return ccsidr;
-}
-
-static void ci_leaf_init(struct cacheinfo *this_leaf,
- enum cache_type type, unsigned int level)
-{
- bool is_icache = type & CACHE_TYPE_INST;
- u64 tmp = cache_get_ccsidr((level - 1) << 1 | is_icache);
-
- this_leaf->level = level;
- this_leaf->type = type;
- this_leaf->coherency_line_size = CACHE_LINESIZE(tmp);
- this_leaf->number_of_sets = CACHE_NUMSETS(tmp);
- this_leaf->ways_of_associativity = CACHE_ASSOCIATIVITY(tmp);
- this_leaf->size = this_leaf->number_of_sets *
- this_leaf->coherency_line_size * this_leaf->ways_of_associativity;
- this_leaf->attributes =
- ((tmp & CCSIDR_EL1_WRITE_THROUGH) ? CACHE_WRITE_THROUGH : 0) |
- ((tmp & CCSIDR_EL1_WRITE_BACK) ? CACHE_WRITE_BACK : 0) |
- ((tmp & CCSIDR_EL1_READ_ALLOCATE) ? CACHE_READ_ALLOCATE : 0) |
- ((tmp & CCSIDR_EL1_WRITE_ALLOCATE) ? CACHE_WRITE_ALLOCATE : 0);
-}
-
-static int __init_cache_level(unsigned int cpu)
-{
- unsigned int ctype, level, leaves;
- struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
-
- for (level = 1, leaves = 0; level <= MAX_CACHE_LEVEL; level++) {
- ctype = get_cache_type(level);
- if (ctype == CACHE_TYPE_NOCACHE) {
- level--;
- break;
- }
- /* Separate instruction and data caches */
- leaves += (ctype == CACHE_TYPE_SEPARATE) ? 2 : 1;
- }
-
- this_cpu_ci->num_levels = level;
- this_cpu_ci->num_leaves = leaves;
- return 0;
-}
-
-static int __populate_cache_leaves(unsigned int cpu)
-{
- unsigned int level, idx;
- enum cache_type type;
- struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
- struct cacheinfo *this_leaf = this_cpu_ci->info_list;
-
- for (idx = 0, level = 1; level <= this_cpu_ci->num_levels &&
- idx < this_cpu_ci->num_leaves; idx++, level++) {
- type = get_cache_type(level);
- if (type == CACHE_TYPE_SEPARATE) {
- ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level);
- ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level);
- } else {
- ci_leaf_init(this_leaf++, type, level);
- }
- }
- return 0;
-}
-
-DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
-DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 75d5a86..540177a 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -254,3 +254,15 @@ void __init cpuinfo_store_boot_cpu(void)
boot_cpu_data = *info;
}
+
+u64 __attribute_const__ icache_get_ccsidr(void)
+{
+ u64 ccsidr;
+
+ WARN_ON(preemptible());
+
+ /* Select L1 I-cache and read its size ID register */
+ asm("msr csselr_el1, %1; isb; mrs %0, ccsidr_el1"
+ : "=r"(ccsidr) : "r"(1L));
+ return ccsidr;
+}
--
2.1.4
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH 2/3] Revert "arm64: don't flag non-aliasing VIPT I-caches as aliasing"
2015-10-28 21:43 [PATCH 0/3] Revert arm64 cache geometry Alex Van Brunt
2015-10-28 21:43 ` [PATCH 1/3] Revert "arm64: kernel: add support for cpu cache information" Alex Van Brunt
@ 2015-10-28 21:43 ` Alex Van Brunt
2015-10-28 21:43 ` [PATCH 3/3] Revert "arm64: add helper functions to read I-cache attributes" Alex Van Brunt
` (3 subsequent siblings)
5 siblings, 0 replies; 20+ messages in thread
From: Alex Van Brunt @ 2015-10-28 21:43 UTC (permalink / raw)
To: linux-arm-kernel
This reverts commit 169c018de7b6d376f821f9fae0ab23dc5c7bb549.
Change-Id: I86627b6328b733d3bb5874832ca1a7543f75efc1
Signed-off-by: Alex Van Brunt <avanbrunt@nvidia.com>
---
arch/arm64/kernel/cpuinfo.c | 14 ++------------
1 file changed, 2 insertions(+), 12 deletions(-)
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 540177a..e0c6c8c 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -51,18 +51,8 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
unsigned int cpu = smp_processor_id();
u32 l1ip = CTR_L1IP(info->reg_ctr);
- if (l1ip != ICACHE_POLICY_PIPT) {
- /*
- * VIPT caches are non-aliasing if the VA always equals the PA
- * in all bit positions that are covered by the index. This is
- * the case if the size of a way (# of sets * line size) does
- * not exceed PAGE_SIZE.
- */
- u32 waysize = icache_get_numsets() * icache_get_linesize();
-
- if (l1ip != ICACHE_POLICY_VIPT || waysize > PAGE_SIZE)
- set_bit(ICACHEF_ALIASING, &__icache_flags);
- }
+ if (l1ip != ICACHE_POLICY_PIPT)
+ set_bit(ICACHEF_ALIASING, &__icache_flags);
if (l1ip == ICACHE_POLICY_AIVIVT)
set_bit(ICACHEF_AIVIVT, &__icache_flags);
--
2.1.4
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH 3/3] Revert "arm64: add helper functions to read I-cache attributes"
2015-10-28 21:43 [PATCH 0/3] Revert arm64 cache geometry Alex Van Brunt
2015-10-28 21:43 ` [PATCH 1/3] Revert "arm64: kernel: add support for cpu cache information" Alex Van Brunt
2015-10-28 21:43 ` [PATCH 2/3] Revert "arm64: don't flag non-aliasing VIPT I-caches as aliasing" Alex Van Brunt
@ 2015-10-28 21:43 ` Alex Van Brunt
2015-10-29 3:22 ` [PATCH 0/3] Revert arm64 cache geometry Ard Biesheuvel
` (2 subsequent siblings)
5 siblings, 0 replies; 20+ messages in thread
From: Alex Van Brunt @ 2015-10-28 21:43 UTC (permalink / raw)
To: linux-arm-kernel
This reverts commit 80c517b0ff71a4c874fed9196fd990d2d9e911f3.
Signed-off-by: Alex Van Brunt <avanbrunt@nvidia.com>
---
arch/arm64/include/asm/cachetype.h | 20 --------------------
arch/arm64/kernel/cpuinfo.c | 14 --------------
2 files changed, 34 deletions(-)
diff --git a/arch/arm64/include/asm/cachetype.h b/arch/arm64/include/asm/cachetype.h
index 4c631a0..7a2e076 100644
--- a/arch/arm64/include/asm/cachetype.h
+++ b/arch/arm64/include/asm/cachetype.h
@@ -39,26 +39,6 @@
extern unsigned long __icache_flags;
-#define CCSIDR_EL1_LINESIZE_MASK 0x7
-#define CCSIDR_EL1_LINESIZE(x) ((x) & CCSIDR_EL1_LINESIZE_MASK)
-
-#define CCSIDR_EL1_NUMSETS_SHIFT 13
-#define CCSIDR_EL1_NUMSETS_MASK (0x7fff << CCSIDR_EL1_NUMSETS_SHIFT)
-#define CCSIDR_EL1_NUMSETS(x) \
- (((x) & CCSIDR_EL1_NUMSETS_MASK) >> CCSIDR_EL1_NUMSETS_SHIFT)
-
-extern u64 __attribute_const__ icache_get_ccsidr(void);
-
-static inline int icache_get_linesize(void)
-{
- return 16 << CCSIDR_EL1_LINESIZE(icache_get_ccsidr());
-}
-
-static inline int icache_get_numsets(void)
-{
- return 1 + CCSIDR_EL1_NUMSETS(icache_get_ccsidr());
-}
-
/*
* Whilst the D-side always behaves as PIPT on AArch64, aliasing is
* permitted in the I-cache.
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index e0c6c8c..ae04ac1 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -21,10 +21,8 @@
#include <asm/cpufeature.h>
#include <linux/bitops.h>
-#include <linux/bug.h>
#include <linux/init.h>
#include <linux/kernel.h>
-#include <linux/preempt.h>
#include <linux/printk.h>
#include <linux/smp.h>
@@ -244,15 +242,3 @@ void __init cpuinfo_store_boot_cpu(void)
boot_cpu_data = *info;
}
-
-u64 __attribute_const__ icache_get_ccsidr(void)
-{
- u64 ccsidr;
-
- WARN_ON(preemptible());
-
- /* Select L1 I-cache and read its size ID register */
- asm("msr csselr_el1, %1; isb; mrs %0, ccsidr_el1"
- : "=r"(ccsidr) : "r"(1L));
- return ccsidr;
-}
--
2.1.4
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH 0/3] Revert arm64 cache geometry
2015-10-28 21:43 [PATCH 0/3] Revert arm64 cache geometry Alex Van Brunt
` (2 preceding siblings ...)
2015-10-28 21:43 ` [PATCH 3/3] Revert "arm64: add helper functions to read I-cache attributes" Alex Van Brunt
@ 2015-10-29 3:22 ` Ard Biesheuvel
2015-10-29 11:20 ` Mark Rutland
` (2 more replies)
2015-10-29 11:29 ` Mark Rutland
2015-10-29 11:40 ` Will Deacon
5 siblings, 3 replies; 20+ messages in thread
From: Ard Biesheuvel @ 2015-10-29 3:22 UTC (permalink / raw)
To: linux-arm-kernel
On 29 October 2015 at 06:43, Alex Van Brunt <avanbrunt@nvidia.com> wrote:
> This patchset reverts three patches that attempt to query the CPU for cache
> geometry and then make use of that information. Those patches rely on the
> NumSets and LineSize fields of CCSIDR to determine the cache geometry. However,
> the architectural documentation for these registers forbids such use:
>
> The parameters NumSets, Associativity, and LineSize in these registers
> define the architecturally visible parameters that are required for the
> cache maintenance by Set/Way instructions. They are not guaranteed to
> represent the actual microarchitectural features of a design. You cannot
> make any inference about the actual sizes of caches based on these
> parameters.
>
> It is not just theoretical. For example, the Denver CPU will report one set and
> one way in CCSIDR even though the actual microarchitectural implementation has
> many sets and many ways.
>
Fair enough. It is a bit disappointing that we cannot trust these
values, but if the architecture does not mandate their accuracy, we
obviously should not be using them in the way that we are.
I think we have similar code in the ARM tree, so we should probably
make some changes there as well.
> I have two suggestions for how to get the cache geometry on an ARMv8 processor:
> 1. Specify the information in the device tree. The purpose of the deivce tree
> is to specify information that software cannot query at run-time. Becuase
> the architecture does not have an architectural way to query the cache
> geometry this may be a good fit.
> 2. Add a function pointer to cpu_table that gives a implementation specific
> way to query the cache geometry. For an A57, for example, the function
> could read the CCSIDR register because it happens to report the
> microarchitectural geometry. The Denver CPU has implementation defined
> registers that can be used to determine the microarchitectural geometry.
> However, the implementation for the default "AArch64 Processor", must
> return an error.
>
> The only place that the cache geometry is used is to determine if there can be
> aliasing for a VIPT (virtually-indexed, physically-tagged) instruction cache.
> The code assumes that there is no need to flush the entire instruction cache
> if the size of a cache set is less than or equal to a page size. However, the
> architectural definition of VIPT says "The only architecturally-guaranteed way
> to invalidate all aliases of a physical address from a VIPT instruction cache
> is to invalidate the entire instruction cache." Not only are the parameters not
> guaranteed to be correct, it is explicitly not legal to ignore aliasing even if
> the parameters were correct.
>
I understand that this is subject to interpretation, but I would argue
that this does not apply to the case where you can prove that no
aliases could ever exist (which is the point of comparing the way size
to the page size)
> Alex Van Brunt (3):
> Revert "arm64: kernel: add support for cpu cache information"
> Revert "arm64: don't flag non-aliasing VIPT I-caches as aliasing"
> Revert "arm64: add helper functions to read I-cache attributes"
>
None of the clarifications you offer here are in the commit logs of
the patches. Since the cover letter does not make it into the
repository, someone looking at the commit log will not have a clue why
these patches were reverted all of a sudden. Could you please update
that? At the same time, could you get rid of the Change-Ids as well?
They are meaningless in the kernel tree.
^ permalink raw reply [flat|nested] 20+ messages in thread* [PATCH 0/3] Revert arm64 cache geometry
2015-10-29 3:22 ` [PATCH 0/3] Revert arm64 cache geometry Ard Biesheuvel
@ 2015-10-29 11:20 ` Mark Rutland
2015-10-29 12:26 ` Ard Biesheuvel
2015-10-29 15:43 ` Russell King - ARM Linux
2015-10-29 22:54 ` Alexander Van Brunt
2 siblings, 1 reply; 20+ messages in thread
From: Mark Rutland @ 2015-10-29 11:20 UTC (permalink / raw)
To: linux-arm-kernel
On Thu, Oct 29, 2015 at 12:22:51PM +0900, Ard Biesheuvel wrote:
> On 29 October 2015 at 06:43, Alex Van Brunt <avanbrunt@nvidia.com> wrote:
> > This patchset reverts three patches that attempt to query the CPU for cache
> > geometry and then make use of that information. Those patches rely on the
> > NumSets and LineSize fields of CCSIDR to determine the cache geometry. However,
> > the architectural documentation for these registers forbids such use:
> >
> > The parameters NumSets, Associativity, and LineSize in these registers
> > define the architecturally visible parameters that are required for the
> > cache maintenance by Set/Way instructions. They are not guaranteed to
> > represent the actual microarchitectural features of a design. You cannot
> > make any inference about the actual sizes of caches based on these
> > parameters.
> >
> > It is not just theoretical. For example, the Denver CPU will report one set and
> > one way in CCSIDR even though the actual microarchitectural implementation has
> > many sets and many ways.
> >
>
> Fair enough. It is a bit disappointing that we cannot trust these
> values, but if the architecture does not mandate their accuracy, we
> obviously should not be using them in the way that we are.
Indeed.
> > The only place that the cache geometry is used is to determine if there can be
> > aliasing for a VIPT (virtually-indexed, physically-tagged) instruction cache.
> > The code assumes that there is no need to flush the entire instruction cache
> > if the size of a cache set is less than or equal to a page size. However, the
> > architectural definition of VIPT says "The only architecturally-guaranteed way
> > to invalidate all aliases of a physical address from a VIPT instruction cache
> > is to invalidate the entire instruction cache." Not only are the parameters not
> > guaranteed to be correct, it is explicitly not legal to ignore aliasing even if
> > the parameters were correct.
> >
>
> I understand that this is subject to interpretation, but I would argue
> that this does not apply to the case where you can prove that no
> aliases could ever exist (which is the point of comparing the way size
> to the page size)
Given NumSets and LineSize aren't guaranteed to match the physical
values, I don't see that way have sufficient information to derive the
physical way size in order to do so.
> > Alex Van Brunt (3):
> > Revert "arm64: kernel: add support for cpu cache information"
> > Revert "arm64: don't flag non-aliasing VIPT I-caches as aliasing"
> > Revert "arm64: add helper functions to read I-cache attributes"
> >
>
> None of the clarifications you offer here are in the commit logs of
> the patches. Since the cover letter does not make it into the
> repository, someone looking at the commit log will not have a clue why
> these patches were reverted all of a sudden. Could you please update
> that? At the same time, could you get rid of the Change-Ids as well?
> They are meaningless in the kernel tree.
It would be good to include the quote from the cover letter in each
patch; it makes the situation abundantly clear and avoids having to
trawl through the ARM ARM again in future.
Thanks,
Mark.
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH 0/3] Revert arm64 cache geometry
2015-10-29 11:20 ` Mark Rutland
@ 2015-10-29 12:26 ` Ard Biesheuvel
0 siblings, 0 replies; 20+ messages in thread
From: Ard Biesheuvel @ 2015-10-29 12:26 UTC (permalink / raw)
To: linux-arm-kernel
On 29 October 2015 at 20:20, Mark Rutland <mark.rutland@arm.com> wrote:
> On Thu, Oct 29, 2015 at 12:22:51PM +0900, Ard Biesheuvel wrote:
>> On 29 October 2015 at 06:43, Alex Van Brunt <avanbrunt@nvidia.com> wrote:
>> > This patchset reverts three patches that attempt to query the CPU for cache
>> > geometry and then make use of that information. Those patches rely on the
>> > NumSets and LineSize fields of CCSIDR to determine the cache geometry. However,
>> > the architectural documentation for these registers forbids such use:
>> >
>> > The parameters NumSets, Associativity, and LineSize in these registers
>> > define the architecturally visible parameters that are required for the
>> > cache maintenance by Set/Way instructions. They are not guaranteed to
>> > represent the actual microarchitectural features of a design. You cannot
>> > make any inference about the actual sizes of caches based on these
>> > parameters.
>> >
>> > It is not just theoretical. For example, the Denver CPU will report one set and
>> > one way in CCSIDR even though the actual microarchitectural implementation has
>> > many sets and many ways.
>> >
>>
>> Fair enough. It is a bit disappointing that we cannot trust these
>> values, but if the architecture does not mandate their accuracy, we
>> obviously should not be using them in the way that we are.
>
> Indeed.
>
>> > The only place that the cache geometry is used is to determine if there can be
>> > aliasing for a VIPT (virtually-indexed, physically-tagged) instruction cache.
>> > The code assumes that there is no need to flush the entire instruction cache
>> > if the size of a cache set is less than or equal to a page size. However, the
>> > architectural definition of VIPT says "The only architecturally-guaranteed way
>> > to invalidate all aliases of a physical address from a VIPT instruction cache
>> > is to invalidate the entire instruction cache." Not only are the parameters not
>> > guaranteed to be correct, it is explicitly not legal to ignore aliasing even if
>> > the parameters were correct.
>> >
>>
>> I understand that this is subject to interpretation, but I would argue
>> that this does not apply to the case where you can prove that no
>> aliases could ever exist (which is the point of comparing the way size
>> to the page size)
>
> Given NumSets and LineSize aren't guaranteed to match the physical
> values, I don't see that way have sufficient information to derive the
> physical way size in order to do so.
>
I know, but the argument being made is that if we did have this
information, we would still violate the spec if we used it to decide
whether a VIPT I-cache could alias or not.
>> > Alex Van Brunt (3):
>> > Revert "arm64: kernel: add support for cpu cache information"
>> > Revert "arm64: don't flag non-aliasing VIPT I-caches as aliasing"
>> > Revert "arm64: add helper functions to read I-cache attributes"
>> >
>>
>> None of the clarifications you offer here are in the commit logs of
>> the patches. Since the cover letter does not make it into the
>> repository, someone looking at the commit log will not have a clue why
>> these patches were reverted all of a sudden. Could you please update
>> that? At the same time, could you get rid of the Change-Ids as well?
>> They are meaningless in the kernel tree.
>
> It would be good to include the quote from the cover letter in each
> patch; it makes the situation abundantly clear and avoids having to
> trawl through the ARM ARM again in future.
>
Indeed.
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH 0/3] Revert arm64 cache geometry
2015-10-29 3:22 ` [PATCH 0/3] Revert arm64 cache geometry Ard Biesheuvel
2015-10-29 11:20 ` Mark Rutland
@ 2015-10-29 15:43 ` Russell King - ARM Linux
2015-10-29 16:28 ` Ard Biesheuvel
2015-10-29 22:54 ` Alexander Van Brunt
2 siblings, 1 reply; 20+ messages in thread
From: Russell King - ARM Linux @ 2015-10-29 15:43 UTC (permalink / raw)
To: linux-arm-kernel
On Thu, Oct 29, 2015 at 12:22:51PM +0900, Ard Biesheuvel wrote:
> Fair enough. It is a bit disappointing that we cannot trust these
> values, but if the architecture does not mandate their accuracy, we
> obviously should not be using them in the way that we are.
>
> I think we have similar code in the ARM tree, so we should probably
> make some changes there as well.
I've opposed exporting the cache dimensions to userspace for several
reasons:
* it will become a nightmare with the various different register formats
to properly decode these values
* older CPUs don't have the cache ID registers, so we'd need to augment
any export with additional static configuration somehow
* I don't trust userland with this information to make the right choices,
especially when faced with further levels of caches.
The main reason for people wanting the cache dimensions has been "so we
can select the optimal code for the CPU". Given all the combinations
of caches out there, I've always said selecting code based on one level
of cache is totally insane, and userspace is better off doing some
performance measurement of its implementations and selecting the most
appropriate version.
There's many things that affect the performance of code paths with CPUs.
It's not just about cache line size, but instruction latencies, write
delays, branch prediction and so forth. You can't _say_ "because this
CPU has a 32K L1 cache, if I optimise my code as X it'll perform
better everywhere with a 32K L1 cache than optimised Y."
Selecting code based on cache parameters is just wrong.
There may be cases where userspace would like to know the cache line
size, so it can appropriately align data structures - but that again
depends on what you're trying to achieve, and what if the L1 cache
line size is different from the L2 cache line size...
It's a minefield, one which IMHO userspace shouldn't be trusted with.
Userspace should assume the worst case cache line size seen in ARM
CPUs and be done with it.
--
FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
according to speedtest.net.
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH 0/3] Revert arm64 cache geometry
2015-10-29 15:43 ` Russell King - ARM Linux
@ 2015-10-29 16:28 ` Ard Biesheuvel
2015-10-29 23:06 ` Alexander Van Brunt
0 siblings, 1 reply; 20+ messages in thread
From: Ard Biesheuvel @ 2015-10-29 16:28 UTC (permalink / raw)
To: linux-arm-kernel
> On 29 okt. 2015, at 16:43, Russell King - ARM Linux <linux@arm.linux.org.uk> wrote:
>
>> On Thu, Oct 29, 2015 at 12:22:51PM +0900, Ard Biesheuvel wrote:
>> Fair enough. It is a bit disappointing that we cannot trust these
>> values, but if the architecture does not mandate their accuracy, we
>> obviously should not be using them in the way that we are.
>>
>> I think we have similar code in the ARM tree, so we should probably
>> make some changes there as well.
>
> I've opposed exporting the cache dimensions to userspace for several
> reasons:
>
I agree with the arguments below. However, what I refer to here is kernel code that infers whether a certain VIPT cache is non-aliasing based on the way size, which is calculated from values that are exposed to software for the sole purpose of enumerating cachelines by set/way.
> * it will become a nightmare with the various different register formats
> to properly decode these values
> * older CPUs don't have the cache ID registers, so we'd need to augment
> any export with additional static configuration somehow
> * I don't trust userland with this information to make the right choices,
> especially when faced with further levels of caches.
>
> The main reason for people wanting the cache dimensions has been "so we
> can select the optimal code for the CPU". Given all the combinations
> of caches out there, I've always said selecting code based on one level
> of cache is totally insane, and userspace is better off doing some
> performance measurement of its implementations and selecting the most
> appropriate version.
>
> There's many things that affect the performance of code paths with CPUs.
> It's not just about cache line size, but instruction latencies, write
> delays, branch prediction and so forth. You can't _say_ "because this
> CPU has a 32K L1 cache, if I optimise my code as X it'll perform
> better everywhere with a 32K L1 cache than optimised Y."
>
> Selecting code based on cache parameters is just wrong.
>
> There may be cases where userspace would like to know the cache line
> size, so it can appropriately align data structures - but that again
> depends on what you're trying to achieve, and what if the L1 cache
> line size is different from the L2 cache line size...
>
> It's a minefield, one which IMHO userspace shouldn't be trusted with.
> Userspace should assume the worst case cache line size seen in ARM
> CPUs and be done with it.
>
> --
> FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
> according to speedtest.net.
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH 0/3] Revert arm64 cache geometry
2015-10-29 16:28 ` Ard Biesheuvel
@ 2015-10-29 23:06 ` Alexander Van Brunt
0 siblings, 0 replies; 20+ messages in thread
From: Alexander Van Brunt @ 2015-10-29 23:06 UTC (permalink / raw)
To: linux-arm-kernel
>> On 29 okt. 2015, at 16:43, Russell King - ARM Linux <linux@arm.linux.org.uk> wrote:
>>
>>> On Thu, Oct 29, 2015 at 12:22:51PM +0900, Ard Biesheuvel wrote:
>>> Fair enough. It is a bit disappointing that we cannot trust these
>>> values, but if the architecture does not mandate their accuracy, we
>>> obviously should not be using them in the way that we are.
>>>
>>> I think we have similar code in the ARM tree, so we should probably
>>> make some changes there as well.
>>
>> I've opposed exporting the cache dimensions to userspace for several
>> reasons:
>>
>
>I agree with the arguments below. However, what I refer to here is kernel code that infers whether a certain VIPT cache is non-aliasing based on the way size, which is calculated from values that are exposed to software for the sole purpose of enumerating cachelines by set/way.
No, CCSIDR_EL1 is for the sole purpose of indicating how to clean / invalidate
caches by set and way. The documentation explicitly says it is not for
enumerating the cache geometry.
________________________________________
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Sent: Thursday, October 29, 2015 9:28 AM
To: Russell King - ARM Linux
Cc: Alexander Van Brunt; linux-arm-kernel at lists.infradead.org; Will Deacon; Sudeep Holla; Catalin Marinas
Subject: Re: [PATCH 0/3] Revert arm64 cache geometry
> On 29 okt. 2015, at 16:43, Russell King - ARM Linux <linux@arm.linux.org.uk> wrote:
>
>> On Thu, Oct 29, 2015 at 12:22:51PM +0900, Ard Biesheuvel wrote:
>> Fair enough. It is a bit disappointing that we cannot trust these
>> values, but if the architecture does not mandate their accuracy, we
>> obviously should not be using them in the way that we are.
>>
>> I think we have similar code in the ARM tree, so we should probably
>> make some changes there as well.
>
> I've opposed exporting the cache dimensions to userspace for several
> reasons:
>
I agree with the arguments below. However, what I refer to here is kernel code that infers whether a certain VIPT cache is non-aliasing based on the way size, which is calculated from values that are exposed to software for the sole purpose of enumerating cachelines by set/way.
> * it will become a nightmare with the various different register formats
> to properly decode these values
> * older CPUs don't have the cache ID registers, so we'd need to augment
> any export with additional static configuration somehow
> * I don't trust userland with this information to make the right choices,
> especially when faced with further levels of caches.
>
> The main reason for people wanting the cache dimensions has been "so we
> can select the optimal code for the CPU". Given all the combinations
> of caches out there, I've always said selecting code based on one level
> of cache is totally insane, and userspace is better off doing some
> performance measurement of its implementations and selecting the most
> appropriate version.
>
> There's many things that affect the performance of code paths with CPUs.
> It's not just about cache line size, but instruction latencies, write
> delays, branch prediction and so forth. You can't _say_ "because this
> CPU has a 32K L1 cache, if I optimise my code as X it'll perform
> better everywhere with a 32K L1 cache than optimised Y."
>
> Selecting code based on cache parameters is just wrong.
>
> There may be cases where userspace would like to know the cache line
> size, so it can appropriately align data structures - but that again
> depends on what you're trying to achieve, and what if the L1 cache
> line size is different from the L2 cache line size...
>
> It's a minefield, one which IMHO userspace shouldn't be trusted with.
> Userspace should assume the worst case cache line size seen in ARM
> CPUs and be done with it.
>
> --
> FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
> according to speedtest.net.
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH 0/3] Revert arm64 cache geometry
2015-10-29 3:22 ` [PATCH 0/3] Revert arm64 cache geometry Ard Biesheuvel
2015-10-29 11:20 ` Mark Rutland
2015-10-29 15:43 ` Russell King - ARM Linux
@ 2015-10-29 22:54 ` Alexander Van Brunt
2015-10-30 11:18 ` Will Deacon
2 siblings, 1 reply; 20+ messages in thread
From: Alexander Van Brunt @ 2015-10-29 22:54 UTC (permalink / raw)
To: linux-arm-kernel
>I think we have similar code in the ARM tree, so we should probably
>make some changes there as well.
I don't have a working ARMv7 system anymore. So, I'll have to leave that to
someone else.
>> The only place that the cache geometry is used is to determine if there can be
>> aliasing for a VIPT (virtually-indexed, physically-tagged) instruction cache.
>> The code assumes that there is no need to flush the entire instruction cache
>> if the size of a cache set is less than or equal to a page size. However, the
>> architectural definition of VIPT says "The only architecturally-guaranteed way
>> to invalidate all aliases of a physical address from a VIPT instruction cache
>> is to invalidate the entire instruction cache." Not only are the parameters not
>> guaranteed to be correct, it is explicitly not legal to ignore aliasing even if
>> the parameters were correct.
>>
>
>I understand that this is subject to interpretation, but I would argue
>that this does not apply to the case where you can prove that no
>aliases could ever exist (which is the point of comparing the way size
>to the page size)
The definition of VIPT and PIPT caches in the ARM ARM is import. It does not
describe how the CPU indexes the caches and checks the tags. Instead, it
defines them in terms of the observable aliasing and cache invalidation
behavior. The only difference in their definitions is that VIPT must invalidate
the entire cache to prevent aliasing.
________________________________________
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Sent: Wednesday, October 28, 2015 8:22 PM
To: Alexander Van Brunt; Russell King
Cc: linux-arm-kernel at lists.infradead.org; Will Deacon; Sudeep Holla; Catalin Marinas
Subject: Re: [PATCH 0/3] Revert arm64 cache geometry
On 29 October 2015 at 06:43, Alex Van Brunt <avanbrunt@nvidia.com> wrote:
> This patchset reverts three patches that attempt to query the CPU for cache
> geometry and then make use of that information. Those patches rely on the
> NumSets and LineSize fields of CCSIDR to determine the cache geometry. However,
> the architectural documentation for these registers forbids such use:
>
> The parameters NumSets, Associativity, and LineSize in these registers
> define the architecturally visible parameters that are required for the
> cache maintenance by Set/Way instructions. They are not guaranteed to
> represent the actual microarchitectural features of a design. You cannot
> make any inference about the actual sizes of caches based on these
> parameters.
>
> It is not just theoretical. For example, the Denver CPU will report one set and
> one way in CCSIDR even though the actual microarchitectural implementation has
> many sets and many ways.
>
Fair enough. It is a bit disappointing that we cannot trust these
values, but if the architecture does not mandate their accuracy, we
obviously should not be using them in the way that we are.
I think we have similar code in the ARM tree, so we should probably
make some changes there as well.
> I have two suggestions for how to get the cache geometry on an ARMv8 processor:
> 1. Specify the information in the device tree. The purpose of the deivce tree
> is to specify information that software cannot query at run-time. Becuase
> the architecture does not have an architectural way to query the cache
> geometry this may be a good fit.
> 2. Add a function pointer to cpu_table that gives a implementation specific
> way to query the cache geometry. For an A57, for example, the function
> could read the CCSIDR register because it happens to report the
> microarchitectural geometry. The Denver CPU has implementation defined
> registers that can be used to determine the microarchitectural geometry.
> However, the implementation for the default "AArch64 Processor", must
> return an error.
>
> The only place that the cache geometry is used is to determine if there can be
> aliasing for a VIPT (virtually-indexed, physically-tagged) instruction cache.
> The code assumes that there is no need to flush the entire instruction cache
> if the size of a cache set is less than or equal to a page size. However, the
> architectural definition of VIPT says "The only architecturally-guaranteed way
> to invalidate all aliases of a physical address from a VIPT instruction cache
> is to invalidate the entire instruction cache." Not only are the parameters not
> guaranteed to be correct, it is explicitly not legal to ignore aliasing even if
> the parameters were correct.
>
I understand that this is subject to interpretation, but I would argue
that this does not apply to the case where you can prove that no
aliases could ever exist (which is the point of comparing the way size
to the page size)
> Alex Van Brunt (3):
> Revert "arm64: kernel: add support for cpu cache information"
> Revert "arm64: don't flag non-aliasing VIPT I-caches as aliasing"
> Revert "arm64: add helper functions to read I-cache attributes"
>
None of the clarifications you offer here are in the commit logs of
the patches. Since the cover letter does not make it into the
repository, someone looking at the commit log will not have a clue why
these patches were reverted all of a sudden. Could you please update
that? At the same time, could you get rid of the Change-Ids as well?
They are meaningless in the kernel tree.
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH 0/3] Revert arm64 cache geometry
2015-10-28 21:43 [PATCH 0/3] Revert arm64 cache geometry Alex Van Brunt
` (3 preceding siblings ...)
2015-10-29 3:22 ` [PATCH 0/3] Revert arm64 cache geometry Ard Biesheuvel
@ 2015-10-29 11:29 ` Mark Rutland
2015-10-29 11:43 ` Sudeep Holla
2015-10-29 11:40 ` Will Deacon
5 siblings, 1 reply; 20+ messages in thread
From: Mark Rutland @ 2015-10-29 11:29 UTC (permalink / raw)
To: linux-arm-kernel
On Wed, Oct 28, 2015 at 02:43:54PM -0700, Alex Van Brunt wrote:
> This patchset reverts three patches that attempt to query the CPU for cache
> geometry and then make use of that information. Those patches rely on the
> NumSets and LineSize fields of CCSIDR to determine the cache geometry. However,
> the architectural documentation for these registers forbids such use:
>
> The parameters NumSets, Associativity, and LineSize in these registers
> define the architecturally visible parameters that are required for the
> cache maintenance by Set/Way instructions. They are not guaranteed to
> represent the actual microarchitectural features of a design. You cannot
> make any inference about the actual sizes of caches based on these
> parameters.
>
> It is not just theoretical. For example, the Denver CPU will report one set and
> one way in CCSIDR even though the actual microarchitectural implementation has
> many sets and many ways.
>
> I have two suggestions for how to get the cache geometry on an ARMv8 processor:
> 1. Specify the information in the device tree. The purpose of the deivce tree
> is to specify information that software cannot query at run-time. Becuase
> the architecture does not have an architectural way to query the cache
> geometry this may be a good fit.
We already have to detect the unification of caches via DT, so the geomtery
should probably live there too.
Thanks,
Mark.
^ permalink raw reply [flat|nested] 20+ messages in thread* [PATCH 0/3] Revert arm64 cache geometry
2015-10-29 11:29 ` Mark Rutland
@ 2015-10-29 11:43 ` Sudeep Holla
0 siblings, 0 replies; 20+ messages in thread
From: Sudeep Holla @ 2015-10-29 11:43 UTC (permalink / raw)
To: linux-arm-kernel
On 29/10/15 11:29, Mark Rutland wrote:
> On Wed, Oct 28, 2015 at 02:43:54PM -0700, Alex Van Brunt wrote:
>> This patchset reverts three patches that attempt to query the CPU for cache
>> geometry and then make use of that information. Those patches rely on the
>> NumSets and LineSize fields of CCSIDR to determine the cache geometry. However,
>> the architectural documentation for these registers forbids such use:
>>
>> The parameters NumSets, Associativity, and LineSize in these registers
>> define the architecturally visible parameters that are required for the
>> cache maintenance by Set/Way instructions. They are not guaranteed to
>> represent the actual microarchitectural features of a design. You cannot
>> make any inference about the actual sizes of caches based on these
>> parameters.
>>
>> It is not just theoretical. For example, the Denver CPU will report one set and
>> one way in CCSIDR even though the actual microarchitectural implementation has
>> many sets and many ways.
>>
>> I have two suggestions for how to get the cache geometry on an ARMv8 processor:
>> 1. Specify the information in the device tree. The purpose of the deivce tree
>> is to specify information that software cannot query at run-time. Becuase
>> the architecture does not have an architectural way to query the cache
>> geometry this may be a good fit.
>
> We already have to detect the unification of caches via DT, so the geomtery
> should probably live there too.
>
I agree with that and since we are already exposing cache geometry via
sysfs we need to override with values from DT if found instead of
reverting this completely, so that we don't break any user-space.
--
Regards,
Sudeep
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH 0/3] Revert arm64 cache geometry
2015-10-28 21:43 [PATCH 0/3] Revert arm64 cache geometry Alex Van Brunt
` (4 preceding siblings ...)
2015-10-29 11:29 ` Mark Rutland
@ 2015-10-29 11:40 ` Will Deacon
2015-10-29 23:03 ` Alexander Van Brunt
5 siblings, 1 reply; 20+ messages in thread
From: Will Deacon @ 2015-10-29 11:40 UTC (permalink / raw)
To: linux-arm-kernel
Hi Alex,
Thanks for describing the problem in depth here. I'm not at all happy
about the conclusion, but you're right and thanks for the report.
Minor comments below.
On Wed, Oct 28, 2015 at 02:43:54PM -0700, Alex Van Brunt wrote:
> This patchset reverts three patches that attempt to query the CPU for cache
> geometry and then make use of that information. Those patches rely on the
> NumSets and LineSize fields of CCSIDR to determine the cache geometry. However,
> the architectural documentation for these registers forbids such use:
>
> The parameters NumSets, Associativity, and LineSize in these registers
> define the architecturally visible parameters that are required for the
> cache maintenance by Set/Way instructions. They are not guaranteed to
> represent the actual microarchitectural features of a design. You cannot
> make any inference about the actual sizes of caches based on these
> parameters.
>
> It is not just theoretical. For example, the Denver CPU will report one set and
> one way in CCSIDR even though the actual microarchitectural implementation has
> many sets and many ways.
>
> I have two suggestions for how to get the cache geometry on an ARMv8 processor:
> 1. Specify the information in the device tree. The purpose of the deivce tree
> is to specify information that software cannot query at run-time. Becuase
> the architecture does not have an architectural way to query the cache
> geometry this may be a good fit.
> 2. Add a function pointer to cpu_table that gives a implementation specific
> way to query the cache geometry. For an A57, for example, the function
> could read the CCSIDR register because it happens to report the
> microarchitectural geometry. The Denver CPU has implementation defined
> registers that can be used to determine the microarchitectural geometry.
> However, the implementation for the default "AArch64 Processor", must
> return an error.
>
> The only place that the cache geometry is used is to determine if there can be
> aliasing for a VIPT (virtually-indexed, physically-tagged) instruction cache.
> The code assumes that there is no need to flush the entire instruction cache
> if the size of a cache set is less than or equal to a page size. However, the
> architectural definition of VIPT says "The only architecturally-guaranteed way
> to invalidate all aliases of a physical address from a VIPT instruction cache
> is to invalidate the entire instruction cache." Not only are the parameters not
> guaranteed to be correct, it is explicitly not legal to ignore aliasing even if
> the parameters were correct.
This is useful detail -- can you include it in the relevant commit messages
too, please? You can also drop the ChangeId tags and probably add Cc:
<stable@vger.kernel.org> tags as well.
We should also add a comment to the I-cache aliasing code to state why
we always nuke the entire I-cache, so that we don't "optimise" it again
in the future!
My final concern is how this impacts userspace parsing
/sys/devices/system/cpu/cpuX/cache/*. Do we need to stub that out with
dummy values and extend the device-tree properties to allow inner-cache
geometry to be described? I worry that simply removing the files under
there could break more than it solves.
Perhaps the right solution is to leave the cacheinfo code as-is and extend
it so that it prefers to use DT, falling back to the registers if the
properties are absent? That matches up with our treatment of MPIDR for
topology too and reduces the risk of breaking any existing software.
Patch 2 and 3 look fine as straight reverts, though (module my previous
comments about commit messages).
Will
^ permalink raw reply [flat|nested] 20+ messages in thread* [PATCH 0/3] Revert arm64 cache geometry
2015-10-29 11:40 ` Will Deacon
@ 2015-10-29 23:03 ` Alexander Van Brunt
2015-10-30 11:00 ` Catalin Marinas
0 siblings, 1 reply; 20+ messages in thread
From: Alexander Van Brunt @ 2015-10-29 23:03 UTC (permalink / raw)
To: linux-arm-kernel
>> The only place that the cache geometry is used is to determine if there can be
>> aliasing for a VIPT (virtually-indexed, physically-tagged) instruction cache.
>> The code assumes that there is no need to flush the entire instruction cache
>> if the size of a cache set is less than or equal to a page size. However, the
>> architectural definition of VIPT says "The only architecturally-guaranteed way
>> to invalidate all aliases of a physical address from a VIPT instruction cache
>> is to invalidate the entire instruction cache." Not only are the parameters not
>> guaranteed to be correct, it is explicitly not legal to ignore aliasing even if
>> the parameters were correct.
>
>This is useful detail -- can you include it in the relevant commit messages
>too, please? You can also drop the ChangeId tags and probably add Cc:
><stable@vger.kernel.org> tags as well.
>
>We should also add a comment to the I-cache aliasing code to state why
>we always nuke the entire I-cache, so that we don't "optimise" it again
>in the future!
>
I'll take care of those.
>My final concern is how this impacts userspace parsing
>/sys/devices/system/cpu/cpuX/cache/*. Do we need to stub that out with
>dummy values and extend the device-tree properties to allow inner-cache
>geometry to be described? I worry that simply removing the files under
>there could break more than it solves.
>
>Perhaps the right solution is to leave the cacheinfo code as-is and extend
>it so that it prefers to use DT, falling back to the registers if the
>properties are absent? That matches up with our treatment of MPIDR for
>topology too and reduces the risk of breaking any existing software.
>
I agree with Russel that the kernel really shouldn't be reporting the CPU cache
geometry at all. I'll add that the problems he described are worse on systems
with more than one CPU micro-architecture like a big.LITTLE system. In those
systems the cache geometry of the CPU a thread is executing on can change
without notice.
While I think that the Linux kernel should be practical, I think that it should
be written to work with the architectural behavior by default rather than the
way some processors behave. That is important because there are many
implementations of the ARM architecture. If we want all of them to run correctly
by default, then the default must be to use the architectural behavior.
I think that by default, there should not be any
/sys/devices/system/cpu/cpu*/cache/index* nodes. Any user space application
that accesses these nodes already needs to handle N index* nodes. The N = 0 case
is valid. However, that assertion is not based on seeing any application that
uses the nodes.
BTW, it is possible to find the cache line size using CTR_EL0.DminLine and
CTR_EL0.IminLine. It is accessible form userspace. But, it isn't very useful for
application optimization though.
________________________________________
From: Will Deacon <will.deacon@arm.com>
Sent: Thursday, October 29, 2015 4:40 AM
To: Alexander Van Brunt
Cc: linux-arm-kernel at lists.infradead.org; Ard Biesheuvel; Sudeep Holla; Catalin Marinas
Subject: Re: [PATCH 0/3] Revert arm64 cache geometry
Hi Alex,
Thanks for describing the problem in depth here. I'm not at all happy
about the conclusion, but you're right and thanks for the report.
Minor comments below.
On Wed, Oct 28, 2015 at 02:43:54PM -0700, Alex Van Brunt wrote:
> This patchset reverts three patches that attempt to query the CPU for cache
> geometry and then make use of that information. Those patches rely on the
> NumSets and LineSize fields of CCSIDR to determine the cache geometry. However,
> the architectural documentation for these registers forbids such use:
>
> The parameters NumSets, Associativity, and LineSize in these registers
> define the architecturally visible parameters that are required for the
> cache maintenance by Set/Way instructions. They are not guaranteed to
> represent the actual microarchitectural features of a design. You cannot
> make any inference about the actual sizes of caches based on these
> parameters.
>
> It is not just theoretical. For example, the Denver CPU will report one set and
> one way in CCSIDR even though the actual microarchitectural implementation has
> many sets and many ways.
>
> I have two suggestions for how to get the cache geometry on an ARMv8 processor:
> 1. Specify the information in the device tree. The purpose of the deivce tree
> is to specify information that software cannot query at run-time. Becuase
> the architecture does not have an architectural way to query the cache
> geometry this may be a good fit.
> 2. Add a function pointer to cpu_table that gives a implementation specific
> way to query the cache geometry. For an A57, for example, the function
> could read the CCSIDR register because it happens to report the
> microarchitectural geometry. The Denver CPU has implementation defined
> registers that can be used to determine the microarchitectural geometry.
> However, the implementation for the default "AArch64 Processor", must
> return an error.
>
> The only place that the cache geometry is used is to determine if there can be
> aliasing for a VIPT (virtually-indexed, physically-tagged) instruction cache.
> The code assumes that there is no need to flush the entire instruction cache
> if the size of a cache set is less than or equal to a page size. However, the
> architectural definition of VIPT says "The only architecturally-guaranteed way
> to invalidate all aliases of a physical address from a VIPT instruction cache
> is to invalidate the entire instruction cache." Not only are the parameters not
> guaranteed to be correct, it is explicitly not legal to ignore aliasing even if
> the parameters were correct.
This is useful detail -- can you include it in the relevant commit messages
too, please? You can also drop the ChangeId tags and probably add Cc:
<stable@vger.kernel.org> tags as well.
We should also add a comment to the I-cache aliasing code to state why
we always nuke the entire I-cache, so that we don't "optimise" it again
in the future!
My final concern is how this impacts userspace parsing
/sys/devices/system/cpu/cpuX/cache/*. Do we need to stub that out with
dummy values and extend the device-tree properties to allow inner-cache
geometry to be described? I worry that simply removing the files under
there could break more than it solves.
Perhaps the right solution is to leave the cacheinfo code as-is and extend
it so that it prefers to use DT, falling back to the registers if the
properties are absent? That matches up with our treatment of MPIDR for
topology too and reduces the risk of breaking any existing software.
Patch 2 and 3 look fine as straight reverts, though (module my previous
comments about commit messages).
Will
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH 0/3] Revert arm64 cache geometry
2015-10-29 23:03 ` Alexander Van Brunt
@ 2015-10-30 11:00 ` Catalin Marinas
2015-10-30 11:10 ` Sudeep Holla
0 siblings, 1 reply; 20+ messages in thread
From: Catalin Marinas @ 2015-10-30 11:00 UTC (permalink / raw)
To: linux-arm-kernel
On Thu, Oct 29, 2015 at 11:03:27PM +0000, Alexander Van Brunt wrote:
> >My final concern is how this impacts userspace parsing
> >/sys/devices/system/cpu/cpuX/cache/*. Do we need to stub that out with
> >dummy values and extend the device-tree properties to allow inner-cache
> >geometry to be described? I worry that simply removing the files under
> >there could break more than it solves.
> >
> >Perhaps the right solution is to leave the cacheinfo code as-is and extend
> >it so that it prefers to use DT, falling back to the registers if the
> >properties are absent? That matches up with our treatment of MPIDR for
> >topology too and reduces the risk of breaking any existing software.
>
> I agree with Russel that the kernel really shouldn't be reporting the CPU cache
> geometry at all.
This would be the immediate reaction but people work around it in other
ways like parsing the MIDR from /proc/cpuinfo and hard-coding cache
topology in their code (when they are after a 1-2% improvement in an
artificial benchmark that doesn't always have relevance to the real
world). But, well, benchmarks are user space software that use a kernel
provided ABI.
> I'll add that the problems he described are worse on systems
> with more than one CPU micro-architecture like a big.LITTLE system. In those
> systems the cache geometry of the CPU a thread is executing on can change
> without notice.
This information is not meant to be continuously read at run-time but
rather parsed once and choosing the best implementation that would be
acceptable on all CPUs. Big.LITTLE is one of the reasons for such
information where people would like to see the cache levels, some of
them potentially common to all the clusters in the system.
We've got repeated requests from the toolchain, graphics, Android people
to expose such information, so we gave in and merged the patches. Since
people are doing insane things in user space anyway, I'd rather have a
standard interface for passing the information they need. Similarly, we
have patches for exposing more of the CPUID space to user, though they
are not merged yet.
> While I think that the Linux kernel should be practical, I think that it should
> be written to work with the architectural behavior by default rather than the
> way some processors behave. That is important because there are many
> implementations of the ARM architecture. If we want all of them to run correctly
> by default, then the default must be to use the architectural behavior.
The Linux *kernel* is written to work with the architectural behaviour.
What we are talking about here is user space making use of
implementation specific behaviour.
> I think that by default, there should not be any
> /sys/devices/system/cpu/cpu*/cache/index* nodes. Any user space application
> that accesses these nodes already needs to handle N index* nodes. The N = 0 case
> is valid. However, that assertion is not based on seeing any application that
> uses the nodes.
In general, user space should cope with not seeing these files,
especially since it's new ABI and not present in older kernels. OTOH, it
may directly try to access the current CPU information without bothering
with the whole list (tough for big.LITTLE).
BTW, it looks like I don't get any cache information on Juno with
4.3-rc7, it says "Unable to detect cache hierarchy from DT for CPU 0". I
thought the point of the patch you are trying to revert was to parse
this information from the hardware registers. Sudeep, any idea?
--
Catalin
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH 0/3] Revert arm64 cache geometry
2015-10-30 11:00 ` Catalin Marinas
@ 2015-10-30 11:10 ` Sudeep Holla
2015-10-30 11:57 ` Catalin Marinas
0 siblings, 1 reply; 20+ messages in thread
From: Sudeep Holla @ 2015-10-30 11:10 UTC (permalink / raw)
To: linux-arm-kernel
On 30/10/15 11:00, Catalin Marinas wrote:
> On Thu, Oct 29, 2015 at 11:03:27PM +0000, Alexander Van Brunt wrote:
[...]
> BTW, it looks like I don't get any cache information on Juno with
> 4.3-rc7, it says "Unable to detect cache hierarchy from DT for CPU
> 0". I thought the point of the patch you are trying to revert was to
> parse this information from the hardware registers. Sudeep, any
> idea?
>
Yes you need to have updated DT from the mainline. Since the
architecture doesn't provide any way of detecting the cpus sharing
particular cache, we get that information from DT.
If DT lacks that info we don't expose any information. It would be wrong
to say that a shared cache is shared by all the cpus in the system. DT
check was added explicitly to avoid that.
--
Regards,
Sudeep
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH 0/3] Revert arm64 cache geometry
2015-10-30 11:10 ` Sudeep Holla
@ 2015-10-30 11:57 ` Catalin Marinas
2015-10-30 12:27 ` Sudeep Holla
0 siblings, 1 reply; 20+ messages in thread
From: Catalin Marinas @ 2015-10-30 11:57 UTC (permalink / raw)
To: linux-arm-kernel
On Fri, Oct 30, 2015 at 11:10:28AM +0000, Sudeep Holla wrote:
> On 30/10/15 11:00, Catalin Marinas wrote:
> >BTW, it looks like I don't get any cache information on Juno with
> >4.3-rc7, it says "Unable to detect cache hierarchy from DT for CPU
> >0". I thought the point of the patch you are trying to revert was to
> >parse this information from the hardware registers. Sudeep, any
> >idea?
> >
>
> Yes you need to have updated DT from the mainline. Since the
> architecture doesn't provide any way of detecting the cpus sharing
> particular cache, we get that information from DT.
>
> If DT lacks that info we don't expose any information. It would be wrong
> to say that a shared cache is shared by all the cpus in the system. DT
> check was added explicitly to avoid that.
OK, thanks for the information. The good thing is that use space should
already be able to cope with cache information not present in sysfs.
So, rather than reverting this feature, for the Denver case we have the
option of either providing full cache topology information in DT or
providing a DT flag which states whether hardware cache ID information
is relevant.
What about ACPI? Are there any provisions for specifying such
information?
--
Catalin
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH 0/3] Revert arm64 cache geometry
2015-10-30 11:57 ` Catalin Marinas
@ 2015-10-30 12:27 ` Sudeep Holla
0 siblings, 0 replies; 20+ messages in thread
From: Sudeep Holla @ 2015-10-30 12:27 UTC (permalink / raw)
To: linux-arm-kernel
On 30/10/15 11:57, Catalin Marinas wrote:
> On Fri, Oct 30, 2015 at 11:10:28AM +0000, Sudeep Holla wrote:
>> On 30/10/15 11:00, Catalin Marinas wrote:
>>> BTW, it looks like I don't get any cache information on Juno
>>> with 4.3-rc7, it says "Unable to detect cache hierarchy from DT
>>> for CPU 0". I thought the point of the patch you are trying to
>>> revert was to parse this information from the hardware registers.
>>> Sudeep, any idea?
>>>
>>
>> Yes you need to have updated DT from the mainline. Since the
>> architecture doesn't provide any way of detecting the cpus sharing
>> particular cache, we get that information from DT.
>>
>> If DT lacks that info we don't expose any information. It would be
>> wrong to say that a shared cache is shared by all the cpus in the
>> system. DT check was added explicitly to avoid that.
>
> OK, thanks for the information. The good thing is that use space
> should already be able to cope with cache information not present in
> sysfs.
>
True.
> So, rather than reverting this feature, for the Denver case we have
> the option of either providing full cache topology information in DT
> or providing a DT flag which states whether hardware cache ID
> information is relevant.
>
Yes at-least few L2 cache controllers where either hardware can't
provide that information or provides incorrect one, already use/override
values from DT.
We can probably patch core cacheinfo itself as PPC also completely
depends on DT(though not yet migrated to use generic cacheinfo). In that
way we can avoid calling arch code if all the information is found in DT.
> What about ACPI? Are there any provisions for specifying such
> information?
>
Not yet.
--
Regards,
Sudeep
^ permalink raw reply [flat|nested] 20+ messages in thread