All of lore.kernel.org
 help / color / mirror / Atom feed
From: will.deacon@arm.com (Will Deacon)
To: linux-arm-kernel@lists.infradead.org
Subject: [PATCH 8/8] arm64: Work around systems with mismatched cache line sizes
Date: Mon, 22 Aug 2016 14:02:05 +0100	[thread overview]
Message-ID: <20160822130205.GF14680@arm.com> (raw)
In-Reply-To: <1471525832-21209-9-git-send-email-suzuki.poulose@arm.com>

On Thu, Aug 18, 2016 at 02:10:32PM +0100, Suzuki K Poulose wrote:
> Systems with differing CPU i-cache/d-cache line sizes can cause
> problems with the cache management by software when the execution
> is migrated from one to another. Usually, the application reads
> the cache size on a CPU and then uses that length to perform cache
> operations. However, if it gets migrated to another CPU with a smaller
> cache line size, things could go completely wrong. To prevent such
> cases, always use the smallest cache line size among the CPUs. The
> kernel CPU feature infrastructure already keeps track of the safe
> value for all CPUID registers including CTR. This patch works around
> the problem by :
> 
> For kernel, dynamically patch the kernel to read the cache size
> from the system wide copy of CTR_EL0.

Is it only CTR that is mismatched in practice, or do we need to worry
about DCZID_EL0 too?

> For applications, trap read accesses to CTR_EL0 (by clearing the SCTLR.UCT)
> and emulate the mrs instruction to return the system wide safe value
> of CTR_EL0.
> 
> For faster access (i.e, avoiding to lookup the system wide value of CTR_EL0
> via read_system_reg), we keep track of the pointer to table entry for
> CTR_EL0 in the CPU feature infrastructure.
> 
> Cc: Mark Rutland <mark.rutland@arm.com>
> Cc: Andre Przywara <andre.przywara@arm.com>
> Cc: Will Deacon <will.deacon@arm.com>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
> ---
>  arch/arm64/include/asm/assembler.h  | 25 +++++++++++++++++++++++--
>  arch/arm64/include/asm/cpufeature.h |  4 +++-
>  arch/arm64/include/asm/esr.h        |  8 ++++++++
>  arch/arm64/include/asm/sysreg.h     |  1 +
>  arch/arm64/kernel/asm-offsets.c     |  2 ++
>  arch/arm64/kernel/cpu_errata.c      | 22 ++++++++++++++++++++++
>  arch/arm64/kernel/cpufeature.c      |  9 +++++++++
>  arch/arm64/kernel/traps.c           | 14 ++++++++++++++
>  8 files changed, 82 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
> index a4bb3f5..66bd268 100644
> --- a/arch/arm64/include/asm/assembler.h
> +++ b/arch/arm64/include/asm/assembler.h
> @@ -216,6 +216,21 @@ lr	.req	x30		// link register
>  	.macro	mmid, rd, rn
>  	ldr	\rd, [\rn, #MM_CONTEXT_ID]
>  	.endm
> +/*
> + * read_ctr - read CTR_EL0. If the system has mismatched
> + * cache line sizes, provide the system wide safe value.
> + */
> +	.macro	read_ctr, reg
> +alternative_if_not ARM64_MISMATCHED_CACHE_LINE_SIZE
> +	mrs	\reg, ctr_el0			// read CTR
> +	nop
> +	nop
> +alternative_else
> +	ldr_l	\reg, sys_ctr_ftr		// Read system wide safe CTR value
> +	ldr	\reg, [\reg, #ARM64_FTR_SYSVAL] // from sys_ctr_ftr->sys_val
> +alternative_endif
> +	.endm
> +
>  
>  /*
>   * raw_dcache_line_size - get the minimum D-cache line size on this CPU
> @@ -232,7 +247,10 @@ lr	.req	x30		// link register
>   * dcache_line_size - get the safe D-cache line size across all CPUs
>   */
>  	.macro	dcache_line_size, reg, tmp
> -	raw_dcache_line_size	\reg, \tmp
> +	read_ctr	\tmp
> +	ubfm		\tmp, \tmp, #16, #19	// cache line size encoding
> +	mov		\reg, #4		// bytes per word
> +	lsl		\reg, \reg, \tmp	// actual cache line size
>  	.endm
>  
>  /*
> @@ -250,7 +268,10 @@ lr	.req	x30		// link register
>   * icache_line_size - get the safe I-cache line size across all CPUs
>   */
>  	.macro	icache_line_size, reg, tmp
> -	raw_icache_line_size	\reg, \tmp
> +	read_ctr	\tmp
> +	and		\tmp, \tmp, #0xf	// cache line size encoding
> +	mov		\reg, #4		// bytes per word
> +	lsl		\reg, \reg, \tmp	// actual cache line size
>  	.endm
>  
>  /*
> diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
> index 692b8d3..e99f2af 100644
> --- a/arch/arm64/include/asm/cpufeature.h
> +++ b/arch/arm64/include/asm/cpufeature.h
> @@ -37,8 +37,9 @@
>  #define ARM64_WORKAROUND_CAVIUM_27456		12
>  #define ARM64_HAS_32BIT_EL0			13
>  #define ARM64_HYP_OFFSET_LOW			14
> +#define ARM64_MISMATCHED_CACHE_LINE_SIZE	15
>  
> -#define ARM64_NCAPS				15
> +#define ARM64_NCAPS				16
>  
>  #ifndef __ASSEMBLY__
>  
> @@ -109,6 +110,7 @@ struct arm64_cpu_capabilities {
>  };
>  
>  extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
> +extern struct arm64_ftr_reg *sys_ctr_ftr;
>  
>  bool this_cpu_has_cap(unsigned int cap);
>  
> diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
> index 2a8f6c3..51aea89 100644
> --- a/arch/arm64/include/asm/esr.h
> +++ b/arch/arm64/include/asm/esr.h
> @@ -139,6 +139,9 @@
>  					 ((Op2) << ESR_ELx_SYS64_ISS_Op2_SHIFT) | \
>  					 ((CRn) << ESR_ELx_SYS64_ISS_CRn_SHIFT) | \
>  					 ((CRm) << ESR_ELx_SYS64_ISS_CRm_SHIFT))
> +
> +#define ESR_ELx_SYS64_ISS_SYS_OP_MASK	(ESR_ELx_SYS64_ISS_SYS_MASK | \
> +					 ESR_ELx_SYS64_ISS_DIR_MASK)
>  /*
>   * User space cache operations have the following sysreg encoding
>   * in System instructions.
> @@ -157,6 +160,11 @@
>  #define ESR_ELx_SYS64_ISS_U_CACHE_OP_VAL \
>  				(ESR_ELx_SYS64_ISS_SYS_VAL(1, 3, 1, 7, 0) | \
>  				 ESR_ELx_SYS64_ISS_DIR_WRITE)
> +
> +#define ESR_ELx_SYS64_ISS_SYS_CTR	ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 1, 0, 0)
> +#define ESR_ELx_SYS64_ISS_SYS_CTR_READ	(ESR_ELx_SYS64_ISS_SYS_CTR | \
> +					 ESR_ELx_SYS64_ISS_DIR_READ)
> +
>  #ifndef __ASSEMBLY__
>  #include <asm/types.h>
>  
> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> index cc06794..4f26ad5 100644
> --- a/arch/arm64/include/asm/sysreg.h
> +++ b/arch/arm64/include/asm/sysreg.h
> @@ -100,6 +100,7 @@
>  /* SCTLR_EL1 specific flags. */
>  #define SCTLR_EL1_UCI		(1 << 26)
>  #define SCTLR_EL1_SPAN		(1 << 23)
> +#define SCTLR_EL1_UCT		(1 << 15)
>  #define SCTLR_EL1_SED		(1 << 8)
>  #define SCTLR_EL1_CP15BEN	(1 << 5)
>  
> diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
> index 05070b7..4a2f0f0 100644
> --- a/arch/arm64/kernel/asm-offsets.c
> +++ b/arch/arm64/kernel/asm-offsets.c
> @@ -23,6 +23,7 @@
>  #include <linux/dma-mapping.h>
>  #include <linux/kvm_host.h>
>  #include <linux/suspend.h>
> +#include <asm/cpufeature.h>
>  #include <asm/thread_info.h>
>  #include <asm/memory.h>
>  #include <asm/smp_plat.h>
> @@ -145,5 +146,6 @@ int main(void)
>    DEFINE(HIBERN_PBE_ORIG,	offsetof(struct pbe, orig_address));
>    DEFINE(HIBERN_PBE_ADDR,	offsetof(struct pbe, address));
>    DEFINE(HIBERN_PBE_NEXT,	offsetof(struct pbe, next));
> +  DEFINE(ARM64_FTR_SYSVAL,	offsetof(struct arm64_ftr_reg, sys_val));
>    return 0;
>  }
> diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
> index 5836b3d..621fa7f 100644
> --- a/arch/arm64/kernel/cpu_errata.c
> +++ b/arch/arm64/kernel/cpu_errata.c
> @@ -30,6 +30,21 @@ is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope)
>  				       entry->midr_range_max);
>  }
>  
> +static bool
> +has_mismatched_cache_line_size(const struct arm64_cpu_capabilities *entry,
> +				int scope)
> +{
> +	WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
> +	return (read_cpuid_cachetype() & sys_ctr_ftr->strict_mask) !=
> +		 (sys_ctr_ftr->sys_val & sys_ctr_ftr->strict_mask);
> +}
> +
> +static void cpu_enable_trap_ctr_access(void *__unused)
> +{
> +	/* Clear SCTLR_EL1.UCT */
> +	config_sctlr_el1(SCTLR_EL1_UCT, 0);
> +}
> +
>  #define MIDR_RANGE(model, min, max) \
>  	.def_scope = SCOPE_LOCAL_CPU, \
>  	.matches = is_affected_midr_range, \
> @@ -108,6 +123,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
>  	},
>  #endif
>  	{
> +		.desc = "Mismatched cache line size",
> +		.capability = ARM64_MISMATCHED_CACHE_LINE_SIZE,
> +		.matches = has_mismatched_cache_line_size,
> +		.def_scope = SCOPE_LOCAL_CPU,
> +		.enable = cpu_enable_trap_ctr_access,
> +	},
> +	{
>  	}
>  };
>  
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index fcf87ca..c1db313 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -46,6 +46,13 @@ unsigned int compat_elf_hwcap2 __read_mostly;
>  
>  DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
>  
> +/*
> + * If we have mismatched cache line sizes, we need to emulate access to
> + * CTRL_EL0. To avoid a lookup everytime (via read_system_reg()), cache
> + * the table entry for CTR_EL0.
> + */
> +struct arm64_ftr_reg *sys_ctr_ftr;
> +
>  #define __ARM64_FTR_BITS(SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
>  	{						\
>  		.sign = SIGNED,				\
> @@ -461,6 +468,8 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
>  		init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2);
>  	}
>  
> +	/* Keep track of the CTR feature register */
> +	sys_ctr_ftr = get_arm64_ftr_reg(SYS_CTR_EL0);
>  }
>  
>  static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
> diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
> index 93c5287..db2d6cb 100644
> --- a/arch/arm64/kernel/traps.c
> +++ b/arch/arm64/kernel/traps.c
> @@ -480,6 +480,14 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
>  		regs->pc += 4;
>  }
>  
> +static void ctr_read_handler(unsigned int esr, struct pt_regs *regs)
> +{
> +	int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT;
> +
> +	regs->regs[rt] = sys_ctr_ftr->sys_val;
> +	regs->pc += 4;
> +}

Whilst this is correct, I wonder if there's any advantage in reporting a
*larger* size to userspace and avoid incurring additional trap overhead?

Any idea what sort of size typical JITs are using?

Will

WARNING: multiple messages have this Message-ID (diff)
From: Will Deacon <will.deacon@arm.com>
To: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, catalin.marinas@arm.com,
	mark.rutland@arm.com, andre.przywara@arm.com
Subject: Re: [PATCH 8/8] arm64: Work around systems with mismatched cache line sizes
Date: Mon, 22 Aug 2016 14:02:05 +0100	[thread overview]
Message-ID: <20160822130205.GF14680@arm.com> (raw)
In-Reply-To: <1471525832-21209-9-git-send-email-suzuki.poulose@arm.com>

On Thu, Aug 18, 2016 at 02:10:32PM +0100, Suzuki K Poulose wrote:
> Systems with differing CPU i-cache/d-cache line sizes can cause
> problems with the cache management by software when the execution
> is migrated from one to another. Usually, the application reads
> the cache size on a CPU and then uses that length to perform cache
> operations. However, if it gets migrated to another CPU with a smaller
> cache line size, things could go completely wrong. To prevent such
> cases, always use the smallest cache line size among the CPUs. The
> kernel CPU feature infrastructure already keeps track of the safe
> value for all CPUID registers including CTR. This patch works around
> the problem by :
> 
> For kernel, dynamically patch the kernel to read the cache size
> from the system wide copy of CTR_EL0.

Is it only CTR that is mismatched in practice, or do we need to worry
about DCZID_EL0 too?

> For applications, trap read accesses to CTR_EL0 (by clearing the SCTLR.UCT)
> and emulate the mrs instruction to return the system wide safe value
> of CTR_EL0.
> 
> For faster access (i.e, avoiding to lookup the system wide value of CTR_EL0
> via read_system_reg), we keep track of the pointer to table entry for
> CTR_EL0 in the CPU feature infrastructure.
> 
> Cc: Mark Rutland <mark.rutland@arm.com>
> Cc: Andre Przywara <andre.przywara@arm.com>
> Cc: Will Deacon <will.deacon@arm.com>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
> ---
>  arch/arm64/include/asm/assembler.h  | 25 +++++++++++++++++++++++--
>  arch/arm64/include/asm/cpufeature.h |  4 +++-
>  arch/arm64/include/asm/esr.h        |  8 ++++++++
>  arch/arm64/include/asm/sysreg.h     |  1 +
>  arch/arm64/kernel/asm-offsets.c     |  2 ++
>  arch/arm64/kernel/cpu_errata.c      | 22 ++++++++++++++++++++++
>  arch/arm64/kernel/cpufeature.c      |  9 +++++++++
>  arch/arm64/kernel/traps.c           | 14 ++++++++++++++
>  8 files changed, 82 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
> index a4bb3f5..66bd268 100644
> --- a/arch/arm64/include/asm/assembler.h
> +++ b/arch/arm64/include/asm/assembler.h
> @@ -216,6 +216,21 @@ lr	.req	x30		// link register
>  	.macro	mmid, rd, rn
>  	ldr	\rd, [\rn, #MM_CONTEXT_ID]
>  	.endm
> +/*
> + * read_ctr - read CTR_EL0. If the system has mismatched
> + * cache line sizes, provide the system wide safe value.
> + */
> +	.macro	read_ctr, reg
> +alternative_if_not ARM64_MISMATCHED_CACHE_LINE_SIZE
> +	mrs	\reg, ctr_el0			// read CTR
> +	nop
> +	nop
> +alternative_else
> +	ldr_l	\reg, sys_ctr_ftr		// Read system wide safe CTR value
> +	ldr	\reg, [\reg, #ARM64_FTR_SYSVAL] // from sys_ctr_ftr->sys_val
> +alternative_endif
> +	.endm
> +
>  
>  /*
>   * raw_dcache_line_size - get the minimum D-cache line size on this CPU
> @@ -232,7 +247,10 @@ lr	.req	x30		// link register
>   * dcache_line_size - get the safe D-cache line size across all CPUs
>   */
>  	.macro	dcache_line_size, reg, tmp
> -	raw_dcache_line_size	\reg, \tmp
> +	read_ctr	\tmp
> +	ubfm		\tmp, \tmp, #16, #19	// cache line size encoding
> +	mov		\reg, #4		// bytes per word
> +	lsl		\reg, \reg, \tmp	// actual cache line size
>  	.endm
>  
>  /*
> @@ -250,7 +268,10 @@ lr	.req	x30		// link register
>   * icache_line_size - get the safe I-cache line size across all CPUs
>   */
>  	.macro	icache_line_size, reg, tmp
> -	raw_icache_line_size	\reg, \tmp
> +	read_ctr	\tmp
> +	and		\tmp, \tmp, #0xf	// cache line size encoding
> +	mov		\reg, #4		// bytes per word
> +	lsl		\reg, \reg, \tmp	// actual cache line size
>  	.endm
>  
>  /*
> diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
> index 692b8d3..e99f2af 100644
> --- a/arch/arm64/include/asm/cpufeature.h
> +++ b/arch/arm64/include/asm/cpufeature.h
> @@ -37,8 +37,9 @@
>  #define ARM64_WORKAROUND_CAVIUM_27456		12
>  #define ARM64_HAS_32BIT_EL0			13
>  #define ARM64_HYP_OFFSET_LOW			14
> +#define ARM64_MISMATCHED_CACHE_LINE_SIZE	15
>  
> -#define ARM64_NCAPS				15
> +#define ARM64_NCAPS				16
>  
>  #ifndef __ASSEMBLY__
>  
> @@ -109,6 +110,7 @@ struct arm64_cpu_capabilities {
>  };
>  
>  extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
> +extern struct arm64_ftr_reg *sys_ctr_ftr;
>  
>  bool this_cpu_has_cap(unsigned int cap);
>  
> diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
> index 2a8f6c3..51aea89 100644
> --- a/arch/arm64/include/asm/esr.h
> +++ b/arch/arm64/include/asm/esr.h
> @@ -139,6 +139,9 @@
>  					 ((Op2) << ESR_ELx_SYS64_ISS_Op2_SHIFT) | \
>  					 ((CRn) << ESR_ELx_SYS64_ISS_CRn_SHIFT) | \
>  					 ((CRm) << ESR_ELx_SYS64_ISS_CRm_SHIFT))
> +
> +#define ESR_ELx_SYS64_ISS_SYS_OP_MASK	(ESR_ELx_SYS64_ISS_SYS_MASK | \
> +					 ESR_ELx_SYS64_ISS_DIR_MASK)
>  /*
>   * User space cache operations have the following sysreg encoding
>   * in System instructions.
> @@ -157,6 +160,11 @@
>  #define ESR_ELx_SYS64_ISS_U_CACHE_OP_VAL \
>  				(ESR_ELx_SYS64_ISS_SYS_VAL(1, 3, 1, 7, 0) | \
>  				 ESR_ELx_SYS64_ISS_DIR_WRITE)
> +
> +#define ESR_ELx_SYS64_ISS_SYS_CTR	ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 1, 0, 0)
> +#define ESR_ELx_SYS64_ISS_SYS_CTR_READ	(ESR_ELx_SYS64_ISS_SYS_CTR | \
> +					 ESR_ELx_SYS64_ISS_DIR_READ)
> +
>  #ifndef __ASSEMBLY__
>  #include <asm/types.h>
>  
> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> index cc06794..4f26ad5 100644
> --- a/arch/arm64/include/asm/sysreg.h
> +++ b/arch/arm64/include/asm/sysreg.h
> @@ -100,6 +100,7 @@
>  /* SCTLR_EL1 specific flags. */
>  #define SCTLR_EL1_UCI		(1 << 26)
>  #define SCTLR_EL1_SPAN		(1 << 23)
> +#define SCTLR_EL1_UCT		(1 << 15)
>  #define SCTLR_EL1_SED		(1 << 8)
>  #define SCTLR_EL1_CP15BEN	(1 << 5)
>  
> diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
> index 05070b7..4a2f0f0 100644
> --- a/arch/arm64/kernel/asm-offsets.c
> +++ b/arch/arm64/kernel/asm-offsets.c
> @@ -23,6 +23,7 @@
>  #include <linux/dma-mapping.h>
>  #include <linux/kvm_host.h>
>  #include <linux/suspend.h>
> +#include <asm/cpufeature.h>
>  #include <asm/thread_info.h>
>  #include <asm/memory.h>
>  #include <asm/smp_plat.h>
> @@ -145,5 +146,6 @@ int main(void)
>    DEFINE(HIBERN_PBE_ORIG,	offsetof(struct pbe, orig_address));
>    DEFINE(HIBERN_PBE_ADDR,	offsetof(struct pbe, address));
>    DEFINE(HIBERN_PBE_NEXT,	offsetof(struct pbe, next));
> +  DEFINE(ARM64_FTR_SYSVAL,	offsetof(struct arm64_ftr_reg, sys_val));
>    return 0;
>  }
> diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
> index 5836b3d..621fa7f 100644
> --- a/arch/arm64/kernel/cpu_errata.c
> +++ b/arch/arm64/kernel/cpu_errata.c
> @@ -30,6 +30,21 @@ is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope)
>  				       entry->midr_range_max);
>  }
>  
> +static bool
> +has_mismatched_cache_line_size(const struct arm64_cpu_capabilities *entry,
> +				int scope)
> +{
> +	WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
> +	return (read_cpuid_cachetype() & sys_ctr_ftr->strict_mask) !=
> +		 (sys_ctr_ftr->sys_val & sys_ctr_ftr->strict_mask);
> +}
> +
> +static void cpu_enable_trap_ctr_access(void *__unused)
> +{
> +	/* Clear SCTLR_EL1.UCT */
> +	config_sctlr_el1(SCTLR_EL1_UCT, 0);
> +}
> +
>  #define MIDR_RANGE(model, min, max) \
>  	.def_scope = SCOPE_LOCAL_CPU, \
>  	.matches = is_affected_midr_range, \
> @@ -108,6 +123,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
>  	},
>  #endif
>  	{
> +		.desc = "Mismatched cache line size",
> +		.capability = ARM64_MISMATCHED_CACHE_LINE_SIZE,
> +		.matches = has_mismatched_cache_line_size,
> +		.def_scope = SCOPE_LOCAL_CPU,
> +		.enable = cpu_enable_trap_ctr_access,
> +	},
> +	{
>  	}
>  };
>  
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index fcf87ca..c1db313 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -46,6 +46,13 @@ unsigned int compat_elf_hwcap2 __read_mostly;
>  
>  DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
>  
> +/*
> + * If we have mismatched cache line sizes, we need to emulate access to
> + * CTRL_EL0. To avoid a lookup everytime (via read_system_reg()), cache
> + * the table entry for CTR_EL0.
> + */
> +struct arm64_ftr_reg *sys_ctr_ftr;
> +
>  #define __ARM64_FTR_BITS(SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
>  	{						\
>  		.sign = SIGNED,				\
> @@ -461,6 +468,8 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
>  		init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2);
>  	}
>  
> +	/* Keep track of the CTR feature register */
> +	sys_ctr_ftr = get_arm64_ftr_reg(SYS_CTR_EL0);
>  }
>  
>  static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
> diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
> index 93c5287..db2d6cb 100644
> --- a/arch/arm64/kernel/traps.c
> +++ b/arch/arm64/kernel/traps.c
> @@ -480,6 +480,14 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
>  		regs->pc += 4;
>  }
>  
> +static void ctr_read_handler(unsigned int esr, struct pt_regs *regs)
> +{
> +	int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT;
> +
> +	regs->regs[rt] = sys_ctr_ftr->sys_val;
> +	regs->pc += 4;
> +}

Whilst this is correct, I wonder if there's any advantage in reporting a
*larger* size to userspace and avoid incurring additional trap overhead?

Any idea what sort of size typical JITs are using?

Will

  reply	other threads:[~2016-08-22 13:02 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-08-18 13:10 [RESEND] [PATCH 0/8] arm64: Work around for mismatched cache line size Suzuki K Poulose
2016-08-18 13:10 ` Suzuki K Poulose
2016-08-18 13:10 ` [PATCH 1/8] arm64: Set the safe value for L1 icache policy Suzuki K Poulose
2016-08-18 13:10   ` Suzuki K Poulose
2016-08-18 13:10 ` [PATCH 2/8] arm64: Use consistent naming for errata handling Suzuki K Poulose
2016-08-18 13:10   ` Suzuki K Poulose
2016-08-18 13:10 ` [PATCH 3/8] arm64: Rearrange CPU errata workaround checks Suzuki K Poulose
2016-08-18 13:10   ` Suzuki K Poulose
2016-08-18 13:10 ` [PATCH 4/8] arm64: insn: Add helpers for adrp offsets Suzuki K Poulose
2016-08-18 13:10   ` Suzuki K Poulose
2016-08-18 14:47   ` Marc Zyngier
2016-08-18 14:47     ` Marc Zyngier
2016-08-18 14:52     ` Suzuki K Poulose
2016-08-18 14:52       ` Suzuki K Poulose
2016-08-18 13:10 ` [PATCH 5/8] arm64: alternative: Add support for patching adrp instructions Suzuki K Poulose
2016-08-18 13:10   ` Suzuki K Poulose
2016-08-22 11:19   ` Will Deacon
2016-08-22 11:19     ` Will Deacon
2016-08-23  9:39     ` Suzuki K Poulose
2016-08-23  9:39       ` Suzuki K Poulose
2016-08-22 11:45   ` Ard Biesheuvel
2016-08-22 11:45     ` Ard Biesheuvel
2016-08-23  9:16     ` Suzuki K Poulose
2016-08-23  9:16       ` Suzuki K Poulose
2016-08-23 11:32       ` Ard Biesheuvel
2016-08-23 11:32         ` Ard Biesheuvel
2016-08-18 13:10 ` [PATCH 6/8] arm64: Introduce raw_{d,i}cache_line_size Suzuki K Poulose
2016-08-18 13:10   ` Suzuki K Poulose
2016-08-18 17:57   ` Geoff Levand
2016-08-18 17:57     ` Geoff Levand
2016-08-22 10:00   ` Will Deacon
2016-08-22 10:00     ` Will Deacon
2016-08-23 10:07     ` Suzuki K Poulose
2016-08-23 10:07       ` Suzuki K Poulose
2016-08-18 13:10 ` [PATCH 7/8] arm64: Refactor sysinstr exception handling Suzuki K Poulose
2016-08-18 13:10   ` Suzuki K Poulose
2016-08-22 12:53   ` Will Deacon
2016-08-22 12:53     ` Will Deacon
2016-08-23 10:19     ` Suzuki K Poulose
2016-08-23 10:19       ` Suzuki K Poulose
2016-08-18 13:10 ` [PATCH 8/8] arm64: Work around systems with mismatched cache line sizes Suzuki K Poulose
2016-08-18 13:10   ` Suzuki K Poulose
2016-08-22 13:02   ` Will Deacon [this message]
2016-08-22 13:02     ` Will Deacon
2016-08-24 13:23     ` Suzuki K Poulose
2016-08-24 13:23       ` Suzuki K Poulose
  -- strict thread matches above, loose matches on Subject: below --
2016-07-08 11:37 [PATCH 0/8] arm64: Work around for mismatched cache line size Suzuki K Poulose
2016-07-08 11:37 ` [PATCH 8/8] arm64: Work around systems with mismatched cache line sizes Suzuki K Poulose
2016-07-08 11:37   ` Suzuki K Poulose

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160822130205.GF14680@arm.com \
    --to=will.deacon@arm.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.