public inbox for linux-tegra@vger.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH 2/2] arm64: Check if GMID_EL1.BS is the same on all CPUs
       [not found] ` <20210511182322.3830-3-catalin.marinas@arm.com>
@ 2021-05-26 19:47   ` Jon Hunter
  2021-05-26 21:08     ` Will Deacon
  2021-05-26 21:48     ` Catalin Marinas
  0 siblings, 2 replies; 4+ messages in thread
From: Jon Hunter @ 2021-05-26 19:47 UTC (permalink / raw)
  To: Catalin Marinas, linux-arm-kernel
  Cc: Will Deacon, Mark Rutland, Suzuki K Poulose, linux-tegra


On 11/05/2021 19:23, Catalin Marinas wrote:
> The GMID_EL1.BS field determines the number of tags accessed by the
> LDGM/STGM instructions (EL1 and up), used by the kernel for copying or
> zeroing page tags.
> 
> Taint the kernel if GMID_EL1.BS differs between CPUs.
> 
> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Mark Rutland <mark.rutland@arm.com>
> Cc: Suzuki K Poulose <Suzuki.Poulose@arm.com>
> ---
>  arch/arm64/include/asm/cpu.h   |  1 +
>  arch/arm64/kernel/cpufeature.c | 17 +++++++++++++++++
>  arch/arm64/kernel/cpuinfo.c    |  1 +
>  3 files changed, 19 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
> index fe5a8499ddc2..9088e72c7cf6 100644
> --- a/arch/arm64/include/asm/cpu.h
> +++ b/arch/arm64/include/asm/cpu.h
> @@ -20,6 +20,7 @@ struct cpuinfo_arm64 {
>  	u64		reg_dczid;
>  	u64		reg_midr;
>  	u64		reg_revidr;
> +	u64		reg_gmid;
>  
>  	u64		reg_id_aa64dfr0;
>  	u64		reg_id_aa64dfr1;
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index ca66a61bb396..3b9089ca52dc 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -401,6 +401,11 @@ static const struct arm64_ftr_bits ftr_dczid[] = {
>  	ARM64_FTR_END,
>  };
>  
> +static const struct arm64_ftr_bits ftr_gmid[] = {
> +	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, SYS_GMID_EL1_BS_SHIFT, 4, 0),
> +	ARM64_FTR_END,
> +};
> +
>  static const struct arm64_ftr_bits ftr_id_isar0[] = {
>  	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_DIVIDE_SHIFT, 4, 0),
>  	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_DEBUG_SHIFT, 4, 0),
> @@ -618,6 +623,9 @@ static const struct __ftr_reg_entry {
>  	/* Op1 = 0, CRn = 1, CRm = 2 */
>  	ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr),
>  
> +	/* Op1 = 1, CRn = 0, CRm = 0 */
> +	ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid),
> +
>  	/* Op1 = 3, CRn = 0, CRm = 0 */
>  	{ SYS_CTR_EL0, &arm64_ftr_reg_ctrel0 },
>  	ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid),
> @@ -872,6 +880,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
>  	init_cpu_ftr_reg(SYS_CTR_EL0, info->reg_ctr);
>  	init_cpu_ftr_reg(SYS_DCZID_EL0, info->reg_dczid);
>  	init_cpu_ftr_reg(SYS_CNTFRQ_EL0, info->reg_cntfrq);
> +	init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid);
>  	init_cpu_ftr_reg(SYS_ID_AA64DFR0_EL1, info->reg_id_aa64dfr0);
>  	init_cpu_ftr_reg(SYS_ID_AA64DFR1_EL1, info->reg_id_aa64dfr1);
>  	init_cpu_ftr_reg(SYS_ID_AA64ISAR0_EL1, info->reg_id_aa64isar0);
> @@ -1082,6 +1091,14 @@ void update_cpu_features(int cpu,
>  	taint |= check_update_ftr_reg(SYS_DCZID_EL0, cpu,
>  				      info->reg_dczid, boot->reg_dczid);
>  
> +	/*
> +	 * The kernel uses the LDGM/STGM instructions and the number of tags
> +	 * they read/write depends on the GMID_EL1.BS field. Check that the
> +	 * value is the same on all CPUs.
> +	 */
> +	taint |= check_update_ftr_reg(SYS_GMID_EL1, cpu,
> +				      info->reg_gmid, boot->reg_gmid);
> +
>  	/* If different, timekeeping will be broken (especially with KVM) */
>  	taint |= check_update_ftr_reg(SYS_CNTFRQ_EL0, cpu,
>  				      info->reg_cntfrq, boot->reg_cntfrq);
> diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
> index 4bea701117d4..cd9f2d51285b 100644
> --- a/arch/arm64/kernel/cpuinfo.c
> +++ b/arch/arm64/kernel/cpuinfo.c
> @@ -359,6 +359,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
>  	info->reg_dczid = read_cpuid(DCZID_EL0);
>  	info->reg_midr = read_cpuid_id();
>  	info->reg_revidr = read_cpuid(REVIDR_EL1);
> +	info->reg_gmid = read_cpuid(GMID_EL1);
>  
>  	info->reg_id_aa64dfr0 = read_cpuid(ID_AA64DFR0_EL1);
>  	info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1);


I am seeing the following undefined instruction crash on all our 
ARM64 Tegra devices on today's -next and bisect is pointing to
this patch. Reverting this patch on top of -next does fix the 
problem. Let me know if you have any thoughts.

Thanks!
Jon

[    0.000000] ------------[ cut here ]------------
[    0.000000] kernel BUG at /dvs/git/dirty/git-master_l4t-upstream/kernel/arch/arm64/kernel/traps.c:406!
[    0.000000] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP
[    0.000000] Modules linked in:
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.13.0-rc3-next-20210526-gf6b46ef27317 #1
[    0.000000] Hardware name: NVIDIA Jetson TX2 Developer Kit (DT)
[    0.000000] pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO BTYPE=--)
[    0.000000] pc : do_undefinstr+0x298/0x2b0
[    0.000000] lr : do_undefinstr+0x2a8/0x2b0
[    0.000000] sp : ffff800011cb3c10
[    0.000000] x29: ffff800011cb3c10 x28: ffff800011cc3540 x27: 0000000000000002
[    0.000000] x26: ffff800011760008 x25: ffff0001f4deccc0 x24: ffff800011cb9000
[    0.000000] x23: ffff800011fb50f8 x22: ffff800011fb5000 x21: 00000000d5390080
[    0.000000] x20: ffff800011cb3c90 x19: ffff800011cb9000 x18: ffffffffffffffff
[    0.000000] x17: 0000000000017000 x16: 0000000000000000 x15: 000000000000001e
[    0.000000] x14: 0000000000200000 x13: 0000000275e00000 x12: 0000001000000000
[    0.000000] x11: 00000000009fc580 x10: 0000000274e03a80 x9 : 0000001000000000
[    0.000000] x8 : 0000000000200000 x7 : 0000000000000003 x6 : 0000000000000000
[    0.000000] x5 : ffff800011cc5910 x4 : 0000000000000000 x3 : ffff800011fb50f8
[    0.000000] x2 : 0000000000000000 x1 : ffff800011cc3540 x0 : 0000000000000005
[    0.000000] Call trace:
[    0.000000]  do_undefinstr+0x298/0x2b0
[    0.000000]  el1_undef+0x2c/0x48
[    0.000000]  el1_sync_handler+0xb4/0xd0
[    0.000000]  el1_sync+0x74/0x100
[    0.000000]  __cpuinfo_store_cpu+0x5c/0x248
[    0.000000]  cpuinfo_store_boot_cpu+0x28/0x54
[    0.000000]  smp_prepare_boot_cpu+0x2c/0x38
[    0.000000]  start_kernel+0x1a4/0x62c
[    0.000000]  __primary_switched+0x8c/0x90
[    0.000000] Code: b5fffe40 b94047b5 17ffffca d503201f (d4210000) 
[    0.000000] random: get_random_bytes called from print_oops_end_marker+0x4c/0x68 with crng_init=0
[    0.000000] ---[ end trace 0000000000000000 ]---

-- 
nvpublic

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 2/2] arm64: Check if GMID_EL1.BS is the same on all CPUs
  2021-05-26 19:47   ` [PATCH 2/2] arm64: Check if GMID_EL1.BS is the same on all CPUs Jon Hunter
@ 2021-05-26 21:08     ` Will Deacon
  2021-05-26 21:48     ` Catalin Marinas
  1 sibling, 0 replies; 4+ messages in thread
From: Will Deacon @ 2021-05-26 21:08 UTC (permalink / raw)
  To: Jon Hunter
  Cc: Catalin Marinas, linux-arm-kernel, Mark Rutland, Suzuki K Poulose,
	linux-tegra

On Wed, May 26, 2021 at 08:47:16PM +0100, Jon Hunter wrote:
> On 11/05/2021 19:23, Catalin Marinas wrote:
> > diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
> > index 4bea701117d4..cd9f2d51285b 100644
> > --- a/arch/arm64/kernel/cpuinfo.c
> > +++ b/arch/arm64/kernel/cpuinfo.c
> > @@ -359,6 +359,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
> >  	info->reg_dczid = read_cpuid(DCZID_EL0);
> >  	info->reg_midr = read_cpuid_id();
> >  	info->reg_revidr = read_cpuid(REVIDR_EL1);
> > +	info->reg_gmid = read_cpuid(GMID_EL1);
> >  
> >  	info->reg_id_aa64dfr0 = read_cpuid(ID_AA64DFR0_EL1);
> >  	info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1);
> 
> 
> I am seeing the following undefined instruction crash on all our 
> ARM64 Tegra devices on today's -next and bisect is pointing to
> this patch. Reverting this patch on top of -next does fix the 
> problem. Let me know if you have any thoughts.

Catalin just posted a fixed version, so should be solved asap (I'll push a
new branch shortly).

Thanks!

Will

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 2/2] arm64: Check if GMID_EL1.BS is the same on all CPUs
  2021-05-26 19:47   ` [PATCH 2/2] arm64: Check if GMID_EL1.BS is the same on all CPUs Jon Hunter
  2021-05-26 21:08     ` Will Deacon
@ 2021-05-26 21:48     ` Catalin Marinas
  2021-05-26 22:11       ` Jon Hunter
  1 sibling, 1 reply; 4+ messages in thread
From: Catalin Marinas @ 2021-05-26 21:48 UTC (permalink / raw)
  To: Jon Hunter
  Cc: linux-arm-kernel, Will Deacon, Mark Rutland, Suzuki K Poulose,
	linux-tegra

On Wed, May 26, 2021 at 08:47:16PM +0100, Jon Hunter wrote:
> On 11/05/2021 19:23, Catalin Marinas wrote:
> > The GMID_EL1.BS field determines the number of tags accessed by the
> > LDGM/STGM instructions (EL1 and up), used by the kernel for copying or
> > zeroing page tags.
> > 
> > Taint the kernel if GMID_EL1.BS differs between CPUs.
> > 
> > Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
> > Cc: Will Deacon <will@kernel.org>
> > Cc: Mark Rutland <mark.rutland@arm.com>
> > Cc: Suzuki K Poulose <Suzuki.Poulose@arm.com>
[...]
> I am seeing the following undefined instruction crash on all our 
> ARM64 Tegra devices on today's -next and bisect is pointing to
> this patch. Reverting this patch on top of -next does fix the 
> problem. Let me know if you have any thoughts.

Yeah, sorry about that. Posted a new version here, better tested:

https://lore.kernel.org/r/20210526193621.21559-1-catalin.marinas@arm.com

Will should have dropped the old one from linux-next but it takes a few
hours before Stephen re-creates the tree.

-- 
Catalin

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 2/2] arm64: Check if GMID_EL1.BS is the same on all CPUs
  2021-05-26 21:48     ` Catalin Marinas
@ 2021-05-26 22:11       ` Jon Hunter
  0 siblings, 0 replies; 4+ messages in thread
From: Jon Hunter @ 2021-05-26 22:11 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: linux-arm-kernel, Will Deacon, Mark Rutland, Suzuki K Poulose,
	linux-tegra


On 26/05/2021 22:48, Catalin Marinas wrote:
> On Wed, May 26, 2021 at 08:47:16PM +0100, Jon Hunter wrote:
>> On 11/05/2021 19:23, Catalin Marinas wrote:
>>> The GMID_EL1.BS field determines the number of tags accessed by the
>>> LDGM/STGM instructions (EL1 and up), used by the kernel for copying or
>>> zeroing page tags.
>>>
>>> Taint the kernel if GMID_EL1.BS differs between CPUs.
>>>
>>> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
>>> Cc: Will Deacon <will@kernel.org>
>>> Cc: Mark Rutland <mark.rutland@arm.com>
>>> Cc: Suzuki K Poulose <Suzuki.Poulose@arm.com>
> [...]
>> I am seeing the following undefined instruction crash on all our 
>> ARM64 Tegra devices on today's -next and bisect is pointing to
>> this patch. Reverting this patch on top of -next does fix the 
>> problem. Let me know if you have any thoughts.
> 
> Yeah, sorry about that. Posted a new version here, better tested:
> 
> https://lore.kernel.org/r/20210526193621.21559-1-catalin.marinas@arm.com
> 
> Will should have dropped the old one from linux-next but it takes a few
> hours before Stephen re-creates the tree.


No problem. Thanks for the quick fix!

Jon

-- 
nvpublic

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-05-26 22:12 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20210511182322.3830-1-catalin.marinas@arm.com>
     [not found] ` <20210511182322.3830-3-catalin.marinas@arm.com>
2021-05-26 19:47   ` [PATCH 2/2] arm64: Check if GMID_EL1.BS is the same on all CPUs Jon Hunter
2021-05-26 21:08     ` Will Deacon
2021-05-26 21:48     ` Catalin Marinas
2021-05-26 22:11       ` Jon Hunter

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox