Linux-ARM-Kernel Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 10/10] ARM: KVM: add world-switch for AMAIR{0,1}
From: Christoffer Dall @ 2014-01-29 20:08 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1390402602-22777-11-git-send-email-marc.zyngier@arm.com>

On Wed, Jan 22, 2014 at 02:56:42PM +0000, Marc Zyngier wrote:
> HCR.TVM traps (among other things) accesses to AMAIR0 and AMAIR1.
> In order to minimise the amount of surprise a guest could generate by
> trying to access these registers with caches off, add them to the
> list of registers we switch/handle.
> 
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> ---
>  arch/arm/include/asm/kvm_asm.h |  4 +++-
>  arch/arm/kvm/coproc.c          |  6 ++++++
>  arch/arm/kvm/interrupts_head.S | 12 ++++++++++--
>  3 files changed, 19 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
> index 661da11..53b3c4a 100644
> --- a/arch/arm/include/asm/kvm_asm.h
> +++ b/arch/arm/include/asm/kvm_asm.h
> @@ -48,7 +48,9 @@
>  #define c13_TID_URO	26	/* Thread ID, User R/O */
>  #define c13_TID_PRIV	27	/* Thread ID, Privileged */
>  #define c14_CNTKCTL	28	/* Timer Control Register (PL1) */
> -#define NR_CP15_REGS	29	/* Number of regs (incl. invalid) */
> +#define c10_AMAIR0	29	/* Auxilary Memory Attribute Indirection Reg0 */
> +#define c10_AMAIR1	30	/* Auxilary Memory Attribute Indirection Reg1 */
> +#define NR_CP15_REGS	31	/* Number of regs (incl. invalid) */
>  
>  #define ARM_EXCEPTION_RESET	  0
>  #define ARM_EXCEPTION_UNDEFINED   1
> diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
> index 1839770..539f6d4 100644
> --- a/arch/arm/kvm/coproc.c
> +++ b/arch/arm/kvm/coproc.c
> @@ -381,6 +381,12 @@ static const struct coproc_reg cp15_regs[] = {
>  	{ CRn(10), CRm( 2), Op1( 0), Op2( 1), is32,
>  			access_vm_reg, reset_unknown, c10_NMRR},
>  
> +	/* AMAIR0/AMAIR1: swapped by interrupt.S. */
> +	{ CRn(10), CRm( 3), Op1( 0), Op2( 0), is32,
> +			access_vm_reg, reset_unknown, c10_AMAIR0},
> +	{ CRn(10), CRm( 3), Op1( 0), Op2( 1), is32,
> +			access_vm_reg, reset_unknown, c10_AMAIR1},
> +
>  	/* VBAR: swapped by interrupt.S. */
>  	{ CRn(12), CRm( 0), Op1( 0), Op2( 0), is32,
>  			NULL, reset_val, c12_VBAR, 0x00000000 },
> diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
> index 7cb41e1..e4eaf30 100644
> --- a/arch/arm/kvm/interrupts_head.S
> +++ b/arch/arm/kvm/interrupts_head.S
> @@ -303,13 +303,17 @@ vcpu	.req	r0		@ vcpu pointer always in r0
>  
>  	mrc	p15, 0, r2, c14, c1, 0	@ CNTKCTL
>  	mrrc	p15, 0, r4, r5, c7	@ PAR
> +	mrc	p15, 0, r6, c10, c3, 0	@ AMAIR0
> +	mrc	p15, 0, r7, c10, c3, 1	@ AMAIR1
>  
>  	.if \store_to_vcpu == 0
> -	push	{r2,r4-r5}
> +	push	{r2,r4-r7}
>  	.else
>  	str	r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
>  	add	r12, vcpu, #CP15_OFFSET(c7_PAR)
>  	strd	r4, r5, [r12]
> +	str	r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)]
> +	str	r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)]
>  	.endif
>  .endm
>  
> @@ -322,15 +326,19 @@ vcpu	.req	r0		@ vcpu pointer always in r0
>   */
>  .macro write_cp15_state read_from_vcpu
>  	.if \read_from_vcpu == 0
> -	pop	{r2,r4-r5}
> +	pop	{r2,r4-r7}
>  	.else
>  	ldr	r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
>  	add	r12, vcpu, #CP15_OFFSET(c7_PAR)
>  	ldrd	r4, r5, [r12]
> +	ldr	r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)]
> +	ldr	r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)]
>  	.endif
>  
>  	mcr	p15, 0, r2, c14, c1, 0	@ CNTKCTL
>  	mcrr	p15, 0, r4, r5, c7	@ PAR
> +	mcr	p15, 0, r6, c10, c3, 0	@ AMAIR0
> +	mcr	p15, 0, r7, c10, c3, 1	@ AMAIR1
>  
>  	.if \read_from_vcpu == 0
>  	pop	{r2-r12}
> -- 
> 1.8.3.4
> 

Looks good, but shouldn't this be added before patch 9 to maintain
functional bisectability?

Otherwise:
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>

^ permalink raw reply

* [PATCH v2 09/10] ARM: KVM: trap VM system registers until MMU and caches are ON
From: Christoffer Dall @ 2014-01-29 20:08 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1390402602-22777-10-git-send-email-marc.zyngier@arm.com>

On Wed, Jan 22, 2014 at 02:56:41PM +0000, Marc Zyngier wrote:
> In order to be able to detect the point where the guest enables
> its MMU and caches, trap all the VM related system registers.
> 
> Once we see the guest enabling both the MMU and the caches, we
> can go back to a saner mode of operation, which is to leave these
> registers in complete control of the guest.
> 
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> ---
>  arch/arm/include/asm/kvm_arm.h |  3 +-
>  arch/arm/kvm/coproc.c          | 85 ++++++++++++++++++++++++++++++++++--------
>  arch/arm/kvm/coproc_a15.c      |  2 +-
>  arch/arm/kvm/coproc_a7.c       |  2 +-
>  4 files changed, 73 insertions(+), 19 deletions(-)
> 
> diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
> index a843e74..816db0b 100644
> --- a/arch/arm/include/asm/kvm_arm.h
> +++ b/arch/arm/include/asm/kvm_arm.h
> @@ -55,6 +55,7 @@
>   * The bits we set in HCR:
>   * TAC:		Trap ACTLR
>   * TSC:		Trap SMC
> + * TVM:		Trap VM ops (until MMU and caches are on)
>   * TSW:		Trap cache operations by set/way
>   * TWI:		Trap WFI
>   * TWE:		Trap WFE
> @@ -68,7 +69,7 @@
>   */
>  #define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \
>  			HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \
> -			HCR_TWE | HCR_SWIO | HCR_TIDCP)
> +			HCR_TVM | HCR_TWE | HCR_SWIO | HCR_TIDCP)
>  
>  /* System Control Register (SCTLR) bits */
>  #define SCTLR_TE	(1 << 30)
> diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
> index 126c90d..1839770 100644
> --- a/arch/arm/kvm/coproc.c
> +++ b/arch/arm/kvm/coproc.c
> @@ -23,6 +23,7 @@
>  #include <asm/kvm_host.h>
>  #include <asm/kvm_emulate.h>
>  #include <asm/kvm_coproc.h>
> +#include <asm/kvm_mmu.h>
>  #include <asm/cacheflush.h>
>  #include <asm/cputype.h>
>  #include <trace/events/kvm.h>
> @@ -205,6 +206,55 @@ done:
>  }
>  
>  /*
> + * Generic accessor for VM registers. Only called as long as HCR_TVM
> + * is set.
> + */
> +static bool access_vm_reg(struct kvm_vcpu *vcpu,
> +			  const struct coproc_params *p,
> +			  const struct coproc_reg *r)
> +{
> +	if (p->is_write) {
> +		vcpu->arch.cp15[r->reg] = *vcpu_reg(vcpu, p->Rt1);
> +		if (p->is_64bit)
> +			vcpu->arch.cp15[r->reg + 1] = *vcpu_reg(vcpu, p->Rt2);
> +	} else {

hmm, the TVM in the ARM ARM v7 says it only traps for write accesses...?

> +		*vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[r->reg];
> +		if (p->is_64bit)
> +			*vcpu_reg(vcpu, p->Rt2) = vcpu->arch.cp15[r->reg + 1];
> +	}
> +
> +	return true;
> +}
> +
> +/*
> + * SCTLR accessor. Only called as long as HCR_TVM is set.  If the
> + * guest enables the MMU, we stop trapping the VM sys_regs and leave
> + * it in complete control of the caches.
> + *
> + * Used by the cpu-specific code.
> + */
> +bool access_sctlr(struct kvm_vcpu *vcpu,
> +		  const struct coproc_params *p,
> +		  const struct coproc_reg *r)
> +{
> +	if (p->is_write) {
> +		unsigned long val;
> +
> +		val = *vcpu_reg(vcpu, p->Rt1);
> +		vcpu->arch.cp15[r->reg] = val;

again, would it make sense to just call access_vm_reg and do the check
for caches enabled afterwards?

> +
> +		if ((val & (0b101)) == 0b101) {	/* MMU+Caches enabled? */
> +			vcpu->arch.hcr &= ~HCR_TVM;
> +			stage2_flush_vm(vcpu->kvm);
> +		}

my favorite static inline, again, again, ...

> +	} else {
> +		*vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[r->reg];
> +	}
> +
> +	return true;
> +}
> +
> +/*
>   * We could trap ID_DFR0 and tell the guest we don't support performance
>   * monitoring.  Unfortunately the patch to make the kernel check ID_DFR0 was
>   * NAKed, so it will read the PMCR anyway.
> @@ -261,33 +311,36 @@ static const struct coproc_reg cp15_regs[] = {
>  	{ CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32,
>  			NULL, reset_val, c1_CPACR, 0x00000000 },
>  
> -	/* TTBR0/TTBR1: swapped by interrupt.S. */
> -	{ CRm64( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 },
> -	{ CRm64( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 },
> -
> -	/* TTBCR: swapped by interrupt.S. */
> +	/* TTBR0/TTBR1/TTBCR: swapped by interrupt.S. */
> +	{ CRm64( 2), Op1( 0), is64, access_vm_reg, reset_unknown64, c2_TTBR0 },
> +	{ CRn(2), CRm( 0), Op1( 0), Op2( 0), is32,
> +			access_vm_reg, reset_unknown, c2_TTBR0 },
> +	{ CRn(2), CRm( 0), Op1( 0), Op2( 1), is32,
> +			access_vm_reg, reset_unknown, c2_TTBR1 },
>  	{ CRn( 2), CRm( 0), Op1( 0), Op2( 2), is32,
> -			NULL, reset_val, c2_TTBCR, 0x00000000 },
> +			access_vm_reg, reset_val, c2_TTBCR, 0x00000000 },
> +	{ CRm64( 2), Op1( 1), is64, access_vm_reg, reset_unknown64, c2_TTBR1 },
> +
>  
>  	/* DACR: swapped by interrupt.S. */
>  	{ CRn( 3), CRm( 0), Op1( 0), Op2( 0), is32,
> -			NULL, reset_unknown, c3_DACR },
> +			access_vm_reg, reset_unknown, c3_DACR },
>  
>  	/* DFSR/IFSR/ADFSR/AIFSR: swapped by interrupt.S. */
>  	{ CRn( 5), CRm( 0), Op1( 0), Op2( 0), is32,
> -			NULL, reset_unknown, c5_DFSR },
> +			access_vm_reg, reset_unknown, c5_DFSR },
>  	{ CRn( 5), CRm( 0), Op1( 0), Op2( 1), is32,
> -			NULL, reset_unknown, c5_IFSR },
> +			access_vm_reg, reset_unknown, c5_IFSR },
>  	{ CRn( 5), CRm( 1), Op1( 0), Op2( 0), is32,
> -			NULL, reset_unknown, c5_ADFSR },
> +			access_vm_reg, reset_unknown, c5_ADFSR },
>  	{ CRn( 5), CRm( 1), Op1( 0), Op2( 1), is32,
> -			NULL, reset_unknown, c5_AIFSR },
> +			access_vm_reg, reset_unknown, c5_AIFSR },
>  
>  	/* DFAR/IFAR: swapped by interrupt.S. */
>  	{ CRn( 6), CRm( 0), Op1( 0), Op2( 0), is32,
> -			NULL, reset_unknown, c6_DFAR },
> +			access_vm_reg, reset_unknown, c6_DFAR },
>  	{ CRn( 6), CRm( 0), Op1( 0), Op2( 2), is32,
> -			NULL, reset_unknown, c6_IFAR },
> +			access_vm_reg, reset_unknown, c6_IFAR },
>  
>  	/* PAR swapped by interrupt.S */
>  	{ CRm64( 7), Op1( 0), is64, NULL, reset_unknown64, c7_PAR },
> @@ -324,9 +377,9 @@ static const struct coproc_reg cp15_regs[] = {
>  
>  	/* PRRR/NMRR (aka MAIR0/MAIR1): swapped by interrupt.S. */
>  	{ CRn(10), CRm( 2), Op1( 0), Op2( 0), is32,
> -			NULL, reset_unknown, c10_PRRR},
> +			access_vm_reg, reset_unknown, c10_PRRR},
>  	{ CRn(10), CRm( 2), Op1( 0), Op2( 1), is32,
> -			NULL, reset_unknown, c10_NMRR},
> +			access_vm_reg, reset_unknown, c10_NMRR},
>  
>  	/* VBAR: swapped by interrupt.S. */
>  	{ CRn(12), CRm( 0), Op1( 0), Op2( 0), is32,
> @@ -334,7 +387,7 @@ static const struct coproc_reg cp15_regs[] = {
>  
>  	/* CONTEXTIDR/TPIDRURW/TPIDRURO/TPIDRPRW: swapped by interrupt.S. */
>  	{ CRn(13), CRm( 0), Op1( 0), Op2( 1), is32,
> -			NULL, reset_val, c13_CID, 0x00000000 },
> +			access_vm_reg, reset_val, c13_CID, 0x00000000 },
>  	{ CRn(13), CRm( 0), Op1( 0), Op2( 2), is32,
>  			NULL, reset_unknown, c13_TID_URW },
>  	{ CRn(13), CRm( 0), Op1( 0), Op2( 3), is32,
> diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c
> index bb0cac1..e6f4ae4 100644
> --- a/arch/arm/kvm/coproc_a15.c
> +++ b/arch/arm/kvm/coproc_a15.c
> @@ -34,7 +34,7 @@
>  static const struct coproc_reg a15_regs[] = {
>  	/* SCTLR: swapped by interrupt.S. */
>  	{ CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
> -			NULL, reset_val, c1_SCTLR, 0x00C50078 },
> +			access_sctlr, reset_val, c1_SCTLR, 0x00C50078 },
>  };
>  
>  static struct kvm_coproc_target_table a15_target_table = {
> diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c
> index 1df76733..17fc7cd 100644
> --- a/arch/arm/kvm/coproc_a7.c
> +++ b/arch/arm/kvm/coproc_a7.c
> @@ -37,7 +37,7 @@
>  static const struct coproc_reg a7_regs[] = {
>  	/* SCTLR: swapped by interrupt.S. */
>  	{ CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
> -			NULL, reset_val, c1_SCTLR, 0x00C50878 },
> +			access_sctlr, reset_val, c1_SCTLR, 0x00C50878 },
>  };
>  
>  static struct kvm_coproc_target_table a7_target_table = {
> -- 
> 1.8.3.4
> 

Otherwise looks good,
-- 
Christoffer

^ permalink raw reply

* [PATCH v2 08/10] ARM: KVM: introduce per-vcpu HYP Configuration Register
From: Christoffer Dall @ 2014-01-29 20:08 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1390402602-22777-9-git-send-email-marc.zyngier@arm.com>

On Wed, Jan 22, 2014 at 02:56:40PM +0000, Marc Zyngier wrote:
> So far, KVM/ARM used a fixed HCR configuration per guest, except for
> the VI/VF/VA bits to control the interrupt in absence of VGIC.
> 
> With the upcoming need to dynamically reconfigure trapping, it becomes
> necessary to allow the HCR to be changed on a per-vcpu basis.
> 
> The fix here is to mimic what KVM/arm64 already does: a per vcpu HCR
> field, initialized at setup time.
> 
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> ---
>  arch/arm/include/asm/kvm_arm.h  | 1 -
>  arch/arm/include/asm/kvm_host.h | 9 ++++++---
>  arch/arm/kernel/asm-offsets.c   | 1 +
>  arch/arm/kvm/guest.c            | 1 +
>  arch/arm/kvm/interrupts_head.S  | 9 +++------
>  5 files changed, 11 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
> index 1d3153c..a843e74 100644
> --- a/arch/arm/include/asm/kvm_arm.h
> +++ b/arch/arm/include/asm/kvm_arm.h
> @@ -69,7 +69,6 @@
>  #define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \
>  			HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \
>  			HCR_TWE | HCR_SWIO | HCR_TIDCP)
> -#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
>  
>  /* System Control Register (SCTLR) bits */
>  #define SCTLR_TE	(1 << 30)
> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index ba6d33a..918fdc1 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -101,6 +101,12 @@ struct kvm_vcpu_arch {
>  	/* The CPU type we expose to the VM */
>  	u32 midr;
>  
> +	/* HYP trapping configuration */
> +	u32 hcr;
> +
> +	/* Interrupt related fields */
> +	u32 irq_lines;		/* IRQ and FIQ levels */
> +
>  	/* Exception Information */
>  	struct kvm_vcpu_fault_info fault;
>  
> @@ -128,9 +134,6 @@ struct kvm_vcpu_arch {
>  	/* IO related fields */
>  	struct kvm_decode mmio_decode;
>  
> -	/* Interrupt related fields */
> -	u32 irq_lines;		/* IRQ and FIQ levels */
> -
>  	/* Cache some mmu pages needed inside spinlock regions */
>  	struct kvm_mmu_memory_cache mmu_page_cache;
>  
> diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
> index dbe0476..713e807 100644
> --- a/arch/arm/kernel/asm-offsets.c
> +++ b/arch/arm/kernel/asm-offsets.c
> @@ -174,6 +174,7 @@ int main(void)
>    DEFINE(VCPU_FIQ_REGS,		offsetof(struct kvm_vcpu, arch.regs.fiq_regs));
>    DEFINE(VCPU_PC,		offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_pc));
>    DEFINE(VCPU_CPSR,		offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_cpsr));
> +  DEFINE(VCPU_HCR,		offsetof(struct kvm_vcpu, arch.hcr));
>    DEFINE(VCPU_IRQ_LINES,	offsetof(struct kvm_vcpu, arch.irq_lines));
>    DEFINE(VCPU_HSR,		offsetof(struct kvm_vcpu, arch.fault.hsr));
>    DEFINE(VCPU_HxFAR,		offsetof(struct kvm_vcpu, arch.fault.hxfar));
> diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
> index 20f8d97..0c8c044 100644
> --- a/arch/arm/kvm/guest.c
> +++ b/arch/arm/kvm/guest.c
> @@ -38,6 +38,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
>  
>  int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
>  {
> +	vcpu->arch.hcr = HCR_GUEST_MASK;
>  	return 0;
>  }
>  
> diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
> index 4a2a97a..7cb41e1 100644
> --- a/arch/arm/kvm/interrupts_head.S
> +++ b/arch/arm/kvm/interrupts_head.S
> @@ -597,17 +597,14 @@ vcpu	.req	r0		@ vcpu pointer always in r0
>  
>  /* Enable/Disable: stage-2 trans., trap interrupts, trap wfi, trap smc */
>  .macro configure_hyp_role operation
> -	mrc	p15, 4, r2, c1, c1, 0	@ HCR
> -	bic	r2, r2, #HCR_VIRT_EXCP_MASK
> -	ldr	r3, =HCR_GUEST_MASK
>  	.if \operation == vmentry
> -	orr	r2, r2, r3
> +	ldr	r2, [vcpu, #VCPU_HCR]
>  	ldr	r3, [vcpu, #VCPU_IRQ_LINES]
>  	orr	r2, r2, r3
>  	.else
> -	bic	r2, r2, r3
> +	mov	r2, #0
>  	.endif
> -	mcr	p15, 4, r2, c1, c1, 0
> +	mcr	p15, 4, r2, c1, c1, 0	@ HCR
>  .endm
>  
>  .macro load_vcpu
> -- 
> 1.8.3.4
> 

Looks good:

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>

^ permalink raw reply

* [PATCH v2 07/10] ARM: KVM: fix ordering of 64bit coprocessor accesses
From: Christoffer Dall @ 2014-01-29 20:07 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1390402602-22777-8-git-send-email-marc.zyngier@arm.com>

On Wed, Jan 22, 2014 at 02:56:39PM +0000, Marc Zyngier wrote:
> Commit 240e99cbd00a (ARM: KVM: Fix 64-bit coprocessor handling)
> added an ordering dependency for the 64bit registers.
> 
> The order described is: CRn, CRm, Op1, Op2, 64bit-first.
> 
> Unfortunately, the implementation is: CRn, 64bit-first, CRm...
> 
> Move the 64bit test to be last in order to match the documentation.
> 
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> ---
>  arch/arm/kvm/coproc.h | 10 +++++++---
>  1 file changed, 7 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h
> index c5ad7ff..1a44bbe 100644
> --- a/arch/arm/kvm/coproc.h
> +++ b/arch/arm/kvm/coproc.h
> @@ -135,13 +135,13 @@ static inline int cmp_reg(const struct coproc_reg *i1,
>  		return -1;
>  	if (i1->CRn != i2->CRn)
>  		return i1->CRn - i2->CRn;
> -	if (i1->is_64 != i2->is_64)
> -		return i2->is_64 - i1->is_64;
>  	if (i1->CRm != i2->CRm)
>  		return i1->CRm - i2->CRm;
>  	if (i1->Op1 != i2->Op1)
>  		return i1->Op1 - i2->Op1;
> -	return i1->Op2 - i2->Op2;
> +	if (i1->Op2 != i2->Op2)
> +		return i1->Op2 - i2->Op2;
> +	return i2->is_64 - i1->is_64;
>  }
>  
>  
> @@ -153,4 +153,8 @@ static inline int cmp_reg(const struct coproc_reg *i1,
>  #define is64		.is_64 = true
>  #define is32		.is_64 = false
>  
> +bool access_sctlr(struct kvm_vcpu *vcpu,
> +		  const struct coproc_params *p,
> +		  const struct coproc_reg *r);
> +
>  #endif /* __ARM_KVM_COPROC_LOCAL_H__ */

stray hunk from other patch?

> -- 
> 1.8.3.4
> 

otherwise,

Thanks for fixing my broken fix, again,
FWIW: Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>

^ permalink raw reply

* [PATCH v2 06/10] ARM: KVM: fix handling of trapped 64bit coprocessor accesses
From: Christoffer Dall @ 2014-01-29 20:07 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1390402602-22777-7-git-send-email-marc.zyngier@arm.com>

On Wed, Jan 22, 2014 at 02:56:38PM +0000, Marc Zyngier wrote:
> Commit 240e99cbd00a (ARM: KVM: Fix 64-bit coprocessor handling)
> changed the way we match the 64bit coprocessor access from
> user space, but didn't update the trap handler for the same
> set of registers.
> 
> The effect is that a trapped 64bit access is never matched, leading
> to a fault being injected into the guest. This went unnoticed as we
> didn;t really trap any 64bit register so far.

didn't

> 
> Placing the CRm field of the access into the CRn field of the matching
> structure fixes the problem. Also update the debug feature to emit the
> expected string in case of failing match.
> 
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> ---
>  arch/arm/kvm/coproc.c | 4 ++--
>  arch/arm/kvm/coproc.h | 4 ++--
>  2 files changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
> index 78c0885..126c90d 100644
> --- a/arch/arm/kvm/coproc.c
> +++ b/arch/arm/kvm/coproc.c
> @@ -443,7 +443,7 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
>  {
>  	struct coproc_params params;
>  
> -	params.CRm = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf;
> +	params.CRn = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf;
>  	params.Rt1 = (kvm_vcpu_get_hsr(vcpu) >> 5) & 0xf;
>  	params.is_write = ((kvm_vcpu_get_hsr(vcpu) & 1) == 0);
>  	params.is_64bit = true;
> @@ -451,7 +451,7 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
>  	params.Op1 = (kvm_vcpu_get_hsr(vcpu) >> 16) & 0xf;
>  	params.Op2 = 0;
>  	params.Rt2 = (kvm_vcpu_get_hsr(vcpu) >> 10) & 0xf;
> -	params.CRn = 0;
> +	params.CRm = 0;
>  
>  	return emulate_cp15(vcpu, &params);
>  }
> diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h
> index 0461d5c..c5ad7ff 100644
> --- a/arch/arm/kvm/coproc.h
> +++ b/arch/arm/kvm/coproc.h
> @@ -58,8 +58,8 @@ static inline void print_cp_instr(const struct coproc_params *p)
>  {
>  	/* Look, we even formatted it for you to paste into the table! */
>  	if (p->is_64bit) {
> -		kvm_pr_unimpl(" { CRm(%2lu), Op1(%2lu), is64, func_%s },\n",
> -			      p->CRm, p->Op1, p->is_write ? "write" : "read");
> +		kvm_pr_unimpl(" { CRm64(%2lu), Op1(%2lu), is64, func_%s },\n",
> +			      p->CRn, p->Op1, p->is_write ? "write" : "read");
>  	} else {
>  		kvm_pr_unimpl(" { CRn(%2lu), CRm(%2lu), Op1(%2lu), Op2(%2lu), is32,"
>  			      " func_%s },\n",
> -- 
> 1.8.3.4
> 

Thanks for fixing my broken fix!

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>

^ permalink raw reply

* [PATCH v2 05/10] ARM: KVM: force cache clean on page fault when caches are off
From: Christoffer Dall @ 2014-01-29 20:07 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1390402602-22777-6-git-send-email-marc.zyngier@arm.com>

On Wed, Jan 22, 2014 at 02:56:37PM +0000, Marc Zyngier wrote:
> In order for the guest with caches off to observe data written

nit: s/the guest/a guest/
nit: s/caches off/caches disabled/

> contained in a given page, we need to make sure that page is
> committed to memory, and not just hanging in the cache (as
> guest accesses are completely bypassing the cache until it

nit: s/it/the guest/

> decides to enable it).
> 
> For this purpose, hook into the coherent_cache_guest_page
> function and flush the region if the guest SCTLR
> register doesn't show the MMU and caches as being enabled.
> 
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> ---
>  arch/arm/include/asm/kvm_mmu.h | 7 +++++--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
> index cbab9ba..fa023e2 100644
> --- a/arch/arm/include/asm/kvm_mmu.h
> +++ b/arch/arm/include/asm/kvm_mmu.h
> @@ -116,9 +116,14 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
>  
>  struct kvm;
>  
> +#define kvm_flush_dcache_to_poc(a,l)	__cpuc_flush_dcache_area((a), (l))
> +
>  static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
>  					     unsigned long size)
>  {
> +	if ((vcpu->arch.cp15[c1_SCTLR] & 0b101) != 0b101)
> +		kvm_flush_dcache_to_poc((void *)hva, size);
> +	

Ah, my favorite inline function again...

>  	/*
>  	 * If we are going to insert an instruction page and the icache is
>  	 * either VIPT or PIPT, there is a potential problem where the host
> @@ -139,8 +144,6 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
>  	}
>  }
>  
> -#define kvm_flush_dcache_to_poc(a,l)	__cpuc_flush_dcache_area((a), (l))
> -
>  void stage2_flush_vm(struct kvm *kvm);
>  
>  #endif	/* !__ASSEMBLY__ */
> -- 
> 1.8.3.4
> 

Besides the nits:

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>

^ permalink raw reply

* [PATCH v2 04/10] arm64: KVM: flush VM pages before letting the guest enable caches
From: Christoffer Dall @ 2014-01-29 20:07 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1390402602-22777-5-git-send-email-marc.zyngier@arm.com>

On Wed, Jan 22, 2014 at 02:56:36PM +0000, Marc Zyngier wrote:
> When the guest runs with caches disabled (like in an early boot
> sequence, for example), all the writes are diectly going to RAM,
> bypassing the caches altogether.
> 
> Once the MMU and caches are enabled, whatever sits in the cache
> becomes suddently visible, which isn't what the guest expects.

nit: s/suddently/suddenly/

> 
> A way to avoid this potential disaster is to invalidate the cache
> when the MMU is being turned on. For this, we hook into the SCTLR_EL1
> trapping code, and scan the stage-2 page tables, invalidating the
> pages/sections that have already been mapped in.
> 
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
> ---
>  arch/arm/include/asm/kvm_mmu.h   |  2 +
>  arch/arm/kvm/mmu.c               | 83 ++++++++++++++++++++++++++++++++++++++++
>  arch/arm64/include/asm/kvm_mmu.h |  1 +
>  arch/arm64/kvm/sys_regs.c        |  5 ++-
>  4 files changed, 90 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
> index f997b9e..cbab9ba 100644
> --- a/arch/arm/include/asm/kvm_mmu.h
> +++ b/arch/arm/include/asm/kvm_mmu.h
> @@ -141,6 +141,8 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
>  
>  #define kvm_flush_dcache_to_poc(a,l)	__cpuc_flush_dcache_area((a), (l))
>  
> +void stage2_flush_vm(struct kvm *kvm);
> +
>  #endif	/* !__ASSEMBLY__ */
>  
>  #endif /* __ARM_KVM_MMU_H__ */
> diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
> index 415fd63..52f8b7d 100644
> --- a/arch/arm/kvm/mmu.c
> +++ b/arch/arm/kvm/mmu.c
> @@ -187,6 +187,89 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
>  	}
>  }
>  
> +void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
> +		       unsigned long addr, unsigned long end)
> +{
> +	pte_t *pte;
> +
> +	pte = pte_offset_kernel(pmd, addr);
> +	do {
> +		if (!pte_none(*pte)) {
> +			hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
> +			kvm_flush_dcache_to_poc((void*)hva, PAGE_SIZE);
> +		}
> +	} while(pte++, addr += PAGE_SIZE, addr != end);
> +}
> +
> +void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
> +		       unsigned long addr, unsigned long end)
> +{
> +	pmd_t *pmd;
> +	unsigned long next;
> +
> +	pmd = pmd_offset(pud, addr);
> +	do {
> +		next = pmd_addr_end(addr, end);
> +		if (!pmd_none(*pmd)) {
> +			if (kvm_pmd_huge(*pmd)) {
> +				hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
> +				kvm_flush_dcache_to_poc((void*)hva, PMD_SIZE);
> +			} else {
> +				stage2_flush_ptes(kvm, pmd, addr, next);
> +			}
> +		}
> +	} while(pmd++, addr = next, addr != end);
> +}
> +
> +void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
> +		       unsigned long addr, unsigned long end)
> +{
> +	pud_t *pud;
> +	unsigned long next;
> +
> +	pud = pud_offset(pgd, addr);
> +	do {
> +		next = pud_addr_end(addr, end);
> +		if (!pud_none(*pud)) {
> +			if (pud_huge(*pud)) {
> +				hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
> +				kvm_flush_dcache_to_poc((void*)hva, PUD_SIZE);
> +			} else {
> +				stage2_flush_pmds(kvm, pud, addr, next);
> +			}
> +		}
> +	} while(pud++, addr = next, addr != end);
> +}
> +
> +static void stage2_flush_memslot(struct kvm *kvm,
> +				 struct kvm_memory_slot *memslot)
> +{
> +	unsigned long addr = memslot->base_gfn << PAGE_SHIFT;
> +	unsigned long end = addr + PAGE_SIZE * memslot->npages;
> +	unsigned long next;

hmm, these are IPAs right, which can be larger than 32 bits with LPAE
on arm, so shouldn't this be phys_addr_t ?

If so, that propagates throughout the child subroutines, and you should
check if you can actually use pgd_addr_end.

> +	pgd_t *pgd;
> +
> +	pgd = kvm->arch.pgd + pgd_index(addr);
> +	do {
> +		next = pgd_addr_end(addr, end);
> +		stage2_flush_puds(kvm, pgd, addr, next);
> +	} while(pgd++, addr = next, addr != end);

I think the CodingStyle mandates a space after the while, which applies
to all the functions above.

> +}
> +

Could you add documentation for stage2_flush_vm?

/**
 * stage2_flush_vm - Invalidate cache for pages mapped in stage 2
 * @kvm: The struct kvm pointer
 *
 * Go through the stage 2 page tables and invalidate any cache lines
 * backing memory already mapped to the VM.
 */

or some variation thereof...

> +void stage2_flush_vm(struct kvm *kvm)
> +{
> +	struct kvm_memslots *slots;
> +	struct kvm_memory_slot *memslot;
> +
> +	spin_lock(&kvm->mmu_lock);
> +
> +	slots = kvm_memslots(kvm);
> +	kvm_for_each_memslot(memslot, slots)
> +		stage2_flush_memslot(kvm, memslot);
> +
> +	spin_unlock(&kvm->mmu_lock);

don't you need to also srcu_read_lock(kvm->srcu) here?

> +}
> +
>  /**
>   * free_boot_hyp_pgd - free HYP boot page tables
>   *
> diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
> index 2232dd0..b7b2ca3 100644
> --- a/arch/arm64/include/asm/kvm_mmu.h
> +++ b/arch/arm64/include/asm/kvm_mmu.h
> @@ -139,6 +139,7 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
>  	}
>  }
>  
> +void stage2_flush_vm(struct kvm *kvm);
>  
>  #endif /* __ASSEMBLY__ */
>  #endif /* __ARM64_KVM_MMU_H__ */
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 1a4731b..3d27bb8 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -27,6 +27,7 @@
>  #include <asm/kvm_host.h>
>  #include <asm/kvm_emulate.h>
>  #include <asm/kvm_coproc.h>
> +#include <asm/kvm_mmu.h>
>  #include <asm/cacheflush.h>
>  #include <asm/cputype.h>
>  #include <trace/events/kvm.h>
> @@ -163,8 +164,10 @@ static bool access_sctlr(struct kvm_vcpu *vcpu,
>  	else
>  		vcpu_cp15(vcpu, r->reg) = val & 0xffffffffUL;
>  
> -	if ((val & (0b101)) == 0b101)	/* MMU+Caches enabled? */
> +	if ((val & (0b101)) == 0b101) {	/* MMU+Caches enabled? */
>  		vcpu->arch.hcr_el2 &= ~HCR_TVM;
> +		stage2_flush_vm(vcpu->kvm);
> +	}
>  
>  	return true;
>  }
> -- 
> 1.8.3.4
> 

Thanks,
-- 
Christoffer

^ permalink raw reply

* [PATCH v2 03/10] arm64: KVM: trap VM system registers until MMU and caches are ON
From: Christoffer Dall @ 2014-01-29 20:07 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1390402602-22777-4-git-send-email-marc.zyngier@arm.com>

On Wed, Jan 22, 2014 at 02:56:35PM +0000, Marc Zyngier wrote:
> In order to be able to detect the point where the guest enables
> its MMU and caches, trap all the VM related system registers.
> 
> Once we see the guest enabling both the MMU and the caches, we
> can go back to a saner mode of operation, which is to leave these
> registers in complete control of the guest.
> 
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
> ---
>  arch/arm64/include/asm/kvm_arm.h |  3 +-
>  arch/arm64/include/asm/kvm_asm.h |  3 +-
>  arch/arm64/kvm/sys_regs.c        | 99 +++++++++++++++++++++++++++++++++++-----
>  3 files changed, 91 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
> index c98ef47..fd0a651 100644
> --- a/arch/arm64/include/asm/kvm_arm.h
> +++ b/arch/arm64/include/asm/kvm_arm.h
> @@ -62,6 +62,7 @@
>   * RW:		64bit by default, can be overriden for 32bit VMs
>   * TAC:		Trap ACTLR
>   * TSC:		Trap SMC
> + * TVM:		Trap VM ops (until M+C set in SCTLR_EL1)
>   * TSW:		Trap cache operations by set/way
>   * TWE:		Trap WFE
>   * TWI:		Trap WFI
> @@ -74,7 +75,7 @@
>   * SWIO:	Turn set/way invalidates into set/way clean+invalidate
>   */
>  #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
> -			 HCR_BSU_IS | HCR_FB | HCR_TAC | \
> +			 HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \
>  			 HCR_AMO | HCR_IMO | HCR_FMO | \
>  			 HCR_SWIO | HCR_TIDCP | HCR_RW)
>  #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
> diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
> index 3d796b4..89d7796 100644
> --- a/arch/arm64/include/asm/kvm_asm.h
> +++ b/arch/arm64/include/asm/kvm_asm.h
> @@ -81,7 +81,8 @@
>  #define c13_TID_URW	(TPIDR_EL0 * 2)	/* Thread ID, User R/W */
>  #define c13_TID_URO	(TPIDRRO_EL0 * 2)/* Thread ID, User R/O */
>  #define c13_TID_PRIV	(TPIDR_EL1 * 2)	/* Thread ID, Privileged */
> -#define c10_AMAIR	(AMAIR_EL1 * 2)	/* Aux Memory Attr Indirection Reg */
> +#define c10_AMAIR0	(AMAIR_EL1 * 2)	/* Aux Memory Attr Indirection Reg */
> +#define c10_AMAIR1	(c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */
>  #define c14_CNTKCTL	(CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
>  #define NR_CP15_REGS	(NR_SYS_REGS * 2)
>  
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index f063750..1a4731b 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -121,6 +121,55 @@ done:
>  }
>  
>  /*
> + * Generic accessor for VM registers. Only called as long as HCR_TVM
> + * is set.
> + */
> +static bool access_vm_reg(struct kvm_vcpu *vcpu,
> +			  const struct sys_reg_params *p,
> +			  const struct sys_reg_desc *r)
> +{
> +	unsigned long val;
> +
> +	BUG_ON(!p->is_write);
> +
> +	val = *vcpu_reg(vcpu, p->Rt);
> +	if (!p->is_aarch32) {
> +		vcpu_sys_reg(vcpu, r->reg) = val;
> +	} else {
> +		vcpu_cp15(vcpu, r->reg) = val & 0xffffffffUL;
> +		if (!p->is_32bit)
> +			vcpu_cp15(vcpu, r->reg + 1) = val >> 32;
> +	}
> +	return true;
> +}
> +
> +/*
> + * SCTLR_EL1 accessor. Only called as long as HCR_TVM is set.  If the
> + * guest enables the MMU, we stop trapping the VM sys_regs and leave
> + * it in complete control of the caches.
> + */
> +static bool access_sctlr(struct kvm_vcpu *vcpu,
> +			 const struct sys_reg_params *p,
> +			 const struct sys_reg_desc *r)
> +{
> +	unsigned long val;
> +
> +	BUG_ON(!p->is_write);
> +
> +	val = *vcpu_reg(vcpu, p->Rt);
> +
> +	if (!p->is_aarch32)
> +		vcpu_sys_reg(vcpu, r->reg) = val;
> +	else
> +		vcpu_cp15(vcpu, r->reg) = val & 0xffffffffUL;

call access_vm_reg()?

> +
> +	if ((val & (0b101)) == 0b101)	/* MMU+Caches enabled? */
> +		vcpu->arch.hcr_el2 &= ~HCR_TVM;

static inline and reuse in patch 1?

static inline bool sctlr_caches_enabled(unsigned long val)
{
	return (val & 0b101) == 0b101;
}

> +
> +	return true;
> +}
> +
> +/*
>   * We could trap ID_DFR0 and tell the guest we don't support performance
>   * monitoring.  Unfortunately the patch to make the kernel check ID_DFR0 was
>   * NAKed, so it will read the PMCR anyway.
> @@ -185,32 +234,32 @@ static const struct sys_reg_desc sys_reg_descs[] = {
>  	  NULL, reset_mpidr, MPIDR_EL1 },
>  	/* SCTLR_EL1 */
>  	{ Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000),
> -	  NULL, reset_val, SCTLR_EL1, 0x00C50078 },
> +	  access_sctlr, reset_val, SCTLR_EL1, 0x00C50078 },
>  	/* CPACR_EL1 */
>  	{ Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010),
>  	  NULL, reset_val, CPACR_EL1, 0 },
>  	/* TTBR0_EL1 */
>  	{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b000),
> -	  NULL, reset_unknown, TTBR0_EL1 },
> +	  access_vm_reg, reset_unknown, TTBR0_EL1 },
>  	/* TTBR1_EL1 */
>  	{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b001),
> -	  NULL, reset_unknown, TTBR1_EL1 },
> +	  access_vm_reg, reset_unknown, TTBR1_EL1 },
>  	/* TCR_EL1 */
>  	{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b010),
> -	  NULL, reset_val, TCR_EL1, 0 },
> +	  access_vm_reg, reset_val, TCR_EL1, 0 },
>  
>  	/* AFSR0_EL1 */
>  	{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b000),
> -	  NULL, reset_unknown, AFSR0_EL1 },
> +	  access_vm_reg, reset_unknown, AFSR0_EL1 },
>  	/* AFSR1_EL1 */
>  	{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b001),
> -	  NULL, reset_unknown, AFSR1_EL1 },
> +	  access_vm_reg, reset_unknown, AFSR1_EL1 },
>  	/* ESR_EL1 */
>  	{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0010), Op2(0b000),
> -	  NULL, reset_unknown, ESR_EL1 },
> +	  access_vm_reg, reset_unknown, ESR_EL1 },
>  	/* FAR_EL1 */
>  	{ Op0(0b11), Op1(0b000), CRn(0b0110), CRm(0b0000), Op2(0b000),
> -	  NULL, reset_unknown, FAR_EL1 },
> +	  access_vm_reg, reset_unknown, FAR_EL1 },
>  	/* PAR_EL1 */
>  	{ Op0(0b11), Op1(0b000), CRn(0b0111), CRm(0b0100), Op2(0b000),
>  	  NULL, reset_unknown, PAR_EL1 },
> @@ -224,17 +273,17 @@ static const struct sys_reg_desc sys_reg_descs[] = {
>  
>  	/* MAIR_EL1 */
>  	{ Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000),
> -	  NULL, reset_unknown, MAIR_EL1 },
> +	  access_vm_reg, reset_unknown, MAIR_EL1 },
>  	/* AMAIR_EL1 */
>  	{ Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0011), Op2(0b000),
> -	  NULL, reset_amair_el1, AMAIR_EL1 },
> +	  access_vm_reg, reset_amair_el1, AMAIR_EL1 },
>  
>  	/* VBAR_EL1 */
>  	{ Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b0000), Op2(0b000),
>  	  NULL, reset_val, VBAR_EL1, 0 },
>  	/* CONTEXTIDR_EL1 */
>  	{ Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b001),
> -	  NULL, reset_val, CONTEXTIDR_EL1, 0 },
> +	  access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 },
>  	/* TPIDR_EL1 */
>  	{ Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b100),
>  	  NULL, reset_unknown, TPIDR_EL1 },
> @@ -305,14 +354,32 @@ static const struct sys_reg_desc sys_reg_descs[] = {
>  	  NULL, reset_val, FPEXC32_EL2, 0x70 },
>  };
>  
> -/* Trapped cp15 registers */
> +/*
> + * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding,
> + * depending on the way they are accessed (as a 32bit or a 64bit
> + * register).
> + */
>  static const struct sys_reg_desc cp15_regs[] = {
> +	{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
> +	{ Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_sctlr, NULL, c1_SCTLR },
> +	{ Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
> +	{ Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
> +	{ Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR },
> +	{ Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, c3_DACR },
> +	{ Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, c5_DFSR },
> +	{ Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, c5_IFSR },
> +	{ Op1( 0), CRn( 5), CRm( 1), Op2( 0), access_vm_reg, NULL, c5_ADFSR },
> +	{ Op1( 0), CRn( 5), CRm( 1), Op2( 1), access_vm_reg, NULL, c5_AIFSR },
> +	{ Op1( 0), CRn( 6), CRm( 0), Op2( 0), access_vm_reg, NULL, c6_DFAR },
> +	{ Op1( 0), CRn( 6), CRm( 0), Op2( 2), access_vm_reg, NULL, c6_IFAR },
> +
>  	/*
>  	 * DC{C,I,CI}SW operations:
>  	 */
>  	{ Op1( 0), CRn( 7), CRm( 6), Op2( 2), access_dcsw },
>  	{ Op1( 0), CRn( 7), CRm(10), Op2( 2), access_dcsw },
>  	{ Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw },
> +
>  	{ Op1( 0), CRn( 9), CRm(12), Op2( 0), pm_fake },
>  	{ Op1( 0), CRn( 9), CRm(12), Op2( 1), pm_fake },
>  	{ Op1( 0), CRn( 9), CRm(12), Op2( 2), pm_fake },
> @@ -326,6 +393,14 @@ static const struct sys_reg_desc cp15_regs[] = {
>  	{ Op1( 0), CRn( 9), CRm(14), Op2( 0), pm_fake },
>  	{ Op1( 0), CRn( 9), CRm(14), Op2( 1), pm_fake },
>  	{ Op1( 0), CRn( 9), CRm(14), Op2( 2), pm_fake },
> +
> +	{ Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR },
> +	{ Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR },
> +	{ Op1( 0), CRn(10), CRm( 3), Op2( 0), access_vm_reg, NULL, c10_AMAIR0 },
> +	{ Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 },
> +	{ Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
> +
> +	{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
>  };
>  
>  /* Target specific emulation tables */
> -- 
> 1.8.3.4
> 

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>

^ permalink raw reply

* [PATCH v2 02/10] arm64: KVM: allows discrimination of AArch32 sysreg access
From: Christoffer Dall @ 2014-01-29 20:06 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1390402602-22777-3-git-send-email-marc.zyngier@arm.com>

On Wed, Jan 22, 2014 at 02:56:34PM +0000, Marc Zyngier wrote:
> The current handling of AArch32 trapping is slightly less than
> perfect, as it is not possible (from a handler point of view)
> to distinguish it from an AArch64 access, nor to tell a 32bit
> from a 64bit access either.
> 
> Fix this by introducing two additional flags:
> - is_aarch32: true if the access was made in AArch32 mode
> - is_32bit: true if is_aarch32 == true and a MCR/MRC instruction
>   was used to perform the access (as opposed to MCRR/MRRC).
> 
> This allows a handler to cover all the possible conditions in which
> a system register gets trapped.
> 
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> ---
>  arch/arm64/kvm/sys_regs.c | 5 +++++
>  arch/arm64/kvm/sys_regs.h | 2 ++
>  2 files changed, 7 insertions(+)
> 
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 02e9d09..f063750 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -437,6 +437,8 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
>  	u32 hsr = kvm_vcpu_get_hsr(vcpu);
>  	int Rt2 = (hsr >> 10) & 0xf;
>  
> +	params.is_aarch32 = true;
> +	params.is_32bit = false;
>  	params.CRm = (hsr >> 1) & 0xf;
>  	params.Rt = (hsr >> 5) & 0xf;
>  	params.is_write = ((hsr & 1) == 0);
> @@ -480,6 +482,8 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
>  	struct sys_reg_params params;
>  	u32 hsr = kvm_vcpu_get_hsr(vcpu);
>  
> +	params.is_aarch32 = true;
> +	params.is_32bit = true;
>  	params.CRm = (hsr >> 1) & 0xf;
>  	params.Rt  = (hsr >> 5) & 0xf;
>  	params.is_write = ((hsr & 1) == 0);
> @@ -549,6 +553,7 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run)
>  	struct sys_reg_params params;
>  	unsigned long esr = kvm_vcpu_get_hsr(vcpu);
>  
> +	params.is_aarch32 = false;

I'm wondering if we should set is_32bit = false, just for clarity...

>  	params.Op0 = (esr >> 20) & 3;
>  	params.Op1 = (esr >> 14) & 0x7;
>  	params.CRn = (esr >> 10) & 0xf;
> diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
> index d50d372..d411e25 100644
> --- a/arch/arm64/kvm/sys_regs.h
> +++ b/arch/arm64/kvm/sys_regs.h
> @@ -30,6 +30,8 @@ struct sys_reg_params {
>  	u8	Op2;
>  	u8	Rt;
>  	bool	is_write;
> +	bool	is_aarch32;
> +	bool	is_32bit;	/* Only valid if is_aarch32 is true */
>  };
>  
>  struct sys_reg_desc {
> -- 
> 1.8.3.4
> 

Acked-by: Christoffer Dall <christoffer.dall@linaro.org>

^ permalink raw reply

* [PATCH v2 01/10] arm64: KVM: force cache clean on page fault when caches are off
From: Christoffer Dall @ 2014-01-29 20:06 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1390402602-22777-2-git-send-email-marc.zyngier@arm.com>

On Wed, Jan 22, 2014 at 02:56:33PM +0000, Marc Zyngier wrote:
> In order for the guest with caches off to observe data written
> contained in a given page, we need to make sure that page is
> committed to memory, and not just hanging in the cache (as
> guest accesses are completely bypassing the cache until it
> decides to enable it).
> 
> For this purpose, hook into the coherent_icache_guest_page
> function and flush the region if the guest SCTLR_EL1
> register doesn't show the MMU  and caches as being enabled.
> The function also get renamed to coherent_cache_guest_page.
> 
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
> ---
>  arch/arm/include/asm/kvm_mmu.h   |  4 ++--
>  arch/arm/kvm/mmu.c               |  4 ++--
>  arch/arm64/include/asm/kvm_mmu.h | 11 +++++++----
>  3 files changed, 11 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
> index 77de4a4..f997b9e 100644
> --- a/arch/arm/include/asm/kvm_mmu.h
> +++ b/arch/arm/include/asm/kvm_mmu.h
> @@ -116,8 +116,8 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
>  
>  struct kvm;
>  
> -static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
> -					      unsigned long size)
> +static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
> +					     unsigned long size)
>  {
>  	/*
>  	 * If we are going to insert an instruction page and the icache is
> diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
> index 5809069..415fd63 100644
> --- a/arch/arm/kvm/mmu.c
> +++ b/arch/arm/kvm/mmu.c
> @@ -713,7 +713,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>  			kvm_set_s2pmd_writable(&new_pmd);
>  			kvm_set_pfn_dirty(pfn);
>  		}
> -		coherent_icache_guest_page(kvm, hva & PMD_MASK, PMD_SIZE);
> +		coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE);
>  		ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
>  	} else {
>  		pte_t new_pte = pfn_pte(pfn, PAGE_S2);
> @@ -721,7 +721,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>  			kvm_set_s2pte_writable(&new_pte);
>  			kvm_set_pfn_dirty(pfn);
>  		}
> -		coherent_icache_guest_page(kvm, hva, PAGE_SIZE);
> +		coherent_cache_guest_page(vcpu, hva, PAGE_SIZE);
>  		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false);
>  	}
>  
> diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
> index 680f74e..2232dd0 100644
> --- a/arch/arm64/include/asm/kvm_mmu.h
> +++ b/arch/arm64/include/asm/kvm_mmu.h
> @@ -106,7 +106,6 @@ static inline bool kvm_is_write_fault(unsigned long esr)
>  	return true;
>  }
>  
> -static inline void kvm_clean_dcache_area(void *addr, size_t size) {}
>  static inline void kvm_clean_pgd(pgd_t *pgd) {}
>  static inline void kvm_clean_pmd_entry(pmd_t *pmd) {}
>  static inline void kvm_clean_pte(pte_t *pte) {}
> @@ -124,9 +123,14 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
>  
>  struct kvm;
>  
> -static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
> -					      unsigned long size)
> +#define kvm_flush_dcache_to_poc(a,l)	__flush_dcache_area((a), (l))
> +
> +static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
> +					     unsigned long size)
>  {
> +	if ((vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) != 0b101)
> +		kvm_flush_dcache_to_poc((void *)hva, size);
> +

This deserves a comment or a static inline...

>  	if (!icache_is_aliasing()) {		/* PIPT */
>  		flush_icache_range(hva, hva + size);
>  	} else if (!icache_is_aivivt()) {	/* non ASID-tagged VIVT */
> @@ -135,7 +139,6 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
>  	}
>  }
>  
> -#define kvm_flush_dcache_to_poc(a,l)	__flush_dcache_area((a), (l))
>  
>  #endif /* __ASSEMBLY__ */
>  #endif /* __ARM64_KVM_MMU_H__ */
> -- 
> 1.8.3.4
> 

Otherwise:
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>

^ permalink raw reply

* [BUG] reproducable ubifs reboot assert and corruption
From: Richard Weinberger @ 2014-01-29 19:56 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20140129191322.GC7278@gmail.com>

On Wed, Jan 29, 2014 at 8:13 PM, Andrew Ruder <andy@aeruder.net> wrote:
> On Wed, Jan 29, 2014 at 04:56:04PM +0100, Richard Weinberger wrote:
>> Does the issue also happen if you disable preemption?
>> i.e. CONFIG_PREEMPT_NONE=y
>
> CONFIG_PREEMPT_NONE=y still breaks.  I suspect that sync_filesystem
> has some blocking behavior that allows other processes to schedule.

Okay, I have to test this on real hardware.
May take a few days.

BTW: s/not.at/nod.at/g ;)

> Another log is attached and the patch I used to create this log is
> below.  I think the most interesting part of this patch is the last hunk
> that modifies ubifs_remount_ro.  This clearly shows that after the mount
> has been marked as read-only we have dirty inodes waiting for the
> writeback to come in and hit all the asserts.
>
> Here's some of the important parts of the log:
>
>   170 pre sync_filesystem
> # Notice that while we were running sync_filesystem
> # a inode (0xd75ab658) snuck in with a sys_rename
>   204 inode: d75ab658
>   205 ------------[ cut here ]------------
>   206 WARNING: CPU: 0 PID: 625 at fs/fs-writeback.c:1213 __mark_inode_dirty+0x2a4/0x2f4()
>   207 Modules linked in:
>   208 CPU: 0 PID: 625 Comm: fsstress Tainted: G        W    3.12.0-00041-g7f12d39-dirty #250
>   209 [<c0014f54>] (unwind_backtrace+0x0/0x128) from [<c001235c>] (show_stack+0x20/0x24)
>   210 [<c001235c>] (show_stack+0x20/0x24) from [<c0344574>] (dump_stack+0x20/0x28)
>   211 [<c0344574>] (dump_stack+0x20/0x28) from [<c00206d4>] (warn_slowpath_common+0x78/0x98)
>   212 [<c00206d4>] (warn_slowpath_common+0x78/0x98) from [<c00207b0>] (warn_slowpath_null+0x2c/0x34)
>   213 [<c00207b0>] (warn_slowpath_null+0x2c/0x34) from [<c00f0e8c>] (__mark_inode_dirty+0x2a4/0x2f4)
>   214 [<c00f0e8c>] (__mark_inode_dirty+0x2a4/0x2f4) from [<c0136428>] (ubifs_rename+0x4a4/0x600)
>   215 [<c0136428>] (ubifs_rename+0x4a4/0x600) from [<c00d9f44>] (vfs_rename+0x280/0x3f4)
>   216 [<c00d9f44>] (vfs_rename+0x280/0x3f4) from [<c00dabe4>] (SyS_renameat+0x18c/0x218)
>   217 [<c00dabe4>] (SyS_renameat+0x18c/0x218) from [<c00dac9c>] (SyS_rename+0x2c/0x30)
>   218 [<c00dac9c>] (SyS_rename+0x2c/0x30) from [<c000e820>] (ret_fast_syscall+0x0/0x2c)
>   219 ---[ end trace 35ebec8569a53526 ]---
>   754 post sync_filesystem
>   755 pre prepare_remount
>   756 post prepare_remount
>   757 prepare_remount success
>   758 UBIFS: background thread "ubifs_bgt0_0" stops
>   759 we are now a read-only mount
>   760 bdi.work_list = d7ac4110, .next = d7ac4110
> # WE HAVE DIRTY I/O (notice the .next != &b_dirty)
>   761 bdi.wb.b_dirty = d7ac40d8, .next = d75accd0
>   762 bdi.wb.b_io = d7ac40e0, .next = d7ac40e0
>   763 bdi.wb.b_more_io = d7ac40e8, .next = d7ac40e8
>   764 do_remount_sb success
> # And now our friend (inode 0xd75ab658) comes in with a writeback after
> # we are read-only triggering the assert
>   779 inode: d75ab658
>   780 UBIFS assert failed in reserve_space at 125 (pid 11)
>   781 CPU: 0 PID: 11 Comm: kworker/u2:1 Tainted: G        W    3.12.0-00041-g7f12d39-dirty #250
>   782 Workqueue: writeback bdi_writeback_workfn (flush-ubifs_0_0)
>   783 [<c0014f54>] (unwind_backtrace+0x0/0x128) from [<c001235c>] (show_stack+0x20/0x24)
>   784 [<c001235c>] (show_stack+0x20/0x24) from [<c0344574>] (dump_stack+0x20/0x28)
>   785 [<c0344574>] (dump_stack+0x20/0x28) from [<c012f90c>] (make_reservation+0x8c/0x560)
>   786 [<c012f90c>] (make_reservation+0x8c/0x560) from [<c0130c60>] (ubifs_jnl_write_inode+0xbc/0x214)
>   787 [<c0130c60>] (ubifs_jnl_write_inode+0xbc/0x214) from [<c0137ce0>] (ubifs_write_inode+0xec/0x17c)
>   788 [<c0137ce0>] (ubifs_write_inode+0xec/0x17c) from [<c00f0a00>] (__writeback_single_inode+0x1fc/0x308)
>   789 [<c00f0a00>] (__writeback_single_inode+0x1fc/0x308) from [<c00f1780>] (writeback_sb_inodes+0x1f8/0x3c4)
>   790 [<c00f1780>] (writeback_sb_inodes+0x1f8/0x3c4) from [<c00f19cc>] (__writeback_inodes_wb+0x80/0xc0)
>   791 [<c00f19cc>] (__writeback_inodes_wb+0x80/0xc0) from [<c00f1ba4>] (wb_writeback+0x198/0x310)
>   792 [<c00f1ba4>] (wb_writeback+0x198/0x310) from [<c00f2328>] (bdi_writeback_workfn+0x15c/0x454)
>   793 [<c00f2328>] (bdi_writeback_workfn+0x15c/0x454) from [<c0038348>] (process_one_work+0x280/0x420)
>   794 [<c0038348>] (process_one_work+0x280/0x420) from [<c00389e4>] (worker_thread+0x240/0x380)
>   795 [<c00389e4>] (worker_thread+0x240/0x380) from [<c003dff8>] (kthread+0xbc/0xc8)
>   796 [<c003dff8>] (kthread+0xbc/0xc8) from [<c000e8b0>] (ret_from_fork+0x14/0x20)
>
> - Andy
>
>
>
>
> --- patch ---
> diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
> index 9f4935b..48e4272 100644
> --- a/fs/fs-writeback.c
> +++ b/fs/fs-writeback.c
> @@ -93,6 +93,10 @@ static void bdi_queue_work(struct backing_dev_info *bdi,
>  {
>         trace_writeback_queue(bdi, work);
>
> +       do {
> +               extern bool ubifs_enable_debug;
> +               WARN_ON(ubifs_enable_debug);
> +       } while (0);
>         spin_lock_bh(&bdi->wb_lock);
>         list_add_tail(&work->list, &bdi->work_list);
>         spin_unlock_bh(&bdi->wb_lock);
> @@ -194,6 +198,11 @@ static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
>                 if (time_before(inode->dirtied_when, tail->dirtied_when))
>                         inode->dirtied_when = jiffies;
>         }
> +       do {
> +               extern bool ubifs_enable_debug;
> +               if (ubifs_enable_debug) pr_info("inode: %p\n", inode);
> +               WARN_ON(ubifs_enable_debug);
> +       } while (0);
>         list_move(&inode->i_wb_list, &wb->b_dirty);
>  }
>
> @@ -204,6 +213,11 @@ static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
>  {
>         assert_spin_locked(&wb->list_lock);
>         list_move(&inode->i_wb_list, &wb->b_more_io);
> +       do {
> +               extern bool ubifs_enable_debug;
> +               if (ubifs_enable_debug) pr_info("inode: %p\n", inode);
> +               WARN_ON(ubifs_enable_debug);
> +       } while (0);
>  }
>
>  static void inode_sync_complete(struct inode *inode)
> @@ -437,6 +451,8 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
>         unsigned dirty;
>         int ret;
>
> +       extern bool ubifs_enable_debug;
> +       if (ubifs_enable_debug) pr_info("inode: %p\n", inode);
>         WARN_ON(!(inode->i_state & I_SYNC));
>
>         trace_writeback_single_inode_start(inode, wbc, nr_to_write);
> @@ -1191,6 +1207,11 @@ void __mark_inode_dirty(struct inode *inode, int flags)
>
>                         inode->dirtied_when = jiffies;
>                         list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
> +       do {
> +               extern bool ubifs_enable_debug;
> +               if (ubifs_enable_debug) pr_info("inode: %p\n", inode);
> +               WARN_ON(ubifs_enable_debug);
> +       } while (0);
>                         spin_unlock(&bdi->wb.list_lock);
>
>                         if (wakeup_bdi)
> diff --git a/fs/super.c b/fs/super.c
> index 0225c20..29afc68 100644
> --- a/fs/super.c
> +++ b/fs/super.c
> @@ -737,6 +737,12 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
>         int retval;
>         int remount_ro;
>
> +       extern bool ubifs_enable_debug;
> +       if (flags & MS_RDONLY) {
> +               ubifs_enable_debug = true;
> +       }
> +       WARN_ON(ubifs_enable_debug);
> +
>         if (sb->s_writers.frozen != SB_UNFROZEN)
>                 return -EBUSY;
>
> @@ -747,8 +753,11 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
>
>         if (flags & MS_RDONLY)
>                 acct_auto_close(sb);
> +       pr_info("pre shrink_dcache_sb\n");
>         shrink_dcache_sb(sb);
> +       pr_info("pre sync_filesystem\n");
>         sync_filesystem(sb);
> +       pr_info("post sync_filesystem\n");
>
>         remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY);
>
> @@ -758,9 +767,12 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
>                 if (force) {
>                         mark_files_ro(sb);
>                 } else {
> +                       pr_info("pre prepare_remount\n");
>                         retval = sb_prepare_remount_readonly(sb);
> +                       pr_info("post prepare_remount\n");
>                         if (retval)
>                                 return retval;
> +                       pr_info("prepare_remount success\n");
>                 }
>         }
>
> @@ -789,6 +801,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
>          */
>         if (remount_ro && sb->s_bdev)
>                 invalidate_bdev(sb->s_bdev);
> +       pr_info("do_remount_sb success\n");
>         return 0;
>
>  cancel_readonly:
> diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
> index 123c79b..c9f2d5d 100644
> --- a/fs/ubifs/file.c
> +++ b/fs/ubifs/file.c
> @@ -902,6 +902,8 @@ static int do_writepage(struct page *page, int len)
>         struct inode *inode = page->mapping->host;
>         struct ubifs_info *c = inode->i_sb->s_fs_info;
>
> +       WARN_ON(c->ro_mount);
> +
>  #ifdef UBIFS_DEBUG
>         spin_lock(&ui->ui_lock);
>         ubifs_assert(page->index <= ui->synced_i_size << PAGE_CACHE_SIZE);
> diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
> index 3e4aa72..8cbb731 100644
> --- a/fs/ubifs/super.c
> +++ b/fs/ubifs/super.c
> @@ -38,6 +38,8 @@
>  #include <linux/writeback.h>
>  #include "ubifs.h"
>
> +bool ubifs_enable_debug;
> +
>  /*
>   * Maximum amount of memory we may 'kmalloc()' without worrying that we are
>   * allocating too much.
> @@ -1756,6 +1758,11 @@ static void ubifs_remount_ro(struct ubifs_info *c)
>         err = dbg_check_space_info(c);
>         if (err)
>                 ubifs_ro_mode(c, err);
> +       pr_info("we are now a read-only mount\n");
> +       pr_info("bdi.work_list = %p, .next = %p\n", &c->bdi.work_list, c->bdi.work_list.next);
> +       pr_info("bdi.wb.b_dirty = %p, .next = %p\n", &c->bdi.wb.b_dirty, c->bdi.wb.b_dirty.next);
> +       pr_info("bdi.wb.b_io = %p, .next = %p\n", &c->bdi.wb.b_io, c->bdi.wb.b_io.next);
> +       pr_info("bdi.wb.b_more_io = %p, .next = %p\n", &c->bdi.wb.b_more_io, c->bdi.wb.b_more_io.next);
>         mutex_unlock(&c->umount_mutex);
>  }
>
>
> ______________________________________________________
> Linux MTD discussion mailing list
> http://lists.infradead.org/mailman/listinfo/linux-mtd/
>



-- 
Thanks,
//richard

^ permalink raw reply

* [RFC PATCH V2 1/4] pci: APM X-Gene PCIe controller driver
From: Arnd Bergmann @ 2014-01-29 19:36 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <CACoXjcnTnhWS-5yOgiHA5yVaBjdH1DVzmSsknEBJLB+CFR0EyA@mail.gmail.com>

On Monday 27 January 2014, Tanmay Inamdar wrote:
> On Sat, Jan 25, 2014 at 12:11 PM, Arnd Bergmann <arnd@arndb.de> wrote:
> > On Friday 24 January 2014 13:28:22 Tanmay Inamdar wrote:
> >> On Thu, Jan 16, 2014 at 5:10 PM, Tanmay Inamdar <tinamdar@apm.com> wrote:
> >> > On Wed, Jan 15, 2014 at 4:39 AM, Arnd Bergmann <arnd@arndb.de> wrote:
> >> >> On Wednesday 15 January 2014, Tanmay Inamdar wrote:
> >> >>
> >> >> Maybe another msleep() in the loop? It seems weird to first do an
> >> >> unconditional sleep but then busy-wait for the result.
> >> >
> >> > ok.
> >>
> >> This loop can execute for maximum 4 msec. So putting msleep(1) won't
> >> get us much.
> >
> > 4 msec is still quite a long time for a busy loop that can be spent doing
> > useful work in another thread.
> >
> 
> Right. If 'msleep(1)' is used, then 'checkpatch' throws a warning
> saying that it can actually sleep for 20ms in some cases. I will check
> if 'usleep_range' is useful here.

Sound good. This is really a false positive from checkpatch though,
with the timeout handling in place, everything's fine even with
msleep(1).

> >> >>
> >> >> Another general note: Your "compatible" strings are rather unspecific.
> >> >> Do you have a version number for this IP block? I suppose that it's related
> >> >> to one that has been used in other chips before, or will be used in future
> >> >> chips, if it's not actually licensed from some other company.
> >> >
> >> > I will have to check this.
> >> >
> >>
> >> We have decided to stick with current compatible string for now.
> >
> > Can you elaborate on your reasoning? Does this mean X-Gene is a one-off
> > product and you won't be doing any new chips based on the same hardware
> > components?
> 
> The current convention is to key upon the family name - X-Gene. Future
> chips will also be a part of X-Gene family. Right now it is unclear if
> there are any obvious feature additions to be done in Linux PCIe
> driver. Until then same driver is expected to work as is in future
> chips.

This is not enough for me. Of course you hope that things keep working,
but experience shows that sometimes hardware has slight differences that
you need to work around later. It's better to always be specific and
at least as a secondary identifier list the exact model of the component,
or if that is not know, the model of the SoC. The driver can bind to
the most generic string, but in DT you should have a specific one as
well.

You could for instance have something like

	compatible = "apm,xgene-1234w78-pcie", "thirdparty,pcie-1.23", "apm,xgene-pcie", "thirdparty,pcie";

as an example where you licensed the pcie block version 1.23 from a company
named thirdparty and integrated it into the xgene variant with product
code 1234w78.

	Arnd

^ permalink raw reply

* [PATCH 2/2] ARM: dts: OMAP3+: add clock nodes for CPU
From: Robert Nelson @ 2014-01-29 19:29 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1391019557-22313-3-git-send-email-nm@ti.com>

On Wed, Jan 29, 2014 at 12:19 PM, Nishanth Menon <nm@ti.com> wrote:
> OMAP34xx, AM3517 and OMAP36xx platforms use dpll1 clock.
>
> OMAP443x, OMAP446x, OMAP447x, OMAP5, DRA7, AM43xx platforms use
> dpll_mpu clock.
>
> Latency used is the generic latency defined in omap-cpufreq
> driver.
>
> Signed-off-by: Nishanth Menon <nm@ti.com>

Hi Nishanth,

After this patch, do you see any limitation to finally enabling 1Ghz
operation on the beagle-xm by default? Or are we still missing a
dependicy somewhere?

cpufreq stats: 300 MHz:98.64%, 600 MHz:0.04%, 800 MHz:0.09%, 1000
MHz:1.23%  (11)
full cpufreq output: http://paste.debian.net/79073/

diff --git a/arch/arm/boot/dts/omap3-beagle-xm.dts
b/arch/arm/boot/dts/omap3-beagle-xm.dts
index bb5dad0..b0e5863 100644
--- a/arch/arm/boot/dts/omap3-beagle-xm.dts
+++ b/arch/arm/boot/dts/omap3-beagle-xm.dts
@@ -16,9 +16,36 @@
        cpus {
                cpu at 0 {
                        cpu0-supply = <&vcc>;
+                       operating-points = <
+                               /* kHz    uV */
+                               300000   1012500
+                               600000   1200000
+                               800000   1325000
+                               1000000  1380000
+                       >;
                };
        };

+       abb: regulator-abb {
+               compatible = "ti,abb-v1";
+               regulator-name = "abb";
+               #address-cell = <0>;
+               #size-cells = <0>;
+               reg = <0x483072f0 0x8>, <0x48306818 0x4>;
+               reg-names = "base-address", "int-address";
+               ti,tranxdone-status-mask = <0x4000000>;
+               clocks = <&dpll1_ck>;
+               ti,settling-time = <30>;
+               ti,clock-cycles = <8>;
+               ti,abb_info = <
+                       /* uV           ABB     efuse   rbb_m   fbb_m
 vset_m */
+                       1012500         0       0       0       0
 0 /* Bypass */
+                       1200000         3       0       0       0
 0 /* RBB mandatory */
+                       1320000         1       0       0       0
 0 /* FBB mandatory */
+                       1380000         1       0       0       0       0
+                       >;
+       };
+
        memory {
                device_type = "memory";
                reg = <0x80000000 0x20000000>; /* 512 MB */
-- 
1.8.5.3

Regards,

-- 
Robert Nelson
http://www.rcn-ee.com/

^ permalink raw reply related

* [PATCH v4 2/5] arm: add new asm macro update_sctlr
From: Will Deacon @ 2014-01-29 19:27 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20140129182805.GF11329@bivouac.eciton.net>

Hi Leif,

On Wed, Jan 29, 2014 at 06:28:05PM +0000, Leif Lindholm wrote:
> On Wed, Jan 22, 2014 at 11:20:55AM +0000, Will Deacon wrote:
> > > +#ifdef CONFIG_CPU_CP15
> > > +/* Macro for setting/clearing bits in sctlr */
> > > +	.macro update_sctlr, set:req, clear:req, tmp:req, tmp2:req
> > > +	mrc	p15, 0, \tmp, c1, c0, 0
> > > +	ldr	\tmp2, =\set
> > > +	orr	\tmp, \tmp, \tmp2
> > > +	ldr	\tmp2, =\clear
> > > +	mvn	\tmp2, \tmp2
> > > +	and	\tmp, \tmp, \tmp2
> > > +	mcr	p15, 0, \tmp, c1, c0, 0
> > 
> > I think this would be cleaner if you force the caller to put set and clear
> > into registers beforehand, rather than have to do the literal load every
> > time. Also, I don't think set and clear should be required (and then you can
> > lose tmp2 as well).
> 
> I can't figure out how to make register-parameters non-required
> (i.e. conditionalise on whether an optional parameter was provided),
> so my attempt of refactoring actually ends up using an additional
> register:
> 
> #ifdef CONFIG_CPU_CP15
> /* Macro for setting/clearing bits in sctlr */
> 	.macro	update_sctlr, set:req, clear:req, tmp:req
> 	mrc	p15, 0, \tmp, c1, c0, 0
> 	orr	\tmp, \set
> 	mvn	\clear, \clear
> 	and	\tmp, \tmp, \clear
> 	mcr	p15, 0, \tmp, c1, c0, 0
> 	.endm
> #endif
> 
> If you think that's an improvement I can do that, and I have (just)
> enough registers to spare.
> If I'm being daft with my macro issues, do point it out.

I was thinking along the lines of:

	.macro  update_sctlr, tmp:req, set=0, clear=0
	.if	\set
	orr ...
	.endif
	.if	\clear
	bic ...
	.endif
	.endm

however, that puts us back to the problem of having to pass immediates
instead of registers. Gas *does* seem to accept the following:

	.macro  update_sctlr, tmp:req, set=0, clear=0
	.if	\set != 0
	orr ...
	.endif
	.if	\clear != 0
	bic ...
	.endif
	.endm

which is filthy, so we'd need to see how beautiful the caller ends up being
to justify that!

Will

^ permalink raw reply

* [PATCH v3 02/11] iommu/arm-smmu: Introduce iommu_group notifier block
From: Varun Sethi @ 2014-01-29 19:19 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20140129141429.GD19326@alberich>



> -----Original Message-----
> From: iommu-bounces at lists.linux-foundation.org [mailto:iommu-
> bounces at lists.linux-foundation.org] On Behalf Of Andreas Herrmann
> Sent: Wednesday, January 29, 2014 7:44 PM
> To: Sethi Varun-B16395
> Cc: Andreas Herrmann; iommu at lists.linux-foundation.org; Will Deacon;
> linux-arm-kernel at lists.infradead.org
> Subject: Re: [PATCH v3 02/11] iommu/arm-smmu: Introduce iommu_group
> notifier block
> 
> On Tue, Jan 28, 2014 at 11:00:35AM +0000, Varun Sethi wrote:
> >
> >
> > > -----Original Message-----
> > > From: iommu-bounces at lists.linux-foundation.org [mailto:iommu-
> > > bounces at lists.linux-foundation.org] On Behalf Of Andreas Herrmann
> > > Sent: Friday, January 24, 2014 1:27 AM
> > > To: Sethi Varun-B16395
> > > Cc: Andreas Herrmann; iommu at lists.linux-foundation.org; Will Deacon;
> > > linux-arm-kernel at lists.infradead.org
> > > Subject: Re: [PATCH v3 02/11] iommu/arm-smmu: Introduce iommu_group
> > > notifier block
> > >
> > > On Wed, Jan 22, 2014 at 07:07:40PM +0000, Varun Sethi wrote:
> > > > > -----Original Message-----
> > > > > From: Will Deacon [mailto:will.deacon at arm.com]
> > > > > Sent: Wednesday, January 22, 2014 9:04 PM
> > > > > To: Sethi Varun-B16395
> > > > > Cc: Andreas Herrmann; iommu at lists.linux-foundation.org;
> > > > > linux-arm- kernel at lists.infradead.org; Andreas Herrmann
> > > > > Subject: Re: [PATCH v3 02/11] iommu/arm-smmu: Introduce
> > > > > iommu_group notifier block
> > > > >
> > > > > On Wed, Jan 22, 2014 at 01:54:11PM +0000, Varun Sethi wrote:
> > > > > > > > > Ok, so are you suggesting that we perform the isolation
> > > > > > > > > mapping in arm_smmu_add_device and drop the notifier
> > > altogether?
> > > > > > > > I think that should be fine, until we want to delay
> > > > > > > > mapping creation till driver bind time.
> > > > > > >
> > > > > > > Is there a hard dependency on that?
> > > > > > >
> > > > > > Not sure, may be Andreas can answer that.
> > > > >
> > > > > Ok. Andreas? I would have thought doing this *earlier* shouldn't
> > > > > be a problem (the DMA ops must be swizzled before the driver is
> probed).
> > > > >
> > > > > > > > But the "isolate device" property should dictate iommu
> > > > > > > > group
> > > > > creation.
> > > > > > >
> > > > > > > The reason we added automatic group creation (per-device) is
> > > > > > > for VFIO, which expects all devices to be in a group
> > > > > > > regardless of the device isolation configuration.
> > > > > > >
> > > > > > IIUC, with the "isolate devices" property we ensure that there
> > > > > > would be independent SMR and S2CR per device. Is that correct?
> > > > >
> > > > > Yes, as part of giving them independent sets of page tables.
> > > > > Initially these tables don't have any valid mappings, but the
> > > > > dma-mapping API will populate them in response to
> > > dma_map_*/dma_alloc/etc.
> > > > >
> > > > > Not sure what this has to do with us putting devices into their
> > > > > own groups...
> > >
> > > > [Sethi Varun-B16395] Devices in an iommu group would share the dma
> > > > mapping, so shouldn't they be sharing the SMR and context
> registers?
> > >
> > > I aggree with the context but SMRs won't be shared. I think you just
> > > can say that a certain subset of the available SMRs will be used to
> > > map all devices in an iommu group to the same context. Depending on
> > > what streamIDs each device has you might have to use separate SMRs
> > > for each device to map it to the same context.
> 
> > [Sethi Varun-B16395] IIUC the SMRs would unique per device, but there
> > is a possibility where the number of context registers are restricted?
> > In that case IOMMU groups should correspond to unique stream IDs (and
> > corresponding SMRs).
> 
> > > > In case of the "isolate devices" property, each device would have
> > > > its own SMR and context registers, thus allowing the devices to
> > > > have independent mappings and allowing them to fall in separate
> > > > iommu groups.
> > >
> > > I aggree with Varun's view here. (Now that I take iommu groups into
> > > consideration.)
> > >
> > > But my goal with the "isolate devices" thing was twofold:
> > >
> > > 1. actual make use of SMMU for address translation for all master
> > >   devices (instead of just bypassing the SMMU)
> 
> > [Sethi Varun-B16395] even if masters have to share translation? But
> > from the patch it seemed that we are creating mapping only if we find
> > the isolate devices property.
> 
> Yes, the patch creates mappings only if isolate-devices property is
> given. Currently there is no trigger to create a common mapping for all
> devices attached to the same SMMU.
> 
> > > plus
> > >
> > > 2. put each master into it's own domain to isolate it
> > >
> > > The latter matches usage of separate iommu groups for devices. If we
> > > now use the isolate devices property to just control whether master
> > > devices fall into the same or separate iommu groups it seems to me
> > > we would need to have another mechanism that triggers the creation
> > > of a mapping for a group.
> > >
> > > What do you think?
> > >
> 
> > [Sethi Varun-B16395] As mentioned before, we should be having iommu
> > group per device (having a unique stream id). Isolate devices property
> > just ensures that each device has a unique context. Now, for the
> > devices for which we don't have the isolate device property, can't we
> > have the multiple devices point to a common mapping.
> 
> Hmm, the isolate-devices option is per SMMU. So if this is set for an
> SMMU the driver tries to create a mapping per device. (And this is done
> for all master devices of this SMMU.)
> 
> Are you requesting to change the default behaviour of the driver to
> create a shared mapping in case the isolate-devices property is not
> specified in DT for an SMMU node?
> 
> Or maybe your concern is that you have x master devices for an SMMU which
> support y number of contexts and x > y? Which would imply that not all
> devices can be isolated and some need to share a context?
> 
[Sethi Varun-B16395] I am trying to understand the requirement for the "isolate devices" property. Currently no default in mapping is created in the SMMU driver. The new property allows default mapping to be created for all masters. Why can't we create default mappings for all masters without this property?

-Varun

^ permalink raw reply

* [BUG] reproducable ubifs reboot assert and corruption
From: Andrew Ruder @ 2014-01-29 19:13 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <52E92494.1090407@nod.at>

On Wed, Jan 29, 2014 at 04:56:04PM +0100, Richard Weinberger wrote:
> Does the issue also happen if you disable preemption?
> i.e. CONFIG_PREEMPT_NONE=y

CONFIG_PREEMPT_NONE=y still breaks.  I suspect that sync_filesystem
has some blocking behavior that allows other processes to schedule.

Another log is attached and the patch I used to create this log is
below.  I think the most interesting part of this patch is the last hunk
that modifies ubifs_remount_ro.  This clearly shows that after the mount
has been marked as read-only we have dirty inodes waiting for the
writeback to come in and hit all the asserts.

Here's some of the important parts of the log:

  170 pre sync_filesystem
# Notice that while we were running sync_filesystem
# a inode (0xd75ab658) snuck in with a sys_rename
  204 inode: d75ab658
  205 ------------[ cut here ]------------
  206 WARNING: CPU: 0 PID: 625 at fs/fs-writeback.c:1213 __mark_inode_dirty+0x2a4/0x2f4()
  207 Modules linked in:
  208 CPU: 0 PID: 625 Comm: fsstress Tainted: G        W    3.12.0-00041-g7f12d39-dirty #250
  209 [<c0014f54>] (unwind_backtrace+0x0/0x128) from [<c001235c>] (show_stack+0x20/0x24)
  210 [<c001235c>] (show_stack+0x20/0x24) from [<c0344574>] (dump_stack+0x20/0x28)
  211 [<c0344574>] (dump_stack+0x20/0x28) from [<c00206d4>] (warn_slowpath_common+0x78/0x98)
  212 [<c00206d4>] (warn_slowpath_common+0x78/0x98) from [<c00207b0>] (warn_slowpath_null+0x2c/0x34)
  213 [<c00207b0>] (warn_slowpath_null+0x2c/0x34) from [<c00f0e8c>] (__mark_inode_dirty+0x2a4/0x2f4)
  214 [<c00f0e8c>] (__mark_inode_dirty+0x2a4/0x2f4) from [<c0136428>] (ubifs_rename+0x4a4/0x600)
  215 [<c0136428>] (ubifs_rename+0x4a4/0x600) from [<c00d9f44>] (vfs_rename+0x280/0x3f4)
  216 [<c00d9f44>] (vfs_rename+0x280/0x3f4) from [<c00dabe4>] (SyS_renameat+0x18c/0x218)
  217 [<c00dabe4>] (SyS_renameat+0x18c/0x218) from [<c00dac9c>] (SyS_rename+0x2c/0x30)
  218 [<c00dac9c>] (SyS_rename+0x2c/0x30) from [<c000e820>] (ret_fast_syscall+0x0/0x2c)
  219 ---[ end trace 35ebec8569a53526 ]---
  754 post sync_filesystem
  755 pre prepare_remount
  756 post prepare_remount
  757 prepare_remount success
  758 UBIFS: background thread "ubifs_bgt0_0" stops
  759 we are now a read-only mount
  760 bdi.work_list = d7ac4110, .next = d7ac4110
# WE HAVE DIRTY I/O (notice the .next != &b_dirty)
  761 bdi.wb.b_dirty = d7ac40d8, .next = d75accd0
  762 bdi.wb.b_io = d7ac40e0, .next = d7ac40e0
  763 bdi.wb.b_more_io = d7ac40e8, .next = d7ac40e8
  764 do_remount_sb success
# And now our friend (inode 0xd75ab658) comes in with a writeback after
# we are read-only triggering the assert
  779 inode: d75ab658
  780 UBIFS assert failed in reserve_space at 125 (pid 11)
  781 CPU: 0 PID: 11 Comm: kworker/u2:1 Tainted: G        W    3.12.0-00041-g7f12d39-dirty #250
  782 Workqueue: writeback bdi_writeback_workfn (flush-ubifs_0_0)
  783 [<c0014f54>] (unwind_backtrace+0x0/0x128) from [<c001235c>] (show_stack+0x20/0x24)
  784 [<c001235c>] (show_stack+0x20/0x24) from [<c0344574>] (dump_stack+0x20/0x28)
  785 [<c0344574>] (dump_stack+0x20/0x28) from [<c012f90c>] (make_reservation+0x8c/0x560)
  786 [<c012f90c>] (make_reservation+0x8c/0x560) from [<c0130c60>] (ubifs_jnl_write_inode+0xbc/0x214)
  787 [<c0130c60>] (ubifs_jnl_write_inode+0xbc/0x214) from [<c0137ce0>] (ubifs_write_inode+0xec/0x17c)
  788 [<c0137ce0>] (ubifs_write_inode+0xec/0x17c) from [<c00f0a00>] (__writeback_single_inode+0x1fc/0x308)
  789 [<c00f0a00>] (__writeback_single_inode+0x1fc/0x308) from [<c00f1780>] (writeback_sb_inodes+0x1f8/0x3c4)
  790 [<c00f1780>] (writeback_sb_inodes+0x1f8/0x3c4) from [<c00f19cc>] (__writeback_inodes_wb+0x80/0xc0)
  791 [<c00f19cc>] (__writeback_inodes_wb+0x80/0xc0) from [<c00f1ba4>] (wb_writeback+0x198/0x310)
  792 [<c00f1ba4>] (wb_writeback+0x198/0x310) from [<c00f2328>] (bdi_writeback_workfn+0x15c/0x454)
  793 [<c00f2328>] (bdi_writeback_workfn+0x15c/0x454) from [<c0038348>] (process_one_work+0x280/0x420)
  794 [<c0038348>] (process_one_work+0x280/0x420) from [<c00389e4>] (worker_thread+0x240/0x380)
  795 [<c00389e4>] (worker_thread+0x240/0x380) from [<c003dff8>] (kthread+0xbc/0xc8)
  796 [<c003dff8>] (kthread+0xbc/0xc8) from [<c000e8b0>] (ret_from_fork+0x14/0x20)

- Andy




--- patch ---
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 9f4935b..48e4272 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -93,6 +93,10 @@ static void bdi_queue_work(struct backing_dev_info *bdi,
 {
 	trace_writeback_queue(bdi, work);
 
+	do {
+		extern bool ubifs_enable_debug;
+		WARN_ON(ubifs_enable_debug);
+	} while (0);
 	spin_lock_bh(&bdi->wb_lock);
 	list_add_tail(&work->list, &bdi->work_list);
 	spin_unlock_bh(&bdi->wb_lock);
@@ -194,6 +198,11 @@ static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
 		if (time_before(inode->dirtied_when, tail->dirtied_when))
 			inode->dirtied_when = jiffies;
 	}
+	do {
+		extern bool ubifs_enable_debug;
+		if (ubifs_enable_debug) pr_info("inode: %p\n", inode);
+		WARN_ON(ubifs_enable_debug);
+	} while (0);
 	list_move(&inode->i_wb_list, &wb->b_dirty);
 }
 
@@ -204,6 +213,11 @@ static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
 {
 	assert_spin_locked(&wb->list_lock);
 	list_move(&inode->i_wb_list, &wb->b_more_io);
+	do {
+		extern bool ubifs_enable_debug;
+		if (ubifs_enable_debug) pr_info("inode: %p\n", inode);
+		WARN_ON(ubifs_enable_debug);
+	} while (0);
 }
 
 static void inode_sync_complete(struct inode *inode)
@@ -437,6 +451,8 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 	unsigned dirty;
 	int ret;
 
+	extern bool ubifs_enable_debug;
+	if (ubifs_enable_debug) pr_info("inode: %p\n", inode);
 	WARN_ON(!(inode->i_state & I_SYNC));
 
 	trace_writeback_single_inode_start(inode, wbc, nr_to_write);
@@ -1191,6 +1207,11 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 
 			inode->dirtied_when = jiffies;
 			list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
+	do {
+		extern bool ubifs_enable_debug;
+		if (ubifs_enable_debug) pr_info("inode: %p\n", inode);
+		WARN_ON(ubifs_enable_debug);
+	} while (0);
 			spin_unlock(&bdi->wb.list_lock);
 
 			if (wakeup_bdi)
diff --git a/fs/super.c b/fs/super.c
index 0225c20..29afc68 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -737,6 +737,12 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 	int retval;
 	int remount_ro;
 
+	extern bool ubifs_enable_debug;
+	if (flags & MS_RDONLY) {
+		ubifs_enable_debug = true;
+	}
+	WARN_ON(ubifs_enable_debug);
+
 	if (sb->s_writers.frozen != SB_UNFROZEN)
 		return -EBUSY;
 
@@ -747,8 +753,11 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 
 	if (flags & MS_RDONLY)
 		acct_auto_close(sb);
+	pr_info("pre shrink_dcache_sb\n");
 	shrink_dcache_sb(sb);
+	pr_info("pre sync_filesystem\n");
 	sync_filesystem(sb);
+	pr_info("post sync_filesystem\n");
 
 	remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY);
 
@@ -758,9 +767,12 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 		if (force) {
 			mark_files_ro(sb);
 		} else {
+			pr_info("pre prepare_remount\n");
 			retval = sb_prepare_remount_readonly(sb);
+			pr_info("post prepare_remount\n");
 			if (retval)
 				return retval;
+			pr_info("prepare_remount success\n");
 		}
 	}
 
@@ -789,6 +801,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 	 */
 	if (remount_ro && sb->s_bdev)
 		invalidate_bdev(sb->s_bdev);
+	pr_info("do_remount_sb success\n");
 	return 0;
 
 cancel_readonly:
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 123c79b..c9f2d5d 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -902,6 +902,8 @@ static int do_writepage(struct page *page, int len)
 	struct inode *inode = page->mapping->host;
 	struct ubifs_info *c = inode->i_sb->s_fs_info;
 
+	WARN_ON(c->ro_mount);
+
 #ifdef UBIFS_DEBUG
 	spin_lock(&ui->ui_lock);
 	ubifs_assert(page->index <= ui->synced_i_size << PAGE_CACHE_SIZE);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 3e4aa72..8cbb731 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -38,6 +38,8 @@
 #include <linux/writeback.h>
 #include "ubifs.h"
 
+bool ubifs_enable_debug;
+
 /*
  * Maximum amount of memory we may 'kmalloc()' without worrying that we are
  * allocating too much.
@@ -1756,6 +1758,11 @@ static void ubifs_remount_ro(struct ubifs_info *c)
 	err = dbg_check_space_info(c);
 	if (err)
 		ubifs_ro_mode(c, err);
+	pr_info("we are now a read-only mount\n");
+	pr_info("bdi.work_list = %p, .next = %p\n", &c->bdi.work_list, c->bdi.work_list.next);
+	pr_info("bdi.wb.b_dirty = %p, .next = %p\n", &c->bdi.wb.b_dirty, c->bdi.wb.b_dirty.next);
+	pr_info("bdi.wb.b_io = %p, .next = %p\n", &c->bdi.wb.b_io, c->bdi.wb.b_io.next);
+	pr_info("bdi.wb.b_more_io = %p, .next = %p\n", &c->bdi.wb.b_more_io, c->bdi.wb.b_more_io.next);
 	mutex_unlock(&c->umount_mutex);
 }
 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: experiments11.small.txt.gz
Type: application/x-gunzip
Size: 5974 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20140129/fc352ff5/attachment-0001.bin>

^ permalink raw reply related

* [RFC PATCH v2 08/14] mtd: nand: add sunxi NAND flash controller support
From: Jason Gunthorpe @ 2014-01-29 19:10 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20140129184619.GC27143@localhost>

On Wed, Jan 29, 2014 at 03:46:20PM -0300, Ezequiel Garcia wrote:

> After CE# has been pulled high and then transitioned low again, the host
> should issue a Set Features to select the appropriate asynchronous timing mode.
> """

Oh, I had forgot you should do a set feature too

Boris, I think the core core should handle this dance and the driver
should just implement a call back to change the timing mode on the
interface..

If I ever get a moment I can work on support for timing setting in the
mvebu driver, I have boards here with ONFI NAND..

Regards,
Jason

^ permalink raw reply

* [RFC PATCH v2 08/14] mtd: nand: add sunxi NAND flash controller support
From: Boris BREZILLON @ 2014-01-29 19:02 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20140129175641.GF1427@obsidianresearch.com>

Le 29/01/2014 18:56, Jason Gunthorpe a ?crit :
> On Wed, Jan 29, 2014 at 03:34:18PM +0100, Boris BREZILLON wrote:
>
>> +static int sunxi_nand_chip_init_timings(struct sunxi_nand_chip *chip,
>> +					struct device_node *np)
>> +{
>> +	const struct nand_sdr_timings *timings;
>> +	u32 min_clk_period = 0;
>> +	int ret;
>> +
>> +	ret = onfi_get_async_timing_mode(&chip->nand);
>> +	if (ret == ONFI_TIMING_MODE_UNKNOWN) {
>> +		ret = of_get_nand_onfi_timing_mode(np);
>> +		if (ret < 0)
>> +			return ret;
>> +	}
> [..]
>
>> +static int sunxi_nand_chip_init(struct device *dev, struct sunxi_nfc *nfc,
> [..]
>> +       ret = sunxi_nand_chip_init_timings(chip, np);
>> +       if (ret)
>> +               return ret;
> [..]
>> +       ret = nand_scan_ident(mtd, nsels, NULL);
> This ordering looks a bit problematic, will onfi_get_async_timing_mode
> ever return anything other than ONFI_TIMING_MODE_UNKNOWN if it is
> called before nand_scan_ident ?
Indeed. I haven't tested this part as I don't own any board with an ONFI 
compatible chip.
> What sets clk_rate to non-zero if there
> is no DT property?
It is set to 20 MHz by default, but it should definitely be set to the 
rate fulfilling mode 0.
I'll fix this.

>
> For a flow that uses onfi_get_async_timing_mode rather than DT the
> driver should set the interface to timing mode 0 (slowest) and then
> call nand_scan_ident, and then reset the interface to the detected
> timing mode.

Absolutely.

>
> Maybe this should be implemented in the core code through a new
> callback (nand->set_timing_mode ?)
>
> Regards,
> Jason

^ permalink raw reply

* [PATCH v2 5/6] X86: remove redundant cpuidle_idle_call()
From: Olof Johansson @ 2014-01-29 19:02 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1391017513-12995-6-git-send-email-nicolas.pitre@linaro.org>

Hi,

On Wed, Jan 29, 2014 at 9:45 AM, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> The core idle loop now takes care of it.
>
> Signed-off-by: Nicolas Pitre <nico@linaro.org>
> Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
> ---
>  arch/x86/kernel/process.c | 5 +----
>  1 file changed, 1 insertion(+), 4 deletions(-)
>
> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
> index 3fb8d95ab8..4505e2a950 100644
> --- a/arch/x86/kernel/process.c
> +++ b/arch/x86/kernel/process.c
> @@ -298,10 +298,7 @@ void arch_cpu_idle_dead(void)
>   */
>  void arch_cpu_idle(void)
>  {
> -       if (cpuidle_idle_call())
> -               x86_idle();
> -       else
> -               local_irq_enable();
> +       x86_idle();

You're taking out the local_irq_enable() here but I don't see the
equivalent of adding it back in the 1/6 patch that moves the
cpuidle_idle_call() up to common code. It seems that one of the call
paths through cpuidle_idle_call() don't re-enable it on its own.

Even if this is the right thing to do, why it's OK to do so should
probably be documented in the patch description.


-Olof

^ permalink raw reply

* [RFC PATCH v2 08/14] mtd: nand: add sunxi NAND flash controller support
From: Ezequiel Garcia @ 2014-01-29 18:46 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20140129175641.GF1427@obsidianresearch.com>

On Wed, Jan 29, 2014 at 10:56:42AM -0700, Jason Gunthorpe wrote:
> On Wed, Jan 29, 2014 at 03:34:18PM +0100, Boris BREZILLON wrote:
> 
> [..]
> 
> > +static int sunxi_nand_chip_init(struct device *dev, struct sunxi_nfc *nfc,
> [..]
> > +       ret = sunxi_nand_chip_init_timings(chip, np);
> > +       if (ret)
> > +               return ret;
> [..]
> > +       ret = nand_scan_ident(mtd, nsels, NULL);
> 
> This ordering looks a bit problematic, will onfi_get_async_timing_mode
> ever return anything other than ONFI_TIMING_MODE_UNKNOWN if it is
> called before nand_scan_ident ? What sets clk_rate to non-zero if there
> is no DT property?
> 
> For a flow that uses onfi_get_async_timing_mode rather than DT the
> driver should set the interface to timing mode 0 (slowest) and then
> call nand_scan_ident, and then reset the interface to the detected
> timing mode.
> 

Yes. And I believe this is a requirement from the ONFI 2.1 spec:

"""
4.1.4.3. Source Synchronous to Asynchronous
[..]

The host shall transition to the asynchronous data interface. Then the
host shall issue the Reset (FFh) command described in the previous paragraph
using asynchronous timing mode 0, thus the host transitions to the asynchronous
data interface prior to issuing the Reset (FFh). A device in any timing mode is
required to recognize a Reset (FFh) command issued in asynchronous timing
mode 0.

[..]

After CE# has been pulled high and then transitioned low again, the host
should issue a Set Features to select the appropriate asynchronous timing mode.
"""

-- 
Ezequiel Garc?a, Free Electrons
Embedded Linux, Kernel and Android Engineering
http://free-electrons.com

^ permalink raw reply

* [PATCH v3 1/2] init/do_mounts.c: ignore final \n in name_to_dev_t
From: Andrew Morton @ 2014-01-29 18:41 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20140129182956.14275.72264@capellas-linux>

On Wed, 29 Jan 2014 10:29:56 -0800 Sebastian Capella <sebastian.capella@linaro.org> wrote:

> Hi Andrew,
> 
> By the way, I do see a call (sysfs_streq) in use for this purpose
> other places.  Sorry, I didn't find it while looking at the original
> problem.  I'm not sure if this is preferable, but it appears to have
> been added specifically for the strings coming through sysfs.

Yes, I wrote it ;)

I didn't think sysfs_streq() is well suited to this problem.  And the
issue of possibly-null-terminated-strings coming in from userspace is a
common one, so it is desirable that we build up the suite of utilities
to handle this.

There are probably quite a lot of open-coded \n trimming loops which
can be cleaned up using such tools.

	grep -r "if .* == '\\\n'" .

> My preference is copying the string and cleaning it up before passing
> it to internal functions, even though we incur an allocation.

Yes.  Here on the kernel/userspace boundary we are typically running in
GFP_KERNEL context and the code is not performance critical - it is a
good fit.

^ permalink raw reply

* [RFC PATCH v2 03/14] of: mtd: add documentation for nand-ecc-level property
From: Boris BREZILLON @ 2014-01-29 18:39 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20140129175331.GA27143@localhost>

Hello Ezequiel

Le 29/01/2014 18:53, Ezequiel Garcia a ?crit :
> On Wed, Jan 29, 2014 at 03:34:13PM +0100, Boris BREZILLON wrote:
>> nand-ecc-level property statically defines NAND chip's ECC requirements.
>>
>> Signed-off-by: Boris BREZILLON <b.brezillon.dev@gmail.com>
>> ---
>>   Documentation/devicetree/bindings/mtd/nand.txt |    3 +++
>>   1 file changed, 3 insertions(+)
>>
>> diff --git a/Documentation/devicetree/bindings/mtd/nand.txt b/Documentation/devicetree/bindings/mtd/nand.txt
>> index 03855c8..0c962296 100644
>> --- a/Documentation/devicetree/bindings/mtd/nand.txt
>> +++ b/Documentation/devicetree/bindings/mtd/nand.txt
>> @@ -3,5 +3,8 @@
>>   - nand-ecc-mode : String, operation mode of the NAND ecc mode.
>>     Supported values are: "none", "soft", "hw", "hw_syndrome", "hw_oob_first",
>>     "soft_bch".
>> +- nand-ecc-level : Two cells property defining the ECC level requirements.
>> +  The first cell represent the strength and the second cell the ECC block size.
>> +  E.g. : nand-ecc-level = <4 512>; /* 4 bits / 512 bytes */
>>   - nand-bus-width : 8 or 16 bus width if not present 8
>>   - nand-on-flash-bbt: boolean to enable on flash bbt option if not present false
> Hm.. when was this proposal agreed?
Never, this is a proposal based on my needs, and this was not present in the
1st version of this series :-).
> It seems I've missed the
> discussion...
>
> FWIW, we've already proposed an equivalent one, but it received no
> feedback from the devicetree maintainers:
>
> http://comments.gmane.org/gmane.linux.drivers.devicetree/58764
>
> Maybe we can discuss about it now?
>
>    nand-ecc-strength : integer ECC required strength.
>    nand-ecc-size : integer step size associated to the ECC strength.
>
>    vs.
>
>    nand-ecc-level : Two cells property defining the ECC level requirements.
>    The first cell represent the strength and the second cell the ECC block size.
>    E.g. : nand-ecc-level = <4 512>; /* 4 bits / 512 bytes */
>
> It's really the same proposal but with a different format, right?

Yes it is.

> IMHO, the former is more human-readable, but other than that I see no
> difference.

As I already said to Pekon, I won't complain if my proposal is not chosen,
as long as there is a proper way to define these ECC requirements ;-).

Best Regards,

Boris

>
> Brian? DT-guys?

^ permalink raw reply

* [RFC PATCH v2 09/14] mtd: nand: add sunxi NFC dt bindings doc
From: Gupta, Pekon @ 2014-01-29 18:36 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20980858CB6D3A4BAE95CA194937D5E73EA6C01D@DBDE04.ent.ti.com>

Dear Rob, and other DT maintainers,
(apologies, fixed typos in earlier mail)

>>From: Rob Herring
>[...]
>>> +- onfi,nand-timing-mode : mandatory if the chip does not support the ONFI
>>> +  standard.
>>
>>Add to generic nand binding.
>>
>>> +- allwinner,rb : shall contain the native Ready/Busy ids.
>>> + or
>>> +- rb-gpios : shall contain the gpios used as R/B pins.
>>
>>Isn't allwinner,rb implied by a lack of rb-gpios property. Or no R/B
>>pin is an option? If so, don't you need some fixed time delay
>>properties like max erase time?
>>
>>rb-gpios could be added to the generic nand binding as well.
>>
I do _not_ think this should go into generic nand binding, as this is controller specific.
Some controllers have dedicated R/B pin (Ready/Busy) while others may use
GPIO instead. It's the way a hardware controller is designed.

Request you to please consider Ack from MTD Maintainers 'at-least' for
generic NAND DT bindings. There is already a discussion going in
a separate thread for which there are still no replies [1].

[1] http://lists.infradead.org/pipermail/linux-mtd/2014-January/051625.html


with regards, pekon

^ permalink raw reply

* [RFC PATCH v2 09/14] mtd: nand: add sunxi NFC dt bindings doc
From: Boris BREZILLON @ 2014-01-29 18:33 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20980858CB6D3A4BAE95CA194937D5E73EA6C01D@DBDE04.ent.ti.com>

Le 29/01/2014 19:02, Gupta, Pekon a ?crit :
> Dear Rob, and other DT maintainers,
>
>> From: Rob Herring
> [...]
>>> +- onfi,nand-timing-mode : mandatory if the chip does not support the ONFI
>>> +  standard.
>> Add to generic nand binding.
>>
>>> +- allwinner,rb : shall contain the native Ready/Busy ids.
>>> + or
>>> +- rb-gpios : shall contain the gpios used as R/B pins.
>> Isn't allwinner,rb implied by a lack of rb-gpios property. Or no R/B
>> pin is an option? If so, don't you need some fixed time delay
>> properties like max erase time?
>>
>> rb-gpios could be added to the generic nand binding as well.
>>
> I do think this should go into generic nand binding, as this is controller specific.
> Some controllers have dedicated R/B pin (Ready/Busy) while others may use
> GPIO instead. It's the way a hardware controller is designed.

You meant "You do not think", right ?
If so, I think even if the retrieval and control of the GPIO is done is 
each NAND
controller, we could at least use a common property name for all drivers 
using
a GPIO to detect the R/B state.

> Request you to please consider Ack from MTD Maintainers 'at-least' for
> generic NAND DT bindings. There is already a discussion going in
> a separate thread for which is still not awaiting replies [1].
>
> [1]http://lists.infradead.org/pipermail/linux-mtd/2014-January/051625.html

I missed this thread, but I can definitely use the nand-ecc-strength and
nand-ecc-step-size instead of the one I defined (nand-ecc-level), as long
as there is a proper way to define these informations in the DT.

I'll let DT and MTD maintainers decide ;-).

Best Regards,

Boris
>
> with regards, pekon

^ permalink raw reply

* [PATCH v3 1/2] init/do_mounts.c: ignore final \n in name_to_dev_t
From: Sebastian Capella @ 2014-01-29 18:29 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20140128205830.14275.80319@capellas-linux>

Hi Andrew,

By the way, I do see a call (sysfs_streq) in use for this purpose
other places.  Sorry, I didn't find it while looking at the original
problem.  I'm not sure if this is preferable, but it appears to have
been added specifically for the strings coming through sysfs.

My preference is copying the string and cleaning it up before passing
it to internal functions, even though we incur an allocation.

Thanks,

Sebastian

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox