linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/8] ARM/arm64 Barrier cleanups and fixes for 3.16
@ 2014-05-02 15:24 Will Deacon
  2014-05-02 15:24 ` [PATCH 1/8] ARM: cacheflush: use -st dsb option for ensuring completion Will Deacon
                   ` (7 more replies)
  0 siblings, 8 replies; 11+ messages in thread
From: Will Deacon @ 2014-05-02 15:24 UTC (permalink / raw)
  To: linux-arm-kernel

Hello,

Here is the current set of barrier cleanups and fixes I plan to queue
for merging in 3.16. Most of the arm64 patches have been posted before,
but there are also a few additions that I wrote recently.

All feedback welcome,

Will

Will Deacon (8):
  ARM: cacheflush: use -st dsb option for ensuring completion
  ARM: cache: remove redundant dsb instruction from
    v7_coherent_user_range
  arm64: barriers: make use of barrier options with explicit barriers
  arm64: barriers: wire up new barrier options
  arm64: barriers: use barrier() instead of smp_mb() when !SMP
  arm64: head: fix cache flushing and barriers in set_cpu_boot_mode_flag
  arm64: kvm: use inner-shareable barriers for inner-shareable
    maintenance
  arm64: mm: use inner-shareable barriers for inner-shareable
    maintenance

 arch/arm/include/asm/cacheflush.h   |  2 +-
 arch/arm/mm/cache-v7.S              | 13 ++++++-------
 arch/arm/mm/mmu.c                   |  2 +-
 arch/arm64/include/asm/barrier.h    | 20 ++++++++++----------
 arch/arm64/include/asm/cacheflush.h |  4 ++--
 arch/arm64/include/asm/pgtable.h    |  4 ++--
 arch/arm64/include/asm/tlbflush.h   | 14 +++++++-------
 arch/arm64/kernel/head.S            |  8 +++-----
 arch/arm64/kernel/process.c         |  2 +-
 arch/arm64/kvm/hyp.S                | 12 +++++++++---
 arch/arm64/kvm/sys_regs.c           |  4 ++--
 arch/arm64/mm/cache.S               |  6 +++---
 arch/arm64/mm/proc.S                |  2 +-
 arch/arm64/mm/tlb.S                 |  8 ++++----
 14 files changed, 52 insertions(+), 49 deletions(-)

-- 
1.9.2

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/8] ARM: cacheflush: use -st dsb option for ensuring completion
  2014-05-02 15:24 [PATCH 0/8] ARM/arm64 Barrier cleanups and fixes for 3.16 Will Deacon
@ 2014-05-02 15:24 ` Will Deacon
  2014-05-02 15:24 ` [PATCH 2/8] ARM: cache: remove redundant dsb instruction from v7_coherent_user_range Will Deacon
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 11+ messages in thread
From: Will Deacon @ 2014-05-02 15:24 UTC (permalink / raw)
  To: linux-arm-kernel

dsb st can be used to ensure completion of pending cache maintenance
operations, so use it for the v7 cache maintenance operations.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/cacheflush.h |  2 +-
 arch/arm/mm/cache-v7.S            | 12 ++++++------
 arch/arm/mm/mmu.c                 |  2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 8b8b61685a34..00af9fe435e6 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -212,7 +212,7 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *,
 static inline void __flush_icache_all(void)
 {
 	__flush_icache_preferred();
-	dsb();
+	dsb(ishst);
 }
 
 /*
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index 778bcf88ee79..615c99e38ba1 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -59,7 +59,7 @@ ENTRY(v7_invalidate_l1)
        bgt     2b
        cmp     r2, #0
        bgt     1b
-       dsb
+       dsb     st
        isb
        mov     pc, lr
 ENDPROC(v7_invalidate_l1)
@@ -166,7 +166,7 @@ skip:
 finished:
 	mov	r10, #0				@ swith back to cache level 0
 	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
-	dsb
+	dsb	st
 	isb
 	mov	pc, lr
 ENDPROC(v7_flush_dcache_all)
@@ -335,7 +335,7 @@ ENTRY(v7_flush_kern_dcache_area)
 	add	r0, r0, r2
 	cmp	r0, r1
 	blo	1b
-	dsb
+	dsb	st
 	mov	pc, lr
 ENDPROC(v7_flush_kern_dcache_area)
 
@@ -368,7 +368,7 @@ v7_dma_inv_range:
 	add	r0, r0, r2
 	cmp	r0, r1
 	blo	1b
-	dsb
+	dsb	st
 	mov	pc, lr
 ENDPROC(v7_dma_inv_range)
 
@@ -390,7 +390,7 @@ v7_dma_clean_range:
 	add	r0, r0, r2
 	cmp	r0, r1
 	blo	1b
-	dsb
+	dsb	st
 	mov	pc, lr
 ENDPROC(v7_dma_clean_range)
 
@@ -412,7 +412,7 @@ ENTRY(v7_dma_flush_range)
 	add	r0, r0, r2
 	cmp	r0, r1
 	blo	1b
-	dsb
+	dsb	st
 	mov	pc, lr
 ENDPROC(v7_dma_flush_range)
 
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index b68c6b22e1c8..2b001e5dce87 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1461,7 +1461,7 @@ void __init early_paging_init(const struct machine_desc *mdesc,
 	 * just complicate the code.
 	 */
 	flush_cache_louis();
-	dsb();
+	dsb(ishst);
 	isb();
 
 	/* remap level 1 table */
-- 
1.9.2

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/8] ARM: cache: remove redundant dsb instruction from v7_coherent_user_range
  2014-05-02 15:24 [PATCH 0/8] ARM/arm64 Barrier cleanups and fixes for 3.16 Will Deacon
  2014-05-02 15:24 ` [PATCH 1/8] ARM: cacheflush: use -st dsb option for ensuring completion Will Deacon
@ 2014-05-02 15:24 ` Will Deacon
  2014-05-09 16:16   ` Catalin Marinas
  2014-05-02 15:24 ` [PATCH 3/8] arm64: barriers: make use of barrier options with explicit barriers Will Deacon
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 11+ messages in thread
From: Will Deacon @ 2014-05-02 15:24 UTC (permalink / raw)
  To: linux-arm-kernel

v7_coherent_user_range takes a virtual address range, cleans the D-side
to PoU and then invalidates the I-side so that subsequent instruction
fetches can see any new data written to the range in question.

Since cache maintenance by MVA is architected to execute in program
order with respect to other cache maintenance operations specifying
the same virtual address, we do not require a barrier between the
D-side clean and the I-side invalidation.

This patch removes the redundant dsb.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/mm/cache-v7.S | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index 615c99e38ba1..b040d3ca20ac 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -282,7 +282,6 @@ ENTRY(v7_coherent_user_range)
 	add	r12, r12, r2
 	cmp	r12, r1
 	blo	1b
-	dsb	ishst
 	icache_line_size r2, r3
 	sub	r3, r2, #1
 	bic	r12, r0, r3
-- 
1.9.2

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/8] arm64: barriers: make use of barrier options with explicit barriers
  2014-05-02 15:24 [PATCH 0/8] ARM/arm64 Barrier cleanups and fixes for 3.16 Will Deacon
  2014-05-02 15:24 ` [PATCH 1/8] ARM: cacheflush: use -st dsb option for ensuring completion Will Deacon
  2014-05-02 15:24 ` [PATCH 2/8] ARM: cache: remove redundant dsb instruction from v7_coherent_user_range Will Deacon
@ 2014-05-02 15:24 ` Will Deacon
  2014-05-02 15:24 ` [PATCH 4/8] arm64: barriers: wire up new barrier options Will Deacon
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 11+ messages in thread
From: Will Deacon @ 2014-05-02 15:24 UTC (permalink / raw)
  To: linux-arm-kernel

When calling our low-level barrier macros directly, we can often suffice
with more relaxed behaviour than the default "all accesses, full system"
option.

This patch updates the users of dsb() to specify the option which they
actually require.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/barrier.h    |  2 +-
 arch/arm64/include/asm/cacheflush.h |  4 ++--
 arch/arm64/include/asm/pgtable.h    |  4 ++--
 arch/arm64/include/asm/tlbflush.h   | 14 +++++++-------
 arch/arm64/kernel/process.c         |  2 +-
 arch/arm64/kvm/sys_regs.c           |  4 ++--
 6 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 66eb7648043b..5d69eddbe39e 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -28,7 +28,7 @@
 #define dmb(opt)	asm volatile("dmb sy" : : : "memory")
 #define dsb(opt)	asm volatile("dsb sy" : : : "memory")
 
-#define mb()		dsb()
+#define mb()		dsb(sy)
 #define rmb()		asm volatile("dsb ld" : : : "memory")
 #define wmb()		asm volatile("dsb st" : : : "memory")
 
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 4c60e64a801c..a5176cf32dad 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -123,7 +123,7 @@ extern void flush_dcache_page(struct page *);
 static inline void __flush_icache_all(void)
 {
 	asm("ic	ialluis");
-	dsb();
+	dsb(ish);
 }
 
 #define flush_dcache_mmap_lock(mapping) \
@@ -150,7 +150,7 @@ static inline void flush_cache_vmap(unsigned long start, unsigned long end)
 	 * set_pte_at() called from vmap_pte_range() does not
 	 * have a DSB after cleaning the cache line.
 	 */
-	dsb();
+	dsb(ish);
 }
 
 static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 90c811f05a2e..bd55dd9fabd0 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -299,7 +299,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
 {
 	*pmdp = pmd;
-	dsb();
+	dsb(ishst);
 }
 
 static inline void pmd_clear(pmd_t *pmdp)
@@ -329,7 +329,7 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd)
 static inline void set_pud(pud_t *pudp, pud_t pud)
 {
 	*pudp = pud;
-	dsb();
+	dsb(ishst);
 }
 
 static inline void pud_clear(pud_t *pudp)
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 8b482035cfc2..3083a08f9622 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -72,9 +72,9 @@ extern struct cpu_tlb_fns cpu_tlb;
  */
 static inline void flush_tlb_all(void)
 {
-	dsb();
+	dsb(ishst);
 	asm("tlbi	vmalle1is");
-	dsb();
+	dsb(ish);
 	isb();
 }
 
@@ -82,9 +82,9 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
 {
 	unsigned long asid = (unsigned long)ASID(mm) << 48;
 
-	dsb();
+	dsb(ishst);
 	asm("tlbi	aside1is, %0" : : "r" (asid));
-	dsb();
+	dsb(ish);
 }
 
 static inline void flush_tlb_page(struct vm_area_struct *vma,
@@ -93,9 +93,9 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
 	unsigned long addr = uaddr >> 12 |
 		((unsigned long)ASID(vma->vm_mm) << 48);
 
-	dsb();
+	dsb(ishst);
 	asm("tlbi	vae1is, %0" : : "r" (addr));
-	dsb();
+	dsb(ish);
 }
 
 /*
@@ -114,7 +114,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 	 * set_pte() does not have a DSB, so make sure that the page table
 	 * write is visible.
 	 */
-	dsb();
+	dsb(ishst);
 }
 
 #define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 6391485f342d..f7c446a5e97b 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -300,7 +300,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
 	 * Complete any pending TLB or cache maintenance on this CPU in case
 	 * the thread migrates to a different CPU.
 	 */
-	dsb();
+	dsb(ish);
 
 	/* the actual thread switch */
 	last = cpu_switch_to(prev, next);
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 03244582bc55..c59a1bdab5eb 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -71,13 +71,13 @@ static u32 get_ccsidr(u32 csselr)
 static void do_dc_cisw(u32 val)
 {
 	asm volatile("dc cisw, %x0" : : "r" (val));
-	dsb();
+	dsb(ish);
 }
 
 static void do_dc_csw(u32 val)
 {
 	asm volatile("dc csw, %x0" : : "r" (val));
-	dsb();
+	dsb(ish);
 }
 
 /* See note@ARM ARM B1.14.4 */
-- 
1.9.2

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 4/8] arm64: barriers: wire up new barrier options
  2014-05-02 15:24 [PATCH 0/8] ARM/arm64 Barrier cleanups and fixes for 3.16 Will Deacon
                   ` (2 preceding siblings ...)
  2014-05-02 15:24 ` [PATCH 3/8] arm64: barriers: make use of barrier options with explicit barriers Will Deacon
@ 2014-05-02 15:24 ` Will Deacon
  2014-05-02 15:24 ` [PATCH 5/8] arm64: barriers: use barrier() instead of smp_mb() when !SMP Will Deacon
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 11+ messages in thread
From: Will Deacon @ 2014-05-02 15:24 UTC (permalink / raw)
  To: linux-arm-kernel

Now that all callers of the barrier macros are updated to pass the
mandatory options, update the macros so the option is actually used.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/barrier.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 5d69eddbe39e..71a42d6599fb 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -25,12 +25,12 @@
 #define wfi()		asm volatile("wfi" : : : "memory")
 
 #define isb()		asm volatile("isb" : : : "memory")
-#define dmb(opt)	asm volatile("dmb sy" : : : "memory")
-#define dsb(opt)	asm volatile("dsb sy" : : : "memory")
+#define dmb(opt)	asm volatile("dmb " #opt : : : "memory")
+#define dsb(opt)	asm volatile("dsb " #opt : : : "memory")
 
 #define mb()		dsb(sy)
-#define rmb()		asm volatile("dsb ld" : : : "memory")
-#define wmb()		asm volatile("dsb st" : : : "memory")
+#define rmb()		dsb(ld)
+#define wmb()		dsb(st)
 
 #ifndef CONFIG_SMP
 #define smp_mb()	barrier()
@@ -54,9 +54,9 @@ do {									\
 
 #else
 
-#define smp_mb()	asm volatile("dmb ish" : : : "memory")
-#define smp_rmb()	asm volatile("dmb ishld" : : : "memory")
-#define smp_wmb()	asm volatile("dmb ishst" : : : "memory")
+#define smp_mb()	dmb(ish)
+#define smp_rmb()	dmb(ishld)
+#define smp_wmb()	dmb(ishst)
 
 #define smp_store_release(p, v)						\
 do {									\
-- 
1.9.2

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 5/8] arm64: barriers: use barrier() instead of smp_mb() when !SMP
  2014-05-02 15:24 [PATCH 0/8] ARM/arm64 Barrier cleanups and fixes for 3.16 Will Deacon
                   ` (3 preceding siblings ...)
  2014-05-02 15:24 ` [PATCH 4/8] arm64: barriers: wire up new barrier options Will Deacon
@ 2014-05-02 15:24 ` Will Deacon
  2014-05-02 15:24 ` [PATCH 6/8] arm64: head: fix cache flushing and barriers in set_cpu_boot_mode_flag Will Deacon
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 11+ messages in thread
From: Will Deacon @ 2014-05-02 15:24 UTC (permalink / raw)
  To: linux-arm-kernel

The recently introduced acquire/release accessors refer to smp_mb()
in the !CONFIG_SMP case. This is confusing when reading the code, so use
barrier() directly when we know we're UP.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/barrier.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 71a42d6599fb..709f1f6d6bbd 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -40,7 +40,7 @@
 #define smp_store_release(p, v)						\
 do {									\
 	compiletime_assert_atomic_type(*p);				\
-	smp_mb();							\
+	barrier();							\
 	ACCESS_ONCE(*p) = (v);						\
 } while (0)
 
@@ -48,7 +48,7 @@ do {									\
 ({									\
 	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
-	smp_mb();							\
+	barrier();							\
 	___p1;								\
 })
 
-- 
1.9.2

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 6/8] arm64: head: fix cache flushing and barriers in set_cpu_boot_mode_flag
  2014-05-02 15:24 [PATCH 0/8] ARM/arm64 Barrier cleanups and fixes for 3.16 Will Deacon
                   ` (4 preceding siblings ...)
  2014-05-02 15:24 ` [PATCH 5/8] arm64: barriers: use barrier() instead of smp_mb() when !SMP Will Deacon
@ 2014-05-02 15:24 ` Will Deacon
  2014-05-02 15:24 ` [PATCH 7/8] arm64: kvm: use inner-shareable barriers for inner-shareable maintenance Will Deacon
  2014-05-02 15:24 ` [PATCH 8/8] arm64: mm: " Will Deacon
  7 siblings, 0 replies; 11+ messages in thread
From: Will Deacon @ 2014-05-02 15:24 UTC (permalink / raw)
  To: linux-arm-kernel

set_cpu_boot_mode_flag is used to identify which exception levels are
encountered across the system by CPUs trying to enter the kernel. The
basic algorithm is: if a CPU is booting at EL2, it will set a flag at
an offset of #4 from __boot_cpu_mode, a cacheline-aligned variable.
Otherwise, a flag is set at an offset of zero into the same cacheline.
This enables us to check that all CPUs booted at the same exception
level.

This cacheline is written with the stage-1 MMU off (that is, via a
strongly-ordered mapping) and will bypass any clean lines in the cache,
leading to potential coherence problems when the variable is later
checked via the normal, cacheable mapping of the kernel image.

This patch reworks the broken flushing code so that we:

  (1) Use a DMB to order the strongly-ordered write of the cacheline
      against the subsequent cache-maintenance operation (by-VA
      operations only hazard against normal, cacheable accesses).

  (2) Use a single dc ivac instruction to invalidate any clean lines
      containing a stale copy of the line after it has been updated.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/head.S | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 0fd565000772..b96a732e4859 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -230,11 +230,9 @@ ENTRY(set_cpu_boot_mode_flag)
 	cmp	w20, #BOOT_CPU_MODE_EL2
 	b.ne	1f
 	add	x1, x1, #4
-1:	dc	cvac, x1			// Clean potentially dirty cache line
-	dsb	sy
-	str	w20, [x1]			// This CPU has booted in EL1
-	dc	civac, x1			// Clean&invalidate potentially stale cache line
-	dsb	sy
+1:	str	w20, [x1]			// This CPU has booted in EL1
+	dmb	sy
+	dc	ivac, x1			// Invalidate potentially stale cache line
 	ret
 ENDPROC(set_cpu_boot_mode_flag)
 
-- 
1.9.2

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 7/8] arm64: kvm: use inner-shareable barriers for inner-shareable maintenance
  2014-05-02 15:24 [PATCH 0/8] ARM/arm64 Barrier cleanups and fixes for 3.16 Will Deacon
                   ` (5 preceding siblings ...)
  2014-05-02 15:24 ` [PATCH 6/8] arm64: head: fix cache flushing and barriers in set_cpu_boot_mode_flag Will Deacon
@ 2014-05-02 15:24 ` Will Deacon
  2014-05-02 15:24 ` [PATCH 8/8] arm64: mm: " Will Deacon
  7 siblings, 0 replies; 11+ messages in thread
From: Will Deacon @ 2014-05-02 15:24 UTC (permalink / raw)
  To: linux-arm-kernel

In order to ensure completion of inner-shareable maintenance instructions
(cache and TLB) on AArch64, we can use the -ish suffix to the dsb
instruction.

This patch relaxes our dsb sy instructions to dsb ish where possible.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kvm/hyp.S | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 2c56012cb2d2..b0d1512acf08 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -630,9 +630,15 @@ ENTRY(__kvm_tlb_flush_vmid_ipa)
 	 * whole of Stage-1. Weep...
 	 */
 	tlbi	ipas2e1is, x1
-	dsb	sy
+	/*
+	 * We have to ensure completion of the invalidation at Stage-2,
+	 * since a table walk on another CPU could refill a TLB with a
+	 * complete (S1 + S2) walk based on the old Stage-2 mapping if
+	 * the Stage-1 invalidation happened first.
+	 */
+	dsb	ish
 	tlbi	vmalle1is
-	dsb	sy
+	dsb	ish
 	isb
 
 	msr	vttbr_el2, xzr
@@ -643,7 +649,7 @@ ENTRY(__kvm_flush_vm_context)
 	dsb	ishst
 	tlbi	alle1is
 	ic	ialluis
-	dsb	sy
+	dsb	ish
 	ret
 ENDPROC(__kvm_flush_vm_context)
 
-- 
1.9.2

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 8/8] arm64: mm: use inner-shareable barriers for inner-shareable maintenance
  2014-05-02 15:24 [PATCH 0/8] ARM/arm64 Barrier cleanups and fixes for 3.16 Will Deacon
                   ` (6 preceding siblings ...)
  2014-05-02 15:24 ` [PATCH 7/8] arm64: kvm: use inner-shareable barriers for inner-shareable maintenance Will Deacon
@ 2014-05-02 15:24 ` Will Deacon
  7 siblings, 0 replies; 11+ messages in thread
From: Will Deacon @ 2014-05-02 15:24 UTC (permalink / raw)
  To: linux-arm-kernel

In order to ensure ordering and completion of inner-shareable maintenance
instructions (cache and TLB) on AArch64, we can use the -ish suffix to
the dmb and dsb instructions respectively.

This patch updates our low-level cache and tlb maintenance routines to
use the inner-shareable barrier variants where appropriate.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/mm/cache.S | 6 +++---
 arch/arm64/mm/proc.S  | 2 +-
 arch/arm64/mm/tlb.S   | 8 ++++----
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index fda756875fa6..23663837acff 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -31,7 +31,7 @@
  *	Corrupted registers: x0-x7, x9-x11
  */
 __flush_dcache_all:
-	dsb	sy				// ensure ordering with previous memory accesses
+	dmb	sy				// ensure ordering with previous memory accesses
 	mrs	x0, clidr_el1			// read clidr
 	and	x3, x0, #0x7000000		// extract loc from clidr
 	lsr	x3, x3, #23			// left align loc bit field
@@ -128,7 +128,7 @@ USER(9f, dc	cvau, x4	)		// clean D line to PoU
 	add	x4, x4, x2
 	cmp	x4, x1
 	b.lo	1b
-	dsb	sy
+	dsb	ish
 
 	icache_line_size x2, x3
 	sub	x3, x2, #1
@@ -139,7 +139,7 @@ USER(9f, ic	ivau, x4	)		// invalidate I line PoU
 	cmp	x4, x1
 	b.lo	1b
 9:						// ignore any faulting cache operation
-	dsb	sy
+	dsb	ish
 	isb
 	ret
 ENDPROC(flush_icache_range)
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 9042aff5e9e3..7736779c9809 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -182,7 +182,7 @@ ENDPROC(cpu_do_switch_mm)
 ENTRY(__cpu_setup)
 	ic	iallu				// I+BTB cache invalidate
 	tlbi	vmalle1is			// invalidate I + D TLBs
-	dsb	sy
+	dsb	ish
 
 	mov	x0, #3 << 20
 	msr	cpacr_el1, x0			// Enable FP/ASIMD
diff --git a/arch/arm64/mm/tlb.S b/arch/arm64/mm/tlb.S
index 19da91e0cd27..114033617dcc 100644
--- a/arch/arm64/mm/tlb.S
+++ b/arch/arm64/mm/tlb.S
@@ -36,7 +36,7 @@
 ENTRY(__cpu_flush_user_tlb_range)
 	vma_vm_mm x3, x2			// get vma->vm_mm
 	mmid	w3, x3				// get vm_mm->context.id
-	dsb	sy
+	dsb	ishst
 	lsr	x0, x0, #12			// align address
 	lsr	x1, x1, #12
 	bfi	x0, x3, #48, #16		// start VA and ASID
@@ -45,7 +45,7 @@ ENTRY(__cpu_flush_user_tlb_range)
 	add	x0, x0, #1
 	cmp	x0, x1
 	b.lo	1b
-	dsb	sy
+	dsb	ish
 	ret
 ENDPROC(__cpu_flush_user_tlb_range)
 
@@ -58,14 +58,14 @@ ENDPROC(__cpu_flush_user_tlb_range)
  *	- end   - end address (exclusive, may not be aligned)
  */
 ENTRY(__cpu_flush_kern_tlb_range)
-	dsb	sy
+	dsb	ishst
 	lsr	x0, x0, #12			// align address
 	lsr	x1, x1, #12
 1:	tlbi	vaae1is, x0			// TLB invalidate by address
 	add	x0, x0, #1
 	cmp	x0, x1
 	b.lo	1b
-	dsb	sy
+	dsb	ish
 	isb
 	ret
 ENDPROC(__cpu_flush_kern_tlb_range)
-- 
1.9.2

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/8] ARM: cache: remove redundant dsb instruction from v7_coherent_user_range
  2014-05-02 15:24 ` [PATCH 2/8] ARM: cache: remove redundant dsb instruction from v7_coherent_user_range Will Deacon
@ 2014-05-09 16:16   ` Catalin Marinas
  2014-05-09 18:25     ` Will Deacon
  0 siblings, 1 reply; 11+ messages in thread
From: Catalin Marinas @ 2014-05-09 16:16 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, May 02, 2014 at 04:24:09PM +0100, Will Deacon wrote:
> v7_coherent_user_range takes a virtual address range, cleans the D-side
> to PoU and then invalidates the I-side so that subsequent instruction
> fetches can see any new data written to the range in question.
> 
> Since cache maintenance by MVA is architected to execute in program
> order with respect to other cache maintenance operations specifying
> the same virtual address, we do not require a barrier between the
> D-side clean and the I-side invalidation.
> 
> This patch removes the redundant dsb.
> 
> Signed-off-by: Will Deacon <will.deacon@arm.com>
> ---
>  arch/arm/mm/cache-v7.S | 1 -
>  1 file changed, 1 deletion(-)
> 
> diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
> index 615c99e38ba1..b040d3ca20ac 100644
> --- a/arch/arm/mm/cache-v7.S
> +++ b/arch/arm/mm/cache-v7.S
> @@ -282,7 +282,6 @@ ENTRY(v7_coherent_user_range)
>  	add	r12, r12, r2
>  	cmp	r12, r1
>  	blo	1b
> -	dsb	ishst
>  	icache_line_size r2, r3
>  	sub	r3, r2, #1
>  	bic	r12, r0, r3

The original implementation follows the ARMv7 ARM example for self
modifying code which has a DSB. I agree with you that the section B2.2.9
(ARMv7 ARM - Ordering of cache and branch predictor maintenance
operations) states that ops by MVA would be ordered with each-other.

Time for clarification higher up? ;)

-- 
Catalin

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 2/8] ARM: cache: remove redundant dsb instruction from v7_coherent_user_range
  2014-05-09 16:16   ` Catalin Marinas
@ 2014-05-09 18:25     ` Will Deacon
  0 siblings, 0 replies; 11+ messages in thread
From: Will Deacon @ 2014-05-09 18:25 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, May 09, 2014 at 05:16:09PM +0100, Catalin Marinas wrote:
> On Fri, May 02, 2014 at 04:24:09PM +0100, Will Deacon wrote:
> > v7_coherent_user_range takes a virtual address range, cleans the D-side
> > to PoU and then invalidates the I-side so that subsequent instruction
> > fetches can see any new data written to the range in question.
> > 
> > Since cache maintenance by MVA is architected to execute in program
> > order with respect to other cache maintenance operations specifying
> > the same virtual address, we do not require a barrier between the
> > D-side clean and the I-side invalidation.
> > 
> > This patch removes the redundant dsb.
> > 
> > Signed-off-by: Will Deacon <will.deacon@arm.com>
> > ---
> >  arch/arm/mm/cache-v7.S | 1 -
> >  1 file changed, 1 deletion(-)
> > 
> > diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
> > index 615c99e38ba1..b040d3ca20ac 100644
> > --- a/arch/arm/mm/cache-v7.S
> > +++ b/arch/arm/mm/cache-v7.S
> > @@ -282,7 +282,6 @@ ENTRY(v7_coherent_user_range)
> >  	add	r12, r12, r2
> >  	cmp	r12, r1
> >  	blo	1b
> > -	dsb	ishst
> >  	icache_line_size r2, r3
> >  	sub	r3, r2, #1
> >  	bic	r12, r0, r3
> 
> The original implementation follows the ARMv7 ARM example for self
> modifying code which has a DSB. I agree with you that the section B2.2.9
> (ARMv7 ARM - Ordering of cache and branch predictor maintenance
> operations) states that ops by MVA would be ordered with each-other.

Those examples also don't make use of barrier options, so I wouldn't pay too
much attention to them :)

Still, I'll chase this up internally since the ARM ARM needs fixing.

Will

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2014-05-09 18:25 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-05-02 15:24 [PATCH 0/8] ARM/arm64 Barrier cleanups and fixes for 3.16 Will Deacon
2014-05-02 15:24 ` [PATCH 1/8] ARM: cacheflush: use -st dsb option for ensuring completion Will Deacon
2014-05-02 15:24 ` [PATCH 2/8] ARM: cache: remove redundant dsb instruction from v7_coherent_user_range Will Deacon
2014-05-09 16:16   ` Catalin Marinas
2014-05-09 18:25     ` Will Deacon
2014-05-02 15:24 ` [PATCH 3/8] arm64: barriers: make use of barrier options with explicit barriers Will Deacon
2014-05-02 15:24 ` [PATCH 4/8] arm64: barriers: wire up new barrier options Will Deacon
2014-05-02 15:24 ` [PATCH 5/8] arm64: barriers: use barrier() instead of smp_mb() when !SMP Will Deacon
2014-05-02 15:24 ` [PATCH 6/8] arm64: head: fix cache flushing and barriers in set_cpu_boot_mode_flag Will Deacon
2014-05-02 15:24 ` [PATCH 7/8] arm64: kvm: use inner-shareable barriers for inner-shareable maintenance Will Deacon
2014-05-02 15:24 ` [PATCH 8/8] arm64: mm: " Will Deacon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).