linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] Last level (leaf) TLBI support
@ 2015-07-24  8:59 Catalin Marinas
  2015-07-24  8:59 ` [PATCH 1/2] arm64: Clean up __flush_tlb(_kernel)_range functions Catalin Marinas
  2015-07-24  8:59 ` [PATCH 2/2] arm64: Use last level TLBI for user pte changes Catalin Marinas
  0 siblings, 2 replies; 5+ messages in thread
From: Catalin Marinas @ 2015-07-24  8:59 UTC (permalink / raw)
  To: linux-arm-kernel

AArch64 introduces TLBI operations that only invalidate the last level
of a page table (block entry). The first patch in the series is just a
minor clean-up to avoid defining too many functions. The second patch
introduces the last level TLBI support for flush_tlb_page() and
tlb_flush(). The flush_tlb_range() function cannot be changed since it
is used in THP code for collapsing multiple small pages into a huge
page.

Catalin Marinas (2):
  arm64: Clean up __flush_tlb(_kernel)_range functions
  arm64: Use last level TLBI for user pte changes

 arch/arm64/include/asm/tlb.h      |  7 +++-
 arch/arm64/include/asm/tlbflush.h | 69 ++++++++++++++++++++++++---------------
 2 files changed, 48 insertions(+), 28 deletions(-)

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/2] arm64: Clean up __flush_tlb(_kernel)_range functions
  2015-07-24  8:59 [PATCH 0/2] Last level (leaf) TLBI support Catalin Marinas
@ 2015-07-24  8:59 ` Catalin Marinas
  2015-07-24  8:59 ` [PATCH 2/2] arm64: Use last level TLBI for user pte changes Catalin Marinas
  1 sibling, 0 replies; 5+ messages in thread
From: Catalin Marinas @ 2015-07-24  8:59 UTC (permalink / raw)
  To: linux-arm-kernel

This patch moves the MAX_TLB_RANGE check into the
flush_tlb(_kernel)_range functions directly to avoid the
undescore-prefixed definitions (and for consistency with a subsequent
patch).

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/tlbflush.h | 47 +++++++++++++++++----------------------
 1 file changed, 21 insertions(+), 26 deletions(-)

diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 934815d45eda..1643908eb5f3 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -91,11 +91,23 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
 	dsb(ish);
 }
 
-static inline void __flush_tlb_range(struct vm_area_struct *vma,
-				     unsigned long start, unsigned long end)
+/*
+ * This is meant to avoid soft lock-ups on large TLB flushing ranges and not
+ * necessarily a performance improvement.
+ */
+#define MAX_TLB_RANGE	(1024UL << PAGE_SHIFT)
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+				   unsigned long start, unsigned long end)
 {
 	unsigned long asid = (unsigned long)ASID(vma->vm_mm) << 48;
 	unsigned long addr;
+
+	if ((end - start) > MAX_TLB_RANGE) {
+		flush_tlb_mm(vma->vm_mm);
+		return;
+	}
+
 	start = asid | (start >> 12);
 	end = asid | (end >> 12);
 
@@ -105,9 +117,15 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
 	dsb(ish);
 }
 
-static inline void __flush_tlb_kernel_range(unsigned long start, unsigned long end)
+static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 {
 	unsigned long addr;
+
+	if ((end - start) > MAX_TLB_RANGE) {
+		flush_tlb_all();
+		return;
+	}
+
 	start >>= 12;
 	end >>= 12;
 
@@ -119,29 +137,6 @@ static inline void __flush_tlb_kernel_range(unsigned long start, unsigned long e
 }
 
 /*
- * This is meant to avoid soft lock-ups on large TLB flushing ranges and not
- * necessarily a performance improvement.
- */
-#define MAX_TLB_RANGE	(1024UL << PAGE_SHIFT)
-
-static inline void flush_tlb_range(struct vm_area_struct *vma,
-				   unsigned long start, unsigned long end)
-{
-	if ((end - start) <= MAX_TLB_RANGE)
-		__flush_tlb_range(vma, start, end);
-	else
-		flush_tlb_mm(vma->vm_mm);
-}
-
-static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
-{
-	if ((end - start) <= MAX_TLB_RANGE)
-		__flush_tlb_kernel_range(start, end);
-	else
-		flush_tlb_all();
-}
-
-/*
  * Used to invalidate the TLB (walk caches) corresponding to intermediate page
  * table levels (pgd/pud/pmd).
  */

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/2] arm64: Use last level TLBI for user pte changes
  2015-07-24  8:59 [PATCH 0/2] Last level (leaf) TLBI support Catalin Marinas
  2015-07-24  8:59 ` [PATCH 1/2] arm64: Clean up __flush_tlb(_kernel)_range functions Catalin Marinas
@ 2015-07-24  8:59 ` Catalin Marinas
  2015-07-27 17:49   ` Will Deacon
  1 sibling, 1 reply; 5+ messages in thread
From: Catalin Marinas @ 2015-07-24  8:59 UTC (permalink / raw)
  To: linux-arm-kernel

The flush_tlb_page() function is used on user address ranges when PTEs
(or PMDs/PUDs for huge pages) were changed (attributes or clearing). For
such cases, it is more efficient to invalidate only the last level of
the TLB with the "tlbi vale1is" instruction.

In the TLB shoot-down case, the TLB caching of the intermediate page
table levels (pmd, pud, pgd) is handled by __flush_tlb_pgtable() via the
__(pte|pmd|pud)_free_tlb() functions and it is not deferred to
tlb_finish_mmu() (as of commit 285994a62c80 - "arm64: Invalidate the TLB
corresponding to intermediate page table levels"). The tlb_flush()
function only needs to invalidate the TLB for the last level of page
tables; a new arm64-specific __flush_tlb_page_range() function performs
only the last level TLBI.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/tlb.h      |  7 ++++++-
 arch/arm64/include/asm/tlbflush.h | 22 +++++++++++++++++++++-
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index 3a0242c7eb8d..93c70e99a1f2 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -41,7 +41,12 @@ static inline void tlb_flush(struct mmu_gather *tlb)
 		flush_tlb_mm(tlb->mm);
 	} else {
 		struct vm_area_struct vma = { .vm_mm = tlb->mm, };
-		flush_tlb_range(&vma, tlb->start, tlb->end);
+		/*
+		 * The intermediate page table levels are already handled by
+		 * the __(pte|pmd|pud)_free_tlb() functions, so leaf TLBI is
+		 * sufficient here.
+		 */
+		__flush_tlb_page_range(&vma, tlb->start, tlb->end);
 	}
 }
 
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 1643908eb5f3..48794ab79cc0 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -87,7 +87,7 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
 		((unsigned long)ASID(vma->vm_mm) << 48);
 
 	dsb(ishst);
-	asm("tlbi	vae1is, %0" : : "r" (addr));
+	asm("tlbi	vale1is, %0" : : "r" (addr));
 	dsb(ish);
 }
 
@@ -97,6 +97,26 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
  */
 #define MAX_TLB_RANGE	(1024UL << PAGE_SHIFT)
 
+static inline void __flush_tlb_page_range(struct vm_area_struct *vma,
+					  unsigned long start, unsigned long end)
+{
+	unsigned long asid = (unsigned long)ASID(vma->vm_mm) << 48;
+	unsigned long addr;
+
+	if ((end - start) > MAX_TLB_RANGE) {
+		flush_tlb_mm(vma->vm_mm);
+		return;
+	}
+
+	start = asid | (start >> 12);
+	end = asid | (end >> 12);
+
+	dsb(ishst);
+	for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
+		asm("tlbi vale1is, %0" : : "r"(addr));
+	dsb(ish);
+}
+
 static inline void flush_tlb_range(struct vm_area_struct *vma,
 				   unsigned long start, unsigned long end)
 {

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/2] arm64: Use last level TLBI for user pte changes
  2015-07-24  8:59 ` [PATCH 2/2] arm64: Use last level TLBI for user pte changes Catalin Marinas
@ 2015-07-27 17:49   ` Will Deacon
  2015-07-28  9:45     ` Catalin Marinas
  0 siblings, 1 reply; 5+ messages in thread
From: Will Deacon @ 2015-07-27 17:49 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Jul 24, 2015 at 09:59:56AM +0100, Catalin Marinas wrote:
> The flush_tlb_page() function is used on user address ranges when PTEs
> (or PMDs/PUDs for huge pages) were changed (attributes or clearing). For
> such cases, it is more efficient to invalidate only the last level of
> the TLB with the "tlbi vale1is" instruction.
> 
> In the TLB shoot-down case, the TLB caching of the intermediate page
> table levels (pmd, pud, pgd) is handled by __flush_tlb_pgtable() via the
> __(pte|pmd|pud)_free_tlb() functions and it is not deferred to
> tlb_finish_mmu() (as of commit 285994a62c80 - "arm64: Invalidate the TLB
> corresponding to intermediate page table levels"). The tlb_flush()
> function only needs to invalidate the TLB for the last level of page
> tables; a new arm64-specific __flush_tlb_page_range() function performs
> only the last level TLBI.

[...]

> diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
> index 1643908eb5f3..48794ab79cc0 100644
> --- a/arch/arm64/include/asm/tlbflush.h
> +++ b/arch/arm64/include/asm/tlbflush.h
> @@ -87,7 +87,7 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
>  		((unsigned long)ASID(vma->vm_mm) << 48);
>  
>  	dsb(ishst);
> -	asm("tlbi	vae1is, %0" : : "r" (addr));
> +	asm("tlbi	vale1is, %0" : : "r" (addr));
>  	dsb(ish);
>  }
>  
> @@ -97,6 +97,26 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
>   */
>  #define MAX_TLB_RANGE	(1024UL << PAGE_SHIFT)
>  
> +static inline void __flush_tlb_page_range(struct vm_area_struct *vma,
> +					  unsigned long start, unsigned long end)
> +{
> +	unsigned long asid = (unsigned long)ASID(vma->vm_mm) << 48;
> +	unsigned long addr;
> +
> +	if ((end - start) > MAX_TLB_RANGE) {
> +		flush_tlb_mm(vma->vm_mm);
> +		return;
> +	}
> +
> +	start = asid | (start >> 12);
> +	end = asid | (end >> 12);
> +
> +	dsb(ishst);
> +	for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
> +		asm("tlbi vale1is, %0" : : "r"(addr));
> +	dsb(ish);
> +}

This is identical to __flush_tlb_range apart from the asm op.

What happens if you add a "const bool leaf" parameter and stick a
conditional inside the loop?

Will

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 2/2] arm64: Use last level TLBI for user pte changes
  2015-07-27 17:49   ` Will Deacon
@ 2015-07-28  9:45     ` Catalin Marinas
  0 siblings, 0 replies; 5+ messages in thread
From: Catalin Marinas @ 2015-07-28  9:45 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Jul 27, 2015 at 06:49:15PM +0100, Will Deacon wrote:
> On Fri, Jul 24, 2015 at 09:59:56AM +0100, Catalin Marinas wrote:
> > The flush_tlb_page() function is used on user address ranges when PTEs
> > (or PMDs/PUDs for huge pages) were changed (attributes or clearing). For
> > such cases, it is more efficient to invalidate only the last level of
> > the TLB with the "tlbi vale1is" instruction.
> > 
> > In the TLB shoot-down case, the TLB caching of the intermediate page
> > table levels (pmd, pud, pgd) is handled by __flush_tlb_pgtable() via the
> > __(pte|pmd|pud)_free_tlb() functions and it is not deferred to
> > tlb_finish_mmu() (as of commit 285994a62c80 - "arm64: Invalidate the TLB
> > corresponding to intermediate page table levels"). The tlb_flush()
> > function only needs to invalidate the TLB for the last level of page
> > tables; a new arm64-specific __flush_tlb_page_range() function performs
> > only the last level TLBI.
> 
> [...]
> 
> > diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
> > index 1643908eb5f3..48794ab79cc0 100644
> > --- a/arch/arm64/include/asm/tlbflush.h
> > +++ b/arch/arm64/include/asm/tlbflush.h
> > @@ -87,7 +87,7 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
> >  		((unsigned long)ASID(vma->vm_mm) << 48);
> >  
> >  	dsb(ishst);
> > -	asm("tlbi	vae1is, %0" : : "r" (addr));
> > +	asm("tlbi	vale1is, %0" : : "r" (addr));
> >  	dsb(ish);
> >  }
> >  
> > @@ -97,6 +97,26 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
> >   */
> >  #define MAX_TLB_RANGE	(1024UL << PAGE_SHIFT)
> >  
> > +static inline void __flush_tlb_page_range(struct vm_area_struct *vma,
> > +					  unsigned long start, unsigned long end)
> > +{
> > +	unsigned long asid = (unsigned long)ASID(vma->vm_mm) << 48;
> > +	unsigned long addr;
> > +
> > +	if ((end - start) > MAX_TLB_RANGE) {
> > +		flush_tlb_mm(vma->vm_mm);
> > +		return;
> > +	}
> > +
> > +	start = asid | (start >> 12);
> > +	end = asid | (end >> 12);
> > +
> > +	dsb(ishst);
> > +	for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
> > +		asm("tlbi vale1is, %0" : : "r"(addr));
> > +	dsb(ish);
> > +}
> 
> This is identical to __flush_tlb_range apart from the asm op.
> 
> What happens if you add a "const bool leaf" parameter and stick a
> conditional inside the loop?

It looks better indeed (at the same code generated). The first patch
remains unchanged and the second patch is below ("git am -c" should be
able to detect the scissors line):

-----8<--------------
>From 8c95d0be9f25e2df6f53c16071494c4aee6aee87 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 18 Mar 2015 11:28:06 +0000
Subject: [PATCH v2] arm64: Use last level TLBI for user pte changes

The flush_tlb_page() function is used on user address ranges when PTEs
(or PMDs/PUDs for huge pages) were changed (attributes or clearing). For
such cases, it is more efficient to invalidate only the last level of
the TLB with the "tlbi vale1is" instruction.

In the TLB shoot-down case, the TLB caching of the intermediate page
table levels (pmd, pud, pgd) is handled by __flush_tlb_pgtable() via the
__(pte|pmd|pud)_free_tlb() functions and it is not deferred to
tlb_finish_mmu() (as of commit 285994a62c80 - "arm64: Invalidate the TLB
corresponding to intermediate page table levels"). The tlb_flush()
function only needs to invalidate the TLB for the last level of page
tables; the __flush_tlb_range() function gains a fourth argument for
last level TLBI.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/tlb.h      |  7 ++++++-
 arch/arm64/include/asm/tlbflush.h | 21 ++++++++++++++++-----
 2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index 3a0242c7eb8d..d6e6b6660380 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -41,7 +41,12 @@ static inline void tlb_flush(struct mmu_gather *tlb)
 		flush_tlb_mm(tlb->mm);
 	} else {
 		struct vm_area_struct vma = { .vm_mm = tlb->mm, };
-		flush_tlb_range(&vma, tlb->start, tlb->end);
+		/*
+		 * The intermediate page table levels are already handled by
+		 * the __(pte|pmd|pud)_free_tlb() functions, so last level
+		 * TLBI is sufficient here.
+		 */
+		__flush_tlb_range(&vma, tlb->start, tlb->end, true);
 	}
 }
 
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 1643908eb5f3..c3a5f83e1276 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -87,7 +87,7 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
 		((unsigned long)ASID(vma->vm_mm) << 48);
 
 	dsb(ishst);
-	asm("tlbi	vae1is, %0" : : "r" (addr));
+	asm("tlbi	vale1is, %0" : : "r" (addr));
 	dsb(ish);
 }
 
@@ -97,8 +97,9 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
  */
 #define MAX_TLB_RANGE	(1024UL << PAGE_SHIFT)
 
-static inline void flush_tlb_range(struct vm_area_struct *vma,
-				   unsigned long start, unsigned long end)
+static inline void __flush_tlb_range(struct vm_area_struct *vma,
+				     unsigned long start, unsigned long end,
+				     bool last_level)
 {
 	unsigned long asid = (unsigned long)ASID(vma->vm_mm) << 48;
 	unsigned long addr;
@@ -112,11 +113,21 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
 	end = asid | (end >> 12);
 
 	dsb(ishst);
-	for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
-		asm("tlbi vae1is, %0" : : "r"(addr));
+	for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) {
+		if (last_level)
+			asm("tlbi vale1is, %0" : : "r"(addr));
+		else
+			asm("tlbi vae1is, %0" : : "r"(addr));
+	}
 	dsb(ish);
 }
 
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+				   unsigned long start, unsigned long end)
+{
+	__flush_tlb_range(vma, start, end, false);
+}
+
 static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 {
 	unsigned long addr;

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2015-07-28  9:45 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-07-24  8:59 [PATCH 0/2] Last level (leaf) TLBI support Catalin Marinas
2015-07-24  8:59 ` [PATCH 1/2] arm64: Clean up __flush_tlb(_kernel)_range functions Catalin Marinas
2015-07-24  8:59 ` [PATCH 2/2] arm64: Use last level TLBI for user pte changes Catalin Marinas
2015-07-27 17:49   ` Will Deacon
2015-07-28  9:45     ` Catalin Marinas

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).