public inbox for linux-arm-kernel@lists.infradead.org
 help / color / mirror / Atom feed
From: Ryan Roberts <ryan.roberts@arm.com>
To: Will Deacon <will@kernel.org>, Ard Biesheuvel <ardb@kernel.org>,
	Catalin Marinas <catalin.marinas@arm.com>,
	Mark Rutland <mark.rutland@arm.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Oliver Upton <oliver.upton@linux.dev>,
	Marc Zyngier <maz@kernel.org>, Dev Jain <dev.jain@arm.com>,
	Linu Cherian <Linu.Cherian@arm.com>,
	Jonathan Cameron <jonathan.cameron@huawei.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org
Subject: [PATCH v3 11/13] arm64: mm: More flags for __flush_tlb_range()
Date: Mon,  2 Mar 2026 13:55:58 +0000	[thread overview]
Message-ID: <20260302135602.3716920-12-ryan.roberts@arm.com> (raw)
In-Reply-To: <20260302135602.3716920-1-ryan.roberts@arm.com>

Refactor function variants with "_nosync", "_local" and "_nonotify" into
a single __always_inline implementation that takes flags and rely on
constant folding to select the parts that are actually needed at any
given callsite, based on the provided flags.

Flags all live in the tlbf_t (TLB flags) type; TLBF_NONE (0) continues
to provide the strongest semantics (i.e. evict from walk cache,
broadcast, synchronise and notify). Each flag reduces the strength in
some way; TLBF_NONOTIFY, TLBF_NOSYNC and TLBF_NOBROADCAST are added to
complement the existing TLBF_NOWALKCACHE.

There are no users that require TLBF_NOBROADCAST without
TLBF_NOWALKCACHE so implement that as BUILD_BUG() to avoid needing to
introduce dead code for vae1 invalidations.

The result is a clearer, simpler, more powerful API.

Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
---
 arch/arm64/include/asm/tlbflush.h | 95 ++++++++++++++++++-------------
 arch/arm64/mm/contpte.c           |  9 ++-
 2 files changed, 62 insertions(+), 42 deletions(-)

diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index d134824ea5daa..5509927e45b93 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -295,7 +295,10 @@ static inline void __tlbi_sync_s1ish_hyp(void)
  *		no invalidation may take place. In the case where the level
  *		cannot be easily determined, the value TLBI_TTL_UNKNOWN will
  *		perform a non-hinted invalidation. flags may be TLBF_NONE (0) or
- *		TLBF_NOWALKCACHE (elide eviction of walk cache entries).
+ *		any combination of TLBF_NOWALKCACHE (elide eviction of walk
+ *		cache entries), TLBF_NONOTIFY (don't call mmu notifiers),
+ *		TLBF_NOSYNC (don't issue trailing dsb) and TLBF_NOBROADCAST
+ *		(only perform the invalidation for the local cpu).
  *
  *	local_flush_tlb_page(vma, addr)
  *		Local variant of flush_tlb_page().  Stale TLB entries may
@@ -305,12 +308,6 @@ static inline void __tlbi_sync_s1ish_hyp(void)
  *		Same as local_flush_tlb_page() except MMU notifier will not be
  *		called.
  *
- *	local_flush_tlb_contpte(vma, addr)
- *		Invalidate the virtual-address range
- *		'[addr, addr+CONT_PTE_SIZE)' mapped with contpte on local CPU
- *		for the user address space corresponding to 'vma->mm'.  Stale
- *		TLB entries may remain in remote CPUs.
- *
  *	Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented
  *	on top of these routines, since that is our interface to the mmu_gather
  *	API as used by munmap() and friends.
@@ -552,15 +549,23 @@ typedef unsigned __bitwise tlbf_t;
 /* Invalidate tlb entries only, leaving the page table walk cache intact. */
 #define TLBF_NOWALKCACHE	((__force tlbf_t)BIT(0))
 
-static inline void __flush_tlb_range_nosync(struct mm_struct *mm,
-				     unsigned long start, unsigned long end,
-				     unsigned long stride, int tlb_level,
-				     tlbf_t flags)
+/* Skip the trailing dsb after issuing tlbi. */
+#define TLBF_NOSYNC		((__force tlbf_t)BIT(1))
+
+/* Suppress tlb notifier callbacks for this flush operation. */
+#define TLBF_NONOTIFY		((__force tlbf_t)BIT(2))
+
+/* Perform the tlbi locally without broadcasting to other CPUs. */
+#define TLBF_NOBROADCAST	((__force tlbf_t)BIT(3))
+
+static __always_inline void __do_flush_tlb_range(struct vm_area_struct *vma,
+					unsigned long start, unsigned long end,
+					unsigned long stride, int tlb_level,
+					tlbf_t flags)
 {
+	struct mm_struct *mm = vma->vm_mm;
 	unsigned long asid, pages;
 
-	start = round_down(start, stride);
-	end = round_up(end, stride);
 	pages = (end - start) >> PAGE_SHIFT;
 
 	if (__flush_tlb_range_limit_excess(pages, stride)) {
@@ -568,17 +573,41 @@ static inline void __flush_tlb_range_nosync(struct mm_struct *mm,
 		return;
 	}
 
-	dsb(ishst);
+	if (!(flags & TLBF_NOBROADCAST))
+		dsb(ishst);
+	else
+		dsb(nshst);
+
 	asid = ASID(mm);
 
-	if (flags & TLBF_NOWALKCACHE)
-		__flush_s1_tlb_range_op(vale1is, start, pages, stride,
-				     asid, tlb_level);
-	else
+	switch (flags & (TLBF_NOWALKCACHE | TLBF_NOBROADCAST)) {
+	case TLBF_NONE:
 		__flush_s1_tlb_range_op(vae1is, start, pages, stride,
-				     asid, tlb_level);
+					asid, tlb_level);
+		break;
+	case TLBF_NOWALKCACHE:
+		__flush_s1_tlb_range_op(vale1is, start, pages, stride,
+					asid, tlb_level);
+		break;
+	case TLBF_NOBROADCAST:
+		/* Combination unused */
+		BUG();
+		break;
+	case TLBF_NOWALKCACHE | TLBF_NOBROADCAST:
+		__flush_s1_tlb_range_op(vale1, start, pages, stride,
+					asid, tlb_level);
+		break;
+	}
+
+	if (!(flags & TLBF_NONOTIFY))
+		mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
 
-	mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
+	if (!(flags & TLBF_NOSYNC)) {
+		if (!(flags & TLBF_NOBROADCAST))
+			__tlbi_sync_s1ish();
+		else
+			dsb(nsh);
+	}
 }
 
 static inline void __flush_tlb_range(struct vm_area_struct *vma,
@@ -586,24 +615,9 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
 				     unsigned long stride, int tlb_level,
 				     tlbf_t flags)
 {
-	__flush_tlb_range_nosync(vma->vm_mm, start, end, stride,
-				 tlb_level, flags);
-	__tlbi_sync_s1ish();
-}
-
-static inline void local_flush_tlb_contpte(struct vm_area_struct *vma,
-					   unsigned long addr)
-{
-	unsigned long asid;
-
-	addr = round_down(addr, CONT_PTE_SIZE);
-
-	dsb(nshst);
-	asid = ASID(vma->vm_mm);
-	__flush_s1_tlb_range_op(vale1, addr, CONT_PTES, PAGE_SIZE, asid, 3);
-	mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, addr,
-						    addr + CONT_PTE_SIZE);
-	dsb(nsh);
+	start = round_down(start, stride);
+	end = round_up(end, stride);
+	__do_flush_tlb_range(vma, start, end, stride, tlb_level, flags);
 }
 
 static inline void flush_tlb_range(struct vm_area_struct *vma,
@@ -656,7 +670,10 @@ static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr)
 static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
 		struct mm_struct *mm, unsigned long start, unsigned long end)
 {
-	__flush_tlb_range_nosync(mm, start, end, PAGE_SIZE, 3, TLBF_NOWALKCACHE);
+	struct vm_area_struct vma = { .vm_mm = mm, .vm_flags = 0 };
+
+	__flush_tlb_range(&vma, start, end, PAGE_SIZE, 3,
+			  TLBF_NOWALKCACHE | TLBF_NOSYNC);
 }
 
 static inline bool __pte_flags_need_flush(ptdesc_t oldval, ptdesc_t newval)
diff --git a/arch/arm64/mm/contpte.c b/arch/arm64/mm/contpte.c
index 681f22fac52a1..3f1a3e86353de 100644
--- a/arch/arm64/mm/contpte.c
+++ b/arch/arm64/mm/contpte.c
@@ -552,8 +552,8 @@ int contpte_clear_flush_young_ptes(struct vm_area_struct *vma,
 		 * See comment in __ptep_clear_flush_young(); same rationale for
 		 * eliding the trailing DSB applies here.
 		 */
-		__flush_tlb_range_nosync(vma->vm_mm, addr, end,
-					 PAGE_SIZE, 3, TLBF_NOWALKCACHE);
+		__flush_tlb_range(vma, addr, end, PAGE_SIZE, 3,
+				  TLBF_NOWALKCACHE | TLBF_NOSYNC);
 	}
 
 	return young;
@@ -641,7 +641,10 @@ int contpte_ptep_set_access_flags(struct vm_area_struct *vma,
 			__ptep_set_access_flags(vma, addr, ptep, entry, 0);
 
 		if (dirty)
-			local_flush_tlb_contpte(vma, start_addr);
+			__flush_tlb_range(vma, start_addr,
+					  start_addr + CONT_PTE_SIZE,
+					  PAGE_SIZE, 3,
+					  TLBF_NOWALKCACHE | TLBF_NOBROADCAST);
 	} else {
 		__contpte_try_unfold(vma->vm_mm, addr, ptep, orig_pte);
 		__ptep_set_access_flags(vma, addr, ptep, entry, dirty);
-- 
2.43.0



  parent reply	other threads:[~2026-03-02 13:56 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-02 13:55 [PATCH v3 00/13] arm64: Refactor TLB invalidation API and implementation Ryan Roberts
2026-03-02 13:55 ` [PATCH v3 01/13] arm64: mm: Re-implement the __tlbi_level macro as a C function Ryan Roberts
2026-03-02 13:55 ` [PATCH v3 02/13] arm64: mm: Introduce a C wrapper for by-range TLB invalidation Ryan Roberts
2026-03-02 13:55 ` [PATCH v3 03/13] arm64: mm: Implicitly invalidate user ASID based on TLBI operation Ryan Roberts
2026-03-02 13:55 ` [PATCH v3 04/13] arm64: mm: Push __TLBI_VADDR() into __tlbi_level() Ryan Roberts
2026-03-02 13:55 ` [PATCH v3 05/13] arm64: mm: Inline __TLBI_VADDR_RANGE() into __tlbi_range() Ryan Roberts
2026-03-02 13:55 ` [PATCH v3 06/13] arm64: mm: Re-implement the __flush_tlb_range_op macro in C Ryan Roberts
2026-03-02 13:55 ` [PATCH v3 07/13] arm64: mm: Simplify __TLBI_RANGE_NUM() macro Ryan Roberts
2026-03-02 13:55 ` [PATCH v3 08/13] arm64: mm: Simplify __flush_tlb_range_limit_excess() Ryan Roberts
2026-03-02 13:55 ` [PATCH v3 09/13] arm64: mm: Refactor flush_tlb_page() to use __tlbi_level_asid() Ryan Roberts
2026-03-02 13:55 ` [PATCH v3 10/13] arm64: mm: Refactor __flush_tlb_range() to take flags Ryan Roberts
2026-03-02 13:55 ` Ryan Roberts [this message]
2026-03-03  9:57   ` [PATCH v3 11/13] arm64: mm: More flags for __flush_tlb_range() Jonathan Cameron
2026-03-03 13:54     ` Ryan Roberts
2026-03-03 17:34       ` Jonathan Cameron
2026-03-02 13:55 ` [PATCH v3 12/13] arm64: mm: Wrap flush_tlb_page() around __do_flush_tlb_range() Ryan Roberts
2026-03-03  9:59   ` Jonathan Cameron
2026-03-02 13:56 ` [PATCH v3 13/13] arm64: mm: Provide level hint for flush_tlb_page() Ryan Roberts
2026-03-02 14:42   ` Mark Rutland
2026-03-02 17:39     ` Ryan Roberts
2026-03-02 17:56       ` Mark Rutland
2026-03-13 19:43 ` [PATCH v3 00/13] arm64: Refactor TLB invalidation API and implementation Catalin Marinas

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260302135602.3716920-12-ryan.roberts@arm.com \
    --to=ryan.roberts@arm.com \
    --cc=Linu.Cherian@arm.com \
    --cc=ardb@kernel.org \
    --cc=catalin.marinas@arm.com \
    --cc=dev.jain@arm.com \
    --cc=jonathan.cameron@huawei.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=maz@kernel.org \
    --cc=oliver.upton@linux.dev \
    --cc=torvalds@linux-foundation.org \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox