[PATCH 08/10] x86,tlb: do targeted broadcast flushing from tlbbatch code

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Rik van Riel <riel@surriel.com>
To: x86@kernel.org
Cc: linux-kernel@vger.kernel.org, kernel-team@meta.com,
	dave.hansen@linux.intel.com, luto@kernel.org,
	peterz@infradead.org, tglx@linutronix.de, mingo@redhat.com,
	bp@alien8.de, hpa@zytor.com, akpm@linux-foundation.org,
	Rik van Riel <riel@surriel.com>
Subject: [PATCH 08/10] x86,tlb: do targeted broadcast flushing from tlbbatch code
Date: Sat, 21 Dec 2024 23:06:40 -0500	[thread overview]
Message-ID: <20241222040717.3096835-9-riel@surriel.com> (raw)
In-Reply-To: <20241222040717.3096835-1-riel@surriel.com>

Instead of doing a system-wide TLB flush from arch_tlbbatch_flush,
queue up asynchronous, targeted flushes from arch_tlbbatch_add_pending.

This also allows us to avoid adding the CPUs of processes using broadcast
flushing to the batch->cpumask, and will hopefully further reduce TLB
flushing from the reclaim and compaction paths.

Signed-off-by: Rik van Riel <riel@surriel.com>
---
 arch/x86/include/asm/tlbbatch.h |  1 +
 arch/x86/include/asm/tlbflush.h | 12 +++---------
 arch/x86/mm/tlb.c               | 33 ++++++++++++++++++++++++++++++---
 3 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/tlbbatch.h b/arch/x86/include/asm/tlbbatch.h
index 1ad56eb3e8a8..f9a17edf63ad 100644
--- a/arch/x86/include/asm/tlbbatch.h
+++ b/arch/x86/include/asm/tlbbatch.h
@@ -10,6 +10,7 @@ struct arch_tlbflush_unmap_batch {
 	 * the PFNs being flushed..
 	 */
 	struct cpumask cpumask;
+	bool used_invlpgb;
 };
 
 #endif /* _ARCH_X86_TLBBATCH_H */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index a59f56c9b355..87f9a3725d95 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -294,21 +294,15 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
 	return atomic64_inc_return(&mm->context.tlb_gen);
 }
 
-static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
-					     struct mm_struct *mm,
-					     unsigned long uaddr)
-{
-	inc_mm_tlb_gen(mm);
-	cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
-	mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
-}
-
 static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm)
 {
 	flush_tlb_mm(mm);
 }
 
 extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
+extern void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+					     struct mm_struct *mm,
+					     unsigned long uaddr);
 
 static inline bool pte_flags_need_flush(unsigned long oldflags,
 					unsigned long newflags,
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 11ecffa26567..0482042e011c 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -1614,12 +1614,13 @@ EXPORT_SYMBOL_GPL(__flush_tlb_all);
 void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
 {
 	struct flush_tlb_info *info;
-
 	int cpu = get_cpu();
 
-	if (static_cpu_has(X86_FEATURE_INVLPGB)) {
-		invlpgb_flush_all_nonglobals();
+	/* If we issued (asynchronous) INVLPGB flushes, wait for them here. */
+	if (static_cpu_has(X86_FEATURE_INVLPGB) && batch->used_invlpgb) {
 		tlbsync();
+		migrate_enable();
+		batch->used_invlpgb = false;
 		goto out_put_cpu;
 	}
 
@@ -1646,6 +1647,32 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
 	put_cpu();
 }
 
+void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+					     struct mm_struct *mm,
+					     unsigned long uaddr)
+{
+	if (static_cpu_has(X86_FEATURE_INVLPGB) && mm->context.broadcast_asid) {
+		u16 asid = mm->context.broadcast_asid;
+		/*
+		 * Queue up an asynchronous invalidation. The corresponding
+		 * TLBSYNC is done in arch_tlbbatch_flush(), and must be done
+		 * on the same CPU.
+		 */
+		if (!batch->used_invlpgb) {
+			batch->used_invlpgb = true;
+			migrate_disable();
+		}
+		invlpgb_flush_user_nr(kern_pcid(asid), uaddr, 1, 0);
+		/* Do any CPUs supporting INVLPGB need PTI? */
+		if (static_cpu_has(X86_FEATURE_PTI))
+			invlpgb_flush_user_nr(user_pcid(asid), uaddr, 1, 0);
+	} else {
+		inc_mm_tlb_gen(mm);
+		cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
+	}
+	mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
+}
+
 /*
  * Blindly accessing user memory from NMI context can be dangerous
  * if we're in the middle of switching the current user task or
-- 
2.47.1

next prev parent reply	other threads:[~2024-12-22  4:07 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-12-22  4:06 [RFC PATCH 00/10] AMD broadcast TLB invalidation Rik van Riel
2024-12-22  4:06 ` [PATCH 01/10] Add X86_FEATURE_INVLPGB definition Rik van Riel
2024-12-22  4:06 ` [PATCH 02/10] x86,tlb: get INVLPGB count max from CPUID Rik van Riel
2024-12-22 11:46   ` Borislav Petkov
2024-12-22 14:39     ` Rik van Riel
2024-12-22  4:06 ` [PATCH 03/10] x86,mm: add INVLPGB support code Rik van Riel
2024-12-22 11:05   ` Peter Zijlstra
2024-12-22 21:24     ` Rik van Riel
2024-12-22  4:06 ` [PATCH 04/10] x86,mm: use INVLPGB for kernel TLB flushes Rik van Riel
2024-12-22 11:16   ` Peter Zijlstra
2024-12-22 15:12     ` Rik van Riel
2024-12-24 18:13       ` Peter Zijlstra
2024-12-22 11:47   ` Borislav Petkov
2024-12-22  4:06 ` [PATCH 05/10] x86,tlb: use INVLPGB in flush_tlb_all Rik van Riel
2024-12-22 11:17   ` Peter Zijlstra
2024-12-22  4:06 ` [PATCH 06/10] x86,mm: use broadcast TLB flushing for page reclaim TLB flushing Rik van Riel
2024-12-22 11:18   ` Peter Zijlstra
2024-12-22  4:06 ` [PATCH 07/10] x86,mm: enable broadcast TLB invalidation for multi-threaded processes Rik van Riel
2024-12-22 11:36   ` Peter Zijlstra
2024-12-22 22:45     ` Rik van Riel
2024-12-22  4:06 ` Rik van Riel [this message]
2024-12-22  4:06 ` [PATCH 09/10] x86/mm: enable AMD translation cache extensions Rik van Riel
2024-12-22 11:38   ` Peter Zijlstra
2024-12-22 15:37     ` Rik van Riel
2024-12-24 18:25       ` Peter Zijlstra
2024-12-22  4:06 ` [PATCH 10/10] x86/mm: only invalidate final translations with INVLPGB Rik van Riel
2024-12-22 11:11 ` [RFC PATCH 00/10] AMD broadcast TLB invalidation Peter Zijlstra
2024-12-22 15:06   ` Rik van Riel

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:1ad56eb3e8a dfblob:f9a17edf63a dfblob:a59f56c9b35
dfblob:87f9a3725d9 dfblob:11ecffa2656 dfblob:0482042e011 )
 OR (
bs:"[PATCH 08/10] x86,tlb: do targeted broadcast flushing from tlbbatch code" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241222040717.3096835-9-riel@surriel.com \
    --to=riel@surriel.com \
    --cc=akpm@linux-foundation.org \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=hpa@zytor.com \
    --cc=kernel-team@meta.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.