All of lore.kernel.org
 help / color / mirror / Atom feed
From: Rik van Riel <riel@surriel.com>
To: x86@kernel.org
Cc: linux-kernel@vger.kernel.org, bp@alien8.de, peterz@infradead.org,
	dave.hansen@linux.intel.com, zhengqi.arch@bytedance.com,
	nadav.amit@gmail.com, thomas.lendacky@amd.com,
	kernel-team@meta.com, linux-mm@kvack.org,
	akpm@linux-foundation.org, jackmanb@google.com, jannh@google.com,
	mhklinux@outlook.com, andrew.cooper3@citrix.com,
	Manali.Shukla@amd.com, Rik van Riel <riel@surriel.com>,
	Dave Hansen <dave.hansen@intel.com>
Subject: [PATCH v12 05/16] x86/mm: add INVLPGB support code
Date: Thu, 20 Feb 2025 19:53:04 -0500	[thread overview]
Message-ID: <20250221005345.2156760-6-riel@surriel.com> (raw)
In-Reply-To: <20250221005345.2156760-1-riel@surriel.com>

Add invlpgb.h with the helper functions and definitions needed to use
broadcast TLB invalidation on AMD EPYC 3 and newer CPUs.

All the functions defined in invlpgb.h are used later in the series.

Compile time disabling X86_FEATURE_INVLPGB when the config
option is not set allows the compiler to omit unnecessary code.

Signed-off-by: Rik van Riel <riel@surriel.com>
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
Tested-by: Brendan Jackman <jackmanb@google.com>
Tested-by: Michael Kelley <mhklinux@outlook.com>
Acked-by: Dave Hansen <dave.hansen@intel.com>
---
 arch/x86/include/asm/disabled-features.h |  9 ++-
 arch/x86/include/asm/tlb.h               | 92 ++++++++++++++++++++++++
 2 files changed, 100 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index c492bdc97b05..95997caf0935 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -129,6 +129,13 @@
 #define DISABLE_SEV_SNP		(1 << (X86_FEATURE_SEV_SNP & 31))
 #endif
 
+#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH
+#define DISABLE_INVLPGB		0
+#else
+/* Keep 32 bit kernels smaller by compiling out the INVLPGB code. */
+#define DISABLE_INVLPGB		(1 << (X86_FEATURE_INVLPGB & 31))
+#endif
+
 /*
  * Make sure to add features to the correct mask
  */
@@ -146,7 +153,7 @@
 #define DISABLED_MASK11	(DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \
 			 DISABLE_CALL_DEPTH_TRACKING|DISABLE_USER_SHSTK)
 #define DISABLED_MASK12	(DISABLE_FRED|DISABLE_LAM)
-#define DISABLED_MASK13	0
+#define DISABLED_MASK13	(DISABLE_INVLPGB)
 #define DISABLED_MASK14	0
 #define DISABLED_MASK15	0
 #define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 77f52bc1578a..b3cd521e5e2f 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -6,6 +6,9 @@
 static inline void tlb_flush(struct mmu_gather *tlb);
 
 #include <asm-generic/tlb.h>
+#include <linux/kernel.h>
+#include <vdso/bits.h>
+#include <vdso/page.h>
 
 static inline void tlb_flush(struct mmu_gather *tlb)
 {
@@ -25,4 +28,93 @@ static inline void invlpg(unsigned long addr)
 	asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
 }
 
+
+/*
+ * INVLPGB does broadcast TLB invalidation across all the CPUs in the system.
+ *
+ * The INVLPGB instruction is weakly ordered, and a batch of invalidations can
+ * be done in a parallel fashion.
+ *
+ * The instruction takes the number of extra pages to invalidate, beyond
+ * the first page, while __invlpgb gets the more human readable number of
+ * pages to invalidate.
+ *
+ * TLBSYNC is used to ensure that pending INVLPGB invalidations initiated from
+ * this CPU have completed.
+ */
+static inline void __invlpgb(unsigned long asid, unsigned long pcid,
+			     unsigned long addr, u16 nr_pages,
+			     bool pmd_stride, u8 flags)
+{
+	u32 edx = (pcid << 16) | asid;
+	u32 ecx = (pmd_stride << 31) | (nr_pages - 1);
+	u64 rax = addr | flags;
+
+	/* The low bits in rax are for flags. Verify addr is clean. */
+	VM_WARN_ON_ONCE(addr & ~PAGE_MASK);
+
+	/* INVLPGB; supported in binutils >= 2.36. */
+	asm volatile(".byte 0x0f, 0x01, 0xfe" : : "a" (rax), "c" (ecx), "d" (edx));
+}
+
+/* Wait for INVLPGB originated by this CPU to complete. */
+static inline void __tlbsync(void)
+{
+	cant_migrate();
+	/* TLBSYNC: supported in binutils >= 0.36. */
+	asm volatile(".byte 0x0f, 0x01, 0xff" ::: "memory");
+}
+
+/*
+ * INVLPGB can be targeted by virtual address, PCID, ASID, or any combination
+ * of the three. For example:
+ * - INVLPGB_VA | INVLPGB_INCLUDE_GLOBAL: invalidate all TLB entries at the address
+ * - INVLPGB_PCID:			  invalidate all TLB entries matching the PCID
+ *
+ * The first can be used to invalidate (kernel) mappings at a particular
+ * address across all processes.
+ *
+ * The latter invalidates all TLB entries matching a PCID.
+ */
+#define INVLPGB_VA			BIT(0)
+#define INVLPGB_PCID			BIT(1)
+#define INVLPGB_ASID			BIT(2)
+#define INVLPGB_INCLUDE_GLOBAL		BIT(3)
+#define INVLPGB_FINAL_ONLY		BIT(4)
+#define INVLPGB_INCLUDE_NESTED		BIT(5)
+
+static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
+						unsigned long addr,
+						u16 nr,
+						bool pmd_stride)
+{
+	__invlpgb(0, pcid, addr, nr, pmd_stride, INVLPGB_PCID | INVLPGB_VA);
+}
+
+/* Flush all mappings for a given PCID, not including globals. */
+static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid)
+{
+	__invlpgb(0, pcid, 0, 1, 0, INVLPGB_PCID);
+}
+
+/* Flush all mappings, including globals, for all PCIDs. */
+static inline void invlpgb_flush_all(void)
+{
+	__invlpgb(0, 0, 0, 1, 0, INVLPGB_INCLUDE_GLOBAL);
+	__tlbsync();
+}
+
+/* Flush addr, including globals, for all PCIDs. */
+static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
+{
+	__invlpgb(0, 0, addr, nr, 0, INVLPGB_INCLUDE_GLOBAL);
+}
+
+/* Flush all mappings for all PCIDs except globals. */
+static inline void invlpgb_flush_all_nonglobals(void)
+{
+	__invlpgb(0, 0, 0, 1, 0, 0);
+	__tlbsync();
+}
+
 #endif /* _ASM_X86_TLB_H */
-- 
2.47.1



  parent reply	other threads:[~2025-02-21  0:55 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-21  0:52 [PATCH v12 00/16] AMD broadcast TLB invalidation Rik van Riel
2025-02-21  0:53 ` [PATCH v12 01/16] x86/mm: make MMU_GATHER_RCU_TABLE_FREE unconditional Rik van Riel
2025-02-21  0:53 ` [PATCH v12 02/16] x86/mm: remove pv_ops.mmu.tlb_remove_table call Rik van Riel
2025-02-21  0:53 ` [PATCH v12 03/16] x86/mm: consolidate full flush threshold decision Rik van Riel
2025-02-21  0:53 ` [PATCH v12 04/16] x86/mm: get INVLPGB count max from CPUID Rik van Riel
2025-02-21  1:58   ` Borislav Petkov
2025-02-21  2:25     ` Rik van Riel
2025-02-21 18:03     ` [PATCH v12.1 " Rik van Riel
2025-02-21 18:41       ` Tom Lendacky
2025-02-22  3:35         ` [PATCH v12.2 " Rik van Riel
2025-02-21  0:53 ` Rik van Riel [this message]
2025-02-21  0:53 ` [PATCH v12 06/16] x86/mm: use INVLPGB for kernel TLB flushes Rik van Riel
2025-02-21  0:53 ` [PATCH v12 07/16] x86/mm: use INVLPGB in flush_tlb_all Rik van Riel
2025-02-21  0:53 ` [PATCH v12 08/16] x86/mm: use broadcast TLB flushing for page reclaim TLB flushing Rik van Riel
2025-02-21  0:53 ` [PATCH v12 09/16] x86/mm: global ASID allocation helper functions Rik van Riel
2025-02-21  0:53 ` [PATCH v12 10/16] x86/mm: global ASID context switch & TLB flush handling Rik van Riel
2025-02-21  0:53 ` [PATCH v12 11/16] x86/mm: global ASID process exit helpers Rik van Riel
2025-02-21  0:53 ` [PATCH v12 12/16] x86/mm: enable broadcast TLB invalidation for multi-threaded processes Rik van Riel
2025-02-21  0:53 ` [PATCH v12 13/16] x86/mm: do targeted broadcast flushing from tlbbatch code Rik van Riel
2025-02-21  0:53 ` [PATCH v12 14/16] x86/mm: enable AMD translation cache extensions Rik van Riel
2025-02-21  0:53 ` [PATCH v12 15/16] x86/mm: only invalidate final translations with INVLPGB Rik van Riel
2025-02-21  0:53 ` [PATCH v12 16/16] x86/mm: add noinvlpgb commandline option Rik van Riel
2025-02-22 11:29 ` [PATCH v12 00/16] AMD broadcast TLB invalidation Oleksandr Natalenko
2025-02-22 11:36   ` Oleksandr Natalenko
2025-02-22 16:05   ` Rik van Riel
2025-02-22 16:19     ` Borislav Petkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250221005345.2156760-6-riel@surriel.com \
    --to=riel@surriel.com \
    --cc=Manali.Shukla@amd.com \
    --cc=akpm@linux-foundation.org \
    --cc=andrew.cooper3@citrix.com \
    --cc=bp@alien8.de \
    --cc=dave.hansen@intel.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=jackmanb@google.com \
    --cc=jannh@google.com \
    --cc=kernel-team@meta.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhklinux@outlook.com \
    --cc=nadav.amit@gmail.com \
    --cc=peterz@infradead.org \
    --cc=thomas.lendacky@amd.com \
    --cc=x86@kernel.org \
    --cc=zhengqi.arch@bytedance.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.