--- c83449680170170f55a0ab2eb498b92ce97c0624.patch 2025-03-03 10:35:47.422277335 -0800 +++ 11ce4b22643be.patch 2025-03-03 10:38:05.692509993 -0800 @@ -1,8 +1,8 @@ -commit c83449680170170f55a0ab2eb498b92ce97c0624 +commit 11ce4b22643be54b2c70cf6b4743e6b73b461814 Author: Rik van Riel Date: Fri Feb 28 20:32:30 2025 +0100 - x86/mm: Add INVLPGB support code + x86/mm: Add INVLPGB support code Add helper functions and definitions needed to use broadcast TLB invalidation on AMD CPUs. @@ -17,7 +17,7 @@ Link: https://lore.kernel.org/r/20250226030129.530345-4-riel@surriel.com diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h -index 77f52bc1578a7..5375145eb9596 100644 +index 77f52bc1578a7..3bd617c204346 100644 --- a/arch/x86/include/asm/tlb.h +++ b/arch/x86/include/asm/tlb.h @@ -6,6 +6,9 @@ @@ -30,10 +30,15 @@ static inline void tlb_flush(struct mmu_gather *tlb) { -@@ -25,4 +28,110 @@ static inline void invlpg(unsigned long addr) +@@ -25,4 +28,119 @@ static inline void invlpg(unsigned long addr) asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); } ++enum invlpgb_stride { ++ NO_STRIDE = 0, ++ PTE_STRIDE = 0, ++ PMD_STRIDE = 1 ++}; + +/* + * INVLPGB does broadcast TLB invalidation across all the CPUs in the system. @@ -54,10 +59,10 @@ + */ +static inline void __invlpgb(unsigned long asid, unsigned long pcid, + unsigned long addr, u16 nr_pages, -+ bool pmd_stride, u8 flags) ++ enum invlpgb_stride stride, u8 flags) +{ + u32 edx = (pcid << 16) | asid; -+ u32 ecx = (pmd_stride << 31) | (nr_pages - 1); ++ u32 ecx = (stride << 31) | (nr_pages - 1); + u64 rax = addr | flags; + + /* The low bits in rax are for flags. Verify addr is clean. */ @@ -84,33 +89,37 @@ +/* + * INVLPGB can be targeted by virtual address, PCID, ASID, or any combination + * of the three. For example: -+ * - INVLPGB_VA | INVLPGB_INCLUDE_GLOBAL: invalidate all TLB entries at the address -+ * - INVLPGB_PCID: invalidate all TLB entries matching the PCID ++ * - FLAG_VA | FLAG_INCLUDE_GLOBAL: invalidate all TLB entries at the address ++ * - FLAG_PCID: invalidate all TLB entries matching the PCID + * -+ * The first can be used to invalidate (kernel) mappings at a particular ++ * The first is used to invalidate (kernel) mappings at a particular + * address across all processes. + * + * The latter invalidates all TLB entries matching a PCID. + */ -+#define INVLPGB_VA BIT(0) -+#define INVLPGB_PCID BIT(1) -+#define INVLPGB_ASID BIT(2) -+#define INVLPGB_INCLUDE_GLOBAL BIT(3) -+#define INVLPGB_FINAL_ONLY BIT(4) -+#define INVLPGB_INCLUDE_NESTED BIT(5) ++#define INVLPGB_FLAG_VA BIT(0) ++#define INVLPGB_FLAG_PCID BIT(1) ++#define INVLPGB_FLAG_ASID BIT(2) ++#define INVLPGB_FLAG_INCLUDE_GLOBAL BIT(3) ++#define INVLPGB_FLAG_FINAL_ONLY BIT(4) ++#define INVLPGB_FLAG_INCLUDE_NESTED BIT(5) ++ ++/* The implied mode when all bits are clear: */ ++#define INVLPGB_MODE_ALL_NONGLOBALS 0UL + +static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid, + unsigned long addr, + u16 nr, + bool pmd_stride) +{ -+ __invlpgb(0, pcid, addr, nr, pmd_stride, INVLPGB_PCID | INVLPGB_VA); ++ __invlpgb(0, pcid, addr, nr, pmd_stride, INVLPGB_FLAG_PCID | ++ INVLPGB_FLAG_VA); +} + +/* Flush all mappings for a given PCID, not including globals. */ +static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid) +{ -+ __invlpgb(0, pcid, 0, 1, 0, INVLPGB_PCID); ++ __invlpgb(0, pcid, 0, 1, NO_STRIDE, INVLPGB_FLAG_PCID); +} + +/* Flush all mappings, including globals, for all PCIDs. */ @@ -123,21 +132,21 @@ + * as it is cheaper. + */ + guard(preempt)(); -+ __invlpgb(0, 0, 0, 1, 0, INVLPGB_INCLUDE_GLOBAL); ++ __invlpgb(0, 0, 0, 1, NO_STRIDE, INVLPGB_FLAG_INCLUDE_GLOBAL); + __tlbsync(); +} + +/* Flush addr, including globals, for all PCIDs. */ +static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr) +{ -+ __invlpgb(0, 0, addr, nr, 0, INVLPGB_INCLUDE_GLOBAL); ++ __invlpgb(0, 0, addr, nr, PTE_STRIDE, INVLPGB_FLAG_INCLUDE_GLOBAL); +} + +/* Flush all mappings for all PCIDs except globals. */ +static inline void invlpgb_flush_all_nonglobals(void) +{ + guard(preempt)(); -+ __invlpgb(0, 0, 0, 1, 0, 0); ++ __invlpgb(0, 0, 0, 1, NO_STRIDE, INVLPGB_MODE_ALL_NONGLOBALS); + __tlbsync(); +} #endif /* _ASM_X86_TLB_H */