All of lore.kernel.org
 help / color / mirror / Atom feed
From: Thomas Gleixner <tglx@linutronix.de>
To: LKML <linux-kernel@vger.kernel.org>
Cc: x86@kernel.org, Linus Torvalds <torvalds@linux-foundation.org>,
	Andy Lutomirsky <luto@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Dave Hansen <dave.hansen@intel.com>,
	Borislav Petkov <bpetkov@suse.de>,
	Greg KH <gregkh@linuxfoundation.org>,
	keescook@google.com, hughd@google.com,
	Brian Gerst <brgerst@gmail.com>,
	Josh Poimboeuf <jpoimboe@redhat.com>,
	Denys Vlasenko <dvlasenk@redhat.com>,
	Rik van Riel <riel@redhat.com>,
	Boris Ostrovsky <boris.ostrovsky@oracle.com>,
	Juergen Gross <jgross@suse.com>,
	David Laight <David.Laight@aculab.com>,
	Eduardo Valentin <eduval@amazon.com>,
	aliguori@amazon.com, Will Deacon <will.deacon@arm.com>,
	Vlastimil Babka <vbabka@suse.cz>,
	daniel.gruss@iaik.tugraz.at,
	Dave Hansen <dave.hansen@linux.intel.com>,
	Ingo Molnar <mingo@kernel.org>, Borislav Petkov <bp@alien8.de>,
	"H. Peter Anvin" <hpa@zytor.com>
Subject: [patch V181 47/54] x86/mm: Use INVPCID for __native_flush_tlb_single()
Date: Wed, 20 Dec 2017 22:35:50 +0100	[thread overview]
Message-ID: <20171220215444.468813756@linutronix.de> (raw)
In-Reply-To: 20171220213503.672610178@linutronix.de

[-- Attachment #1: 0054-x86-mm-Use-INVPCID-for-__native_flush_tlb_single.patch --]
[-- Type: text/plain, Size: 5739 bytes --]

From: Dave Hansen <dave.hansen@linux.intel.com>

This uses INVPCID to shoot down individual lines of the user mapping
instead of marking the entire user map as invalid. This
could/might/possibly be faster.

This for sure needs tlb_single_page_flush_ceiling to be redetermined;
esp. since INVPCID is _slow_.

A detailed performance analysis is available here:

  https://lkml.kernel.org/r/3062e486-3539-8a1f-5724-16199420be71@intel.com

[ Peterz: Split out from big combo patch ]

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Eduardo Valentin <eduval@amazon.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: aliguori@amazon.com
Cc: daniel.gruss@iaik.tugraz.at
Cc: hughd@google.com
Cc: keescook@google.com
---
 arch/x86/include/asm/cpufeatures.h |    1 
 arch/x86/include/asm/tlbflush.h    |   23 ++++++++++++-
 arch/x86/mm/init.c                 |   64 +++++++++++++++++++++----------------
 3 files changed, 60 insertions(+), 28 deletions(-)

--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -197,6 +197,7 @@
 #define X86_FEATURE_CAT_L3		( 7*32+ 4) /* Cache Allocation Technology L3 */
 #define X86_FEATURE_CAT_L2		( 7*32+ 5) /* Cache Allocation Technology L2 */
 #define X86_FEATURE_CDP_L3		( 7*32+ 6) /* Code and Data Prioritization L3 */
+#define X86_FEATURE_INVPCID_SINGLE	( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
 
 #define X86_FEATURE_HW_PSTATE		( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK	( 7*32+ 9) /* AMD ProcFeedbackInterface */
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -85,6 +85,18 @@ static inline u16 kern_pcid(u16 asid)
 	return asid + 1;
 }
 
+/*
+ * The user PCID is just the kernel one, plus the "switch bit".
+ */
+static inline u16 user_pcid(u16 asid)
+{
+	u16 ret = kern_pcid(asid);
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+	ret |= 1 << X86_CR3_PTI_SWITCH_BIT;
+#endif
+	return ret;
+}
+
 struct pgd_t;
 static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
 {
@@ -335,6 +347,8 @@ static inline void __native_flush_tlb_gl
 		/*
 		 * Using INVPCID is considerably faster than a pair of writes
 		 * to CR4 sandwiched inside an IRQ flag save/restore.
+		 *
+		 * Note, this works with CR4.PCIDE=0 or 1.
 		 */
 		invpcid_flush_all();
 		return;
@@ -368,7 +382,14 @@ static inline void __native_flush_tlb_si
 	if (!static_cpu_has(X86_FEATURE_PTI))
 		return;
 
-	invalidate_user_asid(loaded_mm_asid);
+	/*
+	 * Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1.
+	 * Just use invalidate_user_asid() in case we are called early.
+	 */
+	if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE))
+		invalidate_user_asid(loaded_mm_asid);
+	else
+		invpcid_flush_one(user_pcid(loaded_mm_asid), addr);
 }
 
 /*
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -203,34 +203,44 @@ static void __init probe_page_size_mask(
 
 static void setup_pcid(void)
 {
-#ifdef CONFIG_X86_64
-	if (boot_cpu_has(X86_FEATURE_PCID)) {
-		if (boot_cpu_has(X86_FEATURE_PGE)) {
-			/*
-			 * This can't be cr4_set_bits_and_update_boot() --
-			 * the trampoline code can't handle CR4.PCIDE and
-			 * it wouldn't do any good anyway.  Despite the name,
-			 * cr4_set_bits_and_update_boot() doesn't actually
-			 * cause the bits in question to remain set all the
-			 * way through the secondary boot asm.
-			 *
-			 * Instead, we brute-force it and set CR4.PCIDE
-			 * manually in start_secondary().
-			 */
-			cr4_set_bits(X86_CR4_PCIDE);
-		} else {
-			/*
-			 * flush_tlb_all(), as currently implemented, won't
-			 * work if PCID is on but PGE is not.  Since that
-			 * combination doesn't exist on real hardware, there's
-			 * no reason to try to fully support it, but it's
-			 * polite to avoid corrupting data if we're on
-			 * an improperly configured VM.
-			 */
-			setup_clear_cpu_cap(X86_FEATURE_PCID);
-		}
+	if (!IS_ENABLED(CONFIG_X86_64))
+		return;
+
+	if (!boot_cpu_has(X86_FEATURE_PCID))
+		return;
+
+	if (boot_cpu_has(X86_FEATURE_PGE)) {
+		/*
+		 * This can't be cr4_set_bits_and_update_boot() -- the
+		 * trampoline code can't handle CR4.PCIDE and it wouldn't
+		 * do any good anyway.  Despite the name,
+		 * cr4_set_bits_and_update_boot() doesn't actually cause
+		 * the bits in question to remain set all the way through
+		 * the secondary boot asm.
+		 *
+		 * Instead, we brute-force it and set CR4.PCIDE manually in
+		 * start_secondary().
+		 */
+		cr4_set_bits(X86_CR4_PCIDE);
+
+		/*
+		 * INVPCID's single-context modes (2/3) only work if we set
+		 * X86_CR4_PCIDE, *and* we INVPCID support.  It's unusable
+		 * on systems that have X86_CR4_PCIDE clear, or that have
+		 * no INVPCID support at all.
+		 */
+		if (boot_cpu_has(X86_FEATURE_INVPCID))
+			setup_force_cpu_cap(X86_FEATURE_INVPCID_SINGLE);
+	} else {
+		/*
+		 * flush_tlb_all(), as currently implemented, won't work if
+		 * PCID is on but PGE is not.  Since that combination
+		 * doesn't exist on real hardware, there's no reason to try
+		 * to fully support it, but it's polite to avoid corrupting
+		 * data if we're on an improperly configured VM.
+		 */
+		setup_clear_cpu_cap(X86_FEATURE_PCID);
 	}
-#endif
 }
 
 #ifdef CONFIG_X86_32

  parent reply	other threads:[~2017-12-20 22:00 UTC|newest]

Thread overview: 85+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-12-20 21:35 [patch V181 00/54] x86/pti: Final XMAS release Thomas Gleixner
2017-12-20 21:35 ` [patch V181 01/54] x86/Kconfig: Limit NR_CPUS on 32bit to a sane amount Thomas Gleixner
2017-12-20 21:35 ` [patch V181 02/54] x86/mm/dump_pagetables: Check PAGE_PRESENT for real Thomas Gleixner
2017-12-20 21:35 ` [patch V181 03/54] x86/mm/dump_pagetables: Make the address hints correct and readable Thomas Gleixner
2017-12-20 21:35 ` [patch V181 04/54] x86/vsyscall/64: Explicitly set _PAGE_USER in the pagetable hierarchy Thomas Gleixner
2017-12-20 21:35 ` [patch V181 05/54] x86/vsyscall/64: Warn and fail vsyscall emulation in NATIVE mode Thomas Gleixner
2017-12-20 21:35 ` [patch V181 06/54] arch: Allow arch_dup_mmap() to fail Thomas Gleixner
2017-12-20 21:35   ` Thomas Gleixner
2017-12-20 21:35 ` [patch V181 07/54] x86/ldt: Rework locking Thomas Gleixner
2017-12-20 21:35   ` Thomas Gleixner
2017-12-20 21:35 ` [patch V181 08/54] x86/ldt: Prevent ldt inheritance on exec Thomas Gleixner
2017-12-20 21:35   ` Thomas Gleixner
2017-12-20 21:35 ` [patch V181 09/54] x86/mm/64: Improve the memory map documentation Thomas Gleixner
2017-12-20 21:35 ` [patch V181 10/54] x86/doc: Remove obvious weirdness Thomas Gleixner
2017-12-20 21:35   ` Thomas Gleixner
2017-12-20 21:35 ` [patch V181 11/54] x86/entry: Remove SYSENTER_stack naming Thomas Gleixner
2017-12-20 21:35 ` [patch V181 12/54] x86/uv: Use the right tlbflush API Thomas Gleixner
2017-12-20 21:35   ` Thomas Gleixner
2017-12-20 21:35 ` [patch V181 13/54] x86/microcode: Dont abuse the tlbflush interface Thomas Gleixner
2017-12-20 21:35   ` Thomas Gleixner
2017-12-20 21:35 ` [patch V181 14/54] x86/mm: Use __flush_tlb_one() for kernel memory Thomas Gleixner
2017-12-20 21:35   ` Thomas Gleixner
2017-12-20 21:35 ` [patch V181 15/54] x86/mm: Remove superfluous barriers Thomas Gleixner
2017-12-20 21:35   ` Thomas Gleixner
2017-12-20 21:35 ` [patch V181 16/54] x86/mm: Clarify which functions are supposed to flush what Thomas Gleixner
2017-12-20 21:35   ` Thomas Gleixner
2017-12-20 21:35 ` [patch V181 17/54] x86/mm: Move the CR3 construction functions to tlbflush.h Thomas Gleixner
2017-12-20 21:35   ` Thomas Gleixner
2017-12-20 21:35 ` [patch V181 18/54] x86/mm: Remove hard-coded ASID limit checks Thomas Gleixner
2017-12-20 21:35   ` Thomas Gleixner
2017-12-20 21:35 ` [patch V181 19/54] x86/mm: Put MMU to hardware ASID translation in one place Thomas Gleixner
2017-12-20 21:35   ` Thomas Gleixner
2017-12-20 21:35 ` [patch V181 20/54] x86/mm: Create asm/invpcid.h Thomas Gleixner
2017-12-20 21:35   ` Thomas Gleixner
2017-12-20 21:35 ` [patch V181 21/54] x86/cpu_entry_area: Move it to a separate unit Thomas Gleixner
2017-12-20 22:29   ` Thomas Gleixner
2017-12-20 21:35 ` [patch V181 22/54] x86/cpu_entry_area: Move it out of fixmap Thomas Gleixner
2017-12-22  2:46   ` [V181,22/54] " Andrei Vagin
2017-12-22 13:05     ` Thomas Gleixner
2017-12-20 21:35 ` [patch V181 23/54] init: Invoke init_espfix_bsp() from mm_init() Thomas Gleixner
2017-12-20 21:35 ` [patch V181 24/54] x86/cpufeatures: Add X86_BUG_CPU_INSECURE Thomas Gleixner
2017-12-20 21:35 ` [patch V181 25/54] x86/mm/pti: Disable global pages if PAGE_TABLE_ISOLATION=y Thomas Gleixner
2017-12-20 21:35   ` Thomas Gleixner
2017-12-20 21:35 ` [patch V181 26/54] x86/mm/pti: Prepare the x86/entry assembly code for entry/exit CR3 switching Thomas Gleixner
2017-12-20 21:35   ` Thomas Gleixner
2017-12-20 21:35 ` [patch V181 27/54] x86/mm/pti: Add infrastructure for page table isolation Thomas Gleixner
2017-12-20 21:35 ` [patch V181 28/54] x86/mm/pti: Add mapping helper functions Thomas Gleixner
2017-12-20 21:35 ` [patch V181 29/54] x86/mm/pti: Allow NX poison to be set in p4d/pgd Thomas Gleixner
2017-12-20 21:35 ` [patch V181 30/54] x86/mm/pti: Allocate a separate user PGD Thomas Gleixner
2017-12-20 21:35 ` [patch V181 31/54] x86/mm/pti: Populate " Thomas Gleixner
2017-12-20 21:35 ` [patch V181 32/54] x86/mm/pti: Add functions to clone kernel PMDs Thomas Gleixner
2017-12-20 21:35 ` [patch V181 33/54] x86/mm/pti: Force entry through trampoline when PTI active Thomas Gleixner
2017-12-20 21:35 ` [patch V181 34/54] x86/mm/pti: Share cpu_entry_area with user space page tables Thomas Gleixner
2017-12-20 21:35 ` [patch V181 35/54] x86/entry: Align entry text section to PMD boundary Thomas Gleixner
2018-05-17 15:58   ` Josh Poimboeuf
2018-05-18 10:38     ` Thomas Gleixner
2017-12-20 21:35 ` [patch V181 36/54] x86/mm/pti: Share entry text PMD Thomas Gleixner
2017-12-20 21:35 ` [patch V181 37/54] x86/mm/pti: Map ESPFIX into user space Thomas Gleixner
2017-12-20 21:35 ` [patch V181 38/54] x86/cpu_entry_area: Add debugstore entries to cpu_entry_area Thomas Gleixner
2017-12-20 21:35 ` [patch V181 39/54] x86/events/intel/ds: Map debug buffers in cpu_entry_area Thomas Gleixner
2017-12-20 21:35 ` [patch V181 40/54] x86/mm/64: Make a full PGD-entry size hole in the memory map Thomas Gleixner
2017-12-20 21:35 ` [patch V181 41/54] x86/pti: Put the LDT in its own PGD if PTI is on Thomas Gleixner
2017-12-20 21:35 ` [patch V181 42/54] x86/pti: Map the vsyscall page if needed Thomas Gleixner
2017-12-20 21:35 ` [patch V181 43/54] x86/mm: Allow flushing for future ASID switches Thomas Gleixner
2017-12-20 21:35 ` [patch V181 44/54] x86/mm: Abstract switching CR3 Thomas Gleixner
2017-12-20 21:35 ` [patch V181 45/54] x86/mm: Use/Fix PCID to optimize user/kernel switches Thomas Gleixner
2017-12-20 21:35 ` [patch V181 46/54] x86/mm: Optimize RESTORE_CR3 Thomas Gleixner
2017-12-20 21:35 ` Thomas Gleixner [this message]
2017-12-20 21:35 ` [patch V181 48/54] x86/mm: Clarify the whole ASID/kernel PCID/user PCID naming Thomas Gleixner
2017-12-20 21:35   ` Thomas Gleixner
2017-12-20 21:35 ` [patch V181 49/54] x86/dumpstack: Indicate in Oops whether pti is configured and enabled Thomas Gleixner
2017-12-20 22:03   ` Jiri Kosina
2017-12-20 21:35 ` [patch V181 50/54] x86/mm/pti: Add Kconfig Thomas Gleixner
2017-12-20 21:35   ` Thomas Gleixner
2017-12-20 21:35 ` [patch V181 51/54] x86/mm/dump_pagetables: Add page table directory Thomas Gleixner
2017-12-20 21:35 ` [patch V181 52/54] x86/mm/dump_pagetables: Check user space page table for WX pages Thomas Gleixner
2017-12-20 21:35   ` Thomas Gleixner
2017-12-20 21:35 ` [patch V181 53/54] x86/mm/dump_pagetables: Allow dumping current pagetables Thomas Gleixner
2017-12-20 21:35   ` Thomas Gleixner
2017-12-20 21:35 ` [patch V181 54/54] x86/ldt: Make the LDT mapping RO Thomas Gleixner
2017-12-20 23:48 ` [patch V181 00/54] x86/pti: Final XMAS release Thomas Gleixner
2017-12-21 12:57 ` Kirill A. Shutemov
2017-12-21 16:26   ` Kirill A. Shutemov
2017-12-21 18:39     ` Thomas Gleixner
2017-12-21 15:57 ` Boris Ostrovsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20171220215444.468813756@linutronix.de \
    --to=tglx@linutronix.de \
    --cc=David.Laight@aculab.com \
    --cc=aliguori@amazon.com \
    --cc=boris.ostrovsky@oracle.com \
    --cc=bp@alien8.de \
    --cc=bpetkov@suse.de \
    --cc=brgerst@gmail.com \
    --cc=daniel.gruss@iaik.tugraz.at \
    --cc=dave.hansen@intel.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=dvlasenk@redhat.com \
    --cc=eduval@amazon.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=hpa@zytor.com \
    --cc=hughd@google.com \
    --cc=jgross@suse.com \
    --cc=jpoimboe@redhat.com \
    --cc=keescook@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=riel@redhat.com \
    --cc=torvalds@linux-foundation.org \
    --cc=vbabka@suse.cz \
    --cc=will.deacon@arm.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.