public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] x86/entry: Trade cycles
@ 2023-11-20 14:33 Peter Zijlstra
  2023-11-20 14:33 ` [PATCH 1/2] x86/entry: Optimize common_interrupt_return() Peter Zijlstra
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Peter Zijlstra @ 2023-11-20 14:33 UTC (permalink / raw)
  To: x86; +Cc: linux-kernel, peterz

Hi,

Two little patches that trade a little performance. First patch optimizes
(although I couldn't get definite numbers showing it makes a difference) the
return to user path by avoiding some PTI specifics in the generic path.

Second patch then steals some of the won cycles by making a debug check
unconditional.

This came forth from a discussion with amluto who lamented we only had the
debug check conditional on DEBUG_ENTRY.



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/2] x86/entry: Optimize common_interrupt_return()
  2023-11-20 14:33 [PATCH 0/2] x86/entry: Trade cycles Peter Zijlstra
@ 2023-11-20 14:33 ` Peter Zijlstra
  2023-11-21 13:04   ` [tip: x86/entry] " tip-bot2 for Peter Zijlstra
  2023-11-20 14:33 ` [PATCH 2/2] x86/entry: Harden return-to-user Peter Zijlstra
  2023-11-21 15:26 ` [PATCH 0/2] x86/entry: Trade cycles Thomas Gleixner
  2 siblings, 1 reply; 6+ messages in thread
From: Peter Zijlstra @ 2023-11-20 14:33 UTC (permalink / raw)
  To: x86; +Cc: linux-kernel, peterz

The code in common_interrupt_return() does a bunch of unconditional
work that is really only needed on PTI kernels. Specifically it
unconditionally copies the IRET frame back onto the entry stack,
swizzles onto the entry stack and does IRET from there.

However, without PTI we can simply IRET from whatever stack we're on.

ivb-ep, mitigations=off, gettid-1m:

PRE:
       140,118,538      cycles:k                                                      ( +-  0.01% )
       236,692,878      instructions:k            #    1.69  insn per cycle           ( +-  0.00% )

POST:
       140,026,608      cycles:k                                                      ( +-  0.01% )
       236,696,176      instructions:k            #    1.69  insn per cycle           ( +-  0.00% )

(this is with --repeat 100 and the run-to-run variance is bigger than
the difference shown)

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/entry/calling.h  |   12 +++++++++---
 arch/x86/entry/entry_64.S |   17 +++++++++++++++--
 2 files changed, 24 insertions(+), 5 deletions(-)

--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -175,8 +175,7 @@ For 32-bit we have the following convent
 #define THIS_CPU_user_pcid_flush_mask   \
 	PER_CPU_VAR(cpu_tlbstate + TLB_STATE_user_pcid_flush_mask)
 
-.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
-	ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+.macro SWITCH_TO_USER_CR3 scratch_reg:req scratch_reg2:req
 	mov	%cr3, \scratch_reg
 
 	ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
@@ -206,13 +205,20 @@ For 32-bit we have the following convent
 	/* Flip the PGD to the user version */
 	orq     $(PTI_USER_PGTABLE_MASK), \scratch_reg
 	mov	\scratch_reg, %cr3
+.endm
+
+.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
+	ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+	SWITCH_TO_USER_CR3 \scratch_reg \scratch_reg2
 .Lend_\@:
 .endm
 
 .macro SWITCH_TO_USER_CR3_STACK	scratch_reg:req
+	ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
 	pushq	%rax
-	SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax
+	SWITCH_TO_USER_CR3 scratch_reg=\scratch_reg scratch_reg2=%rax
 	popq	%rax
+.Lend_\@:
 .endm
 
 .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -569,7 +569,18 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_
 #ifdef CONFIG_XEN_PV
 	ALTERNATIVE "", "jmp xenpv_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
 #endif
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+	ALTERNATIVE "", "jmp .Lpti_restore_regs_and_return_to_usermode", X86_FEATURE_PTI
+#endif
+
+	STACKLEAK_ERASE
+	POP_REGS
+	add	$8, %rsp	/* orig_ax */
+	swapgs
+	jmp	.Lnative_iret
 
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+.Lpti_restore_regs_and_return_to_usermode:
 	POP_REGS pop_rdi=0
 
 	/*
@@ -596,13 +607,15 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_
 	 */
 	STACKLEAK_ERASE_NOCLOBBER
 
-	SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
+	push	%rax
+	SWITCH_TO_USER_CR3 scratch_reg=%rdi scratch_reg2=%rax
+	pop	%rax
 
 	/* Restore RDI. */
 	popq	%rdi
 	swapgs
 	jmp	.Lnative_iret
-
+#endif
 
 SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL)
 #ifdef CONFIG_DEBUG_ENTRY



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 2/2] x86/entry: Harden return-to-user
  2023-11-20 14:33 [PATCH 0/2] x86/entry: Trade cycles Peter Zijlstra
  2023-11-20 14:33 ` [PATCH 1/2] x86/entry: Optimize common_interrupt_return() Peter Zijlstra
@ 2023-11-20 14:33 ` Peter Zijlstra
  2023-11-21 13:04   ` [tip: x86/entry] " tip-bot2 for Peter Zijlstra
  2023-11-21 15:26 ` [PATCH 0/2] x86/entry: Trade cycles Thomas Gleixner
  2 siblings, 1 reply; 6+ messages in thread
From: Peter Zijlstra @ 2023-11-20 14:33 UTC (permalink / raw)
  To: x86; +Cc: linux-kernel, peterz

Make the DEBUG_ENTRY check that validates CS is a user segment
unconditional and move it nearer to IRET.

PRE:
       140,026,608      cycles:k                                                      ( +-  0.01% )
       236,696,176      instructions:k            #    1.69  insn per cycle           ( +-  0.00% )

POST:
       139,957,681      cycles:k                                                      ( +-  0.01% )
       236,681,819      instructions:k            #    1.69  insn per cycle           ( +-  0.00% )

(this is with --repeat 100 and the run-to-run variance is bigger than
the difference shown)

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/entry/entry_64.S |   18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -559,13 +559,6 @@ SYM_CODE_END(\asmsym)
 SYM_CODE_START_LOCAL(common_interrupt_return)
 SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
 	IBRS_EXIT
-#ifdef CONFIG_DEBUG_ENTRY
-	/* Assert that pt_regs indicates user mode. */
-	testb	$3, CS(%rsp)
-	jnz	1f
-	ud2
-1:
-#endif
 #ifdef CONFIG_XEN_PV
 	ALTERNATIVE "", "jmp xenpv_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
 #endif
@@ -576,8 +569,14 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_
 	STACKLEAK_ERASE
 	POP_REGS
 	add	$8, %rsp	/* orig_ax */
+	UNWIND_HINT_IRET_REGS
+
+.Lswapgs_and_iret:
 	swapgs
-	jmp	.Lnative_iret
+	/* Assert that the IRET frame indicates user mode. */
+	testb	$3, 8(%rsp)
+	jnz	.Lnative_iret
+	ud2
 
 #ifdef CONFIG_PAGE_TABLE_ISOLATION
 .Lpti_restore_regs_and_return_to_usermode:
@@ -613,8 +612,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_
 
 	/* Restore RDI. */
 	popq	%rdi
-	swapgs
-	jmp	.Lnative_iret
+	jmp	.Lswapgs_and_iret
 #endif
 
 SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL)



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [tip: x86/entry] x86/entry: Harden return-to-user
  2023-11-20 14:33 ` [PATCH 2/2] x86/entry: Harden return-to-user Peter Zijlstra
@ 2023-11-21 13:04   ` tip-bot2 for Peter Zijlstra
  0 siblings, 0 replies; 6+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2023-11-21 13:04 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Peter Zijlstra (Intel), Ingo Molnar, Linus Torvalds, x86,
	linux-kernel

The following commit has been merged into the x86/entry branch of tip:

Commit-ID:     1e4d3001f59fb7a9917cb746544b65e616b5f809
Gitweb:        https://git.kernel.org/tip/1e4d3001f59fb7a9917cb746544b65e616b5f809
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Mon, 20 Nov 2023 15:33:46 +01:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Tue, 21 Nov 2023 13:57:31 +01:00

x86/entry: Harden return-to-user

Make the CONFIG_DEBUG_ENTRY=y check that validates CS is a user segment
unconditional and move it nearer to IRET.

  PRE:
       140,026,608      cycles:k                                                      ( +-  0.01% )
       236,696,176      instructions:k            #    1.69  insn per cycle           ( +-  0.00% )

  POST:
       139,957,681      cycles:k                                                      ( +-  0.01% )
       236,681,819      instructions:k            #    1.69  insn per cycle           ( +-  0.00% )

(this is with --repeat 100 and the run-to-run variance is bigger than
the difference shown)

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20231120143626.753200755@infradead.org
---
 arch/x86/entry/entry_64.S | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index dfbf799..c40f89a 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -559,13 +559,6 @@ __irqentry_text_end:
 SYM_CODE_START_LOCAL(common_interrupt_return)
 SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
 	IBRS_EXIT
-#ifdef CONFIG_DEBUG_ENTRY
-	/* Assert that pt_regs indicates user mode. */
-	testb	$3, CS(%rsp)
-	jnz	1f
-	ud2
-1:
-#endif
 #ifdef CONFIG_XEN_PV
 	ALTERNATIVE "", "jmp xenpv_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
 #endif
@@ -576,8 +569,14 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
 	STACKLEAK_ERASE
 	POP_REGS
 	add	$8, %rsp	/* orig_ax */
+	UNWIND_HINT_IRET_REGS
+
+.Lswapgs_and_iret:
 	swapgs
-	jmp	.Lnative_iret
+	/* Assert that the IRET frame indicates user mode. */
+	testb	$3, 8(%rsp)
+	jnz	.Lnative_iret
+	ud2
 
 #ifdef CONFIG_PAGE_TABLE_ISOLATION
 .Lpti_restore_regs_and_return_to_usermode:
@@ -613,8 +612,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
 
 	/* Restore RDI. */
 	popq	%rdi
-	swapgs
-	jmp	.Lnative_iret
+	jmp	.Lswapgs_and_iret
 #endif
 
 SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL)

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [tip: x86/entry] x86/entry: Optimize common_interrupt_return()
  2023-11-20 14:33 ` [PATCH 1/2] x86/entry: Optimize common_interrupt_return() Peter Zijlstra
@ 2023-11-21 13:04   ` tip-bot2 for Peter Zijlstra
  0 siblings, 0 replies; 6+ messages in thread
From: tip-bot2 for Peter Zijlstra @ 2023-11-21 13:04 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Peter Zijlstra (Intel), Ingo Molnar, Linus Torvalds, x86,
	linux-kernel

The following commit has been merged into the x86/entry branch of tip:

Commit-ID:     c516213726fb572700cce4a5909aa8d82b77192a
Gitweb:        https://git.kernel.org/tip/c516213726fb572700cce4a5909aa8d82b77192a
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Mon, 20 Nov 2023 15:33:45 +01:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Tue, 21 Nov 2023 13:57:30 +01:00

x86/entry: Optimize common_interrupt_return()

The code in common_interrupt_return() does a bunch of unconditional
work that is really only needed on PTI kernels. Specifically it
unconditionally copies the IRET frame back onto the entry stack,
swizzles onto the entry stack and does IRET from there.

However, without PTI we can simply IRET from whatever stack we're on.

  ivb-ep, mitigations=off, gettid-1m:

  PRE:
       140,118,538      cycles:k                                                      ( +-  0.01% )
       236,692,878      instructions:k            #    1.69  insn per cycle           ( +-  0.00% )

  POST:
       140,026,608      cycles:k                                                      ( +-  0.01% )
       236,696,176      instructions:k            #    1.69  insn per cycle           ( +-  0.00% )

(this is with --repeat 100 and the run-to-run variance is bigger than
the difference shown)

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20231120143626.638107480@infradead.org
---
 arch/x86/entry/calling.h  | 12 +++++++++---
 arch/x86/entry/entry_64.S | 17 +++++++++++++++--
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index f690762..9f1d947 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -175,8 +175,7 @@ For 32-bit we have the following conventions - kernel is built with
 #define THIS_CPU_user_pcid_flush_mask   \
 	PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask
 
-.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
-	ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+.macro SWITCH_TO_USER_CR3 scratch_reg:req scratch_reg2:req
 	mov	%cr3, \scratch_reg
 
 	ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
@@ -206,13 +205,20 @@ For 32-bit we have the following conventions - kernel is built with
 	/* Flip the PGD to the user version */
 	orq     $(PTI_USER_PGTABLE_MASK), \scratch_reg
 	mov	\scratch_reg, %cr3
+.endm
+
+.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
+	ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+	SWITCH_TO_USER_CR3 \scratch_reg \scratch_reg2
 .Lend_\@:
 .endm
 
 .macro SWITCH_TO_USER_CR3_STACK	scratch_reg:req
+	ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
 	pushq	%rax
-	SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax
+	SWITCH_TO_USER_CR3 scratch_reg=\scratch_reg scratch_reg2=%rax
 	popq	%rax
+.Lend_\@:
 .endm
 
 .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index de6469d..dfbf799 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -569,7 +569,18 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
 #ifdef CONFIG_XEN_PV
 	ALTERNATIVE "", "jmp xenpv_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
 #endif
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+	ALTERNATIVE "", "jmp .Lpti_restore_regs_and_return_to_usermode", X86_FEATURE_PTI
+#endif
+
+	STACKLEAK_ERASE
+	POP_REGS
+	add	$8, %rsp	/* orig_ax */
+	swapgs
+	jmp	.Lnative_iret
 
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+.Lpti_restore_regs_and_return_to_usermode:
 	POP_REGS pop_rdi=0
 
 	/*
@@ -596,13 +607,15 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
 	 */
 	STACKLEAK_ERASE_NOCLOBBER
 
-	SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
+	push	%rax
+	SWITCH_TO_USER_CR3 scratch_reg=%rdi scratch_reg2=%rax
+	pop	%rax
 
 	/* Restore RDI. */
 	popq	%rdi
 	swapgs
 	jmp	.Lnative_iret
-
+#endif
 
 SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL)
 #ifdef CONFIG_DEBUG_ENTRY

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 0/2] x86/entry: Trade cycles
  2023-11-20 14:33 [PATCH 0/2] x86/entry: Trade cycles Peter Zijlstra
  2023-11-20 14:33 ` [PATCH 1/2] x86/entry: Optimize common_interrupt_return() Peter Zijlstra
  2023-11-20 14:33 ` [PATCH 2/2] x86/entry: Harden return-to-user Peter Zijlstra
@ 2023-11-21 15:26 ` Thomas Gleixner
  2 siblings, 0 replies; 6+ messages in thread
From: Thomas Gleixner @ 2023-11-21 15:26 UTC (permalink / raw)
  To: Peter Zijlstra, x86; +Cc: linux-kernel, peterz

On Mon, Nov 20 2023 at 15:33, Peter Zijlstra wrote:
> Two little patches that trade a little performance. First patch optimizes
> (although I couldn't get definite numbers showing it makes a difference) the
> return to user path by avoiding some PTI specifics in the generic path.
>
> Second patch then steals some of the won cycles by making a debug check
> unconditional.
>
> This came forth from a discussion with amluto who lamented we only had the
> debug check conditional on DEBUG_ENTRY.

Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-11-21 15:26 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-11-20 14:33 [PATCH 0/2] x86/entry: Trade cycles Peter Zijlstra
2023-11-20 14:33 ` [PATCH 1/2] x86/entry: Optimize common_interrupt_return() Peter Zijlstra
2023-11-21 13:04   ` [tip: x86/entry] " tip-bot2 for Peter Zijlstra
2023-11-20 14:33 ` [PATCH 2/2] x86/entry: Harden return-to-user Peter Zijlstra
2023-11-21 13:04   ` [tip: x86/entry] " tip-bot2 for Peter Zijlstra
2023-11-21 15:26 ` [PATCH 0/2] x86/entry: Trade cycles Thomas Gleixner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox