* [PATCH 0/2] x86_64, xen: Simplify Xen SYSCALL handling
@ 2015-07-26 23:29 Andy Lutomirski
2015-07-26 23:29 ` [PATCH 1/2] x86/entry/64: Rearrange SYSCALL entry points for Xen's benefit Andy Lutomirski
` (3 more replies)
0 siblings, 4 replies; 6+ messages in thread
From: Andy Lutomirski @ 2015-07-26 23:29 UTC (permalink / raw)
To: X86 ML, xen-devel@lists.xen.org, Andrew Cooper
Cc: Boris Ostrovsky, linux-kernel@vger.kernel.org, Borislav Petkov,
Steven Rostedt, Andy Lutomirski
Xen's SYSCALL hooks are overcomplicated and add unnecessary places
where RSP is user controlled. Simplify it and get rid of those user
RSP code paths under Xen.
Tested as an Intel KVM guest. Not tested under Xen or on AMD, both
of which are important.
Andy Lutomirski (2):
x86/entry/64: Rearrange SYSCALL entry points for Xen's benefit
x86/xen/64: Rearrange the SYSCALL entries
arch/x86/entry/entry_64.S | 29 +++++++++++++++--------------
arch/x86/entry/entry_64_compat.S | 12 +++++++-----
arch/x86/xen/xen-asm_64.S | 24 ++++++++++--------------
3 files changed, 32 insertions(+), 33 deletions(-)
--
2.4.3
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH 0/2] x86_64, xen: Simplify Xen SYSCALL handling
@ 2015-07-26 23:29 Andy Lutomirski
0 siblings, 0 replies; 6+ messages in thread
From: Andy Lutomirski @ 2015-07-26 23:29 UTC (permalink / raw)
To: X86 ML, xen-devel@lists.xen.org, Andrew Cooper
Cc: Boris Ostrovsky, Andy Lutomirski, Borislav Petkov,
linux-kernel@vger.kernel.org, Steven Rostedt
Xen's SYSCALL hooks are overcomplicated and add unnecessary places
where RSP is user controlled. Simplify it and get rid of those user
RSP code paths under Xen.
Tested as an Intel KVM guest. Not tested under Xen or on AMD, both
of which are important.
Andy Lutomirski (2):
x86/entry/64: Rearrange SYSCALL entry points for Xen's benefit
x86/xen/64: Rearrange the SYSCALL entries
arch/x86/entry/entry_64.S | 29 +++++++++++++++--------------
arch/x86/entry/entry_64_compat.S | 12 +++++++-----
arch/x86/xen/xen-asm_64.S | 24 ++++++++++--------------
3 files changed, 32 insertions(+), 33 deletions(-)
--
2.4.3
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH 1/2] x86/entry/64: Rearrange SYSCALL entry points for Xen's benefit
2015-07-26 23:29 [PATCH 0/2] x86_64, xen: Simplify Xen SYSCALL handling Andy Lutomirski
@ 2015-07-26 23:29 ` Andy Lutomirski
2015-07-26 23:29 ` Andy Lutomirski
` (2 subsequent siblings)
3 siblings, 0 replies; 6+ messages in thread
From: Andy Lutomirski @ 2015-07-26 23:29 UTC (permalink / raw)
To: X86 ML, xen-devel@lists.xen.org, Andrew Cooper
Cc: Boris Ostrovsky, linux-kernel@vger.kernel.org, Borislav Petkov,
Steven Rostedt, Andy Lutomirski
Xen currently fudges RSP on SYSCALL to be compatible with the native
entries. This has the unfortunate side effect that there are extra
poorly-controlled places with user RSP. Add better entry points for
Xen to use instead.
This will add a couple of cycles of IRQ latency, but it avoids an
annoying corner case in which an IRQ can be delivered with a
hardware frame that overlaps current_pt_regs.
Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
arch/x86/entry/entry_64.S | 20 ++++++++++++++------
arch/x86/entry/entry_64_compat.S | 10 ++++++----
2 files changed, 20 insertions(+), 10 deletions(-)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 041a37a643e1..f47996910331 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -148,17 +148,19 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
/* Construct struct pt_regs on stack */
pushq $__USER_DS /* pt_regs->ss */
pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */
+
/*
- * Re-enable interrupts.
- * We use 'rsp_scratch' as a scratch space, hence irq-off block above
- * must execute atomically in the face of possible interrupt-driven
- * task preemption. We must enable interrupts only after we're done
- * with using rsp_scratch:
+ * Before this point, interrupts MUST be off to prevent rsp_scratch
+ * from getting corrupted due to preemption. Nonetheless, we keep
+ * interrupts off a little longer. We eventually want to make it
+ * all the way to C code with interrupts off, which will considerably
+ * simplify context tracking.
*/
- ENABLE_INTERRUPTS(CLBR_NONE)
+
pushq %r11 /* pt_regs->flags */
pushq $__USER_CS /* pt_regs->cs */
pushq %rcx /* pt_regs->ip */
+GLOBAL(entry_SYSCALL_64_after_hwframe)
pushq %rax /* pt_regs->orig_ax */
pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
@@ -171,6 +173,12 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
pushq %r11 /* pt_regs->r11 */
sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
+ /*
+ * Re-enable interrupts. IRQ tracing already thinks they're
+ * on because user mode is traced as IRQs-on.
+ */
+ ENABLE_INTERRUPTS(CLBR_NONE)
+
testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jnz tracesys
entry_SYSCALL_64_fastpath:
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index d7571532e7ce..d39495d6446e 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -312,10 +312,6 @@ ENTRY(entry_SYSCALL_compat)
SWAPGS_UNSAFE_STACK
movl %esp, %r8d
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
- ENABLE_INTERRUPTS(CLBR_NONE)
-
- /* Zero-extending 32-bit regs, do not remove */
- movl %eax, %eax
/* Construct struct pt_regs on stack */
pushq $__USER32_DS /* pt_regs->ss */
@@ -323,6 +319,7 @@ ENTRY(entry_SYSCALL_compat)
pushq %r11 /* pt_regs->flags */
pushq $__USER32_CS /* pt_regs->cs */
pushq %rcx /* pt_regs->ip */
+GLOBAL(entry_SYSCALL_compat_after_hwframe_esp_in_r8d)
pushq %rax /* pt_regs->orig_ax */
pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
@@ -332,6 +329,11 @@ ENTRY(entry_SYSCALL_compat)
pushq $-ENOSYS /* pt_regs->ax */
sub $(10*8), %rsp /* pt_regs->r8-11, bp, bx, r12-15 not saved */
+ ENABLE_INTERRUPTS(CLBR_NONE)
+
+ /* Zero-extending 32-bit regs, do not remove */
+ movl %eax, %eax
+
/*
* No need to do an access_ok check here because r8 has been
* 32-bit zero extended:
--
2.4.3
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 1/2] x86/entry/64: Rearrange SYSCALL entry points for Xen's benefit
2015-07-26 23:29 [PATCH 0/2] x86_64, xen: Simplify Xen SYSCALL handling Andy Lutomirski
2015-07-26 23:29 ` [PATCH 1/2] x86/entry/64: Rearrange SYSCALL entry points for Xen's benefit Andy Lutomirski
@ 2015-07-26 23:29 ` Andy Lutomirski
2015-07-26 23:29 ` [PATCH 2/2] x86/xen/64: Rearrange the SYSCALL entries Andy Lutomirski
2015-07-26 23:29 ` Andy Lutomirski
3 siblings, 0 replies; 6+ messages in thread
From: Andy Lutomirski @ 2015-07-26 23:29 UTC (permalink / raw)
To: X86 ML, xen-devel@lists.xen.org, Andrew Cooper
Cc: Boris Ostrovsky, Andy Lutomirski, Borislav Petkov,
linux-kernel@vger.kernel.org, Steven Rostedt
Xen currently fudges RSP on SYSCALL to be compatible with the native
entries. This has the unfortunate side effect that there are extra
poorly-controlled places with user RSP. Add better entry points for
Xen to use instead.
This will add a couple of cycles of IRQ latency, but it avoids an
annoying corner case in which an IRQ can be delivered with a
hardware frame that overlaps current_pt_regs.
Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
arch/x86/entry/entry_64.S | 20 ++++++++++++++------
arch/x86/entry/entry_64_compat.S | 10 ++++++----
2 files changed, 20 insertions(+), 10 deletions(-)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 041a37a643e1..f47996910331 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -148,17 +148,19 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
/* Construct struct pt_regs on stack */
pushq $__USER_DS /* pt_regs->ss */
pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */
+
/*
- * Re-enable interrupts.
- * We use 'rsp_scratch' as a scratch space, hence irq-off block above
- * must execute atomically in the face of possible interrupt-driven
- * task preemption. We must enable interrupts only after we're done
- * with using rsp_scratch:
+ * Before this point, interrupts MUST be off to prevent rsp_scratch
+ * from getting corrupted due to preemption. Nonetheless, we keep
+ * interrupts off a little longer. We eventually want to make it
+ * all the way to C code with interrupts off, which will considerably
+ * simplify context tracking.
*/
- ENABLE_INTERRUPTS(CLBR_NONE)
+
pushq %r11 /* pt_regs->flags */
pushq $__USER_CS /* pt_regs->cs */
pushq %rcx /* pt_regs->ip */
+GLOBAL(entry_SYSCALL_64_after_hwframe)
pushq %rax /* pt_regs->orig_ax */
pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
@@ -171,6 +173,12 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
pushq %r11 /* pt_regs->r11 */
sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
+ /*
+ * Re-enable interrupts. IRQ tracing already thinks they're
+ * on because user mode is traced as IRQs-on.
+ */
+ ENABLE_INTERRUPTS(CLBR_NONE)
+
testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jnz tracesys
entry_SYSCALL_64_fastpath:
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index d7571532e7ce..d39495d6446e 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -312,10 +312,6 @@ ENTRY(entry_SYSCALL_compat)
SWAPGS_UNSAFE_STACK
movl %esp, %r8d
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
- ENABLE_INTERRUPTS(CLBR_NONE)
-
- /* Zero-extending 32-bit regs, do not remove */
- movl %eax, %eax
/* Construct struct pt_regs on stack */
pushq $__USER32_DS /* pt_regs->ss */
@@ -323,6 +319,7 @@ ENTRY(entry_SYSCALL_compat)
pushq %r11 /* pt_regs->flags */
pushq $__USER32_CS /* pt_regs->cs */
pushq %rcx /* pt_regs->ip */
+GLOBAL(entry_SYSCALL_compat_after_hwframe_esp_in_r8d)
pushq %rax /* pt_regs->orig_ax */
pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
@@ -332,6 +329,11 @@ ENTRY(entry_SYSCALL_compat)
pushq $-ENOSYS /* pt_regs->ax */
sub $(10*8), %rsp /* pt_regs->r8-11, bp, bx, r12-15 not saved */
+ ENABLE_INTERRUPTS(CLBR_NONE)
+
+ /* Zero-extending 32-bit regs, do not remove */
+ movl %eax, %eax
+
/*
* No need to do an access_ok check here because r8 has been
* 32-bit zero extended:
--
2.4.3
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 2/2] x86/xen/64: Rearrange the SYSCALL entries
2015-07-26 23:29 [PATCH 0/2] x86_64, xen: Simplify Xen SYSCALL handling Andy Lutomirski
2015-07-26 23:29 ` [PATCH 1/2] x86/entry/64: Rearrange SYSCALL entry points for Xen's benefit Andy Lutomirski
2015-07-26 23:29 ` Andy Lutomirski
@ 2015-07-26 23:29 ` Andy Lutomirski
2015-07-26 23:29 ` Andy Lutomirski
3 siblings, 0 replies; 6+ messages in thread
From: Andy Lutomirski @ 2015-07-26 23:29 UTC (permalink / raw)
To: X86 ML, xen-devel@lists.xen.org, Andrew Cooper
Cc: Boris Ostrovsky, linux-kernel@vger.kernel.org, Borislav Petkov,
Steven Rostedt, Andy Lutomirski
Xen's raw SYSCALL entries are much less weird than native. Rather
than fudging them to look like native entries, use the Xen-provided
stack frame directly.
This lets us eliminate entry_SYSCALL_64_after_swapgs and two uses
of the SWAPGS_UNSAFE_STACK paravirt hook.
Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
arch/x86/entry/entry_64.S | 9 +--------
arch/x86/entry/entry_64_compat.S | 2 +-
arch/x86/xen/xen-asm_64.S | 24 ++++++++++--------------
3 files changed, 12 insertions(+), 23 deletions(-)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index f47996910331..ebe7e2ca19fe 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -134,14 +134,7 @@ ENTRY(entry_SYSCALL_64)
* We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
* it is too small to ever cause noticeable irq latency.
*/
- SWAPGS_UNSAFE_STACK
- /*
- * A hypervisor implementation might want to use a label
- * after the swapgs, so that it can do the swapgs
- * for the guest and jump here on syscall.
- */
-GLOBAL(entry_SYSCALL_64_after_swapgs)
-
+ swapgs
movq %rsp, PER_CPU_VAR(rsp_scratch)
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index d39495d6446e..beae2b75b588 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -309,7 +309,7 @@ ENTRY(entry_SYSCALL_compat)
* We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
* it is too small to ever cause noticeable irq latency.
*/
- SWAPGS_UNSAFE_STACK
+ swapgs
movl %esp, %r8d
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index f22667abf7b9..c2b5868a9967 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -100,34 +100,30 @@ RELOC(xen_sysret32, 1b+1)
* rip
* r11
* rsp->rcx
- *
- * In all the entrypoints, we undo all that to make it look like a
- * CPU-generated syscall/sysenter and jump to the normal entrypoint.
*/
-.macro undo_xen_syscall
- mov 0*8(%rsp), %rcx
- mov 1*8(%rsp), %r11
- mov 5*8(%rsp), %rsp
-.endm
-
/* Normal 64-bit system call target */
ENTRY(xen_syscall_target)
- undo_xen_syscall
- jmp entry_SYSCALL_64_after_swapgs
+ popq %rcx
+ popq %r11
+ jmp entry_SYSCALL_64_after_hwframe
ENDPROC(xen_syscall_target)
#ifdef CONFIG_IA32_EMULATION
/* 32-bit compat syscall target */
ENTRY(xen_syscall32_target)
- undo_xen_syscall
- jmp entry_SYSCALL_compat
+ popq %rcx
+ popq %r11
+ movl (3*8)(%rsp), %r8d /* r8d = user esp */
+ jmp entry_SYSCALL_compat_after_hwframe_esp_in_r8d
ENDPROC(xen_syscall32_target)
/* 32-bit compat sysenter target */
ENTRY(xen_sysenter_target)
- undo_xen_syscall
+ mov 0*8(%rsp), %rcx
+ mov 1*8(%rsp), %r11
+ mov 5*8(%rsp), %rsp
jmp entry_SYSENTER_compat
ENDPROC(xen_sysenter_target)
--
2.4.3
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 2/2] x86/xen/64: Rearrange the SYSCALL entries
2015-07-26 23:29 [PATCH 0/2] x86_64, xen: Simplify Xen SYSCALL handling Andy Lutomirski
` (2 preceding siblings ...)
2015-07-26 23:29 ` [PATCH 2/2] x86/xen/64: Rearrange the SYSCALL entries Andy Lutomirski
@ 2015-07-26 23:29 ` Andy Lutomirski
3 siblings, 0 replies; 6+ messages in thread
From: Andy Lutomirski @ 2015-07-26 23:29 UTC (permalink / raw)
To: X86 ML, xen-devel@lists.xen.org, Andrew Cooper
Cc: Boris Ostrovsky, Andy Lutomirski, Borislav Petkov,
linux-kernel@vger.kernel.org, Steven Rostedt
Xen's raw SYSCALL entries are much less weird than native. Rather
than fudging them to look like native entries, use the Xen-provided
stack frame directly.
This lets us eliminate entry_SYSCALL_64_after_swapgs and two uses
of the SWAPGS_UNSAFE_STACK paravirt hook.
Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
arch/x86/entry/entry_64.S | 9 +--------
arch/x86/entry/entry_64_compat.S | 2 +-
arch/x86/xen/xen-asm_64.S | 24 ++++++++++--------------
3 files changed, 12 insertions(+), 23 deletions(-)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index f47996910331..ebe7e2ca19fe 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -134,14 +134,7 @@ ENTRY(entry_SYSCALL_64)
* We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
* it is too small to ever cause noticeable irq latency.
*/
- SWAPGS_UNSAFE_STACK
- /*
- * A hypervisor implementation might want to use a label
- * after the swapgs, so that it can do the swapgs
- * for the guest and jump here on syscall.
- */
-GLOBAL(entry_SYSCALL_64_after_swapgs)
-
+ swapgs
movq %rsp, PER_CPU_VAR(rsp_scratch)
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index d39495d6446e..beae2b75b588 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -309,7 +309,7 @@ ENTRY(entry_SYSCALL_compat)
* We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
* it is too small to ever cause noticeable irq latency.
*/
- SWAPGS_UNSAFE_STACK
+ swapgs
movl %esp, %r8d
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index f22667abf7b9..c2b5868a9967 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -100,34 +100,30 @@ RELOC(xen_sysret32, 1b+1)
* rip
* r11
* rsp->rcx
- *
- * In all the entrypoints, we undo all that to make it look like a
- * CPU-generated syscall/sysenter and jump to the normal entrypoint.
*/
-.macro undo_xen_syscall
- mov 0*8(%rsp), %rcx
- mov 1*8(%rsp), %r11
- mov 5*8(%rsp), %rsp
-.endm
-
/* Normal 64-bit system call target */
ENTRY(xen_syscall_target)
- undo_xen_syscall
- jmp entry_SYSCALL_64_after_swapgs
+ popq %rcx
+ popq %r11
+ jmp entry_SYSCALL_64_after_hwframe
ENDPROC(xen_syscall_target)
#ifdef CONFIG_IA32_EMULATION
/* 32-bit compat syscall target */
ENTRY(xen_syscall32_target)
- undo_xen_syscall
- jmp entry_SYSCALL_compat
+ popq %rcx
+ popq %r11
+ movl (3*8)(%rsp), %r8d /* r8d = user esp */
+ jmp entry_SYSCALL_compat_after_hwframe_esp_in_r8d
ENDPROC(xen_syscall32_target)
/* 32-bit compat sysenter target */
ENTRY(xen_sysenter_target)
- undo_xen_syscall
+ mov 0*8(%rsp), %rcx
+ mov 1*8(%rsp), %r11
+ mov 5*8(%rsp), %rsp
jmp entry_SYSENTER_compat
ENDPROC(xen_sysenter_target)
--
2.4.3
^ permalink raw reply related [flat|nested] 6+ messages in thread
end of thread, other threads:[~2015-07-26 23:29 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-07-26 23:29 [PATCH 0/2] x86_64, xen: Simplify Xen SYSCALL handling Andy Lutomirski
2015-07-26 23:29 ` [PATCH 1/2] x86/entry/64: Rearrange SYSCALL entry points for Xen's benefit Andy Lutomirski
2015-07-26 23:29 ` Andy Lutomirski
2015-07-26 23:29 ` [PATCH 2/2] x86/xen/64: Rearrange the SYSCALL entries Andy Lutomirski
2015-07-26 23:29 ` Andy Lutomirski
-- strict thread matches above, loose matches on Subject: below --
2015-07-26 23:29 [PATCH 0/2] x86_64, xen: Simplify Xen SYSCALL handling Andy Lutomirski
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.