From: Glauber de Oliveira Costa <gcosta@redhat.com>
To: linux-kernel@vger.kernel.org
Cc: akpm@linux-foundation.org, glommer@gmail.com, tglx@linutronix.de,
mingo@elte.hu, ehabkost@redhat.com, jeremy@goop.org,
avi@qumranet.com, anthony@codemonkey.ws,
virtualization@lists.linux-foundation.org, rusty@rustcorp.com.au,
ak@suse.de, chrisw@sous-sol.org, rostedt@goodmis.org,
hpa@zytor.com, zach@vmware.com, roland@redhat.com,
Glauber de Oliveira Costa <gcosta@redhat.com>
Subject: [PATCH 10/15] replace privileged instructions with paravirt macros
Date: Thu, 20 Dec 2007 18:04:05 -0200 [thread overview]
Message-ID: <11981813292926-git-send-email-gcosta@redhat.com> (raw)
In-Reply-To: <11981813192341-git-send-email-gcosta@redhat.com>
The assembly code in entry_64.S issues a bunch of privileged instructions,
like cli, sti, swapgs, and others. Paravirt guests are forbidden to do so,
and we then replace them with macros that will do the right thing.
Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
---
arch/x86/kernel/entry_64.S | 101 +++++++++++++++++++++++++------------------
1 files changed, 59 insertions(+), 42 deletions(-)
Index: linux-2.6-x86/arch/x86/kernel/entry_64.S
===================================================================
--- linux-2.6-x86.orig/arch/x86/kernel/entry_64.S 2007-12-20 19:06:59.000000000 -0800
+++ linux-2.6-x86/arch/x86/kernel/entry_64.S 2007-12-20 19:08:08.000000000 -0800
@@ -50,6 +50,7 @@
#include <asm/hw_irq.h>
#include <asm/page.h>
#include <asm/irqflags.h>
+#include <asm/paravirt.h>
.code64
@@ -57,6 +58,13 @@
#define retint_kernel retint_restore_args
#endif
+#ifdef CONFIG_PARAVIRT
+ENTRY(native_irq_enable_syscall_ret)
+ movq %gs:pda_oldrsp,%rsp
+ swapgs
+ sysretq
+#endif /* CONFIG_PARAVIRT */
+
.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
#ifdef CONFIG_TRACE_IRQFLAGS
@@ -216,14 +224,21 @@
CFI_DEF_CFA rsp,PDA_STACKOFFSET
CFI_REGISTER rip,rcx
/*CFI_REGISTER rflags,r11*/
- swapgs
+ SWAPGS_UNSAFE_STACK
+ /*
+ * A hypervisor implementation might want to use a label
+ * after the swapgs, so that it can do the swapgs
+ * for the guest and jump here on syscall.
+ */
+ENTRY(system_call_after_swapgs)
+
movq %rsp,%gs:pda_oldrsp
movq %gs:pda_kernelstack,%rsp
/*
* No need to follow this irqs off/on section - it's straight
* and short:
*/
- sti
+ ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_ARGS 8,1
movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
movq %rcx,RIP-ARGOFFSET(%rsp)
@@ -246,7 +261,7 @@
sysret_check:
LOCKDEP_SYS_EXIT
GET_THREAD_INFO(%rcx)
- cli
+ DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
movl threadinfo_flags(%rcx),%edx
andl %edi,%edx
@@ -260,9 +275,7 @@
CFI_REGISTER rip,rcx
RESTORE_ARGS 0,-ARG_SKIP,1
/*CFI_REGISTER rflags,r11*/
- movq %gs:pda_oldrsp,%rsp
- swapgs
- sysretq
+ ENABLE_INTERRUPTS_SYSCALL_RET
CFI_RESTORE_STATE
/* Handle reschedules */
@@ -271,7 +284,7 @@
bt $TIF_NEED_RESCHED,%edx
jnc sysret_signal
TRACE_IRQS_ON
- sti
+ ENABLE_INTERRUPTS(CLBR_NONE)
pushq %rdi
CFI_ADJUST_CFA_OFFSET 8
call schedule
@@ -282,7 +295,7 @@
/* Handle a signal */
sysret_signal:
TRACE_IRQS_ON
- sti
+ ENABLE_INTERRUPTS(CLBR_NONE)
testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
jz 1f
@@ -295,7 +308,7 @@
1: movl $_TIF_NEED_RESCHED,%edi
/* Use IRET because user could have changed frame. This
works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
- cli
+ DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
jmp int_with_check
@@ -327,7 +340,7 @@
*/
.globl int_ret_from_sys_call
int_ret_from_sys_call:
- cli
+ DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
testl $3,CS-ARGOFFSET(%rsp)
je retint_restore_args
@@ -349,20 +362,20 @@
bt $TIF_NEED_RESCHED,%edx
jnc int_very_careful
TRACE_IRQS_ON
- sti
+ ENABLE_INTERRUPTS(CLBR_NONE)
pushq %rdi
CFI_ADJUST_CFA_OFFSET 8
call schedule
popq %rdi
CFI_ADJUST_CFA_OFFSET -8
- cli
+ DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
jmp int_with_check
/* handle signals and tracing -- both require a full stack frame */
int_very_careful:
TRACE_IRQS_ON
- sti
+ ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_REST
/* Check for syscall exit trace */
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
@@ -385,7 +398,7 @@
1: movl $_TIF_NEED_RESCHED,%edi
int_restore_rest:
RESTORE_REST
- cli
+ DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
jmp int_with_check
CFI_ENDPROC
@@ -506,7 +519,7 @@
CFI_DEF_CFA_REGISTER rbp
testl $3,CS(%rdi)
je 1f
- swapgs
+ SWAPGS
/* irqcount is used to check if a CPU is already on an interrupt
stack or not. While this is essentially redundant with preempt_count
it is a little cheaper to use a separate counter in the PDA
@@ -527,7 +540,7 @@
interrupt do_IRQ
/* 0(%rsp): oldrsp-ARGOFFSET */
ret_from_intr:
- cli
+ DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
decl %gs:pda_irqcount
leaveq
@@ -556,13 +569,13 @@
/*
* The iretq could re-enable interrupts:
*/
- cli
+ DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_IRETQ
- swapgs
+ SWAPGS
jmp restore_args
retint_restore_args: /* return to kernel space */
- cli
+ DISABLE_INTERRUPTS(CLBR_ANY)
/*
* The iretq could re-enable interrupts:
*/
@@ -570,10 +583,14 @@
restore_args:
RESTORE_ARGS 0,8,0
iret_label:
+#ifdef CONFIG_PARAVIRT
+ INTERRUPT_RETURN
+#endif
+ENTRY(native_iret)
iretq
.section __ex_table,"a"
- .quad iret_label,bad_iret
+ .quad native_iret, bad_iret
.previous
.section .fixup,"ax"
/* force a signal here? this matches i386 behaviour */
@@ -581,24 +598,24 @@
bad_iret:
movq $11,%rdi /* SIGSEGV */
TRACE_IRQS_ON
- sti
- jmp do_exit
- .previous
-
+ ENABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
+ jmp do_exit
+ .previous
+
/* edi: workmask, edx: work */
retint_careful:
CFI_RESTORE_STATE
bt $TIF_NEED_RESCHED,%edx
jnc retint_signal
TRACE_IRQS_ON
- sti
+ ENABLE_INTERRUPTS(CLBR_NONE)
pushq %rdi
CFI_ADJUST_CFA_OFFSET 8
call schedule
popq %rdi
CFI_ADJUST_CFA_OFFSET -8
GET_THREAD_INFO(%rcx)
- cli
+ DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
jmp retint_check
@@ -606,14 +623,14 @@
testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
jz retint_swapgs
TRACE_IRQS_ON
- sti
+ ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_REST
movq $-1,ORIG_RAX(%rsp)
xorl %esi,%esi # oldset
movq %rsp,%rdi # &pt_regs
call do_notify_resume
RESTORE_REST
- cli
+ DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
movl $_TIF_NEED_RESCHED,%edi
GET_THREAD_INFO(%rcx)
@@ -731,7 +748,7 @@
rdmsr
testl %edx,%edx
js 1f
- swapgs
+ SWAPGS
xorl %ebx,%ebx
1:
.if \ist
@@ -747,7 +764,7 @@
.if \ist
addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
.endif
- cli
+ DISABLE_INTERRUPTS(CLBR_NONE)
.if \irqtrace
TRACE_IRQS_OFF
.endif
@@ -776,10 +793,10 @@
.if \trace
TRACE_IRQS_IRETQ 0
.endif
- swapgs
+ SWAPGS_UNSAFE_STACK
paranoid_restore\trace:
RESTORE_ALL 8
- iretq
+ INTERRUPT_RETURN
paranoid_userspace\trace:
GET_THREAD_INFO(%rcx)
movl threadinfo_flags(%rcx),%ebx
@@ -794,11 +811,11 @@
.if \trace
TRACE_IRQS_ON
.endif
- sti
+ ENABLE_INTERRUPTS(CLBR_NONE)
xorl %esi,%esi /* arg2: oldset */
movq %rsp,%rdi /* arg1: &pt_regs */
call do_notify_resume
- cli
+ DISABLE_INTERRUPTS(CLBR_NONE)
.if \trace
TRACE_IRQS_OFF
.endif
@@ -807,9 +824,9 @@
.if \trace
TRACE_IRQS_ON
.endif
- sti
+ ENABLE_INTERRUPTS(CLBR_ANY)
call schedule
- cli
+ DISABLE_INTERRUPTS(CLBR_ANY)
.if \trace
TRACE_IRQS_OFF
.endif
@@ -862,7 +879,7 @@
testl $3,CS(%rsp)
je error_kernelspace
error_swapgs:
- swapgs
+ SWAPGS
error_sti:
movq %rdi,RDI(%rsp)
CFI_REL_OFFSET rdi,RDI
@@ -874,7 +891,7 @@
error_exit:
movl %ebx,%eax
RESTORE_REST
- cli
+ DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
GET_THREAD_INFO(%rcx)
testl %eax,%eax
@@ -911,12 +928,12 @@
CFI_STARTPROC
pushf
CFI_ADJUST_CFA_OFFSET 8
- cli
- swapgs
+ DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
+ SWAPGS
gs_change:
movl %edi,%gs
2: mfence /* workaround */
- swapgs
+ SWAPGS
popf
CFI_ADJUST_CFA_OFFSET -8
ret
@@ -930,7 +947,7 @@
.section .fixup,"ax"
/* running with kernelgs */
bad_gs:
- swapgs /* switch back to user gs */
+ SWAPGS /* switch back to user gs */
xorl %eax,%eax
movl %eax,%gs
jmp 2b
next prev parent reply other threads:[~2007-12-20 20:11 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-12-20 20:03 [PATCH 0/15] adjust pvops to accomodate its x86_64 variant Glauber de Oliveira Costa
2007-12-20 20:03 ` [PATCH 01/15] change paravirt_32.c name Glauber de Oliveira Costa
2007-12-20 20:03 ` [PATCH 02/15] adjust PVOP_CALL/VCALL macros for x86_64 Glauber de Oliveira Costa
2007-12-20 20:03 ` [PATCH 03/15] cleanup write_tsc Glauber de Oliveira Costa
2007-12-20 20:03 ` [PATCH 04/15] provide paravirtualized hook for rdtscp Glauber de Oliveira Costa
2007-12-20 20:04 ` [PATCH 05/15] change assembly definition of paravirt_patch_site Glauber de Oliveira Costa
2007-12-20 20:04 ` [PATCH 06/15] adjust assembly macros to x86_64 as well Glauber de Oliveira Costa
2007-12-20 20:04 ` [PATCH 07/15] change irq functions to accomodate x86_64 Glauber de Oliveira Costa
2007-12-20 20:04 ` [PATCH 08/15] add macro for privileged x86_64 operation Glauber de Oliveira Costa
2007-12-20 20:04 ` [PATCH 09/15] adds paravirt hook for swapgs Glauber de Oliveira Costa
2007-12-20 20:04 ` Glauber de Oliveira Costa [this message]
2007-12-20 20:04 ` [PATCH 11/15] cleanup CLI_STRING, STI_STRING and friends Glauber de Oliveira Costa
2007-12-20 20:04 ` [PATCH 12/15] add CLBR_ defines for x86_64 Glauber de Oliveira Costa
2007-12-20 20:04 ` [PATCH 13/15] move patching code to arch-specific file Glauber de Oliveira Costa
2007-12-20 20:04 ` [PATCH 14/15] x86_64 patching functions Glauber de Oliveira Costa
2007-12-20 20:04 ` [PATCH 15/15] replace x86_read/write_per_cpu with a common function Glauber de Oliveira Costa
2007-12-20 21:16 ` H. Peter Anvin
2007-12-20 20:31 ` [PATCH 13/15] move patching code to arch-specific file Ingo Molnar
2007-12-20 20:33 ` Ingo Molnar
2007-12-20 21:22 ` Glauber de Oliveira Costa
2007-12-20 21:26 ` Ingo Molnar
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=11981813292926-git-send-email-gcosta@redhat.com \
--to=gcosta@redhat.com \
--cc=ak@suse.de \
--cc=akpm@linux-foundation.org \
--cc=anthony@codemonkey.ws \
--cc=avi@qumranet.com \
--cc=chrisw@sous-sol.org \
--cc=ehabkost@redhat.com \
--cc=glommer@gmail.com \
--cc=hpa@zytor.com \
--cc=jeremy@goop.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=roland@redhat.com \
--cc=rostedt@goodmis.org \
--cc=rusty@rustcorp.com.au \
--cc=tglx@linutronix.de \
--cc=virtualization@lists.linux-foundation.org \
--cc=zach@vmware.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox