Subject: xen: support sysenter/sysexit if hypervisor does 64-bit Xen supports sysenter for 32-bit guests, so support its use. (sysenter is faster than int $0x80 in 32-on-64.) sysexit is still not supported, so we fake it up using iret. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/kernel/entry_32.S | 22 +++++++++++++++++- arch/x86/xen/enlighten.c | 3 -- arch/x86/xen/setup.c | 21 +++++++++++++++++ arch/x86/xen/smp.c | 1 arch/x86/xen/xen-asm.S | 52 ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/xen/xen-ops.h | 3 ++ 6 files changed, 99 insertions(+), 3 deletions(-) =================================================================== --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -311,6 +311,7 @@ ENTRY(sysenter_past_esp) * A tiny bit of offset fixup is necessary - 4*4 means the 4 words * pushed above; +8 corresponds to copy_thread's esp0 setting. */ +sysenter_stack_setup: pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET eip, 0 @@ -1025,6 +1026,16 @@ ENDPROC(kernel_thread_helper) ENDPROC(kernel_thread_helper) #ifdef CONFIG_XEN +/* Xen doesn't set %esp to be precisely what the normal sysenter + entrypoint expects, so fix it up before using the normal path. */ +ENTRY(xen_sysenter_target) + RING0_INT_FRAME + movl $__USER_DS,16(%esp) + movl %ebp,12(%esp) + movl $__USER_CS,4(%esp) + addl $4,%esp + jmp sysenter_stack_setup + ENTRY(xen_hypervisor_callback) CFI_STARTPROC pushl $0 @@ -1044,8 +1055,17 @@ ENTRY(xen_hypervisor_callback) jae 1f call xen_iret_crit_fixup + jmp 2f -1: mov %esp, %eax +1: cmpl $xen_sysexit_start_crit,%eax + jb 2f + cmpl $xen_sysexit_end_crit,%eax + jae 2f + + jmp xen_sysexit_crit_fixup + +ENTRY(xen_do_upcall) +2: mov %esp, %eax call xen_evtchn_do_upcall jmp ret_from_intr CFI_ENDPROC =================================================================== --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -153,7 +153,6 @@ static void xen_cpuid(unsigned int *ax, if (*ax == 1) maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */ (1 << X86_FEATURE_ACPI) | /* disable ACPI */ - (1 << X86_FEATURE_SEP) | /* disable SEP */ (1 << X86_FEATURE_ACC)); /* thermal monitoring */ asm(XEN_EMULATE_PREFIX "cpuid" @@ -969,7 +968,7 @@ static const struct pv_cpu_ops xen_cpu_o .read_pmc = native_read_pmc, .iret = xen_iret, - .irq_enable_syscall_ret = NULL, /* never called */ + .irq_enable_syscall_ret = xen_sysexit, .load_tr_desc = paravirt_nop, .set_ldt = xen_set_ldt, =================================================================== --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -67,6 +68,24 @@ static void __init fiddle_vdso(void) *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; } +void xen_enable_sysenter(void) +{ + int cpu = smp_processor_id(); + extern void xen_sysenter_target(void); + /* Mask events on entry, even though they get enabled immediately */ + static struct callback_register sysenter = { + .type = CALLBACKTYPE_sysenter, + .address = { __KERNEL_CS, (unsigned long)xen_sysenter_target }, + .flags = CALLBACKF_mask_events, + }; + + if (!boot_cpu_has(X86_FEATURE_SEP) || + HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) != 0) { + clear_cpu_cap(&cpu_data(cpu), X86_FEATURE_SEP); + clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP); + } +} + void __init xen_arch_setup(void) { struct physdev_set_iopl set_iopl; @@ -80,6 +99,8 @@ void __init xen_arch_setup(void) HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback, __KERNEL_CS, (unsigned long)xen_failsafe_callback); + + xen_enable_sysenter(); set_iopl.iopl = 1; rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); =================================================================== --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -72,6 +72,7 @@ static __cpuinit void cpu_bringup_and_id int cpu = smp_processor_id(); cpu_init(); + xen_enable_sysenter(); preempt_disable(); per_cpu(cpu_state, cpu) = CPU_ONLINE; =================================================================== --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -275,6 +275,58 @@ 2: ret 2: ret +ENTRY(xen_sysexit) + /* Store vcpu_info pointer for easy access. Do it this + way to avoid having to reload %fs */ +#ifdef CONFIG_SMP + GET_THREAD_INFO(%eax) + movl TI_cpu(%eax),%eax + movl __per_cpu_offset(,%eax,4),%eax + mov per_cpu__xen_vcpu(%eax),%eax +#else + movl per_cpu__xen_vcpu, %eax +#endif + + /* We can't actually use sysexit in a pv guest, + so fake it up with iret */ + pushl $__USER_DS /* user stack segment */ + pushl %ecx /* user esp */ + pushl PT_EFLAGS+2*4(%esp) /* user eflags */ + pushl $__USER_CS /* user code segment */ + pushl %edx /* user eip */ + + /* Unconditionally unmask events and test for pending */ + andw $0x00ff, XEN_vcpu_info_pending(%eax) + +xen_sysexit_start_crit: + /* If there's something pending, mask events again so we + can directly inject it back into the kernel. */ + jnz 1f + + movl PT_EAX+5*4(%esp),%eax +2: iret +1: movb $1, XEN_vcpu_info_mask(%eax) +xen_sysexit_end_crit: + addl $5*4, %esp /* remove iret frame */ + /* no need to re-save regs, but need to restore kernel %fs */ + mov $__KERNEL_PERCPU, %eax + mov %eax, %fs + jmp xen_do_upcall +.section __ex_table,"a" + .align 4 + .long 2b,iret_exc +.previous + + .globl xen_sysexit_start_crit, xen_sysexit_end_crit +/* + sysexit fixup is easy, since the old frame is still sitting there + on the stack. We just need to remove the new recursive + interrupt and return. + */ +ENTRY(xen_sysexit_crit_fixup) + addl $PT_OLDESP+5*4, %esp /* remove frame+iret */ + jmp xen_do_upcall + /* Force an event check by making a hypercall, but preserve regs before making the call. =================================================================== --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -19,6 +19,7 @@ char * __init xen_memory_setup(void); char * __init xen_memory_setup(void); void __init xen_arch_setup(void); void __init xen_init_IRQ(void); +void xen_enable_sysenter(void); void xen_setup_timer(int cpu); void xen_setup_cpu_clockevents(void); @@ -64,4 +65,6 @@ DECL_ASM(void, xen_restore_fl_direct, un DECL_ASM(void, xen_restore_fl_direct, unsigned long); void xen_iret(void); +void xen_sysexit(void); + #endif /* XEN_OPS_H */