From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755350AbbJGSIa (ORCPT ); Wed, 7 Oct 2015 14:08:30 -0400 Received: from mx1.redhat.com ([209.132.183.28]:40892 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754544AbbJGSI3 (ORCPT ); Wed, 7 Oct 2015 14:08:29 -0400 Message-ID: <56155F9A.2070209@redhat.com> Date: Wed, 07 Oct 2015 20:08:26 +0200 From: Denys Vlasenko User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:24.0) Gecko/20100101 Thunderbird/24.2.0 MIME-Version: 1.0 To: Andy Lutomirski , x86@kernel.org, linux-kernel@vger.kernel.org CC: Brian Gerst , Linus Torvalds , Borislav Petkov Subject: Re: [PATCH v2 27/36] x86/entry/32: Re-implement SYSENTER using the new C path References: <5b99659e8be70f3dd10cd8970a5c90293d9ad9a7.1444091585.git.luto@kernel.org> In-Reply-To: <5b99659e8be70f3dd10cd8970a5c90293d9ad9a7.1444091585.git.luto@kernel.org> Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On 10/06/2015 02:48 AM, Andy Lutomirski wrote: > # SYSENTER call handler stub > ENTRY(entry_SYSENTER_32) > movl TSS_sysenter_sp0(%esp), %esp > sysenter_past_esp: > + pushl $__USER_DS /* pt_regs->ss */ > + pushl %ecx /* pt_regs->cx */ > + pushfl /* pt_regs->flags (except IF = 0) */ > + orl $X86_EFLAGS_IF, (%esp) /* Fix IF */ > + pushl $__USER_CS /* pt_regs->cs */ > + pushl $0 /* pt_regs->ip = 0 (placeholder) */ > + pushl %eax /* pt_regs->orig_ax */ > + SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */ > + > /* > - * Interrupts are disabled here, but we can't trace it until > - * enough kernel state to call TRACE_IRQS_OFF can be called - but > - * we immediately enable interrupts at that point anyway. > - */ > - pushl $__USER_DS > - pushl %ebp > - pushfl > - orl $X86_EFLAGS_IF, (%esp) > - pushl $__USER_CS > - /* > - * Push current_thread_info()->sysenter_return to the stack. > - * A tiny bit of offset fixup is necessary: TI_sysenter_return > - * is relative to thread_info, which is at the bottom of the > - * kernel stack page. 4*4 means the 4 words pushed above; > - * TOP_OF_KERNEL_STACK_PADDING takes us to the top of the stack; > - * and THREAD_SIZE takes us to the bottom. > + * User mode is traced as though IRQs are on, and SYSENTER > + * turned them off. > */ > - pushl ((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp) > - > - pushl %eax > - SAVE_ALL > - ENABLE_INTERRUPTS(CLBR_NONE) > - > -/* > - * Load the potential sixth argument from user stack. > - * Careful about security. > - */ > - cmpl $__PAGE_OFFSET-3, %ebp > - jae syscall_fault > - ASM_STAC > -1: movl (%ebp), %ebp > - ASM_CLAC > - movl %ebp, PT_EBP(%esp) > - _ASM_EXTABLE(1b, syscall_fault) > - > - GET_THREAD_INFO(%ebp) > - > - testl $_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp) > - jnz syscall_trace_entry > -sysenter_do_call: > - cmpl $(NR_syscalls), %eax > - jae sysenter_badsys > - call *sys_call_table(, %eax, 4) > -sysenter_after_call: > - movl %eax, PT_EAX(%esp) > - LOCKDEP_SYS_EXIT > - DISABLE_INTERRUPTS(CLBR_ANY) > TRACE_IRQS_OFF > - movl TI_flags(%ebp), %ecx > - testl $_TIF_ALLWORK_MASK, %ecx > - jnz syscall_exit_work_irqs_off > -sysenter_exit: > -/* if something modifies registers it must also disable sysexit */ > - movl PT_EIP(%esp), %edx > - movl PT_OLDESP(%esp), %ecx > - xorl %ebp, %ebp > - TRACE_IRQS_ON > + > + movl %esp, %eax > + call do_fast_syscall_32 > + testl %eax, %eax > + jz .Lsyscall_32_done > + > +/* Opportunistic SYSEXIT */ > + TRACE_IRQS_ON /* User mode traces as IRQs on. */ > + movl PT_EIP(%esp), %edx /* pt_regs->ip */ > + movl PT_OLDESP(%esp), %ecx /* pt_regs->sp */ > + popl %ebx /* pt_regs->bx */ > + addl $2*4, %esp /* skip pt_regs->cx and pt_regs->dx */ Here stack engine and ALUs operating on ESP conflict, potentially adding a stall both before and after ADD. It might be faster to just pop twice into an unused register, say, popl %eax popl %eax > + popl %esi /* pt_regs->si */ > + popl %edi /* pt_regs->di */ > + popl %ebp /* pt_regs->bp */ > + popl %eax /* pt_regs->ax */ > 1: mov PT_FS(%esp), %fs > PTGS_TO_GS