From: tip-bot for Dominik Brodowski <tipbot@zytor.com>
To: linux-tip-commits@vger.kernel.org
Cc: torvalds@linux-foundation.org, dvlasenk@redhat.com,
arjan@linux.intel.com, luto@kernel.org, bp@alien8.de,
peterz@infradead.org, dave.hansen@linux.intel.com,
dan.j.williams@intel.com, mingo@kernel.org, jpoimboe@redhat.com,
hpa@zytor.com, dwmw2@infradead.org, linux-kernel@vger.kernel.org,
linux@dominikbrodowski.net, tglx@linutronix.de,
gregkh@linuxfoundation.org
Subject: [tip:x86/pti] x86/entry/64: Use 'xorl' for faster register clearing
Date: Sat, 17 Feb 2018 03:40:42 -0800 [thread overview]
Message-ID: <tip-ced5d0bf603fa0baee8ea889e1d70971fd210894@git.kernel.org> (raw)
In-Reply-To: <20180214175924.23065-3-linux@dominikbrodowski.net>
Commit-ID: ced5d0bf603fa0baee8ea889e1d70971fd210894
Gitweb: https://git.kernel.org/tip/ced5d0bf603fa0baee8ea889e1d70971fd210894
Author: Dominik Brodowski <linux@dominikbrodowski.net>
AuthorDate: Wed, 14 Feb 2018 18:59:24 +0100
Committer: Ingo Molnar <mingo@kernel.org>
CommitDate: Sat, 17 Feb 2018 11:14:33 +0100
x86/entry/64: Use 'xorl' for faster register clearing
On some x86 CPU microarchitectures using 'xorq' to clear general-purpose
registers is slower than 'xorl'. As 'xorl' is sufficient to clear all
64 bits of these registers due to zero-extension [*], switch the x86
64-bit entry code to use 'xorl'.
No change in functionality and no change in code size.
[*] According to Intel 64 and IA-32 Architecture Software Developer's
Manual, section 3.4.1.1, the result of 32-bit operands are "zero-
extended to a 64-bit result in the destination general-purpose
register." The AMD64 Architecture Programmer’s Manual Volume 3,
Appendix B.1, describes the same behaviour.
Suggested-by: Denys Vlasenko <dvlasenk@redhat.com>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20180214175924.23065-3-linux@dominikbrodowski.net
[ Improved on the changelog a bit. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
arch/x86/entry/calling.h | 16 ++++++------
arch/x86/entry/entry_64_compat.S | 54 ++++++++++++++++++++--------------------
2 files changed, 35 insertions(+), 35 deletions(-)
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 196b610..5d10b7a 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -117,25 +117,25 @@ For 32-bit we have the following conventions - kernel is built with
pushq %rcx /* pt_regs->cx */
pushq \rax /* pt_regs->ax */
pushq %r8 /* pt_regs->r8 */
- xorq %r8, %r8 /* nospec r8 */
+ xorl %r8d, %r8d /* nospec r8 */
pushq %r9 /* pt_regs->r9 */
- xorq %r9, %r9 /* nospec r9 */
+ xorl %r9d, %r9d /* nospec r9 */
pushq %r10 /* pt_regs->r10 */
- xorq %r10, %r10 /* nospec r10 */
+ xorl %r10d, %r10d /* nospec r10 */
pushq %r11 /* pt_regs->r11 */
- xorq %r11, %r11 /* nospec r11*/
+ xorl %r11d, %r11d /* nospec r11*/
pushq %rbx /* pt_regs->rbx */
xorl %ebx, %ebx /* nospec rbx*/
pushq %rbp /* pt_regs->rbp */
xorl %ebp, %ebp /* nospec rbp*/
pushq %r12 /* pt_regs->r12 */
- xorq %r12, %r12 /* nospec r12*/
+ xorl %r12d, %r12d /* nospec r12*/
pushq %r13 /* pt_regs->r13 */
- xorq %r13, %r13 /* nospec r13*/
+ xorl %r13d, %r13d /* nospec r13*/
pushq %r14 /* pt_regs->r14 */
- xorq %r14, %r14 /* nospec r14*/
+ xorl %r14d, %r14d /* nospec r14*/
pushq %r15 /* pt_regs->r15 */
- xorq %r15, %r15 /* nospec r15*/
+ xorl %r15d, %r15d /* nospec r15*/
UNWIND_HINT_REGS
.if \save_ret
pushq %rsi /* return address on top of stack */
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index fd65e01..364ea4a 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -85,25 +85,25 @@ ENTRY(entry_SYSENTER_compat)
pushq %rcx /* pt_regs->cx */
pushq $-ENOSYS /* pt_regs->ax */
pushq $0 /* pt_regs->r8 = 0 */
- xorq %r8, %r8 /* nospec r8 */
+ xorl %r8d, %r8d /* nospec r8 */
pushq $0 /* pt_regs->r9 = 0 */
- xorq %r9, %r9 /* nospec r9 */
+ xorl %r9d, %r9d /* nospec r9 */
pushq $0 /* pt_regs->r10 = 0 */
- xorq %r10, %r10 /* nospec r10 */
+ xorl %r10d, %r10d /* nospec r10 */
pushq $0 /* pt_regs->r11 = 0 */
- xorq %r11, %r11 /* nospec r11 */
+ xorl %r11d, %r11d /* nospec r11 */
pushq %rbx /* pt_regs->rbx */
xorl %ebx, %ebx /* nospec rbx */
pushq %rbp /* pt_regs->rbp (will be overwritten) */
xorl %ebp, %ebp /* nospec rbp */
pushq $0 /* pt_regs->r12 = 0 */
- xorq %r12, %r12 /* nospec r12 */
+ xorl %r12d, %r12d /* nospec r12 */
pushq $0 /* pt_regs->r13 = 0 */
- xorq %r13, %r13 /* nospec r13 */
+ xorl %r13d, %r13d /* nospec r13 */
pushq $0 /* pt_regs->r14 = 0 */
- xorq %r14, %r14 /* nospec r14 */
+ xorl %r14d, %r14d /* nospec r14 */
pushq $0 /* pt_regs->r15 = 0 */
- xorq %r15, %r15 /* nospec r15 */
+ xorl %r15d, %r15d /* nospec r15 */
cld
/*
@@ -224,25 +224,25 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
pushq %rbp /* pt_regs->cx (stashed in bp) */
pushq $-ENOSYS /* pt_regs->ax */
pushq $0 /* pt_regs->r8 = 0 */
- xorq %r8, %r8 /* nospec r8 */
+ xorl %r8d, %r8d /* nospec r8 */
pushq $0 /* pt_regs->r9 = 0 */
- xorq %r9, %r9 /* nospec r9 */
+ xorl %r9d, %r9d /* nospec r9 */
pushq $0 /* pt_regs->r10 = 0 */
- xorq %r10, %r10 /* nospec r10 */
+ xorl %r10d, %r10d /* nospec r10 */
pushq $0 /* pt_regs->r11 = 0 */
- xorq %r11, %r11 /* nospec r11 */
+ xorl %r11d, %r11d /* nospec r11 */
pushq %rbx /* pt_regs->rbx */
xorl %ebx, %ebx /* nospec rbx */
pushq %rbp /* pt_regs->rbp (will be overwritten) */
xorl %ebp, %ebp /* nospec rbp */
pushq $0 /* pt_regs->r12 = 0 */
- xorq %r12, %r12 /* nospec r12 */
+ xorl %r12d, %r12d /* nospec r12 */
pushq $0 /* pt_regs->r13 = 0 */
- xorq %r13, %r13 /* nospec r13 */
+ xorl %r13d, %r13d /* nospec r13 */
pushq $0 /* pt_regs->r14 = 0 */
- xorq %r14, %r14 /* nospec r14 */
+ xorl %r14d, %r14d /* nospec r14 */
pushq $0 /* pt_regs->r15 = 0 */
- xorq %r15, %r15 /* nospec r15 */
+ xorl %r15d, %r15d /* nospec r15 */
/*
* User mode is traced as though IRQs are on, and SYSENTER
@@ -298,9 +298,9 @@ sysret32_from_system_call:
*/
SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9
- xorq %r8, %r8
- xorq %r9, %r9
- xorq %r10, %r10
+ xorl %r8d, %r8d
+ xorl %r9d, %r9d
+ xorl %r10d, %r10d
swapgs
sysretl
END(entry_SYSCALL_compat)
@@ -358,25 +358,25 @@ ENTRY(entry_INT80_compat)
pushq %rcx /* pt_regs->cx */
pushq $-ENOSYS /* pt_regs->ax */
pushq $0 /* pt_regs->r8 = 0 */
- xorq %r8, %r8 /* nospec r8 */
+ xorl %r8d, %r8d /* nospec r8 */
pushq $0 /* pt_regs->r9 = 0 */
- xorq %r9, %r9 /* nospec r9 */
+ xorl %r9d, %r9d /* nospec r9 */
pushq $0 /* pt_regs->r10 = 0 */
- xorq %r10, %r10 /* nospec r10 */
+ xorl %r10d, %r10d /* nospec r10 */
pushq $0 /* pt_regs->r11 = 0 */
- xorq %r11, %r11 /* nospec r11 */
+ xorl %r11d, %r11d /* nospec r11 */
pushq %rbx /* pt_regs->rbx */
xorl %ebx, %ebx /* nospec rbx */
pushq %rbp /* pt_regs->rbp */
xorl %ebp, %ebp /* nospec rbp */
pushq %r12 /* pt_regs->r12 */
- xorq %r12, %r12 /* nospec r12 */
+ xorl %r12d, %r12d /* nospec r12 */
pushq %r13 /* pt_regs->r13 */
- xorq %r13, %r13 /* nospec r13 */
+ xorl %r13d, %r13d /* nospec r13 */
pushq %r14 /* pt_regs->r14 */
- xorq %r14, %r14 /* nospec r14 */
+ xorl %r14d, %r14d /* nospec r14 */
pushq %r15 /* pt_regs->r15 */
- xorq %r15, %r15 /* nospec r15 */
+ xorl %r15d, %r15d /* nospec r15 */
cld
/*
prev parent reply other threads:[~2018-02-17 11:40 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-02-14 17:59 [PATCH 0/2] x86/entry: xorq->xorl; idtentry size reduction Dominik Brodowski
2018-02-14 17:59 ` [PATCH 1/2] x86/entry: reduce static footprint of idtentry Dominik Brodowski
2018-02-17 11:40 ` [tip:x86/pti] x86/entry: Reduce the code footprint of the 'idtentry' macro tip-bot for Dominik Brodowski
2018-02-17 12:28 ` Dominik Brodowski
2018-02-14 17:59 ` [PATCH 2/2] x86/entry/64: use xorl for register clearing Dominik Brodowski
2018-02-17 11:40 ` tip-bot for Dominik Brodowski [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=tip-ced5d0bf603fa0baee8ea889e1d70971fd210894@git.kernel.org \
--to=tipbot@zytor.com \
--cc=arjan@linux.intel.com \
--cc=bp@alien8.de \
--cc=dan.j.williams@intel.com \
--cc=dave.hansen@linux.intel.com \
--cc=dvlasenk@redhat.com \
--cc=dwmw2@infradead.org \
--cc=gregkh@linuxfoundation.org \
--cc=hpa@zytor.com \
--cc=jpoimboe@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-tip-commits@vger.kernel.org \
--cc=linux@dominikbrodowski.net \
--cc=luto@kernel.org \
--cc=mingo@kernel.org \
--cc=peterz@infradead.org \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.